Prevent pushing down INSERT .. SELECT queries that we shouldn't (and allow some more) (#6752)

Previously INSERT .. SELECT planner were pushing down some queries that should not be pushed down due to wrong colocation checks. It was checking whether one of the table in SELECT part and target table are colocated. But now, we check colocation for all tables in SELECT part and the target table.

Another problem with INSERT .. SELECT planner was that some queries, which is valid to be pushed down, were not pushed down due to unnecessary checks which are currently supported. e.g. UNION check. As solution, we reused the pushdown planner checks for INSERT .. SELECT planner.


DESCRIPTION: Fixes a bug that causes incorrectly pushing down some
INSERT .. SELECT queries that we shouldn't
DESCRIPTION: Prevents unnecessarily pulling the data into coordinator
for some INSERT .. SELECT queries
DESCRIPTION: Drops support for pushing down INSERT .. SELECT with append
table as target

Fixes #6749.
Fixes #1428.
Fixes #6920.

---------

Co-authored-by: aykutbozkurt <aykut.bozkurt1995@gmail.com>
pull/6931/head
Onur Tirtir 2023-05-17 15:05:08 +03:00 committed by GitHub
parent 56d217b108
commit 8ff9dde4b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
28 changed files with 1036 additions and 589 deletions

View File

@ -73,9 +73,9 @@ static List * CreateTargetListForCombineQuery(List *targetList);
static DeferredErrorMessage * DistributedInsertSelectSupported(Query *queryTree,
RangeTblEntry *insertRte,
RangeTblEntry *subqueryRte,
bool allReferenceTables);
static DeferredErrorMessage * MultiTaskRouterSelectQuerySupported(Query *query);
static bool HasUnsupportedDistinctOn(Query *query);
bool allReferenceTables,
PlannerRestrictionContext *
plannerRestrictionContext);
static DeferredErrorMessage * InsertPartitionColumnMatchesSelect(Query *query,
RangeTblEntry *insertRte,
RangeTblEntry *
@ -292,7 +292,8 @@ CreateDistributedInsertSelectPlan(Query *originalQuery,
distributedPlan->planningError = DistributedInsertSelectSupported(originalQuery,
insertRte,
subqueryRte,
allReferenceTables);
allReferenceTables,
plannerRestrictionContext);
if (distributedPlan->planningError)
{
return distributedPlan;
@ -613,7 +614,8 @@ CreateTargetListForCombineQuery(List *targetList)
*/
static DeferredErrorMessage *
DistributedInsertSelectSupported(Query *queryTree, RangeTblEntry *insertRte,
RangeTblEntry *subqueryRte, bool allReferenceTables)
RangeTblEntry *subqueryRte, bool allReferenceTables,
PlannerRestrictionContext *plannerRestrictionContext)
{
Oid selectPartitionColumnTableId = InvalidOid;
Oid targetRelationId = insertRte->relid;
@ -687,8 +689,16 @@ DistributedInsertSelectSupported(Query *queryTree, RangeTblEntry *insertRte,
NULL, NULL);
}
/* we don't support LIMIT, OFFSET and WINDOW functions */
DeferredErrorMessage *error = MultiTaskRouterSelectQuerySupported(subquery);
/* first apply toplevel pushdown checks to SELECT query */
DeferredErrorMessage *error = DeferErrorIfUnsupportedSubqueryPushdown(subquery,
plannerRestrictionContext);
if (error)
{
return error;
}
/* then apply subquery pushdown checks to SELECT query */
error = DeferErrorIfCannotPushdownSubquery(subquery, false);
if (error)
{
return error;
@ -730,27 +740,6 @@ DistributedInsertSelectSupported(Query *queryTree, RangeTblEntry *insertRte,
"table", NULL, NULL);
}
if (!HasDistributionKey(targetRelationId) ||
subqueryRteListProperties->hasSingleShardDistTable)
{
/*
* XXX: Better to check this regardless of the fact that the target table
* has a distribution column or not.
*/
List *distributedRelationIdList = DistributedRelationIdList(subquery);
distributedRelationIdList = lappend_oid(distributedRelationIdList,
targetRelationId);
if (!AllDistributedRelationsInListColocated(distributedRelationIdList))
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"distributed INSERT ... SELECT cannot reference a "
"distributed table without a shard key together "
"with non-colocated distributed tables",
NULL, NULL);
}
}
if (HasDistributionKey(targetRelationId))
{
/* ensure that INSERT's partition column comes from SELECT's partition column */
@ -760,22 +749,22 @@ DistributedInsertSelectSupported(Query *queryTree, RangeTblEntry *insertRte,
{
return error;
}
/*
* We expect partition column values come from colocated tables. Note that we
* skip this check from the reference table case given that all reference tables
* are already (and by default) co-located.
*/
if (!TablesColocated(insertRte->relid, selectPartitionColumnTableId))
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"INSERT target table and the source relation of the SELECT partition "
"column value must be colocated in distributed INSERT ... SELECT",
NULL, NULL);
}
}
}
/* All tables in source list and target table should be colocated. */
List *distributedRelationIdList = DistributedRelationIdList(subquery);
distributedRelationIdList = lappend_oid(distributedRelationIdList,
targetRelationId);
if (!AllDistributedRelationsInListColocated(distributedRelationIdList))
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"INSERT target relation and all source relations of the "
"SELECT must be colocated in distributed INSERT ... SELECT",
NULL, NULL);
}
return NULL;
}
@ -1131,152 +1120,6 @@ ReorderInsertSelectTargetLists(Query *originalQuery, RangeTblEntry *insertRte,
}
/*
* MultiTaskRouterSelectQuerySupported returns NULL if the query may be used
* as the source for an INSERT ... SELECT or returns a description why not.
*/
static DeferredErrorMessage *
MultiTaskRouterSelectQuerySupported(Query *query)
{
List *queryList = NIL;
ListCell *queryCell = NULL;
StringInfo errorDetail = NULL;
bool hasUnsupportedDistinctOn = false;
ExtractQueryWalker((Node *) query, &queryList);
foreach(queryCell, queryList)
{
Query *subquery = (Query *) lfirst(queryCell);
Assert(subquery->commandType == CMD_SELECT);
/* pushing down rtes without relations yields (shardCount * expectedRows) */
if (HasEmptyJoinTree(subquery))
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"Subqueries without relations are not allowed in "
"distributed INSERT ... SELECT queries",
NULL, NULL);
}
/* pushing down limit per shard would yield wrong results */
if (subquery->limitCount != NULL)
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"LIMIT clauses are not allowed in distributed INSERT "
"... SELECT queries",
NULL, NULL);
}
/* pushing down limit offest per shard would yield wrong results */
if (subquery->limitOffset != NULL)
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"OFFSET clauses are not allowed in distributed "
"INSERT ... SELECT queries",
NULL, NULL);
}
/* group clause list must include partition column */
if (subquery->groupClause)
{
List *groupClauseList = subquery->groupClause;
List *targetEntryList = subquery->targetList;
List *groupTargetEntryList = GroupTargetEntryList(groupClauseList,
targetEntryList);
bool groupOnPartitionColumn = TargetListOnPartitionColumn(subquery,
groupTargetEntryList);
if (!groupOnPartitionColumn)
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"Group by list without distribution column is "
"not allowed in distributed INSERT ... "
"SELECT queries",
NULL, NULL);
}
}
/*
* We support window functions when the window function
* is partitioned on distribution column.
*/
if (subquery->windowClause && !SafeToPushdownWindowFunction(subquery,
&errorDetail))
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, errorDetail->data, NULL,
NULL);
}
if (subquery->setOperations != NULL)
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"Set operations are not allowed in distributed "
"INSERT ... SELECT queries",
NULL, NULL);
}
/*
* We currently do not support grouping sets since it could generate NULL
* results even after the restrictions are applied to the query. A solution
* would be to add the whole query into a subquery and add the restrictions
* on that subquery.
*/
if (subquery->groupingSets != NULL)
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"grouping sets are not allowed in distributed "
"INSERT ... SELECT queries",
NULL, NULL);
}
/*
* We don't support DISTINCT ON clauses on non-partition columns.
*/
hasUnsupportedDistinctOn = HasUnsupportedDistinctOn(subquery);
if (hasUnsupportedDistinctOn)
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"DISTINCT ON (non-partition column) clauses are not "
"allowed in distributed INSERT ... SELECT queries",
NULL, NULL);
}
}
return NULL;
}
/*
* HasUnsupportedDistinctOn returns true if the query has distinct on and
* distinct targets do not contain partition column.
*/
static bool
HasUnsupportedDistinctOn(Query *query)
{
ListCell *distinctCell = NULL;
if (!query->hasDistinctOn)
{
return false;
}
foreach(distinctCell, query->distinctClause)
{
SortGroupClause *distinctClause = lfirst(distinctCell);
TargetEntry *distinctEntry = get_sortgroupclause_tle(distinctClause,
query->targetList);
bool skipOuterVars = true;
if (IsPartitionColumn(distinctEntry->expr, query, skipOuterVars))
{
return false;
}
}
return true;
}
/*
* InsertPartitionColumnMatchesSelect returns NULL the partition column in the
* table targeted by INSERTed matches with the any of the SELECTed table's

View File

@ -908,7 +908,7 @@ key
FROM a JOIN table_2 USING (key)
GROUP BY key
HAVING (max(table_2.value) >= (SELECT value FROM a));
DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value FROM coordinator_shouldhaveshards.table_1 ORDER BY key, value DESC LIMIT 1
DEBUG: push down of limit count: 1
DEBUG: generating subplan XXX_2 for subquery SELECT int4(count(*)) AS auto_coerced_by_citus_0, (a.key)::text AS auto_coerced_by_citus_1 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN coordinator_shouldhaveshards.table_2 USING (key)) GROUP BY a.key HAVING (max(table_2.value) OPERATOR(pg_catalog.>=) (SELECT a_1.value FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a_1))
@ -939,7 +939,8 @@ inserts AS (
) SELECT count(*) FROM inserts;
DEBUG: generating subplan XXX_1 for CTE stats: SELECT count(key) AS m FROM coordinator_shouldhaveshards.table_1
DEBUG: generating subplan XXX_2 for CTE inserts: INSERT INTO coordinator_shouldhaveshards.table_2 (key, value) SELECT table_1.key, count(*) AS count FROM coordinator_shouldhaveshards.table_1 WHERE (table_1.key OPERATOR(pg_catalog.>=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) GROUP BY table_1.key HAVING (count(*) OPERATOR(pg_catalog.<=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) LIMIT 1 RETURNING table_2.key, table_2.value
DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: push down of limit count: 1
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) inserts
DEBUG: Subplan XXX_1 will be written to local file

View File

@ -908,7 +908,7 @@ key
FROM a JOIN table_2 USING (key)
GROUP BY key
HAVING (max(table_2.value) >= (SELECT value FROM a));
DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value FROM coordinator_shouldhaveshards.table_1 ORDER BY key, value DESC LIMIT 1
DEBUG: push down of limit count: 1
DEBUG: generating subplan XXX_2 for subquery SELECT int4(count(*)) AS auto_coerced_by_citus_0, (a.key)::text AS auto_coerced_by_citus_1 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN coordinator_shouldhaveshards.table_2 USING (key)) GROUP BY a.key HAVING (max(table_2.value) OPERATOR(pg_catalog.>=) (SELECT a_1.value FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a_1))
@ -939,7 +939,8 @@ inserts AS (
) SELECT count(*) FROM inserts;
DEBUG: generating subplan XXX_1 for CTE stats: SELECT count(key) AS m FROM coordinator_shouldhaveshards.table_1
DEBUG: generating subplan XXX_2 for CTE inserts: INSERT INTO coordinator_shouldhaveshards.table_2 (key, value) SELECT key, count(*) AS count FROM coordinator_shouldhaveshards.table_1 WHERE (key OPERATOR(pg_catalog.>=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) GROUP BY key HAVING (count(*) OPERATOR(pg_catalog.<=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) LIMIT 1 RETURNING table_2.key, table_2.value
DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: push down of limit count: 1
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) inserts
DEBUG: Subplan XXX_1 will be written to local file

View File

@ -549,7 +549,7 @@ SELECT create_distributed_table('target_table', 'a');
INSERT INTO source_table SELECT floor(i/4), i*i FROM generate_series(1, 20) i;
SET client_min_messages TO DEBUG1;
INSERT INTO target_table SELECT a, max(b) FROM source_table GROUP BY a;
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
RESET client_min_messages;
SELECT * FROM target_table ORDER BY a;
@ -622,40 +622,40 @@ INSERT INTO target_table
WHERE a BETWEEN $1 AND $2 GROUP BY a;
SET client_min_messages TO DEBUG1;
EXECUTE insert_plan(0, 2);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
EXECUTE insert_plan(0, 2);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
EXECUTE insert_plan(0, 2);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
EXECUTE insert_plan(0, 2);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
EXECUTE insert_plan(0, 2);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
EXECUTE insert_plan(0, 2);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
EXECUTE insert_plan(2, 4);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
EXECUTE insert_plan(2, 4);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
EXECUTE insert_plan(2, 4);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
EXECUTE insert_plan(2, 4);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
EXECUTE insert_plan(2, 4);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
EXECUTE insert_plan(2, 4);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
RESET client_min_messages;
SELECT a, count(*), count(distinct b) distinct_values FROM target_table GROUP BY a ORDER BY a;
@ -680,25 +680,25 @@ INSERT INTO target_table
WHERE a=$1 GROUP BY a;
SET client_min_messages TO DEBUG1;
EXECUTE insert_plan(0);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Collecting INSERT ... SELECT results on coordinator
EXECUTE insert_plan(0);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Collecting INSERT ... SELECT results on coordinator
EXECUTE insert_plan(0);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Collecting INSERT ... SELECT results on coordinator
EXECUTE insert_plan(0);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Collecting INSERT ... SELECT results on coordinator
EXECUTE insert_plan(0);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Collecting INSERT ... SELECT results on coordinator
EXECUTE insert_plan(0);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Collecting INSERT ... SELECT results on coordinator
EXECUTE insert_plan(0);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Collecting INSERT ... SELECT results on coordinator
RESET client_min_messages;
SELECT a, count(*), count(distinct b) distinct_values FROM target_table GROUP BY a ORDER BY a;
@ -761,10 +761,10 @@ WITH r AS (
INSERT INTO target_table SELECT * FROM source_table RETURNING *
)
INSERT INTO target_table SELECT source_table.a, max(source_table.b) FROM source_table NATURAL JOIN r GROUP BY source_table.a;
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: only SELECT, UPDATE, or DELETE common table expressions may be router planned
DEBUG: generating subplan XXX_1 for CTE r: INSERT INTO insert_select_repartition.target_table (a, b) SELECT source_table.a, source_table.b FROM insert_select_repartition.source_table RETURNING target_table.a, target_table.b
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a, max AS b FROM (SELECT source_table.a, max(source_table.b) AS max FROM (insert_select_repartition.source_table JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) r USING (a, b)) GROUP BY source_table.a) citus_insert_select_subquery
DEBUG: Router planner cannot handle multi-shard select queries
@ -1015,7 +1015,7 @@ SELECT create_distributed_table('target_table', 'a');
INSERT INTO source_table SELECT i, i * i FROM generate_series(1, 10) i;
SET client_min_messages TO DEBUG2;
INSERT INTO target_table SELECT a AS aa, b AS aa, 1 AS aa, 2 AS aa FROM source_table;
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'a'
@ -1049,7 +1049,7 @@ EXPLAIN (costs off) INSERT INTO target_table SELECT a AS aa, b AS aa, 1 AS aa, 2
SET client_min_messages TO DEBUG2;
INSERT INTO target_table SELECT a AS aa, b AS aa, 1 AS aa, 2 AS aa FROM source_table;
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Collecting INSERT ... SELECT results on coordinator
RESET client_min_messages;
@ -1104,7 +1104,7 @@ EXPLAIN (costs off) INSERT INTO test(y, x) SELECT a.x, b.y FROM test a JOIN test
SET client_min_messages TO DEBUG1;
INSERT INTO test(y, x) SELECT a.x, b.y FROM test a JOIN test b USING (y);
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: Collecting INSERT ... SELECT results on coordinator
RESET client_min_messages;
SELECT count(*) FROM test;
@ -1133,7 +1133,7 @@ EXPLAIN (costs off) INSERT INTO test SELECT a.* FROM test a JOIN test b USING (y
SET client_min_messages TO DEBUG1;
INSERT INTO test SELECT a.* FROM test a JOIN test b USING (y);
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: Collecting INSERT ... SELECT results on coordinator
RESET client_min_messages;
SELECT count(*) FROM test;
@ -1219,7 +1219,7 @@ ON CONFLICT(c1, c2, c3, c4, c5, c6)
DO UPDATE SET
cardinality = enriched.cardinality + excluded.cardinality,
sum = enriched.sum + excluded.sum;
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'c1'
@ -1302,6 +1302,33 @@ explain (costs off) insert into table_with_user_sequences select y, x from table
-> Seq Scan on table_with_user_sequences_4213652 table_with_user_sequences
(8 rows)
CREATE TABLE dist_table_1(id int);
SELECT create_distributed_table('dist_table_1','id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE dist_table_2(id int);
SELECT create_distributed_table('dist_table_2','id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- verify that insert select with union can be repartitioned. We cannot push down the query
-- since UNION clause has no FROM clause at top level query.
SELECT public.coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1(id) SELECT id FROM dist_table_1 UNION SELECT id FROM dist_table_2;
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: repartition
-> Custom Scan (Citus Adaptive)
Task Count: 4
(4 rows)
-- clean-up
SET client_min_messages TO WARNING;
DROP SCHEMA insert_select_repartition CASCADE;

View File

@ -549,7 +549,7 @@ SELECT create_distributed_table('target_table', 'a');
INSERT INTO source_table SELECT floor(i/4), i*i FROM generate_series(1, 20) i;
SET client_min_messages TO DEBUG1;
INSERT INTO target_table SELECT a, max(b) FROM source_table GROUP BY a;
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
RESET client_min_messages;
SELECT * FROM target_table ORDER BY a;
@ -622,40 +622,40 @@ INSERT INTO target_table
WHERE a BETWEEN $1 AND $2 GROUP BY a;
SET client_min_messages TO DEBUG1;
EXECUTE insert_plan(0, 2);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
EXECUTE insert_plan(0, 2);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
EXECUTE insert_plan(0, 2);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
EXECUTE insert_plan(0, 2);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
EXECUTE insert_plan(0, 2);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
EXECUTE insert_plan(0, 2);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
EXECUTE insert_plan(2, 4);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
EXECUTE insert_plan(2, 4);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
EXECUTE insert_plan(2, 4);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
EXECUTE insert_plan(2, 4);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
EXECUTE insert_plan(2, 4);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
EXECUTE insert_plan(2, 4);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: performing repartitioned INSERT ... SELECT
RESET client_min_messages;
SELECT a, count(*), count(distinct b) distinct_values FROM target_table GROUP BY a ORDER BY a;
@ -680,25 +680,25 @@ INSERT INTO target_table
WHERE a=$1 GROUP BY a;
SET client_min_messages TO DEBUG1;
EXECUTE insert_plan(0);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Collecting INSERT ... SELECT results on coordinator
EXECUTE insert_plan(0);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Collecting INSERT ... SELECT results on coordinator
EXECUTE insert_plan(0);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Collecting INSERT ... SELECT results on coordinator
EXECUTE insert_plan(0);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Collecting INSERT ... SELECT results on coordinator
EXECUTE insert_plan(0);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Collecting INSERT ... SELECT results on coordinator
EXECUTE insert_plan(0);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Collecting INSERT ... SELECT results on coordinator
EXECUTE insert_plan(0);
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Collecting INSERT ... SELECT results on coordinator
RESET client_min_messages;
SELECT a, count(*), count(distinct b) distinct_values FROM target_table GROUP BY a ORDER BY a;
@ -761,10 +761,10 @@ WITH r AS (
INSERT INTO target_table SELECT * FROM source_table RETURNING *
)
INSERT INTO target_table SELECT source_table.a, max(source_table.b) FROM source_table NATURAL JOIN r GROUP BY source_table.a;
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: only SELECT, UPDATE, or DELETE common table expressions may be router planned
DEBUG: generating subplan XXX_1 for CTE r: INSERT INTO insert_select_repartition.target_table (a, b) SELECT a, b FROM insert_select_repartition.source_table RETURNING target_table.a, target_table.b
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a, max AS b FROM (SELECT source_table.a, max(source_table.b) AS max FROM (insert_select_repartition.source_table JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) r USING (a, b)) GROUP BY source_table.a) citus_insert_select_subquery
DEBUG: Router planner cannot handle multi-shard select queries
@ -1015,7 +1015,7 @@ SELECT create_distributed_table('target_table', 'a');
INSERT INTO source_table SELECT i, i * i FROM generate_series(1, 10) i;
SET client_min_messages TO DEBUG2;
INSERT INTO target_table SELECT a AS aa, b AS aa, 1 AS aa, 2 AS aa FROM source_table;
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'a'
@ -1049,7 +1049,7 @@ EXPLAIN (costs off) INSERT INTO target_table SELECT a AS aa, b AS aa, 1 AS aa, 2
SET client_min_messages TO DEBUG2;
INSERT INTO target_table SELECT a AS aa, b AS aa, 1 AS aa, 2 AS aa FROM source_table;
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Collecting INSERT ... SELECT results on coordinator
RESET client_min_messages;
@ -1104,7 +1104,7 @@ EXPLAIN (costs off) INSERT INTO test(y, x) SELECT a.x, b.y FROM test a JOIN test
SET client_min_messages TO DEBUG1;
INSERT INTO test(y, x) SELECT a.x, b.y FROM test a JOIN test b USING (y);
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: Collecting INSERT ... SELECT results on coordinator
RESET client_min_messages;
SELECT count(*) FROM test;
@ -1133,7 +1133,7 @@ EXPLAIN (costs off) INSERT INTO test SELECT a.* FROM test a JOIN test b USING (y
SET client_min_messages TO DEBUG1;
INSERT INTO test SELECT a.* FROM test a JOIN test b USING (y);
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: Collecting INSERT ... SELECT results on coordinator
RESET client_min_messages;
SELECT count(*) FROM test;
@ -1219,7 +1219,7 @@ ON CONFLICT(c1, c2, c3, c4, c5, c6)
DO UPDATE SET
cardinality = enriched.cardinality + excluded.cardinality,
sum = enriched.sum + excluded.sum;
DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'c1'
@ -1302,6 +1302,33 @@ explain (costs off) insert into table_with_user_sequences select y, x from table
-> Seq Scan on table_with_user_sequences_4213652 table_with_user_sequences
(8 rows)
CREATE TABLE dist_table_1(id int);
SELECT create_distributed_table('dist_table_1','id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE dist_table_2(id int);
SELECT create_distributed_table('dist_table_2','id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- verify that insert select with union can be repartitioned. We cannot push down the query
-- since UNION clause has no FROM clause at top level query.
SELECT public.coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1(id) SELECT id FROM dist_table_1 UNION SELECT id FROM dist_table_2;
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: repartition
-> Custom Scan (Citus Adaptive)
Task Count: 4
(4 rows)
-- clean-up
SET client_min_messages TO WARNING;
DROP SCHEMA insert_select_repartition CASCADE;

View File

@ -118,73 +118,82 @@ SET client_min_messages TO DEBUG2;
-- different table types together with single-shard tables.
-- use a single-shard table
INSERT INTO distributed_table_c1_t1 SELECT nullkey_c1_t1.a, nullkey_c1_t1.b FROM nullkey_c1_t1;
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: The target table's partition column should correspond to a partition column in the subquery.
DEBUG: Distributed planning for a fast-path router query
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
-- use a reference table
INSERT INTO distributed_table_c1_t1 SELECT nullkey_c1_t1.a, nullkey_c1_t1.b FROM nullkey_c1_t1 JOIN reference_table USING (a);
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: The target table's partition column should correspond to a partition column in the subquery.
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO distributed_table_c1_t1 SELECT nullkey_c1_t1.a, nullkey_c1_t1.b FROM nullkey_c1_t1 RIGHT JOIN reference_table USING (b) WHERE reference_table.a >= 1 AND reference_table.a <= 5;
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO distributed_table_c1_t1 SELECT nullkey_c1_t2.a, nullkey_c1_t2.b FROM nullkey_c1_t2 LEFT JOIN reference_table USING (b);
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: The target table's partition column should correspond to a partition column in the subquery.
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO distributed_table_c1_t1 SELECT nullkey_c1_t1.a, nullkey_c1_t1.b FROM nullkey_c1_t1 INTERSECT SELECT * FROM reference_table;
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Intersect and Except are currently unsupported
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
-- use a colocated single-shard table
INSERT INTO distributed_table_c1_t1 SELECT nullkey_c1_t1.a, nullkey_c1_t1.b FROM nullkey_c1_t1 JOIN nullkey_c1_t2 USING (b);
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: The target table's partition column should correspond to a partition column in the subquery.
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO distributed_table_c1_t1 SELECT nullkey_c1_t1.a, nullkey_c1_t1.b FROM nullkey_c1_t1 FULL JOIN nullkey_c1_t2 USING (a);
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: The target table's partition column should correspond to a partition column in the subquery.
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO distributed_table_c1_t1 SELECT nullkey_c1_t1.a, nullkey_c1_t1.b FROM nullkey_c1_t1 FULL JOIN matview USING (a);
DEBUG: distributed INSERT ... SELECT cannot select from a local relation when inserting into a distributed table
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables
DETAIL: Local tables cannot be used in distributed queries.
INSERT INTO distributed_table_c1_t1 SELECT * FROM nullkey_c1_t1 UNION SELECT * FROM nullkey_c1_t2;
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: The target table's partition column should correspond to a partition column in the subquery.
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
-- use a non-colocated single-shard table
INSERT INTO distributed_table_c1_t1 SELECT nullkey_c1_t2.a, nullkey_c1_t2.b FROM nullkey_c1_t2 LEFT JOIN nullkey_c2_t1 USING (a);
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: The target table's partition column should correspond to a partition column in the subquery.
ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables
DETAIL: router planner does not support queries that reference non-colocated distributed tables
INSERT INTO distributed_table_c1_t1 SELECT * FROM nullkey_c1_t1 UNION SELECT * FROM nullkey_c2_t1;
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: The target table's partition column should correspond to a partition column in the subquery.
ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables
DETAIL: router planner does not support queries that reference non-colocated distributed tables
-- use a distributed table that is colocated with the target table
INSERT INTO distributed_table_c1_t1 SELECT nullkey_c1_t1.a, nullkey_c1_t1.b FROM nullkey_c1_t1 JOIN distributed_table_c1_t2 USING (a);
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables
DETAIL: router planner does not support queries that reference non-colocated distributed tables
INSERT INTO distributed_table_c1_t1 SELECT distributed_table_c1_t2.a, distributed_table_c1_t2.b FROM nullkey_c1_t1 JOIN distributed_table_c1_t2 USING (a);
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables
DETAIL: router planner does not support queries that reference non-colocated distributed tables
INSERT INTO distributed_table_c1_t1 SELECT distributed_table_c1_t2.a, distributed_table_c1_t2.b FROM nullkey_c1_t1 JOIN distributed_table_c1_t2 USING (b);
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables
DETAIL: router planner does not support queries that reference non-colocated distributed tables
INSERT INTO distributed_table_c1_t1 SELECT distributed_table_c1_t2.a, distributed_table_c1_t2.b FROM nullkey_c1_t1 JOIN distributed_table_c1_t2 USING (a) WHERE distributed_table_c1_t2.a = 1;
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables
DETAIL: router planner does not support queries that reference non-colocated distributed tables
-- use a distributed table that is not colocated with the target table
INSERT INTO distributed_table_c1_t1 SELECT nullkey_c1_t2.a, nullkey_c1_t2.b FROM nullkey_c1_t2 JOIN distributed_table_c2_t1 USING (a);
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables
DETAIL: router planner does not support queries that reference non-colocated distributed tables
-- use a citus local table
@ -199,12 +208,14 @@ ERROR: queries that reference a distributed table without a shard key can only
DETAIL: Local tables cannot be used in distributed queries.
-- use append / range distributed tables
INSERT INTO range_table SELECT * FROM nullkey_c1_t1;
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: The target table's partition column should correspond to a partition column in the subquery.
DEBUG: Distributed planning for a fast-path router query
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO append_table SELECT * FROM nullkey_c1_t1;
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: The target table's partition column should correspond to a partition column in the subquery.
ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables
DETAIL: INSERT ... SELECT into an append-distributed table is not supported
SELECT avg(a), avg(b) FROM distributed_table_c1_t1 ORDER BY 1, 2;
@ -236,11 +247,12 @@ DEBUG: only reference tables may be queried when targeting a reference table wi
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO reference_table SELECT nullkey_c1_t1.a, nullkey_c1_t1.b FROM nullkey_c1_t1 UNION SELECT * FROM reference_table;
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Reference tables are not supported with union operator
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO reference_table SELECT nullkey_c1_t2.a, nullkey_c1_t2.b FROM nullkey_c1_t2 LEFT JOIN reference_table USING (b) WHERE b IN (SELECT b FROM matview);
DEBUG: only reference tables may be queried when targeting a reference table with distributed INSERT ... SELECT
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables
DETAIL: Local tables cannot be used in distributed queries.
-- use a colocated single-shard table
@ -259,19 +271,19 @@ ERROR: queries that reference a distributed table without a shard key can only
DETAIL: router planner does not support queries that reference non-colocated distributed tables
-- use a distributed table
INSERT INTO reference_table SELECT nullkey_c1_t1.a, nullkey_c1_t1.b FROM nullkey_c1_t1 JOIN distributed_table_c1_t2 USING (a);
DEBUG: only reference tables may be queried when targeting a reference table with distributed INSERT ... SELECT
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables
DETAIL: router planner does not support queries that reference non-colocated distributed tables
INSERT INTO reference_table SELECT distributed_table_c1_t2.a, distributed_table_c1_t2.b FROM nullkey_c1_t1 JOIN distributed_table_c1_t2 USING (a);
DEBUG: only reference tables may be queried when targeting a reference table with distributed INSERT ... SELECT
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables
DETAIL: router planner does not support queries that reference non-colocated distributed tables
INSERT INTO reference_table SELECT distributed_table_c1_t2.a, distributed_table_c1_t2.b FROM nullkey_c1_t1 JOIN distributed_table_c1_t2 USING (b);
DEBUG: only reference tables may be queried when targeting a reference table with distributed INSERT ... SELECT
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables
DETAIL: router planner does not support queries that reference non-colocated distributed tables
INSERT INTO reference_table SELECT distributed_table_c1_t2.a, distributed_table_c1_t2.b FROM nullkey_c1_t1 JOIN distributed_table_c1_t2 USING (a) WHERE distributed_table_c1_t2.a = 1;
DEBUG: only reference tables may be queried when targeting a reference table with distributed INSERT ... SELECT
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables
DETAIL: router planner does not support queries that reference non-colocated distributed tables
-- use a citus local table
@ -371,20 +383,20 @@ ERROR: queries that reference a distributed table without a shard key can only
DETAIL: Local tables cannot be used in distributed queries.
-- use a distributed table
INSERT INTO nullkey_c1_t1 SELECT distributed_table_c1_t2.a, distributed_table_c1_t2.b FROM distributed_table_c1_t2;
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO nullkey_c1_t1 SELECT distributed_table_c1_t2.a, distributed_table_c1_t2.b FROM distributed_table_c1_t2 JOIN reference_table USING (a);
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO nullkey_c1_t1 SELECT distributed_table_c1_t2.a, distributed_table_c1_t2.b FROM distributed_table_c1_t2 JOIN nullkey_c1_t1 USING (a);
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables
DETAIL: router planner does not support queries that reference non-colocated distributed tables
-- use a non-colocated single-shard table
INSERT INTO nullkey_c2_t1 SELECT q.* FROM (SELECT reference_table.* FROM reference_table LEFT JOIN nullkey_c1_t1 USING (a)) q JOIN nullkey_c1_t2 USING (a);
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
-- use a materialized view
@ -392,11 +404,11 @@ INSERT INTO nullkey_c1_t1 SELECT * FROM matview;
DEBUG: distributed INSERT ... SELECT can only select from distributed tables
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO nullkey_c1_t1 SELECT reference_table.a, reference_table.b FROM reference_table JOIN matview ON (reference_table.a = matview.a);
DEBUG: distributed INSERT ... SELECT cannot select from a local relation when inserting into a distributed table
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO nullkey_c1_t1 SELECT q.* FROM (SELECT reference_table.* FROM reference_table JOIN nullkey_c1_t1 USING (a)) q JOIN matview USING (a);
DEBUG: distributed INSERT ... SELECT cannot select from a local relation when inserting into a distributed table
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables
DETAIL: Local tables cannot be used in distributed queries.
-- use append / range distributed tables
@ -405,7 +417,7 @@ DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO nullkey_c1_t1 SELECT * FROM append_table;
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Router planner does not support append-partitioned tables.
DEBUG: Collecting INSERT ... SELECT results on coordinator
SELECT avg(a), avg(b) FROM nullkey_c1_t1 ORDER BY 1, 2;
@ -462,8 +474,7 @@ cte_2 AS (
)
INSERT INTO distributed_table_c1_t1
SELECT cte_1.* FROM cte_1 JOIN cte_2 USING (a) JOIN distributed_table_c1_t2 USING (a) ORDER BY 1,2;
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: The target table's partition column should correspond to a partition column in the subquery.
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: CTE cte_1 is going to be inlined via distributed planning
DEBUG: CTE cte_2 is going to be inlined via distributed planning
ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables
@ -479,7 +490,8 @@ cte_2 AS (
)
INSERT INTO distributed_table_c1_t1
SELECT * FROM cte_1 UNION SELECT * FROM cte_2 EXCEPT SELECT * FROM reference_table;
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: CTEs in subqueries are currently unsupported
DEBUG: CTE cte_1 is going to be inlined via distributed planning
DEBUG: CTE cte_2 is going to be inlined via distributed planning
DEBUG: Creating router plan
@ -491,7 +503,8 @@ JOIN (
SELECT b FROM nullkey_c1_t2 ORDER BY b DESC LIMIT 1
) t2
ON t1.b < t2.b;
DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO distributed_table_c1_t1 (a, b)
@ -503,7 +516,9 @@ WITH cte AS (
)
SELECT d1, COALESCE(d2, a) FROM cte WHERE d1 IS NOT NULL AND d2 IS NOT NULL;
DEBUG: CTE cte is going to be inlined via distributed planning
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column.
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO citus_local_table (a, b)
@ -523,7 +538,8 @@ LEFT JOIN (
FROM nullkey_c1_t1
) t2 ON t1.b = t2.b
WHERE t2.rn > 0;
DEBUG: Window functions without PARTITION BY on distribution column is currently unsupported
DEBUG: cannot push down this subquery
DETAIL: Window functions without PARTITION BY on distribution column is currently unsupported
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO nullkey_c1_t1 (a, b)
@ -537,7 +553,7 @@ JOIN (
) q
) t2 ON t1.b = t2.b
WHERE t2.rn > 2;
DEBUG: Window functions without PARTITION BY on distribution column is currently unsupported
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables
DETAIL: router planner does not support queries that reference non-colocated distributed tables
INSERT INTO distributed_table_c1_t1 (a, b)
@ -551,21 +567,23 @@ JOIN (
) q
) t2 ON t1.b = t2.b
WHERE t2.sum_val > 2;
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: The target table's partition column should correspond to a partition column in the subquery.
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
-- Temporaryly reduce the verbosity to avoid noise
-- in the output of the next query.
SET client_min_messages TO DEBUG1;
-- MultiTaskRouterSelectQuerySupported() is unnecessarily restrictive
-- about pushing down queries with DISTINCT ON clause even if the table
-- doesn't have a shard key. See https://github.com/citusdata/citus/pull/6752.
INSERT INTO nullkey_c1_t1 SELECT DISTINCT ON (a) a, b FROM nullkey_c1_t2;
DEBUG: DISTINCT ON (non-partition column) clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: Distributed planning for a fast-path router query
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
SET client_min_messages TO DEBUG2;
-- Similarly, we could push down the following query as well. see
-- https://github.com/citusdata/citus/pull/6831.
INSERT INTO nullkey_c1_t1 SELECT b, SUM(a) OVER (ORDER BY b) AS sum_val FROM nullkey_c1_t1;
DEBUG: Window functions without PARTITION BY on distribution column is currently unsupported
DEBUG: cannot push down this subquery
DETAIL: Window functions without PARTITION BY on distribution column is currently unsupported
DEBUG: Distributed planning for a fast-path router query
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
@ -576,7 +594,7 @@ JOIN reference_table AS t3 ON (t2.a = t3.a)
WHERE NOT EXISTS (
SELECT 1 FROM nullkey_c1_t2 AS t1 WHERE t1.b = t3.b
);
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO distributed_table_c1_t1
@ -585,7 +603,7 @@ FROM nullkey_c1_t1 AS t1
WHERE t1.a NOT IN (
SELECT DISTINCT t2.a FROM distributed_table_c1_t2 AS t2
);
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables
DETAIL: Router planner cannot handle multi-shard select queries
INSERT INTO distributed_table_c1_t1
@ -598,7 +616,8 @@ JOIN (
SELECT a FROM nullkey_c1_t2
) AS t2
) AS t3 ON t1.a = t3.a;
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: The target table's partition column should correspond to a partition column in the subquery.
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
-- Temporaryly reduce the verbosity to avoid noise
@ -617,7 +636,7 @@ WHERE t1.a IN (
) AS t4 ON t3.a = t4.a
) AS t2
);
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: correlated subqueries are not supported when the FROM clause contains a reference table
DEBUG: generating subplan XXX_1 for subquery SELECT a FROM (SELECT t3.a FROM ((SELECT distributed_table_c1_t1.a FROM insert_select_single_shard_table.distributed_table_c1_t1 WHERE (distributed_table_c1_t1.b OPERATOR(pg_catalog.>) 4)) t3 JOIN (SELECT distributed_table_c1_t2.a FROM insert_select_single_shard_table.distributed_table_c1_t2 WHERE (distributed_table_c1_t2.b OPERATOR(pg_catalog.<) 7)) t4 ON ((t3.a OPERATOR(pg_catalog.=) t4.a)))) t2
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a, b FROM insert_select_single_shard_table.reference_table t1 WHERE (a OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)))
DEBUG: Collecting INSERT ... SELECT results on coordinator
@ -736,16 +755,17 @@ DEBUG: distributed statement: INSERT INTO insert_select_single_shard_table.null
SET client_min_messages TO DEBUG1;
INSERT INTO distributed_table_c1_t1 AS t1 (a, b) SELECT t3.a, t3.b FROM nullkey_c1_t2 t2 JOIN reference_table t3 ON (t2.a = t3.a) ON CONFLICT (a, b)
DO UPDATE SET b = t1.b + 10;
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: The target table's partition column should correspond to a partition column in the subquery.
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO nullkey_c1_t1 AS t1 (a, b) SELECT t3.a, t3.b FROM distributed_table_c1_t1 t2 JOIN reference_table t3 ON (t2.a = t3.a) ON CONFLICT (a)
DO UPDATE SET a = t1.a + 10;
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Collecting INSERT ... SELECT results on coordinator
-- This also fails due to https://github.com/citusdata/citus/issues/6826.
INSERT INTO nullkey_c1_t1 AS t1 (a, b) SELECT t3.a, t3.b FROM distributed_table_c1_t1 t2 JOIN reference_table t3 ON (t2.a = t3.a) WHERE t2.a = 3 ON CONFLICT (a)
DO UPDATE SET a = (SELECT max(b)+1 FROM distributed_table_c1_t1 WHERE a = 3);
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: Collecting INSERT ... SELECT results on coordinator
ERROR: cannot execute a distributed query from a query on a shard
DETAIL: Executing a distributed query in a function call that may be pushed to a remote node can lead to incorrect results.

View File

@ -762,7 +762,8 @@ ROLLBACK;
-- We use offset 1 to make sure the result needs to be pulled to the coordinator, offset 0 would be optimized away
INSERT INTO table_1
SELECT * FROM table_2 OFFSET 1;
DEBUG: OFFSET clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Offset clause is currently unsupported when a subquery references a column from another query
DEBUG: Collecting INSERT ... SELECT results on coordinator
-- INSERT .. SELECT via coordinator which has intermediate result,
-- and can be pruned to a single worker because the final query is on
@ -793,7 +794,7 @@ INSERT INTO table_1
SELECT * FROM cte_1
UNION
SELECT * FROM cte_2);
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT table_1.key FROM intermediate_result_pruning.table_1 WHERE (table_1.key OPERATOR(pg_catalog.=) 1) INTERSECT SELECT table_1.key FROM intermediate_result_pruning.table_1 WHERE (table_1.key OPERATOR(pg_catalog.=) 2)
DEBUG: generating subplan XXX_1 for subquery SELECT key FROM intermediate_result_pruning.table_1 WHERE (key OPERATOR(pg_catalog.=) 1)
DEBUG: generating subplan XXX_2 for subquery SELECT key FROM intermediate_result_pruning.table_1 WHERE (key OPERATOR(pg_catalog.=) 2)
@ -830,7 +831,7 @@ INSERT INTO table_1
) foo
where table_2.key != 1 AND
foo.key = table_2.value::int;
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT table_1.key FROM intermediate_result_pruning.table_1 WHERE (table_1.key OPERATOR(pg_catalog.=) 1) INTERSECT SELECT table_1.key FROM intermediate_result_pruning.table_1 WHERE (table_1.key OPERATOR(pg_catalog.=) 2)
DEBUG: generating subplan XXX_1 for subquery SELECT key FROM intermediate_result_pruning.table_1 WHERE (key OPERATOR(pg_catalog.=) 1)
DEBUG: generating subplan XXX_2 for subquery SELECT key FROM intermediate_result_pruning.table_1 WHERE (key OPERATOR(pg_catalog.=) 2)
@ -1054,7 +1055,8 @@ inserts AS MATERIALIZED (
) SELECT count(*) FROM inserts;
DEBUG: generating subplan XXX_1 for CTE stats: SELECT count(key) AS m FROM intermediate_result_pruning.table_3
DEBUG: generating subplan XXX_2 for CTE inserts: INSERT INTO intermediate_result_pruning.table_2 (key, value) SELECT table_1.key, count(*) AS count FROM intermediate_result_pruning.table_1 WHERE (table_1.key OPERATOR(pg_catalog.>) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) GROUP BY table_1.key HAVING (count(*) OPERATOR(pg_catalog.<) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) LIMIT 1 RETURNING table_2.key, table_2.value
DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: push down of limit count: 1
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) inserts
DEBUG: Subplan XXX_1 will be written to local file

View File

@ -762,7 +762,8 @@ ROLLBACK;
-- We use offset 1 to make sure the result needs to be pulled to the coordinator, offset 0 would be optimized away
INSERT INTO table_1
SELECT * FROM table_2 OFFSET 1;
DEBUG: OFFSET clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Offset clause is currently unsupported when a subquery references a column from another query
DEBUG: Collecting INSERT ... SELECT results on coordinator
-- INSERT .. SELECT via coordinator which has intermediate result,
-- and can be pruned to a single worker because the final query is on
@ -793,7 +794,7 @@ INSERT INTO table_1
SELECT * FROM cte_1
UNION
SELECT * FROM cte_2);
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT table_1.key FROM intermediate_result_pruning.table_1 WHERE (table_1.key OPERATOR(pg_catalog.=) 1) INTERSECT SELECT table_1.key FROM intermediate_result_pruning.table_1 WHERE (table_1.key OPERATOR(pg_catalog.=) 2)
DEBUG: generating subplan XXX_1 for subquery SELECT key FROM intermediate_result_pruning.table_1 WHERE (key OPERATOR(pg_catalog.=) 1)
DEBUG: generating subplan XXX_2 for subquery SELECT key FROM intermediate_result_pruning.table_1 WHERE (key OPERATOR(pg_catalog.=) 2)
@ -830,7 +831,7 @@ INSERT INTO table_1
) foo
where table_2.key != 1 AND
foo.key = table_2.value::int;
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT table_1.key FROM intermediate_result_pruning.table_1 WHERE (table_1.key OPERATOR(pg_catalog.=) 1) INTERSECT SELECT table_1.key FROM intermediate_result_pruning.table_1 WHERE (table_1.key OPERATOR(pg_catalog.=) 2)
DEBUG: generating subplan XXX_1 for subquery SELECT key FROM intermediate_result_pruning.table_1 WHERE (key OPERATOR(pg_catalog.=) 1)
DEBUG: generating subplan XXX_2 for subquery SELECT key FROM intermediate_result_pruning.table_1 WHERE (key OPERATOR(pg_catalog.=) 2)
@ -1054,7 +1055,8 @@ inserts AS MATERIALIZED (
) SELECT count(*) FROM inserts;
DEBUG: generating subplan XXX_1 for CTE stats: SELECT count(key) AS m FROM intermediate_result_pruning.table_3
DEBUG: generating subplan XXX_2 for CTE inserts: INSERT INTO intermediate_result_pruning.table_2 (key, value) SELECT key, count(*) AS count FROM intermediate_result_pruning.table_1 WHERE (key OPERATOR(pg_catalog.>) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) GROUP BY key HAVING (count(*) OPERATOR(pg_catalog.<) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) LIMIT 1 RETURNING table_2.key, table_2.value
DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: push down of limit count: 1
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) inserts
DEBUG: Subplan XXX_1 will be written to local file

View File

@ -341,29 +341,6 @@ count
(1 row)
starting permutation: s1-initialize s1-begin s1-router-select s2-insert-select s1-commit s1-select-count
master_create_empty_shard
---------------------------------------------------------------------
6780300
(1 row)
step s1-initialize: COPY select_append FROM PROGRAM 'echo 0, a, 0 && echo 1, b, 1 && echo 2, c, 2 && echo 3, d, 3 && echo 4, e, 4' WITH (format 'csv', append_to_shard xxxxx);
step s1-begin: BEGIN;
step s1-router-select: SELECT * FROM select_append WHERE id = 1;
id|data|int_data
---------------------------------------------------------------------
1| b | 1
(1 row)
step s2-insert-select: INSERT INTO select_append SELECT * FROM select_append;
step s1-commit: COMMIT;
step s1-select-count: SELECT COUNT(*) FROM select_append;
count
---------------------------------------------------------------------
10
(1 row)
starting permutation: s1-initialize s1-begin s1-router-select s2-update s1-commit s1-select-count
master_create_empty_shard
---------------------------------------------------------------------
@ -770,29 +747,6 @@ count
(1 row)
starting permutation: s1-initialize s1-begin s1-insert-select s2-router-select s1-commit s1-select-count
master_create_empty_shard
---------------------------------------------------------------------
6780300
(1 row)
step s1-initialize: COPY select_append FROM PROGRAM 'echo 0, a, 0 && echo 1, b, 1 && echo 2, c, 2 && echo 3, d, 3 && echo 4, e, 4' WITH (format 'csv', append_to_shard xxxxx);
step s1-begin: BEGIN;
step s1-insert-select: INSERT INTO select_append SELECT * FROM select_append;
step s2-router-select: SELECT * FROM select_append WHERE id = 1;
id|data|int_data
---------------------------------------------------------------------
1| b | 1
(1 row)
step s1-commit: COMMIT;
step s1-select-count: SELECT COUNT(*) FROM select_append;
count
---------------------------------------------------------------------
10
(1 row)
starting permutation: s1-initialize s1-begin s1-update s2-router-select s1-commit s1-select-count
master_create_empty_shard
---------------------------------------------------------------------
@ -1162,33 +1116,6 @@ count
(1 row)
starting permutation: s1-initialize s1-begin s1-real-time-select s2-insert-select s1-commit s1-select-count
master_create_empty_shard
---------------------------------------------------------------------
6780300
(1 row)
step s1-initialize: COPY select_append FROM PROGRAM 'echo 0, a, 0 && echo 1, b, 1 && echo 2, c, 2 && echo 3, d, 3 && echo 4, e, 4' WITH (format 'csv', append_to_shard xxxxx);
step s1-begin: BEGIN;
step s1-real-time-select: SELECT * FROM select_append ORDER BY 1, 2;
id|data|int_data
---------------------------------------------------------------------
0| a | 0
1| b | 1
2| c | 2
3| d | 3
4| e | 4
(5 rows)
step s2-insert-select: INSERT INTO select_append SELECT * FROM select_append;
step s1-commit: COMMIT;
step s1-select-count: SELECT COUNT(*) FROM select_append;
count
---------------------------------------------------------------------
10
(1 row)
starting permutation: s1-initialize s1-begin s1-real-time-select s2-update s1-commit s1-select-count
master_create_empty_shard
---------------------------------------------------------------------
@ -1621,33 +1548,6 @@ count
(1 row)
starting permutation: s1-initialize s1-begin s1-insert-select s2-real-time-select s1-commit s1-select-count
master_create_empty_shard
---------------------------------------------------------------------
6780300
(1 row)
step s1-initialize: COPY select_append FROM PROGRAM 'echo 0, a, 0 && echo 1, b, 1 && echo 2, c, 2 && echo 3, d, 3 && echo 4, e, 4' WITH (format 'csv', append_to_shard xxxxx);
step s1-begin: BEGIN;
step s1-insert-select: INSERT INTO select_append SELECT * FROM select_append;
step s2-real-time-select: SELECT * FROM select_append ORDER BY 1, 2;
id|data|int_data
---------------------------------------------------------------------
0| a | 0
1| b | 1
2| c | 2
3| d | 3
4| e | 4
(5 rows)
step s1-commit: COMMIT;
step s1-select-count: SELECT COUNT(*) FROM select_append;
count
---------------------------------------------------------------------
10
(1 row)
starting permutation: s1-initialize s1-begin s1-update s2-real-time-select s1-commit s1-select-count
master_create_empty_shard
---------------------------------------------------------------------
@ -2034,36 +1934,6 @@ count
(1 row)
starting permutation: s1-initialize s1-begin s1-adaptive-select s2-insert-select s1-commit s1-select-count
master_create_empty_shard
---------------------------------------------------------------------
6780300
(1 row)
step s1-initialize: COPY select_append FROM PROGRAM 'echo 0, a, 0 && echo 1, b, 1 && echo 2, c, 2 && echo 3, d, 3 && echo 4, e, 4' WITH (format 'csv', append_to_shard xxxxx);
step s1-begin: BEGIN;
step s1-adaptive-select:
SET citus.enable_repartition_joins TO ON;
SELECT * FROM select_append AS t1 JOIN select_append AS t2 ON t1.id = t2.int_data ORDER BY 1, 2, 3, 4;
id|data|int_data|id|data|int_data
---------------------------------------------------------------------
0| a | 0| 0| a | 0
1| b | 1| 1| b | 1
2| c | 2| 2| c | 2
3| d | 3| 3| d | 3
4| e | 4| 4| e | 4
(5 rows)
step s2-insert-select: INSERT INTO select_append SELECT * FROM select_append;
step s1-commit: COMMIT;
step s1-select-count: SELECT COUNT(*) FROM select_append;
count
---------------------------------------------------------------------
10
(1 row)
starting permutation: s1-initialize s1-begin s1-adaptive-select s2-update s1-commit s1-select-count
master_create_empty_shard
---------------------------------------------------------------------
@ -2538,36 +2408,6 @@ count
(1 row)
starting permutation: s1-initialize s1-begin s1-insert-select s2-adaptive-select s1-commit s1-select-count
master_create_empty_shard
---------------------------------------------------------------------
6780300
(1 row)
step s1-initialize: COPY select_append FROM PROGRAM 'echo 0, a, 0 && echo 1, b, 1 && echo 2, c, 2 && echo 3, d, 3 && echo 4, e, 4' WITH (format 'csv', append_to_shard xxxxx);
step s1-begin: BEGIN;
step s1-insert-select: INSERT INTO select_append SELECT * FROM select_append;
step s2-adaptive-select:
SET citus.enable_repartition_joins TO ON;
SELECT * FROM select_append AS t1 JOIN select_append AS t2 ON t1.id = t2.int_data ORDER BY 1, 2, 3, 4;
id|data|int_data|id|data|int_data
---------------------------------------------------------------------
0| a | 0| 0| a | 0
1| b | 1| 1| b | 1
2| c | 2| 2| c | 2
3| d | 3| 3| d | 3
4| e | 4| 4| e | 4
(5 rows)
step s1-commit: COMMIT;
step s1-select-count: SELECT COUNT(*) FROM select_append;
count
---------------------------------------------------------------------
10
(1 row)
starting permutation: s1-initialize s1-begin s1-update s2-adaptive-select s1-commit s1-select-count
master_create_empty_shard
---------------------------------------------------------------------

View File

@ -620,7 +620,8 @@ INSERT INTO agg_events (value_1_agg, user_id)
DISTINCT ON (value_1) value_1, user_id
FROM
raw_events_first;
DEBUG: DISTINCT ON (non-partition column) clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Distinct on columns without partition column is currently unsupported
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Collecting INSERT ... SELECT results on coordinator
SELECT user_id, value_1_agg FROM agg_events ORDER BY 1,2;
@ -686,7 +687,7 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, v1_a
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
ROLLBACK;
-- We don't support CTEs that are referenced in the target list
-- We do support CTEs that are referenced in the target list
INSERT INTO agg_events
WITH sub_cte AS (SELECT 1)
SELECT
@ -694,15 +695,11 @@ INSERT INTO agg_events
FROM
raw_events_first;
DEBUG: CTE sub_cte is going to be inlined via distributed planning
DEBUG: Subqueries without relations are not allowed in distributed INSERT ... SELECT queries
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'user_id'
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT intermediate_result.user_id, intermediate_result.value_1_agg FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_0}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1_agg integer)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT intermediate_result.user_id, intermediate_result.value_1_agg FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300001_to_1}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1_agg integer)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT intermediate_result.user_id, intermediate_result.value_1_agg FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300002_to_2}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1_agg integer)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT intermediate_result.user_id, intermediate_result.value_1_agg FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300003_to_3}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1_agg integer)
-- We support set operations via the coordinator
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
-- We support set operations
BEGIN;
INSERT INTO
raw_events_first(user_id)
@ -711,14 +708,10 @@ SELECT
FROM
((SELECT user_id FROM raw_events_first) UNION
(SELECT user_id FROM raw_events_second)) as foo;
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'user_id'
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300004_to_0}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300005_to_1}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300006_to_2}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300007_to_3}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second) foo WHERE (foo.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second) foo WHERE (foo.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second) foo WHERE (foo.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second) foo WHERE (foo.user_id IS NOT NULL)
ROLLBACK;
-- We do support set operations through recursive planning
BEGIN;
@ -727,7 +720,7 @@ INSERT INTO
raw_events_first(user_id)
(SELECT user_id FROM raw_events_first) INTERSECT
(SELECT user_id FROM raw_events_first);
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM multi_insert_select.raw_events_first
@ -747,7 +740,7 @@ SELECT
FROM
((SELECT user_id FROM raw_events_first WHERE user_id = 15) EXCEPT
(SELECT user_id FROM raw_events_second where user_id = 17)) as foo;
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
-- some supported LEFT joins
@ -765,10 +758,16 @@ DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_133000
raw_events_second.user_id
FROM
reference_table LEFT JOIN raw_events_second ON reference_table.user_id = raw_events_second.user_id;
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table_13300012 reference_table LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_second.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table_13300012 reference_table LEFT JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_second.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table_13300012 reference_table LEFT JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_second.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table_13300012 reference_table LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_second.user_id IS NOT NULL)
DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
DEBUG: recursively planning distributed relation "raw_events_second" since it is part of a distributed join node that is outer joined with a recurring rel
DEBUG: Wrapping relation "raw_events_second" to a subquery
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM multi_insert_select.raw_events_second WHERE true
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table LEFT JOIN (SELECT raw_events_second_1.user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) raw_events_second_1) raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)))
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
@ -1126,7 +1125,8 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.value_3) AS foo;
DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Group by list without partition column is currently unsupported when a subquery references a column from another query
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1]
@ -1281,7 +1281,7 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4,
GROUP BY raw_events_second.value_1
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id);
DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1]
@ -1327,7 +1327,7 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4,
GROUP BY raw_events_second.value_1
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id);
DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1]
@ -3242,5 +3242,236 @@ returning text_col_1;
string
(1 row)
CREATE TABLE dist_table_3(
dist_col bigint,
int_col integer
);
SELECT create_distributed_table('dist_table_3', 'dist_col');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- dist_table_2 and dist_table_3 are non-colocated source tables. Repartitioning is also not possible due to
-- different types for distribution columns. Citus would not be able to handle this complex insert select.
INSERT INTO dist_table_1 SELECT dist_table_2.dist_col FROM dist_table_2 JOIN dist_table_3 USING(dist_col);
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
CREATE TABLE dist_table_4(
dist_col integer,
int_col integer
);
SELECT create_distributed_table('dist_table_4', 'dist_col');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- Even if target table distribution column is colocated with dist_table_2's distributed column, source tables dist_table_2 and dist_table_4
-- are non-colocated. Hence, SELECT part of the query should be pulled to coordinator.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT dist_table_2.dist_col FROM dist_table_2 JOIN dist_table_4 ON dist_table_2.dist_col = dist_table_4.int_col;
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Custom Scan (Citus Adaptive)
Task Count: 4
(4 rows)
-- For INSERT SELECT, when a lateral query references an outer query, push-down is possible even if limit clause exists in the lateral query.
-- It is because subquery with limit does not need to be merged at coordinator as it is a lateral query.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT d1.dist_col FROM dist_table_1 d1 LEFT JOIN LATERAL (SELECT * FROM dist_table_2 d2 WHERE d1.dist_col = d2.dist_col LIMIT 3) dummy USING(dist_col);
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 4
(2 rows)
-- For INSERT SELECT, when push-down is NOT possible when limit clause exists in a subquery at SELECT part of INSERT SELECT.
-- It is because the subquery with limit needs to be merged at coordinator.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT d1.dist_col FROM dist_table_1 d1 LEFT JOIN (SELECT * FROM dist_table_2 LIMIT 3) dummy USING(dist_col);
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: repartition
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Limit
-> Custom Scan (Citus Adaptive)
Task Count: 4
(7 rows)
CREATE TABLE dist_table_5(id int, id2 int);
SELECT create_distributed_table('dist_table_5','id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE dist_table_6(id int, id2 int);
SELECT create_distributed_table('dist_table_6','id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- verify that insert select with union can be pushed down since UNION clause has FROM clause at top level query.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5(id) SELECT id FROM (SELECT id FROM dist_table_5 UNION SELECT id FROM dist_table_6) dummy;
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 4
(2 rows)
-- verify that insert select with sublink can be pushed down when tables are colocated.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM dist_table_5 WHERE dist_table_5.id = dist_table_6.id) FROM dist_table_6;
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 4
(2 rows)
CREATE TABLE ref_table_1(id int);
SELECT create_reference_table('ref_table_1');
create_reference_table
---------------------------------------------------------------------
(1 row)
-- verify that insert select with sublink cannot be pushed down when from clause does not contain any distributed relation.
INSERT INTO dist_table_5 SELECT id, (SELECT id FROM dist_table_5 WHERE dist_table_5.id = ref_table_1.id) FROM ref_table_1;
ERROR: correlated subqueries are not supported when the FROM clause contains a reference table
-- verify that insert select cannot be pushed down when we have recurring range table in from clause.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM ref_table_1 WHERE id = 1) FROM ref_table_1;
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Custom Scan (Citus Adaptive)
Task Count: 1
(4 rows)
-- verify that insert select cannot be pushed down when we have reference table in outside of outer join.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT a.id FROM dist_table_5 a LEFT JOIN ref_table_1 b ON (true) RIGHT JOIN ref_table_1 c ON (true);
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 4
(6 rows)
-- verify that insert select cannot be pushed down when it has a recurring outer join in a subquery.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM ref_table_1 LEFT JOIN dist_table_5 USING(id);
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 4
(6 rows)
CREATE TABLE loc_table_1(id int);
-- verify that insert select cannot be pushed down when it contains join between local and distributed tables.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM dist_table_5 JOIN loc_table_1 USING(id);
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: repartition
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Seq Scan on loc_table_1
Task Count: 4
(6 rows)
CREATE VIEW view_1 AS
SELECT id FROM dist_table_6;
CREATE MATERIALIZED VIEW view_2 AS
SELECT id FROM dist_table_6;
-- verify that insert select cannot be pushed down when it contains view.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT * FROM view_1;
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 4
(2 rows)
-- verify that insert select cannot be pushed down when it contains materialized view.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT * FROM view_2;
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Seq Scan on view_2
(3 rows)
CREATE TABLE append_table(id integer, data text, int_data int);
SELECT create_distributed_table('append_table', 'id', 'append');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT master_create_empty_shard('append_table');
master_create_empty_shard
---------------------------------------------------------------------
13300096
(1 row)
-- verify that insert select push down for append tables are not supported.
INSERT INTO append_table SELECT * FROM append_table;
ERROR: INSERT ... SELECT into an append-distributed table is not supported
-- verify that CTEs at top level of INSERT SELECT, that can normally be inlined, would not be inlined by INSERT SELECT pushdown planner
-- and handled by pull to coordinator.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) WITH cte_1 AS (SELECT id FROM dist_table_5 WHERE id = 5)
INSERT INTO dist_table_5
SELECT id FROM dist_table_5 JOIN cte_1 USING(id);
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Custom Scan (Citus Adaptive)
Task Count: 1
(4 rows)
-- verify that CTEs at top level of SELECT part, would be inlined by Postgres and pushed down by INSERT SELECT planner.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5
WITH cte_1 AS (SELECT id FROM dist_table_5 WHERE id = 5)
SELECT id FROM dist_table_5 JOIN cte_1 USING(id);
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 1
(2 rows)
SET client_min_messages TO ERROR;
DROP SCHEMA multi_insert_select CASCADE;

View File

@ -620,7 +620,8 @@ INSERT INTO agg_events (value_1_agg, user_id)
DISTINCT ON (value_1) value_1, user_id
FROM
raw_events_first;
DEBUG: DISTINCT ON (non-partition column) clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Distinct on columns without partition column is currently unsupported
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Collecting INSERT ... SELECT results on coordinator
SELECT user_id, value_1_agg FROM agg_events ORDER BY 1,2;
@ -686,7 +687,7 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, v1_a
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
ROLLBACK;
-- We don't support CTEs that are referenced in the target list
-- We do support CTEs that are referenced in the target list
INSERT INTO agg_events
WITH sub_cte AS (SELECT 1)
SELECT
@ -694,15 +695,11 @@ INSERT INTO agg_events
FROM
raw_events_first;
DEBUG: CTE sub_cte is going to be inlined via distributed planning
DEBUG: Subqueries without relations are not allowed in distributed INSERT ... SELECT queries
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'user_id'
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT user_id, value_1_agg FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_0}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1_agg integer)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT user_id, value_1_agg FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300001_to_1}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1_agg integer)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT user_id, value_1_agg FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300002_to_2}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1_agg integer)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT user_id, value_1_agg FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300003_to_3}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1_agg integer)
-- We support set operations via the coordinator
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (user_id IS NOT NULL)
-- We support set operations
BEGIN;
INSERT INTO
raw_events_first(user_id)
@ -711,14 +708,10 @@ SELECT
FROM
((SELECT user_id FROM raw_events_first) UNION
(SELECT user_id FROM raw_events_second)) as foo;
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'user_id'
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id) SELECT user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300004_to_0}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id) SELECT user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300005_to_1}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id) SELECT user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300006_to_2}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id) SELECT user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300007_to_3}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id) SELECT user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second) foo WHERE (user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id) SELECT user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second) foo WHERE (user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id) SELECT user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second) foo WHERE (user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id) SELECT user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second) foo WHERE (user_id IS NOT NULL)
ROLLBACK;
-- We do support set operations through recursive planning
BEGIN;
@ -727,7 +720,7 @@ INSERT INTO
raw_events_first(user_id)
(SELECT user_id FROM raw_events_first) INTERSECT
(SELECT user_id FROM raw_events_first);
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM multi_insert_select.raw_events_first
@ -747,7 +740,7 @@ SELECT
FROM
((SELECT user_id FROM raw_events_first WHERE user_id = 15) EXCEPT
(SELECT user_id FROM raw_events_second where user_id = 17)) as foo;
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
-- some supported LEFT joins
@ -765,10 +758,16 @@ DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_133000
raw_events_second.user_id
FROM
reference_table LEFT JOIN raw_events_second ON reference_table.user_id = raw_events_second.user_id;
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table_13300012 reference_table LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_second.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table_13300012 reference_table LEFT JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_second.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table_13300012 reference_table LEFT JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_second.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table_13300012 reference_table LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_second.user_id IS NOT NULL)
DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
DEBUG: recursively planning distributed relation "raw_events_second" since it is part of a distributed join node that is outer joined with a recurring rel
DEBUG: Wrapping relation "raw_events_second" to a subquery
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM multi_insert_select.raw_events_second WHERE true
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table LEFT JOIN (SELECT raw_events_second_1.user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) raw_events_second_1) raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)))
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
@ -1126,7 +1125,8 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.value_3) AS foo;
DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Group by list without partition column is currently unsupported when a subquery references a column from another query
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1]
@ -1281,7 +1281,7 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4,
GROUP BY raw_events_second.value_1
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id);
DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1]
@ -1327,7 +1327,7 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4,
GROUP BY raw_events_second.value_1
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id);
DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1]
@ -3242,5 +3242,236 @@ returning text_col_1;
string
(1 row)
CREATE TABLE dist_table_3(
dist_col bigint,
int_col integer
);
SELECT create_distributed_table('dist_table_3', 'dist_col');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- dist_table_2 and dist_table_3 are non-colocated source tables. Repartitioning is also not possible due to
-- different types for distribution columns. Citus would not be able to handle this complex insert select.
INSERT INTO dist_table_1 SELECT dist_table_2.dist_col FROM dist_table_2 JOIN dist_table_3 USING(dist_col);
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
CREATE TABLE dist_table_4(
dist_col integer,
int_col integer
);
SELECT create_distributed_table('dist_table_4', 'dist_col');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- Even if target table distribution column is colocated with dist_table_2's distributed column, source tables dist_table_2 and dist_table_4
-- are non-colocated. Hence, SELECT part of the query should be pulled to coordinator.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT dist_table_2.dist_col FROM dist_table_2 JOIN dist_table_4 ON dist_table_2.dist_col = dist_table_4.int_col;
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Custom Scan (Citus Adaptive)
Task Count: 4
(4 rows)
-- For INSERT SELECT, when a lateral query references an outer query, push-down is possible even if limit clause exists in the lateral query.
-- It is because subquery with limit does not need to be merged at coordinator as it is a lateral query.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT d1.dist_col FROM dist_table_1 d1 LEFT JOIN LATERAL (SELECT * FROM dist_table_2 d2 WHERE d1.dist_col = d2.dist_col LIMIT 3) dummy USING(dist_col);
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 4
(2 rows)
-- For INSERT SELECT, when push-down is NOT possible when limit clause exists in a subquery at SELECT part of INSERT SELECT.
-- It is because the subquery with limit needs to be merged at coordinator.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT d1.dist_col FROM dist_table_1 d1 LEFT JOIN (SELECT * FROM dist_table_2 LIMIT 3) dummy USING(dist_col);
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: repartition
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Limit
-> Custom Scan (Citus Adaptive)
Task Count: 4
(7 rows)
CREATE TABLE dist_table_5(id int, id2 int);
SELECT create_distributed_table('dist_table_5','id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE dist_table_6(id int, id2 int);
SELECT create_distributed_table('dist_table_6','id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- verify that insert select with union can be pushed down since UNION clause has FROM clause at top level query.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5(id) SELECT id FROM (SELECT id FROM dist_table_5 UNION SELECT id FROM dist_table_6) dummy;
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 4
(2 rows)
-- verify that insert select with sublink can be pushed down when tables are colocated.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM dist_table_5 WHERE dist_table_5.id = dist_table_6.id) FROM dist_table_6;
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 4
(2 rows)
CREATE TABLE ref_table_1(id int);
SELECT create_reference_table('ref_table_1');
create_reference_table
---------------------------------------------------------------------
(1 row)
-- verify that insert select with sublink cannot be pushed down when from clause does not contain any distributed relation.
INSERT INTO dist_table_5 SELECT id, (SELECT id FROM dist_table_5 WHERE dist_table_5.id = ref_table_1.id) FROM ref_table_1;
ERROR: correlated subqueries are not supported when the FROM clause contains a reference table
-- verify that insert select cannot be pushed down when we have recurring range table in from clause.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM ref_table_1 WHERE id = 1) FROM ref_table_1;
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Custom Scan (Citus Adaptive)
Task Count: 1
(4 rows)
-- verify that insert select cannot be pushed down when we have reference table in outside of outer join.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT a.id FROM dist_table_5 a LEFT JOIN ref_table_1 b ON (true) RIGHT JOIN ref_table_1 c ON (true);
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 4
(6 rows)
-- verify that insert select cannot be pushed down when it has a recurring outer join in a subquery.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM ref_table_1 LEFT JOIN dist_table_5 USING(id);
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 4
(6 rows)
CREATE TABLE loc_table_1(id int);
-- verify that insert select cannot be pushed down when it contains join between local and distributed tables.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM dist_table_5 JOIN loc_table_1 USING(id);
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: repartition
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Seq Scan on loc_table_1
Task Count: 4
(6 rows)
CREATE VIEW view_1 AS
SELECT id FROM dist_table_6;
CREATE MATERIALIZED VIEW view_2 AS
SELECT id FROM dist_table_6;
-- verify that insert select cannot be pushed down when it contains view.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT * FROM view_1;
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 4
(2 rows)
-- verify that insert select cannot be pushed down when it contains materialized view.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT * FROM view_2;
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Seq Scan on view_2
(3 rows)
CREATE TABLE append_table(id integer, data text, int_data int);
SELECT create_distributed_table('append_table', 'id', 'append');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT master_create_empty_shard('append_table');
master_create_empty_shard
---------------------------------------------------------------------
13300096
(1 row)
-- verify that insert select push down for append tables are not supported.
INSERT INTO append_table SELECT * FROM append_table;
ERROR: INSERT ... SELECT into an append-distributed table is not supported
-- verify that CTEs at top level of INSERT SELECT, that can normally be inlined, would not be inlined by INSERT SELECT pushdown planner
-- and handled by pull to coordinator.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) WITH cte_1 AS (SELECT id FROM dist_table_5 WHERE id = 5)
INSERT INTO dist_table_5
SELECT id FROM dist_table_5 JOIN cte_1 USING(id);
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Custom Scan (Citus Adaptive)
Task Count: 1
(4 rows)
-- verify that CTEs at top level of SELECT part, would be inlined by Postgres and pushed down by INSERT SELECT planner.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5
WITH cte_1 AS (SELECT id FROM dist_table_5 WHERE id = 5)
SELECT id FROM dist_table_5 JOIN cte_1 USING(id);
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 1
(2 rows)
SET client_min_messages TO ERROR;
DROP SCHEMA multi_insert_select CASCADE;

View File

@ -106,7 +106,8 @@ FROM (
LIMIT 5
) as foo
ON CONFLICT DO NOTHING;
DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: push down of limit count: 5
DEBUG: generating subplan XXX_1 for subquery SELECT col_1, col_2, col_3 FROM on_conflict.source_table_1 LIMIT 5
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT col_1, col_2 FROM (SELECT intermediate_result.col_1, intermediate_result.col_2, intermediate_result.col_3 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(col_1 integer, col_2 integer, col_3 integer)) foo
@ -127,7 +128,8 @@ WITH inserted_table AS (
ON CONFLICT(col_1) DO UPDATE SET col_2 = EXCLUDED.col_2 RETURNING *
) SELECT * FROM inserted_table ORDER BY 1;
DEBUG: generating subplan XXX_1 for CTE inserted_table: INSERT INTO on_conflict.target_table (col_1, col_2) SELECT foo.col_1, foo.col_2 FROM (SELECT source_table_1.col_1, source_table_1.col_2, source_table_1.col_3 FROM on_conflict.source_table_1 LIMIT 5) foo ON CONFLICT(col_1) DO UPDATE SET col_2 = excluded.col_2 RETURNING target_table.col_1, target_table.col_2
DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: push down of limit count: 5
DEBUG: generating subplan XXX_1 for subquery SELECT col_1, col_2, col_3 FROM on_conflict.source_table_1 LIMIT 5
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT col_1, col_2 FROM (SELECT intermediate_result.col_1, intermediate_result.col_2, intermediate_result.col_3 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(col_1 integer, col_2 integer, col_3 integer)) foo
@ -163,7 +165,8 @@ WITH inserted_table AS (
ON CONFLICT(col_1) DO UPDATE SET col_2 = 0 RETURNING *
) SELECT * FROM inserted_table ORDER BY 1;
DEBUG: generating subplan XXX_1 for CTE inserted_table: INSERT INTO on_conflict.target_table (col_1, col_2) SELECT foo.col_1, foo.col_2 FROM ((SELECT source_table_1.col_1, source_table_1.col_2, source_table_1.col_3 FROM on_conflict.source_table_1 LIMIT 5) UNION (SELECT source_table_2.col_1, source_table_2.col_2, source_table_2.col_3 FROM on_conflict.source_table_2 LIMIT 5)) foo ON CONFLICT(col_1) DO UPDATE SET col_2 = 0 RETURNING target_table.col_1, target_table.col_2
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: push down of limit count: 5
DEBUG: generating subplan XXX_1 for subquery SELECT col_1, col_2, col_3 FROM on_conflict.source_table_1 LIMIT 5
DEBUG: push down of limit count: 5
@ -498,7 +501,8 @@ FROM (
LIMIT 5)
) as foo
ON CONFLICT(col_1) DO UPDATE SET col_2 = 0;
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: push down of limit count: 5
DEBUG: generating subplan XXX_1 for subquery SELECT col_1, col_2, col_3 FROM on_conflict.source_table_1 LIMIT 5
DEBUG: push down of limit count: 5
@ -560,7 +564,8 @@ DEBUG: Collecting INSERT ... SELECT results on coordinator
-- intermediate result file
SET citus.max_adaptive_executor_pool_size TO 1;
INSERT INTO target_table SELECT * FROM target_table LIMIT 10000 ON CONFLICT(col_1) DO UPDATE SET col_2 = EXCLUDED.col_2 + 1;
DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: push down of limit count: 10000
DEBUG: Collecting INSERT ... SELECT results on coordinator
SELECT DISTINCT col_2 FROM target_table;
@ -572,7 +577,8 @@ SELECT DISTINCT col_2 FROM target_table;
WITH cte_1 AS (INSERT INTO target_table SELECT * FROM target_table LIMIT 10000 ON CONFLICT(col_1) DO UPDATE SET col_2 = EXCLUDED.col_2 + 1 RETURNING *)
SELECT DISTINCT col_2 FROM cte_1;
DEBUG: generating subplan XXX_1 for CTE cte_1: INSERT INTO on_conflict.target_table (col_1, col_2) SELECT target_table_1.col_1, target_table_1.col_2 FROM on_conflict.target_table target_table_1 LIMIT 10000 ON CONFLICT(col_1) DO UPDATE SET col_2 = (excluded.col_2 OPERATOR(pg_catalog.+) 1) RETURNING target_table.col_1, target_table.col_2
DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: push down of limit count: 10000
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT DISTINCT col_2 FROM (SELECT intermediate_result.col_1, intermediate_result.col_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(col_1 integer, col_2 integer)) cte_1
DEBUG: Collecting INSERT ... SELECT results on coordinator

View File

@ -106,7 +106,8 @@ FROM (
LIMIT 5
) as foo
ON CONFLICT DO NOTHING;
DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: push down of limit count: 5
DEBUG: generating subplan XXX_1 for subquery SELECT col_1, col_2, col_3 FROM on_conflict.source_table_1 LIMIT 5
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT col_1, col_2 FROM (SELECT intermediate_result.col_1, intermediate_result.col_2, intermediate_result.col_3 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(col_1 integer, col_2 integer, col_3 integer)) foo
@ -127,7 +128,8 @@ WITH inserted_table AS (
ON CONFLICT(col_1) DO UPDATE SET col_2 = EXCLUDED.col_2 RETURNING *
) SELECT * FROM inserted_table ORDER BY 1;
DEBUG: generating subplan XXX_1 for CTE inserted_table: INSERT INTO on_conflict.target_table (col_1, col_2) SELECT col_1, col_2 FROM (SELECT source_table_1.col_1, source_table_1.col_2, source_table_1.col_3 FROM on_conflict.source_table_1 LIMIT 5) foo ON CONFLICT(col_1) DO UPDATE SET col_2 = excluded.col_2 RETURNING target_table.col_1, target_table.col_2
DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: push down of limit count: 5
DEBUG: generating subplan XXX_1 for subquery SELECT col_1, col_2, col_3 FROM on_conflict.source_table_1 LIMIT 5
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT col_1, col_2 FROM (SELECT intermediate_result.col_1, intermediate_result.col_2, intermediate_result.col_3 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(col_1 integer, col_2 integer, col_3 integer)) foo
@ -163,7 +165,8 @@ WITH inserted_table AS (
ON CONFLICT(col_1) DO UPDATE SET col_2 = 0 RETURNING *
) SELECT * FROM inserted_table ORDER BY 1;
DEBUG: generating subplan XXX_1 for CTE inserted_table: INSERT INTO on_conflict.target_table (col_1, col_2) SELECT col_1, col_2 FROM ((SELECT source_table_1.col_1, source_table_1.col_2, source_table_1.col_3 FROM on_conflict.source_table_1 LIMIT 5) UNION (SELECT source_table_2.col_1, source_table_2.col_2, source_table_2.col_3 FROM on_conflict.source_table_2 LIMIT 5)) foo ON CONFLICT(col_1) DO UPDATE SET col_2 = 0 RETURNING target_table.col_1, target_table.col_2
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: push down of limit count: 5
DEBUG: generating subplan XXX_1 for subquery SELECT col_1, col_2, col_3 FROM on_conflict.source_table_1 LIMIT 5
DEBUG: push down of limit count: 5
@ -498,7 +501,8 @@ FROM (
LIMIT 5)
) as foo
ON CONFLICT(col_1) DO UPDATE SET col_2 = 0;
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: push down of limit count: 5
DEBUG: generating subplan XXX_1 for subquery SELECT col_1, col_2, col_3 FROM on_conflict.source_table_1 LIMIT 5
DEBUG: push down of limit count: 5
@ -560,7 +564,8 @@ DEBUG: Collecting INSERT ... SELECT results on coordinator
-- intermediate result file
SET citus.max_adaptive_executor_pool_size TO 1;
INSERT INTO target_table SELECT * FROM target_table LIMIT 10000 ON CONFLICT(col_1) DO UPDATE SET col_2 = EXCLUDED.col_2 + 1;
DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: push down of limit count: 10000
DEBUG: Collecting INSERT ... SELECT results on coordinator
SELECT DISTINCT col_2 FROM target_table;
@ -572,7 +577,8 @@ SELECT DISTINCT col_2 FROM target_table;
WITH cte_1 AS (INSERT INTO target_table SELECT * FROM target_table LIMIT 10000 ON CONFLICT(col_1) DO UPDATE SET col_2 = EXCLUDED.col_2 + 1 RETURNING *)
SELECT DISTINCT col_2 FROM cte_1;
DEBUG: generating subplan XXX_1 for CTE cte_1: INSERT INTO on_conflict.target_table (col_1, col_2) SELECT col_1, col_2 FROM on_conflict.target_table LIMIT 10000 ON CONFLICT(col_1) DO UPDATE SET col_2 = (excluded.col_2 OPERATOR(pg_catalog.+) 1) RETURNING target_table.col_1, target_table.col_2
DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: push down of limit count: 10000
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT DISTINCT col_2 FROM (SELECT intermediate_result.col_1, intermediate_result.col_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(col_1 integer, col_2 integer)) cte_1
DEBUG: Collecting INSERT ... SELECT results on coordinator

View File

@ -3,6 +3,8 @@
-- Vanilla funnel query
---------------------------------------------------------------------
---------------------------------------------------------------------
CREATE SCHEMA multi_insert_select_non_pushable_queries;
SET search_path = multi_insert_select_non_pushable_queries,public;
-- not pushable since the JOIN is not an equi join
INSERT INTO agg_results_third (user_id, value_1_agg)
SELECT user_id, array_length(events_table, 1)
@ -147,7 +149,7 @@ FROM (
GROUP BY t1.user_id, hasdone_event
) t GROUP BY user_id, hasdone_event
RETURNING user_id, value_1_agg, value_2_agg;
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: generating subplan XXX_1 for subquery SELECT u.user_id, 'step=>1'::text AS event, e."time" FROM public.users_table u, public.events_table e WHERE ((u.user_id OPERATOR(pg_catalog.=) e.user_id) AND (u.user_id OPERATOR(pg_catalog.>=) 10) AND (u.user_id OPERATOR(pg_catalog.<=) 25) AND (e.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[100, 101, 102])))
DEBUG: generating subplan XXX_2 for subquery SELECT u.user_id, 'step=>2'::text AS event, e."time" FROM public.users_table u, public.events_table e WHERE ((u.user_id OPERATOR(pg_catalog.=) e.user_id) AND (u.user_id OPERATOR(pg_catalog.>=) 10) AND (u.user_id OPERATOR(pg_catalog.<=) 25) AND (e.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[103, 104, 105])))
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone) UNION SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone)
@ -305,7 +307,7 @@ GROUP BY
ORDER BY
count_pay
RETURNING user_id, value_1_agg, value_2_agg;
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: generating subplan XXX_1 for subquery SELECT users_table.user_id, 'action=>1'::text AS event, events_table."time" FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (users_table.user_id OPERATOR(pg_catalog.>=) 10) AND (users_table.user_id OPERATOR(pg_catalog.<=) 70) AND (events_table.event_type OPERATOR(pg_catalog.>) 10) AND (events_table.event_type OPERATOR(pg_catalog.<) 12))
DEBUG: generating subplan XXX_2 for subquery SELECT users_table.user_id, 'action=>2'::text AS event, events_table."time" FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (users_table.user_id OPERATOR(pg_catalog.>=) 10) AND (users_table.user_id OPERATOR(pg_catalog.<=) 70) AND (events_table.event_type OPERATOR(pg_catalog.>) 12) AND (events_table.event_type OPERATOR(pg_catalog.<) 14))
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone) UNION SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone)
@ -808,3 +810,36 @@ FROM (
GROUP BY user_id
) AS shard_union
ORDER BY user_lastseen DESC;
CREATE TABLE dist_table_1(id int);
SELECT create_distributed_table('dist_table_1','id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE dist_table_2(id int, id2 int);
SELECT create_distributed_table('dist_table_2','id2');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- verify that insert select with union can be pulled to coordinator. We cannot push down the query
-- since UNION clause has no FROM clause at top level query.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1(id) SELECT id FROM dist_table_1 UNION SELECT id FROM dist_table_2;
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 4
(6 rows)
DROP SCHEMA multi_insert_select_non_pushable_queries CASCADE;
NOTICE: drop cascades to 2 other objects
DETAIL: drop cascades to table dist_table_1
drop cascades to table dist_table_2

View File

@ -1876,32 +1876,38 @@ PREPARE insert_sel(int, int) AS
INSERT INTO articles_hash
SELECT * FROM articles_hash WHERE author_id = $2 AND word_count = $1 OFFSET 0;
EXECUTE insert_sel(1,1);
DEBUG: OFFSET clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Offset clause is currently unsupported when a subquery references a column from another query
DEBUG: Deferred pruning for a fast-path router query
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
EXECUTE insert_sel(1,1);
DEBUG: OFFSET clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Offset clause is currently unsupported when a subquery references a column from another query
DEBUG: Deferred pruning for a fast-path router query
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
EXECUTE insert_sel(1,1);
DEBUG: OFFSET clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Offset clause is currently unsupported when a subquery references a column from another query
DEBUG: Deferred pruning for a fast-path router query
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
EXECUTE insert_sel(1,1);
DEBUG: OFFSET clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Offset clause is currently unsupported when a subquery references a column from another query
DEBUG: Deferred pruning for a fast-path router query
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
EXECUTE insert_sel(1,1);
DEBUG: OFFSET clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Offset clause is currently unsupported when a subquery references a column from another query
DEBUG: Deferred pruning for a fast-path router query
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
EXECUTE insert_sel(1,1);
DEBUG: OFFSET clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Offset clause is currently unsupported when a subquery references a column from another query
DEBUG: Deferred pruning for a fast-path router query
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator

View File

@ -89,7 +89,7 @@ key
FROM a JOIN table_2 USING (key)
GROUP BY key
HAVING (max(table_2.value) >= (SELECT value FROM a));
DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value FROM mx_coordinator_shouldhaveshards.table_1 ORDER BY key, value DESC LIMIT 1
DEBUG: push down of limit count: 1
DEBUG: generating subplan XXX_2 for subquery SELECT int4(count(*)) AS auto_coerced_by_citus_0, (a.key)::text AS auto_coerced_by_citus_1 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN mx_coordinator_shouldhaveshards.table_2 USING (key)) GROUP BY a.key HAVING (max(table_2.value) OPERATOR(pg_catalog.>=) (SELECT a_1.value FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a_1))
@ -114,7 +114,8 @@ inserts AS (
) SELECT count(*) FROM inserts;
DEBUG: generating subplan XXX_1 for CTE stats: SELECT count(key) AS m FROM mx_coordinator_shouldhaveshards.table_1
DEBUG: generating subplan XXX_2 for CTE inserts: INSERT INTO mx_coordinator_shouldhaveshards.table_2 (key, value) SELECT table_1.key, count(*) AS count FROM mx_coordinator_shouldhaveshards.table_1 WHERE (table_1.key OPERATOR(pg_catalog.>=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) GROUP BY table_1.key HAVING (count(*) OPERATOR(pg_catalog.<=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) LIMIT 1 RETURNING table_2.key, table_2.value
DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: push down of limit count: 1
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) inserts
DEBUG: Subplan XXX_1 will be written to local file
@ -150,7 +151,7 @@ key
FROM a JOIN table_2_rep USING (key)
GROUP BY key
HAVING (max(table_2_rep.value) >= (SELECT value FROM a));
DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value FROM mx_coordinator_shouldhaveshards.table_1_rep ORDER BY key, value DESC LIMIT 1
DEBUG: push down of limit count: 1
DEBUG: generating subplan XXX_2 for subquery SELECT int4(count(*)) AS auto_coerced_by_citus_0, (a.key)::text AS auto_coerced_by_citus_1 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN mx_coordinator_shouldhaveshards.table_2_rep USING (key)) GROUP BY a.key HAVING (max(table_2_rep.value) OPERATOR(pg_catalog.>=) (SELECT a_1.value FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a_1))
@ -175,7 +176,8 @@ inserts AS (
) SELECT count(*) FROM inserts;
DEBUG: generating subplan XXX_1 for CTE stats: SELECT count(key) AS m FROM mx_coordinator_shouldhaveshards.table_1_rep
DEBUG: generating subplan XXX_2 for CTE inserts: INSERT INTO mx_coordinator_shouldhaveshards.table_2_rep (key, value) SELECT table_1_rep.key, count(*) AS count FROM mx_coordinator_shouldhaveshards.table_1_rep WHERE (table_1_rep.key OPERATOR(pg_catalog.>=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) GROUP BY table_1_rep.key HAVING (count(*) OPERATOR(pg_catalog.<=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) LIMIT 1 RETURNING table_2_rep.key, table_2_rep.value
DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: push down of limit count: 1
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) inserts
DEBUG: Subplan XXX_1 will be written to local file
@ -215,7 +217,7 @@ key
FROM a JOIN table_2 USING (key)
GROUP BY key
HAVING (max(table_2.value) >= (SELECT value FROM a));
DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value FROM mx_coordinator_shouldhaveshards.table_1 ORDER BY key, value DESC LIMIT 1
DEBUG: push down of limit count: 1
DEBUG: generating subplan XXX_2 for subquery SELECT int4(count(*)) AS auto_coerced_by_citus_0, (a.key)::text AS auto_coerced_by_citus_1 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN mx_coordinator_shouldhaveshards.table_2 USING (key)) GROUP BY a.key HAVING (max(table_2.value) OPERATOR(pg_catalog.>=) (SELECT a_1.value FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a_1))
@ -240,7 +242,8 @@ inserts AS (
) SELECT count(*) FROM inserts;
DEBUG: generating subplan XXX_1 for CTE stats: SELECT count(key) AS m FROM mx_coordinator_shouldhaveshards.table_1
DEBUG: generating subplan XXX_2 for CTE inserts: INSERT INTO mx_coordinator_shouldhaveshards.table_2 (key, value) SELECT table_1.key, count(*) AS count FROM mx_coordinator_shouldhaveshards.table_1 WHERE (table_1.key OPERATOR(pg_catalog.>=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) GROUP BY table_1.key HAVING (count(*) OPERATOR(pg_catalog.<=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) LIMIT 1 RETURNING table_2.key, table_2.value
DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: push down of limit count: 1
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) inserts
DEBUG: Subplan XXX_1 will be written to local file
@ -276,7 +279,7 @@ key
FROM a JOIN table_2_rep USING (key)
GROUP BY key
HAVING (max(table_2_rep.value) >= (SELECT value FROM a));
DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value FROM mx_coordinator_shouldhaveshards.table_1_rep ORDER BY key, value DESC LIMIT 1
DEBUG: push down of limit count: 1
DEBUG: generating subplan XXX_2 for subquery SELECT int4(count(*)) AS auto_coerced_by_citus_0, (a.key)::text AS auto_coerced_by_citus_1 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN mx_coordinator_shouldhaveshards.table_2_rep USING (key)) GROUP BY a.key HAVING (max(table_2_rep.value) OPERATOR(pg_catalog.>=) (SELECT a_1.value FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a_1))
@ -301,7 +304,8 @@ inserts AS (
) SELECT count(*) FROM inserts;
DEBUG: generating subplan XXX_1 for CTE stats: SELECT count(key) AS m FROM mx_coordinator_shouldhaveshards.table_1_rep
DEBUG: generating subplan XXX_2 for CTE inserts: INSERT INTO mx_coordinator_shouldhaveshards.table_2_rep (key, value) SELECT table_1_rep.key, count(*) AS count FROM mx_coordinator_shouldhaveshards.table_1_rep WHERE (table_1_rep.key OPERATOR(pg_catalog.>=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) GROUP BY table_1_rep.key HAVING (count(*) OPERATOR(pg_catalog.<=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) LIMIT 1 RETURNING table_2_rep.key, table_2_rep.value
DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: push down of limit count: 1
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) inserts
DEBUG: Subplan XXX_1 will be written to local file

View File

@ -89,7 +89,7 @@ key
FROM a JOIN table_2 USING (key)
GROUP BY key
HAVING (max(table_2.value) >= (SELECT value FROM a));
DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value FROM mx_coordinator_shouldhaveshards.table_1 ORDER BY key, value DESC LIMIT 1
DEBUG: push down of limit count: 1
DEBUG: generating subplan XXX_2 for subquery SELECT int4(count(*)) AS auto_coerced_by_citus_0, (a.key)::text AS auto_coerced_by_citus_1 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN mx_coordinator_shouldhaveshards.table_2 USING (key)) GROUP BY a.key HAVING (max(table_2.value) OPERATOR(pg_catalog.>=) (SELECT a_1.value FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a_1))
@ -114,7 +114,8 @@ inserts AS (
) SELECT count(*) FROM inserts;
DEBUG: generating subplan XXX_1 for CTE stats: SELECT count(key) AS m FROM mx_coordinator_shouldhaveshards.table_1
DEBUG: generating subplan XXX_2 for CTE inserts: INSERT INTO mx_coordinator_shouldhaveshards.table_2 (key, value) SELECT key, count(*) AS count FROM mx_coordinator_shouldhaveshards.table_1 WHERE (key OPERATOR(pg_catalog.>=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) GROUP BY key HAVING (count(*) OPERATOR(pg_catalog.<=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) LIMIT 1 RETURNING table_2.key, table_2.value
DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: push down of limit count: 1
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) inserts
DEBUG: Subplan XXX_1 will be written to local file
@ -150,7 +151,7 @@ key
FROM a JOIN table_2_rep USING (key)
GROUP BY key
HAVING (max(table_2_rep.value) >= (SELECT value FROM a));
DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value FROM mx_coordinator_shouldhaveshards.table_1_rep ORDER BY key, value DESC LIMIT 1
DEBUG: push down of limit count: 1
DEBUG: generating subplan XXX_2 for subquery SELECT int4(count(*)) AS auto_coerced_by_citus_0, (a.key)::text AS auto_coerced_by_citus_1 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN mx_coordinator_shouldhaveshards.table_2_rep USING (key)) GROUP BY a.key HAVING (max(table_2_rep.value) OPERATOR(pg_catalog.>=) (SELECT a_1.value FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a_1))
@ -175,7 +176,8 @@ inserts AS (
) SELECT count(*) FROM inserts;
DEBUG: generating subplan XXX_1 for CTE stats: SELECT count(key) AS m FROM mx_coordinator_shouldhaveshards.table_1_rep
DEBUG: generating subplan XXX_2 for CTE inserts: INSERT INTO mx_coordinator_shouldhaveshards.table_2_rep (key, value) SELECT key, count(*) AS count FROM mx_coordinator_shouldhaveshards.table_1_rep WHERE (key OPERATOR(pg_catalog.>=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) GROUP BY key HAVING (count(*) OPERATOR(pg_catalog.<=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) LIMIT 1 RETURNING table_2_rep.key, table_2_rep.value
DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: push down of limit count: 1
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) inserts
DEBUG: Subplan XXX_1 will be written to local file
@ -215,7 +217,7 @@ key
FROM a JOIN table_2 USING (key)
GROUP BY key
HAVING (max(table_2.value) >= (SELECT value FROM a));
DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value FROM mx_coordinator_shouldhaveshards.table_1 ORDER BY key, value DESC LIMIT 1
DEBUG: push down of limit count: 1
DEBUG: generating subplan XXX_2 for subquery SELECT int4(count(*)) AS auto_coerced_by_citus_0, (a.key)::text AS auto_coerced_by_citus_1 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN mx_coordinator_shouldhaveshards.table_2 USING (key)) GROUP BY a.key HAVING (max(table_2.value) OPERATOR(pg_catalog.>=) (SELECT a_1.value FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a_1))
@ -240,7 +242,8 @@ inserts AS (
) SELECT count(*) FROM inserts;
DEBUG: generating subplan XXX_1 for CTE stats: SELECT count(key) AS m FROM mx_coordinator_shouldhaveshards.table_1
DEBUG: generating subplan XXX_2 for CTE inserts: INSERT INTO mx_coordinator_shouldhaveshards.table_2 (key, value) SELECT key, count(*) AS count FROM mx_coordinator_shouldhaveshards.table_1 WHERE (key OPERATOR(pg_catalog.>=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) GROUP BY key HAVING (count(*) OPERATOR(pg_catalog.<=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) LIMIT 1 RETURNING table_2.key, table_2.value
DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: push down of limit count: 1
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) inserts
DEBUG: Subplan XXX_1 will be written to local file
@ -276,7 +279,7 @@ key
FROM a JOIN table_2_rep USING (key)
GROUP BY key
HAVING (max(table_2_rep.value) >= (SELECT value FROM a));
DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value FROM mx_coordinator_shouldhaveshards.table_1_rep ORDER BY key, value DESC LIMIT 1
DEBUG: push down of limit count: 1
DEBUG: generating subplan XXX_2 for subquery SELECT int4(count(*)) AS auto_coerced_by_citus_0, (a.key)::text AS auto_coerced_by_citus_1 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN mx_coordinator_shouldhaveshards.table_2_rep USING (key)) GROUP BY a.key HAVING (max(table_2_rep.value) OPERATOR(pg_catalog.>=) (SELECT a_1.value FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a_1))
@ -301,7 +304,8 @@ inserts AS (
) SELECT count(*) FROM inserts;
DEBUG: generating subplan XXX_1 for CTE stats: SELECT count(key) AS m FROM mx_coordinator_shouldhaveshards.table_1_rep
DEBUG: generating subplan XXX_2 for CTE inserts: INSERT INTO mx_coordinator_shouldhaveshards.table_2_rep (key, value) SELECT key, count(*) AS count FROM mx_coordinator_shouldhaveshards.table_1_rep WHERE (key OPERATOR(pg_catalog.>=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) GROUP BY key HAVING (count(*) OPERATOR(pg_catalog.<=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) LIMIT 1 RETURNING table_2_rep.key, table_2_rep.value
DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: push down of limit count: 1
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) inserts
DEBUG: Subplan XXX_1 will be written to local file

View File

@ -827,7 +827,7 @@ INSERT INTO nullkey_c1_t1 SELECT * FROM nullkey_c1_t2;
SET client_min_messages TO DEBUG2;
-- between two non-colocated single-shard tables
INSERT INTO nullkey_c1_t1 SELECT * FROM nullkey_c2_t1;
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Distributed planning for a fast-path router query
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
@ -848,7 +848,7 @@ INSERT INTO nullkey_c1_t1 SELECT * FROM reference_table;
SET client_min_messages TO DEBUG2;
INSERT INTO nullkey_c1_t1 SELECT * FROM distributed_table;
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO nullkey_c1_t1 SELECT * FROM citus_local_table;
@ -865,7 +865,8 @@ DEBUG: Distributed planning for a fast-path router query
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO distributed_table SELECT * FROM nullkey_c1_t1;
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: The target table's partition column should correspond to a partition column in the subquery.
DEBUG: Distributed planning for a fast-path router query
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
@ -1197,6 +1198,8 @@ SELECT COALESCE(raw_events_first.user_id, users_ref_table.user_id)
FROM raw_events_first
RIGHT JOIN (users_ref_table LEFT JOIN raw_events_second ON users_ref_table.user_id = raw_events_second.user_id)
ON raw_events_first.user_id = users_ref_table.user_id;
DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table
DEBUG: Collecting INSERT ... SELECT results on coordinator
-- using a full join
INSERT INTO agg_events (user_id, value_1_agg)
SELECT t1.user_id AS col1,
@ -1227,6 +1230,8 @@ FROM users_ref_table
WHERE NOT EXISTS (SELECT 1
FROM raw_events_second
WHERE raw_events_second.user_id = users_ref_table.user_id);
DEBUG: correlated subqueries are not supported when the FROM clause contains a reference table
DEBUG: Collecting INSERT ... SELECT results on coordinator
-- using inner join
INSERT INTO agg_events (user_id)
SELECT raw_events_first.user_id
@ -1247,13 +1252,16 @@ WHERE raw_events_first.value_1 IN (10, 11,12) OR users_ref_table.user_id IN (1,2
-- Below "limit / offset clause" test and some others are examples of this.
-- limit / offset clause
INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id FROM raw_events_first LIMIT 1;
DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id FROM raw_events_first OFFSET 1;
DEBUG: OFFSET clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Offset clause is currently unsupported when a subquery references a column from another query
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO agg_events (user_id) SELECT users_ref_table.user_id FROM users_ref_table LIMIT 1;
DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: Collecting INSERT ... SELECT results on coordinator
-- using a materialized cte
WITH cte AS MATERIALIZED
@ -1265,9 +1273,15 @@ DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO raw_events_second
WITH cte AS MATERIALIZED (SELECT * FROM raw_events_first)
SELECT user_id * 1000, time, value_1, value_2, value_3, value_4 FROM cte;
DEBUG: cannot push down this subquery
DETAIL: CTEs in subqueries are currently unsupported
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO raw_events_second (user_id)
WITH cte AS MATERIALIZED (SELECT * FROM users_ref_table)
SELECT user_id FROM cte;
DEBUG: cannot push down this subquery
DETAIL: CTEs in subqueries are currently unsupported
DEBUG: Collecting INSERT ... SELECT results on coordinator
-- using a regular cte
WITH cte AS (SELECT * FROM raw_events_first)
INSERT INTO raw_events_second
@ -1286,8 +1300,6 @@ INSERT INTO agg_events
FROM
raw_events_first;
DEBUG: CTE sub_cte is going to be inlined via distributed planning
DEBUG: Subqueries without relations are not allowed in distributed INSERT ... SELECT queries
DEBUG: Collecting INSERT ... SELECT results on coordinator
-- we still support complex joins via INSERT's cte list ..
WITH cte AS (
SELECT DISTINCT(reference_table.a) AS a, 1 AS b
@ -1311,7 +1323,7 @@ WITH cte AS (
)
SELECT (a+5)*2, b FROM cte;
DEBUG: CTE cte is going to be inlined via distributed planning
DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables
DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table
DEBUG: recursively planning left side of the right join since the outer side is a recurring rel
DEBUG: recursively planning distributed relation "distributed_table" since it is part of a distributed join node that is outer joined with a recurring rel
DEBUG: Wrapping relation "distributed_table" to a subquery
@ -1323,13 +1335,15 @@ INSERT INTO
raw_events_first(user_id)
(SELECT user_id FROM raw_events_first) INTERSECT
(SELECT user_id FROM raw_events_first);
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Intersect and Except are currently unsupported
DEBUG: Collecting INSERT ... SELECT results on coordinator
INSERT INTO
raw_events_first(user_id)
(SELECT user_id FROM users_ref_table) INTERSECT
(SELECT user_id FROM raw_events_first);
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Intersect and Except are currently unsupported
DEBUG: Collecting INSERT ... SELECT results on coordinator
-- group by clause inside subquery
INSERT INTO agg_events

View File

@ -1969,8 +1969,7 @@ BEGIN;
FROM ref_1 t1
LEFT JOIN dist_1 t2
ON (t1.a = t2.a);
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: The target table's partition column should correspond to a partition column in the subquery.
DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
DEBUG: recursively planning distributed relation "dist_1" "t2" since it is part of a distributed join node that is outer joined with a recurring rel
DEBUG: Wrapping relation "dist_1" "t2" to a subquery
@ -1986,9 +1985,7 @@ BEGIN;
JOIN
(ref_1 t2 LEFT JOIN dist_1 t3 USING(a)) t4
ON (t1.a = t4.a);
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: Subquery contains an operator in the same position as the target table's partition column.
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
DEBUG: recursively planning distributed relation "dist_1" "t3" since it is part of a distributed join node that is outer joined with a recurring rel
DEBUG: Wrapping relation "dist_1" "t3" to a subquery
@ -2005,7 +2002,7 @@ BEGIN;
JOIN
(ref_1 t2 LEFT JOIN dist_1 t3 USING(a)) t4
ON (t1.a = t4.a);
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
DEBUG: recursively planning distributed relation "dist_1" "t3" since it is part of a distributed join node that is outer joined with a recurring rel
DEBUG: Wrapping relation "dist_1" "t3" to a subquery

View File

@ -103,9 +103,7 @@ WITH ids_to_insert AS
INSERT INTO distributed_table
SELECT DISTINCT ids_to_insert.tenant_id FROM ids_to_insert, distributed_table
WHERE distributed_table.tenant_id < ids_to_insert.tenant_id;
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column.
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: CTE ids_to_insert is going to be inlined via distributed planning
DEBUG: generating subplan XXX_1 for subquery SELECT (((tenant_id)::integer OPERATOR(pg_catalog.*) 100))::text AS tenant_id FROM with_dml.distributed_table WHERE (dept OPERATOR(pg_catalog.>) 7)
DEBUG: generating subplan XXX_2 for subquery SELECT DISTINCT ids_to_insert.tenant_id FROM (SELECT intermediate_result.tenant_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(tenant_id text)) ids_to_insert, with_dml.distributed_table WHERE (distributed_table.tenant_id OPERATOR(pg_catalog.<) ids_to_insert.tenant_id)

View File

@ -956,7 +956,8 @@ WITH first_query AS (INSERT INTO modify_table (id) VALUES (10001)),
SET client_min_messages TO debug2;
-- pushed down without the insert
WITH mb AS (UPDATE modify_table SET val = 3 WHERE id = 3 RETURNING NULL) INSERT INTO modify_table WITH ma AS (SELECT * FROM modify_table LIMIT 10) SELECT count(*) FROM mb;
DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries
DEBUG: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a subquery references a column from another query
DEBUG: Creating router plan
DEBUG: query has a single distribution column value: 3
DEBUG: Collecting INSERT ... SELECT results on coordinator

View File

@ -65,7 +65,12 @@ test: multi_remove_node_reference_table
test: multi_create_table
test: multi_create_table_superuser
test: multi_master_protocol multi_load_data multi_load_data_superuser multi_behavioral_analytics_create_table
test: multi_behavioral_analytics_basics multi_behavioral_analytics_single_shard_queries multi_insert_select_non_pushable_queries multi_insert_select multi_behavioral_analytics_create_table_superuser
test: multi_behavioral_analytics_basics multi_behavioral_analytics_single_shard_queries multi_behavioral_analytics_create_table_superuser
# We don't parallelize the following test with the ones above because they're
# not idempotent and hence causing flaky test detection check to fail.
test: multi_insert_select_non_pushable_queries multi_insert_select
test: multi_shard_update_delete recursive_dml_with_different_planners_executors
test: insert_select_repartition window_functions dml_recursive multi_insert_select_window
test: multi_insert_select_conflict citus_table_triggers alter_table_single_shard_table

View File

@ -12,7 +12,12 @@ test: replicated_table_disable_node
test: multi_create_table
test: multi_create_table_superuser
test: multi_create_table_constraints multi_master_protocol multi_load_data multi_load_data_superuser multi_behavioral_analytics_create_table
test: multi_behavioral_analytics_basics multi_behavioral_analytics_single_shard_queries multi_insert_select_non_pushable_queries multi_insert_select multi_behavioral_analytics_create_table_superuser
test: multi_behavioral_analytics_basics multi_behavioral_analytics_single_shard_queries multi_behavioral_analytics_create_table_superuser
# We don't parallelize the following test with the ones above because they're
# not idempotent and hence causing flaky test detection check to fail.
test: multi_insert_select_non_pushable_queries multi_insert_select
test: multi_shard_update_delete recursive_dml_with_different_planners_executors
test: insert_select_repartition window_functions dml_recursive multi_insert_select_window
test: multi_insert_select_conflict citus_table_triggers

View File

@ -35,7 +35,6 @@ step "s1-adaptive-select"
SELECT * FROM select_append AS t1 JOIN select_append AS t2 ON t1.id = t2.int_data ORDER BY 1, 2, 3, 4;
}
step "s1-insert" { INSERT INTO select_append VALUES(0, 'k', 0); }
step "s1-insert-select" { INSERT INTO select_append SELECT * FROM select_append; }
step "s1-update" { UPDATE select_append SET data = 'l' WHERE id = 0; }
step "s1-delete" { DELETE FROM select_append WHERE id = 1; }
step "s1-truncate" { TRUNCATE select_append; }
@ -65,7 +64,6 @@ step "s2-adaptive-select"
SELECT * FROM select_append AS t1 JOIN select_append AS t2 ON t1.id = t2.int_data ORDER BY 1, 2, 3, 4;
}
step "s2-insert" { INSERT INTO select_append VALUES(0, 'k', 0); }
step "s2-insert-select" { INSERT INTO select_append SELECT * FROM select_append; }
step "s2-update" { UPDATE select_append SET data = 'l' WHERE id = 0; }
step "s2-delete" { DELETE FROM select_append WHERE id = 1; }
step "s2-truncate" { TRUNCATE select_append; }
@ -101,7 +99,6 @@ permutation "s1-initialize" "s1-begin" "s1-adaptive-select" "s2-adaptive-select"
// permutations - router SELECT first
permutation "s1-initialize" "s1-begin" "s1-router-select" "s2-insert" "s1-commit" "s1-select-count"
permutation "s1-initialize" "s1-begin" "s1-router-select" "s2-insert-select" "s1-commit" "s1-select-count"
permutation "s1-initialize" "s1-begin" "s1-router-select" "s2-update" "s1-commit" "s1-select-count"
permutation "s1-initialize" "s1-begin" "s1-router-select" "s2-delete" "s1-commit" "s1-select-count"
permutation "s1-initialize" "s1-begin" "s1-router-select" "s2-truncate" "s1-commit" "s1-select-count"
@ -119,7 +116,6 @@ permutation "s1-drop" "s1-create-non-distributed-table" "s1-begin" "s1-router-se
// permutations - router SELECT second
permutation "s1-initialize" "s1-begin" "s1-insert" "s2-router-select" "s1-commit" "s1-select-count"
permutation "s1-initialize" "s1-begin" "s1-insert-select" "s2-router-select" "s1-commit" "s1-select-count"
permutation "s1-initialize" "s1-begin" "s1-update" "s2-router-select" "s1-commit" "s1-select-count"
permutation "s1-initialize" "s1-begin" "s1-delete" "s2-router-select" "s1-commit" "s1-select-count"
permutation "s1-initialize" "s1-begin" "s1-truncate" "s2-router-select" "s1-commit" "s1-select-count"
@ -136,7 +132,6 @@ permutation "s1-drop" "s1-create-non-distributed-table" "s1-begin" "s1-distribut
// permutations - real-time SELECT first
permutation "s1-initialize" "s1-begin" "s1-real-time-select" "s2-insert" "s1-commit" "s1-select-count"
permutation "s1-initialize" "s1-begin" "s1-real-time-select" "s2-insert-select" "s1-commit" "s1-select-count"
permutation "s1-initialize" "s1-begin" "s1-real-time-select" "s2-update" "s1-commit" "s1-select-count"
permutation "s1-initialize" "s1-begin" "s1-real-time-select" "s2-delete" "s1-commit" "s1-select-count"
permutation "s1-initialize" "s1-begin" "s1-real-time-select" "s2-truncate" "s1-commit" "s1-select-count"
@ -153,7 +148,6 @@ permutation "s1-drop" "s1-create-non-distributed-table" "s1-begin" "s1-real-time
// permutations - real-time SELECT second
permutation "s1-initialize" "s1-begin" "s1-insert" "s2-real-time-select" "s1-commit" "s1-select-count"
permutation "s1-initialize" "s1-begin" "s1-insert-select" "s2-real-time-select" "s1-commit" "s1-select-count"
permutation "s1-initialize" "s1-begin" "s1-update" "s2-real-time-select" "s1-commit" "s1-select-count"
permutation "s1-initialize" "s1-begin" "s1-delete" "s2-real-time-select" "s1-commit" "s1-select-count"
permutation "s1-initialize" "s1-begin" "s1-truncate" "s2-real-time-select" "s1-commit" "s1-select-count"
@ -169,7 +163,6 @@ permutation "s1-drop" "s1-create-non-distributed-table" "s1-begin" "s1-distribut
// permutations - adaptive SELECT first
permutation "s1-initialize" "s1-begin" "s1-adaptive-select" "s2-insert" "s1-commit" "s1-select-count"
permutation "s1-initialize" "s1-begin" "s1-adaptive-select" "s2-insert-select" "s1-commit" "s1-select-count"
permutation "s1-initialize" "s1-begin" "s1-adaptive-select" "s2-update" "s1-commit" "s1-select-count"
permutation "s1-initialize" "s1-begin" "s1-adaptive-select" "s2-delete" "s1-commit" "s1-select-count"
permutation "s1-initialize" "s1-begin" "s1-adaptive-select" "s2-truncate" "s1-commit" "s1-select-count"
@ -186,7 +179,6 @@ permutation "s1-drop" "s1-create-non-distributed-table" "s1-begin" "s1-adaptive-
// permutations - adaptive SELECT second
permutation "s1-initialize" "s1-begin" "s1-insert" "s2-adaptive-select" "s1-commit" "s1-select-count"
permutation "s1-initialize" "s1-begin" "s1-insert-select" "s2-adaptive-select" "s1-commit" "s1-select-count"
permutation "s1-initialize" "s1-begin" "s1-update" "s2-adaptive-select" "s1-commit" "s1-select-count"
permutation "s1-initialize" "s1-begin" "s1-delete" "s2-adaptive-select" "s1-commit" "s1-select-count"
permutation "s1-initialize" "s1-begin" "s1-truncate" "s2-adaptive-select" "s1-commit" "s1-select-count"

View File

@ -647,6 +647,17 @@ insert into table_with_user_sequences values (1,1);
select create_distributed_table('table_with_user_sequences','x');
explain (costs off) insert into table_with_user_sequences select y, x from table_with_user_sequences;
CREATE TABLE dist_table_1(id int);
SELECT create_distributed_table('dist_table_1','id');
CREATE TABLE dist_table_2(id int);
SELECT create_distributed_table('dist_table_2','id');
-- verify that insert select with union can be repartitioned. We cannot push down the query
-- since UNION clause has no FROM clause at top level query.
SELECT public.coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1(id) SELECT id FROM dist_table_1 UNION SELECT id FROM dist_table_2;
$$);
-- clean-up
SET client_min_messages TO WARNING;
DROP SCHEMA insert_select_repartition CASCADE;

View File

@ -325,11 +325,17 @@ JOIN (
) t2 ON t1.b = t2.b
WHERE t2.sum_val > 2;
-- Temporaryly reduce the verbosity to avoid noise
-- in the output of the next query.
SET client_min_messages TO DEBUG1;
-- MultiTaskRouterSelectQuerySupported() is unnecessarily restrictive
-- about pushing down queries with DISTINCT ON clause even if the table
-- doesn't have a shard key. See https://github.com/citusdata/citus/pull/6752.
INSERT INTO nullkey_c1_t1 SELECT DISTINCT ON (a) a, b FROM nullkey_c1_t2;
SET client_min_messages TO DEBUG2;
-- Similarly, we could push down the following query as well. see
-- https://github.com/citusdata/citus/pull/6831.
INSERT INTO nullkey_c1_t1 SELECT b, SUM(a) OVER (ORDER BY b) AS sum_val FROM nullkey_c1_t1;

View File

@ -536,7 +536,7 @@ INSERT INTO agg_events
fist_table_agg;
ROLLBACK;
-- We don't support CTEs that are referenced in the target list
-- We do support CTEs that are referenced in the target list
INSERT INTO agg_events
WITH sub_cte AS (SELECT 1)
SELECT
@ -544,7 +544,7 @@ INSERT INTO agg_events
FROM
raw_events_first;
-- We support set operations via the coordinator
-- We support set operations
BEGIN;
INSERT INTO
@ -2341,5 +2341,121 @@ join dist_table_2 t2 using (dist_col)
limit 1
returning text_col_1;
CREATE TABLE dist_table_3(
dist_col bigint,
int_col integer
);
SELECT create_distributed_table('dist_table_3', 'dist_col');
-- dist_table_2 and dist_table_3 are non-colocated source tables. Repartitioning is also not possible due to
-- different types for distribution columns. Citus would not be able to handle this complex insert select.
INSERT INTO dist_table_1 SELECT dist_table_2.dist_col FROM dist_table_2 JOIN dist_table_3 USING(dist_col);
CREATE TABLE dist_table_4(
dist_col integer,
int_col integer
);
SELECT create_distributed_table('dist_table_4', 'dist_col');
-- Even if target table distribution column is colocated with dist_table_2's distributed column, source tables dist_table_2 and dist_table_4
-- are non-colocated. Hence, SELECT part of the query should be pulled to coordinator.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT dist_table_2.dist_col FROM dist_table_2 JOIN dist_table_4 ON dist_table_2.dist_col = dist_table_4.int_col;
$$);
-- For INSERT SELECT, when a lateral query references an outer query, push-down is possible even if limit clause exists in the lateral query.
-- It is because subquery with limit does not need to be merged at coordinator as it is a lateral query.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT d1.dist_col FROM dist_table_1 d1 LEFT JOIN LATERAL (SELECT * FROM dist_table_2 d2 WHERE d1.dist_col = d2.dist_col LIMIT 3) dummy USING(dist_col);
$$);
-- For INSERT SELECT, when push-down is NOT possible when limit clause exists in a subquery at SELECT part of INSERT SELECT.
-- It is because the subquery with limit needs to be merged at coordinator.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT d1.dist_col FROM dist_table_1 d1 LEFT JOIN (SELECT * FROM dist_table_2 LIMIT 3) dummy USING(dist_col);
$$);
CREATE TABLE dist_table_5(id int, id2 int);
SELECT create_distributed_table('dist_table_5','id');
CREATE TABLE dist_table_6(id int, id2 int);
SELECT create_distributed_table('dist_table_6','id');
-- verify that insert select with union can be pushed down since UNION clause has FROM clause at top level query.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5(id) SELECT id FROM (SELECT id FROM dist_table_5 UNION SELECT id FROM dist_table_6) dummy;
$$);
-- verify that insert select with sublink can be pushed down when tables are colocated.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM dist_table_5 WHERE dist_table_5.id = dist_table_6.id) FROM dist_table_6;
$$);
CREATE TABLE ref_table_1(id int);
SELECT create_reference_table('ref_table_1');
-- verify that insert select with sublink cannot be pushed down when from clause does not contain any distributed relation.
INSERT INTO dist_table_5 SELECT id, (SELECT id FROM dist_table_5 WHERE dist_table_5.id = ref_table_1.id) FROM ref_table_1;
-- verify that insert select cannot be pushed down when we have recurring range table in from clause.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM ref_table_1 WHERE id = 1) FROM ref_table_1;
$$);
-- verify that insert select cannot be pushed down when we have reference table in outside of outer join.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT a.id FROM dist_table_5 a LEFT JOIN ref_table_1 b ON (true) RIGHT JOIN ref_table_1 c ON (true);
$$);
-- verify that insert select cannot be pushed down when it has a recurring outer join in a subquery.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM ref_table_1 LEFT JOIN dist_table_5 USING(id);
$$);
CREATE TABLE loc_table_1(id int);
-- verify that insert select cannot be pushed down when it contains join between local and distributed tables.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM dist_table_5 JOIN loc_table_1 USING(id);
$$);
CREATE VIEW view_1 AS
SELECT id FROM dist_table_6;
CREATE MATERIALIZED VIEW view_2 AS
SELECT id FROM dist_table_6;
-- verify that insert select cannot be pushed down when it contains view.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT * FROM view_1;
$$);
-- verify that insert select cannot be pushed down when it contains materialized view.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT * FROM view_2;
$$);
CREATE TABLE append_table(id integer, data text, int_data int);
SELECT create_distributed_table('append_table', 'id', 'append');
SELECT master_create_empty_shard('append_table');
-- verify that insert select push down for append tables are not supported.
INSERT INTO append_table SELECT * FROM append_table;
-- verify that CTEs at top level of INSERT SELECT, that can normally be inlined, would not be inlined by INSERT SELECT pushdown planner
-- and handled by pull to coordinator.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) WITH cte_1 AS (SELECT id FROM dist_table_5 WHERE id = 5)
INSERT INTO dist_table_5
SELECT id FROM dist_table_5 JOIN cte_1 USING(id);
$$);
-- verify that CTEs at top level of SELECT part, would be inlined by Postgres and pushed down by INSERT SELECT planner.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5
WITH cte_1 AS (SELECT id FROM dist_table_5 WHERE id = 5)
SELECT id FROM dist_table_5 JOIN cte_1 USING(id);
$$);
SET client_min_messages TO ERROR;
DROP SCHEMA multi_insert_select CASCADE;

View File

@ -4,6 +4,9 @@
------------------------------------
------------------------------------
CREATE SCHEMA multi_insert_select_non_pushable_queries;
SET search_path = multi_insert_select_non_pushable_queries,public;
-- not pushable since the JOIN is not an equi join
INSERT INTO agg_results_third (user_id, value_1_agg)
SELECT user_id, array_length(events_table, 1)
@ -716,3 +719,16 @@ FROM (
GROUP BY user_id
) AS shard_union
ORDER BY user_lastseen DESC;
CREATE TABLE dist_table_1(id int);
SELECT create_distributed_table('dist_table_1','id');
CREATE TABLE dist_table_2(id int, id2 int);
SELECT create_distributed_table('dist_table_2','id2');
-- verify that insert select with union can be pulled to coordinator. We cannot push down the query
-- since UNION clause has no FROM clause at top level query.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1(id) SELECT id FROM dist_table_1 UNION SELECT id FROM dist_table_2;
$$);
DROP SCHEMA multi_insert_select_non_pushable_queries CASCADE;