diff --git a/src/backend/distributed/planner/insert_select_planner.c b/src/backend/distributed/planner/insert_select_planner.c index cae71845b..06e446783 100644 --- a/src/backend/distributed/planner/insert_select_planner.c +++ b/src/backend/distributed/planner/insert_select_planner.c @@ -73,9 +73,9 @@ static List * CreateTargetListForCombineQuery(List *targetList); static DeferredErrorMessage * DistributedInsertSelectSupported(Query *queryTree, RangeTblEntry *insertRte, RangeTblEntry *subqueryRte, - bool allReferenceTables); -static DeferredErrorMessage * MultiTaskRouterSelectQuerySupported(Query *query); -static bool HasUnsupportedDistinctOn(Query *query); + bool allReferenceTables, + PlannerRestrictionContext * + plannerRestrictionContext); static DeferredErrorMessage * InsertPartitionColumnMatchesSelect(Query *query, RangeTblEntry *insertRte, RangeTblEntry * @@ -292,7 +292,8 @@ CreateDistributedInsertSelectPlan(Query *originalQuery, distributedPlan->planningError = DistributedInsertSelectSupported(originalQuery, insertRte, subqueryRte, - allReferenceTables); + allReferenceTables, + plannerRestrictionContext); if (distributedPlan->planningError) { return distributedPlan; @@ -613,7 +614,8 @@ CreateTargetListForCombineQuery(List *targetList) */ static DeferredErrorMessage * DistributedInsertSelectSupported(Query *queryTree, RangeTblEntry *insertRte, - RangeTblEntry *subqueryRte, bool allReferenceTables) + RangeTblEntry *subqueryRte, bool allReferenceTables, + PlannerRestrictionContext *plannerRestrictionContext) { Oid selectPartitionColumnTableId = InvalidOid; Oid targetRelationId = insertRte->relid; @@ -687,8 +689,16 @@ DistributedInsertSelectSupported(Query *queryTree, RangeTblEntry *insertRte, NULL, NULL); } - /* we don't support LIMIT, OFFSET and WINDOW functions */ - DeferredErrorMessage *error = MultiTaskRouterSelectQuerySupported(subquery); + /* first apply toplevel pushdown checks to SELECT query */ + DeferredErrorMessage *error = DeferErrorIfUnsupportedSubqueryPushdown(subquery, + plannerRestrictionContext); + if (error) + { + return error; + } + + /* then apply subquery pushdown checks to SELECT query */ + error = DeferErrorIfCannotPushdownSubquery(subquery, false); if (error) { return error; @@ -730,27 +740,6 @@ DistributedInsertSelectSupported(Query *queryTree, RangeTblEntry *insertRte, "table", NULL, NULL); } - if (!HasDistributionKey(targetRelationId) || - subqueryRteListProperties->hasSingleShardDistTable) - { - /* - * XXX: Better to check this regardless of the fact that the target table - * has a distribution column or not. - */ - List *distributedRelationIdList = DistributedRelationIdList(subquery); - distributedRelationIdList = lappend_oid(distributedRelationIdList, - targetRelationId); - - if (!AllDistributedRelationsInListColocated(distributedRelationIdList)) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "distributed INSERT ... SELECT cannot reference a " - "distributed table without a shard key together " - "with non-colocated distributed tables", - NULL, NULL); - } - } - if (HasDistributionKey(targetRelationId)) { /* ensure that INSERT's partition column comes from SELECT's partition column */ @@ -760,22 +749,22 @@ DistributedInsertSelectSupported(Query *queryTree, RangeTblEntry *insertRte, { return error; } - - /* - * We expect partition column values come from colocated tables. Note that we - * skip this check from the reference table case given that all reference tables - * are already (and by default) co-located. - */ - if (!TablesColocated(insertRte->relid, selectPartitionColumnTableId)) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "INSERT target table and the source relation of the SELECT partition " - "column value must be colocated in distributed INSERT ... SELECT", - NULL, NULL); - } } } + /* All tables in source list and target table should be colocated. */ + List *distributedRelationIdList = DistributedRelationIdList(subquery); + distributedRelationIdList = lappend_oid(distributedRelationIdList, + targetRelationId); + + if (!AllDistributedRelationsInListColocated(distributedRelationIdList)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "INSERT target relation and all source relations of the " + "SELECT must be colocated in distributed INSERT ... SELECT", + NULL, NULL); + } + return NULL; } @@ -1131,152 +1120,6 @@ ReorderInsertSelectTargetLists(Query *originalQuery, RangeTblEntry *insertRte, } -/* - * MultiTaskRouterSelectQuerySupported returns NULL if the query may be used - * as the source for an INSERT ... SELECT or returns a description why not. - */ -static DeferredErrorMessage * -MultiTaskRouterSelectQuerySupported(Query *query) -{ - List *queryList = NIL; - ListCell *queryCell = NULL; - StringInfo errorDetail = NULL; - bool hasUnsupportedDistinctOn = false; - - ExtractQueryWalker((Node *) query, &queryList); - foreach(queryCell, queryList) - { - Query *subquery = (Query *) lfirst(queryCell); - - Assert(subquery->commandType == CMD_SELECT); - - /* pushing down rtes without relations yields (shardCount * expectedRows) */ - if (HasEmptyJoinTree(subquery)) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "Subqueries without relations are not allowed in " - "distributed INSERT ... SELECT queries", - NULL, NULL); - } - - /* pushing down limit per shard would yield wrong results */ - if (subquery->limitCount != NULL) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "LIMIT clauses are not allowed in distributed INSERT " - "... SELECT queries", - NULL, NULL); - } - - /* pushing down limit offest per shard would yield wrong results */ - if (subquery->limitOffset != NULL) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "OFFSET clauses are not allowed in distributed " - "INSERT ... SELECT queries", - NULL, NULL); - } - - /* group clause list must include partition column */ - if (subquery->groupClause) - { - List *groupClauseList = subquery->groupClause; - List *targetEntryList = subquery->targetList; - List *groupTargetEntryList = GroupTargetEntryList(groupClauseList, - targetEntryList); - bool groupOnPartitionColumn = TargetListOnPartitionColumn(subquery, - groupTargetEntryList); - if (!groupOnPartitionColumn) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "Group by list without distribution column is " - "not allowed in distributed INSERT ... " - "SELECT queries", - NULL, NULL); - } - } - - /* - * We support window functions when the window function - * is partitioned on distribution column. - */ - if (subquery->windowClause && !SafeToPushdownWindowFunction(subquery, - &errorDetail)) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, errorDetail->data, NULL, - NULL); - } - - if (subquery->setOperations != NULL) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "Set operations are not allowed in distributed " - "INSERT ... SELECT queries", - NULL, NULL); - } - - /* - * We currently do not support grouping sets since it could generate NULL - * results even after the restrictions are applied to the query. A solution - * would be to add the whole query into a subquery and add the restrictions - * on that subquery. - */ - if (subquery->groupingSets != NULL) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "grouping sets are not allowed in distributed " - "INSERT ... SELECT queries", - NULL, NULL); - } - - /* - * We don't support DISTINCT ON clauses on non-partition columns. - */ - hasUnsupportedDistinctOn = HasUnsupportedDistinctOn(subquery); - if (hasUnsupportedDistinctOn) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "DISTINCT ON (non-partition column) clauses are not " - "allowed in distributed INSERT ... SELECT queries", - NULL, NULL); - } - } - - return NULL; -} - - -/* - * HasUnsupportedDistinctOn returns true if the query has distinct on and - * distinct targets do not contain partition column. - */ -static bool -HasUnsupportedDistinctOn(Query *query) -{ - ListCell *distinctCell = NULL; - - if (!query->hasDistinctOn) - { - return false; - } - - foreach(distinctCell, query->distinctClause) - { - SortGroupClause *distinctClause = lfirst(distinctCell); - TargetEntry *distinctEntry = get_sortgroupclause_tle(distinctClause, - query->targetList); - - bool skipOuterVars = true; - if (IsPartitionColumn(distinctEntry->expr, query, skipOuterVars)) - { - return false; - } - } - - return true; -} - - /* * InsertPartitionColumnMatchesSelect returns NULL the partition column in the * table targeted by INSERTed matches with the any of the SELECTed table's diff --git a/src/test/regress/expected/coordinator_shouldhaveshards.out b/src/test/regress/expected/coordinator_shouldhaveshards.out index 3307e6bb6..46d5bf6a9 100644 --- a/src/test/regress/expected/coordinator_shouldhaveshards.out +++ b/src/test/regress/expected/coordinator_shouldhaveshards.out @@ -908,7 +908,7 @@ key FROM a JOIN table_2 USING (key) GROUP BY key HAVING (max(table_2.value) >= (SELECT value FROM a)); -DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value FROM coordinator_shouldhaveshards.table_1 ORDER BY key, value DESC LIMIT 1 DEBUG: push down of limit count: 1 DEBUG: generating subplan XXX_2 for subquery SELECT int4(count(*)) AS auto_coerced_by_citus_0, (a.key)::text AS auto_coerced_by_citus_1 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN coordinator_shouldhaveshards.table_2 USING (key)) GROUP BY a.key HAVING (max(table_2.value) OPERATOR(pg_catalog.>=) (SELECT a_1.value FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a_1)) @@ -939,7 +939,8 @@ inserts AS ( ) SELECT count(*) FROM inserts; DEBUG: generating subplan XXX_1 for CTE stats: SELECT count(key) AS m FROM coordinator_shouldhaveshards.table_1 DEBUG: generating subplan XXX_2 for CTE inserts: INSERT INTO coordinator_shouldhaveshards.table_2 (key, value) SELECT table_1.key, count(*) AS count FROM coordinator_shouldhaveshards.table_1 WHERE (table_1.key OPERATOR(pg_catalog.>=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) GROUP BY table_1.key HAVING (count(*) OPERATOR(pg_catalog.<=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) LIMIT 1 RETURNING table_2.key, table_2.value -DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: push down of limit count: 1 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) inserts DEBUG: Subplan XXX_1 will be written to local file diff --git a/src/test/regress/expected/coordinator_shouldhaveshards_0.out b/src/test/regress/expected/coordinator_shouldhaveshards_0.out index 9b81a6a72..4c9dc0d18 100644 --- a/src/test/regress/expected/coordinator_shouldhaveshards_0.out +++ b/src/test/regress/expected/coordinator_shouldhaveshards_0.out @@ -908,7 +908,7 @@ key FROM a JOIN table_2 USING (key) GROUP BY key HAVING (max(table_2.value) >= (SELECT value FROM a)); -DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value FROM coordinator_shouldhaveshards.table_1 ORDER BY key, value DESC LIMIT 1 DEBUG: push down of limit count: 1 DEBUG: generating subplan XXX_2 for subquery SELECT int4(count(*)) AS auto_coerced_by_citus_0, (a.key)::text AS auto_coerced_by_citus_1 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN coordinator_shouldhaveshards.table_2 USING (key)) GROUP BY a.key HAVING (max(table_2.value) OPERATOR(pg_catalog.>=) (SELECT a_1.value FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a_1)) @@ -939,7 +939,8 @@ inserts AS ( ) SELECT count(*) FROM inserts; DEBUG: generating subplan XXX_1 for CTE stats: SELECT count(key) AS m FROM coordinator_shouldhaveshards.table_1 DEBUG: generating subplan XXX_2 for CTE inserts: INSERT INTO coordinator_shouldhaveshards.table_2 (key, value) SELECT key, count(*) AS count FROM coordinator_shouldhaveshards.table_1 WHERE (key OPERATOR(pg_catalog.>=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) GROUP BY key HAVING (count(*) OPERATOR(pg_catalog.<=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) LIMIT 1 RETURNING table_2.key, table_2.value -DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: push down of limit count: 1 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) inserts DEBUG: Subplan XXX_1 will be written to local file diff --git a/src/test/regress/expected/insert_select_repartition.out b/src/test/regress/expected/insert_select_repartition.out index bb77e9d47..b97a82b63 100644 --- a/src/test/regress/expected/insert_select_repartition.out +++ b/src/test/regress/expected/insert_select_repartition.out @@ -549,7 +549,7 @@ SELECT create_distributed_table('target_table', 'a'); INSERT INTO source_table SELECT floor(i/4), i*i FROM generate_series(1, 20) i; SET client_min_messages TO DEBUG1; INSERT INTO target_table SELECT a, max(b) FROM source_table GROUP BY a; -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT RESET client_min_messages; SELECT * FROM target_table ORDER BY a; @@ -622,40 +622,40 @@ INSERT INTO target_table WHERE a BETWEEN $1 AND $2 GROUP BY a; SET client_min_messages TO DEBUG1; EXECUTE insert_plan(0, 2); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT EXECUTE insert_plan(0, 2); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT EXECUTE insert_plan(0, 2); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT EXECUTE insert_plan(0, 2); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT EXECUTE insert_plan(0, 2); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT EXECUTE insert_plan(0, 2); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT EXECUTE insert_plan(2, 4); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT EXECUTE insert_plan(2, 4); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT EXECUTE insert_plan(2, 4); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT EXECUTE insert_plan(2, 4); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT EXECUTE insert_plan(2, 4); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT EXECUTE insert_plan(2, 4); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT RESET client_min_messages; SELECT a, count(*), count(distinct b) distinct_values FROM target_table GROUP BY a ORDER BY a; @@ -680,25 +680,25 @@ INSERT INTO target_table WHERE a=$1 GROUP BY a; SET client_min_messages TO DEBUG1; EXECUTE insert_plan(0); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Collecting INSERT ... SELECT results on coordinator EXECUTE insert_plan(0); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Collecting INSERT ... SELECT results on coordinator EXECUTE insert_plan(0); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Collecting INSERT ... SELECT results on coordinator EXECUTE insert_plan(0); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Collecting INSERT ... SELECT results on coordinator EXECUTE insert_plan(0); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Collecting INSERT ... SELECT results on coordinator EXECUTE insert_plan(0); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Collecting INSERT ... SELECT results on coordinator EXECUTE insert_plan(0); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Collecting INSERT ... SELECT results on coordinator RESET client_min_messages; SELECT a, count(*), count(distinct b) distinct_values FROM target_table GROUP BY a ORDER BY a; @@ -761,10 +761,10 @@ WITH r AS ( INSERT INTO target_table SELECT * FROM source_table RETURNING * ) INSERT INTO target_table SELECT source_table.a, max(source_table.b) FROM source_table NATURAL JOIN r GROUP BY source_table.a; -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: only SELECT, UPDATE, or DELETE common table expressions may be router planned DEBUG: generating subplan XXX_1 for CTE r: INSERT INTO insert_select_repartition.target_table (a, b) SELECT source_table.a, source_table.b FROM insert_select_repartition.source_table RETURNING target_table.a, target_table.b -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a, max AS b FROM (SELECT source_table.a, max(source_table.b) AS max FROM (insert_select_repartition.source_table JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) r USING (a, b)) GROUP BY source_table.a) citus_insert_select_subquery DEBUG: Router planner cannot handle multi-shard select queries @@ -1015,7 +1015,7 @@ SELECT create_distributed_table('target_table', 'a'); INSERT INTO source_table SELECT i, i * i FROM generate_series(1, 10) i; SET client_min_messages TO DEBUG2; INSERT INTO target_table SELECT a AS aa, b AS aa, 1 AS aa, 2 AS aa FROM source_table; -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Router planner cannot handle multi-shard select queries DEBUG: performing repartitioned INSERT ... SELECT DEBUG: partitioning SELECT query by column index 0 with name 'a' @@ -1049,7 +1049,7 @@ EXPLAIN (costs off) INSERT INTO target_table SELECT a AS aa, b AS aa, 1 AS aa, 2 SET client_min_messages TO DEBUG2; INSERT INTO target_table SELECT a AS aa, b AS aa, 1 AS aa, 2 AS aa FROM source_table; -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Collecting INSERT ... SELECT results on coordinator RESET client_min_messages; @@ -1104,7 +1104,7 @@ EXPLAIN (costs off) INSERT INTO test(y, x) SELECT a.x, b.y FROM test a JOIN test SET client_min_messages TO DEBUG1; INSERT INTO test(y, x) SELECT a.x, b.y FROM test a JOIN test b USING (y); -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: Collecting INSERT ... SELECT results on coordinator RESET client_min_messages; SELECT count(*) FROM test; @@ -1133,7 +1133,7 @@ EXPLAIN (costs off) INSERT INTO test SELECT a.* FROM test a JOIN test b USING (y SET client_min_messages TO DEBUG1; INSERT INTO test SELECT a.* FROM test a JOIN test b USING (y); -DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: Collecting INSERT ... SELECT results on coordinator RESET client_min_messages; SELECT count(*) FROM test; @@ -1219,7 +1219,7 @@ ON CONFLICT(c1, c2, c3, c4, c5, c6) DO UPDATE SET cardinality = enriched.cardinality + excluded.cardinality, sum = enriched.sum + excluded.sum; -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Router planner cannot handle multi-shard select queries DEBUG: performing repartitioned INSERT ... SELECT DEBUG: partitioning SELECT query by column index 0 with name 'c1' @@ -1302,6 +1302,33 @@ explain (costs off) insert into table_with_user_sequences select y, x from table -> Seq Scan on table_with_user_sequences_4213652 table_with_user_sequences (8 rows) +CREATE TABLE dist_table_1(id int); +SELECT create_distributed_table('dist_table_1','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE dist_table_2(id int); +SELECT create_distributed_table('dist_table_2','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- verify that insert select with union can be repartitioned. We cannot push down the query +-- since UNION clause has no FROM clause at top level query. +SELECT public.coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1(id) SELECT id FROM dist_table_1 UNION SELECT id FROM dist_table_2; +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: repartition + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(4 rows) + -- clean-up SET client_min_messages TO WARNING; DROP SCHEMA insert_select_repartition CASCADE; diff --git a/src/test/regress/expected/insert_select_repartition_0.out b/src/test/regress/expected/insert_select_repartition_0.out index 0aec4f49a..5bcb894cc 100644 --- a/src/test/regress/expected/insert_select_repartition_0.out +++ b/src/test/regress/expected/insert_select_repartition_0.out @@ -549,7 +549,7 @@ SELECT create_distributed_table('target_table', 'a'); INSERT INTO source_table SELECT floor(i/4), i*i FROM generate_series(1, 20) i; SET client_min_messages TO DEBUG1; INSERT INTO target_table SELECT a, max(b) FROM source_table GROUP BY a; -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT RESET client_min_messages; SELECT * FROM target_table ORDER BY a; @@ -622,40 +622,40 @@ INSERT INTO target_table WHERE a BETWEEN $1 AND $2 GROUP BY a; SET client_min_messages TO DEBUG1; EXECUTE insert_plan(0, 2); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT EXECUTE insert_plan(0, 2); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT EXECUTE insert_plan(0, 2); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT EXECUTE insert_plan(0, 2); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT EXECUTE insert_plan(0, 2); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT EXECUTE insert_plan(0, 2); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT EXECUTE insert_plan(2, 4); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT EXECUTE insert_plan(2, 4); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT EXECUTE insert_plan(2, 4); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT EXECUTE insert_plan(2, 4); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT EXECUTE insert_plan(2, 4); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT EXECUTE insert_plan(2, 4); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: performing repartitioned INSERT ... SELECT RESET client_min_messages; SELECT a, count(*), count(distinct b) distinct_values FROM target_table GROUP BY a ORDER BY a; @@ -680,25 +680,25 @@ INSERT INTO target_table WHERE a=$1 GROUP BY a; SET client_min_messages TO DEBUG1; EXECUTE insert_plan(0); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Collecting INSERT ... SELECT results on coordinator EXECUTE insert_plan(0); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Collecting INSERT ... SELECT results on coordinator EXECUTE insert_plan(0); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Collecting INSERT ... SELECT results on coordinator EXECUTE insert_plan(0); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Collecting INSERT ... SELECT results on coordinator EXECUTE insert_plan(0); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Collecting INSERT ... SELECT results on coordinator EXECUTE insert_plan(0); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Collecting INSERT ... SELECT results on coordinator EXECUTE insert_plan(0); -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Collecting INSERT ... SELECT results on coordinator RESET client_min_messages; SELECT a, count(*), count(distinct b) distinct_values FROM target_table GROUP BY a ORDER BY a; @@ -761,10 +761,10 @@ WITH r AS ( INSERT INTO target_table SELECT * FROM source_table RETURNING * ) INSERT INTO target_table SELECT source_table.a, max(source_table.b) FROM source_table NATURAL JOIN r GROUP BY source_table.a; -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: only SELECT, UPDATE, or DELETE common table expressions may be router planned DEBUG: generating subplan XXX_1 for CTE r: INSERT INTO insert_select_repartition.target_table (a, b) SELECT a, b FROM insert_select_repartition.source_table RETURNING target_table.a, target_table.b -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a, max AS b FROM (SELECT source_table.a, max(source_table.b) AS max FROM (insert_select_repartition.source_table JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) r USING (a, b)) GROUP BY source_table.a) citus_insert_select_subquery DEBUG: Router planner cannot handle multi-shard select queries @@ -1015,7 +1015,7 @@ SELECT create_distributed_table('target_table', 'a'); INSERT INTO source_table SELECT i, i * i FROM generate_series(1, 10) i; SET client_min_messages TO DEBUG2; INSERT INTO target_table SELECT a AS aa, b AS aa, 1 AS aa, 2 AS aa FROM source_table; -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Router planner cannot handle multi-shard select queries DEBUG: performing repartitioned INSERT ... SELECT DEBUG: partitioning SELECT query by column index 0 with name 'a' @@ -1049,7 +1049,7 @@ EXPLAIN (costs off) INSERT INTO target_table SELECT a AS aa, b AS aa, 1 AS aa, 2 SET client_min_messages TO DEBUG2; INSERT INTO target_table SELECT a AS aa, b AS aa, 1 AS aa, 2 AS aa FROM source_table; -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Collecting INSERT ... SELECT results on coordinator RESET client_min_messages; @@ -1104,7 +1104,7 @@ EXPLAIN (costs off) INSERT INTO test(y, x) SELECT a.x, b.y FROM test a JOIN test SET client_min_messages TO DEBUG1; INSERT INTO test(y, x) SELECT a.x, b.y FROM test a JOIN test b USING (y); -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: Collecting INSERT ... SELECT results on coordinator RESET client_min_messages; SELECT count(*) FROM test; @@ -1133,7 +1133,7 @@ EXPLAIN (costs off) INSERT INTO test SELECT a.* FROM test a JOIN test b USING (y SET client_min_messages TO DEBUG1; INSERT INTO test SELECT a.* FROM test a JOIN test b USING (y); -DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: Collecting INSERT ... SELECT results on coordinator RESET client_min_messages; SELECT count(*) FROM test; @@ -1219,7 +1219,7 @@ ON CONFLICT(c1, c2, c3, c4, c5, c6) DO UPDATE SET cardinality = enriched.cardinality + excluded.cardinality, sum = enriched.sum + excluded.sum; -DEBUG: INSERT target table and the source relation of the SELECT partition column value must be colocated in distributed INSERT ... SELECT +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Router planner cannot handle multi-shard select queries DEBUG: performing repartitioned INSERT ... SELECT DEBUG: partitioning SELECT query by column index 0 with name 'c1' @@ -1302,6 +1302,33 @@ explain (costs off) insert into table_with_user_sequences select y, x from table -> Seq Scan on table_with_user_sequences_4213652 table_with_user_sequences (8 rows) +CREATE TABLE dist_table_1(id int); +SELECT create_distributed_table('dist_table_1','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE dist_table_2(id int); +SELECT create_distributed_table('dist_table_2','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- verify that insert select with union can be repartitioned. We cannot push down the query +-- since UNION clause has no FROM clause at top level query. +SELECT public.coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1(id) SELECT id FROM dist_table_1 UNION SELECT id FROM dist_table_2; +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: repartition + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(4 rows) + -- clean-up SET client_min_messages TO WARNING; DROP SCHEMA insert_select_repartition CASCADE; diff --git a/src/test/regress/expected/insert_select_single_shard_table.out b/src/test/regress/expected/insert_select_single_shard_table.out index 68391abb5..219e7d5d9 100644 --- a/src/test/regress/expected/insert_select_single_shard_table.out +++ b/src/test/regress/expected/insert_select_single_shard_table.out @@ -118,73 +118,82 @@ SET client_min_messages TO DEBUG2; -- different table types together with single-shard tables. -- use a single-shard table INSERT INTO distributed_table_c1_t1 SELECT nullkey_c1_t1.a, nullkey_c1_t1.b FROM nullkey_c1_t1; -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: The target table's partition column should correspond to a partition column in the subquery. DEBUG: Distributed planning for a fast-path router query DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator -- use a reference table INSERT INTO distributed_table_c1_t1 SELECT nullkey_c1_t1.a, nullkey_c1_t1.b FROM nullkey_c1_t1 JOIN reference_table USING (a); -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: The target table's partition column should correspond to a partition column in the subquery. DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO distributed_table_c1_t1 SELECT nullkey_c1_t1.a, nullkey_c1_t1.b FROM nullkey_c1_t1 RIGHT JOIN reference_table USING (b) WHERE reference_table.a >= 1 AND reference_table.a <= 5; -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO distributed_table_c1_t1 SELECT nullkey_c1_t2.a, nullkey_c1_t2.b FROM nullkey_c1_t2 LEFT JOIN reference_table USING (b); -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: The target table's partition column should correspond to a partition column in the subquery. DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO distributed_table_c1_t1 SELECT nullkey_c1_t1.a, nullkey_c1_t1.b FROM nullkey_c1_t1 INTERSECT SELECT * FROM reference_table; -DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Intersect and Except are currently unsupported DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator -- use a colocated single-shard table INSERT INTO distributed_table_c1_t1 SELECT nullkey_c1_t1.a, nullkey_c1_t1.b FROM nullkey_c1_t1 JOIN nullkey_c1_t2 USING (b); -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: The target table's partition column should correspond to a partition column in the subquery. DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO distributed_table_c1_t1 SELECT nullkey_c1_t1.a, nullkey_c1_t1.b FROM nullkey_c1_t1 FULL JOIN nullkey_c1_t2 USING (a); -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: The target table's partition column should correspond to a partition column in the subquery. DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO distributed_table_c1_t1 SELECT nullkey_c1_t1.a, nullkey_c1_t1.b FROM nullkey_c1_t1 FULL JOIN matview USING (a); -DEBUG: distributed INSERT ... SELECT cannot select from a local relation when inserting into a distributed table +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables DETAIL: Local tables cannot be used in distributed queries. INSERT INTO distributed_table_c1_t1 SELECT * FROM nullkey_c1_t1 UNION SELECT * FROM nullkey_c1_t2; -DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: The target table's partition column should correspond to a partition column in the subquery. DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator -- use a non-colocated single-shard table INSERT INTO distributed_table_c1_t1 SELECT nullkey_c1_t2.a, nullkey_c1_t2.b FROM nullkey_c1_t2 LEFT JOIN nullkey_c2_t1 USING (a); -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: The target table's partition column should correspond to a partition column in the subquery. ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables DETAIL: router planner does not support queries that reference non-colocated distributed tables INSERT INTO distributed_table_c1_t1 SELECT * FROM nullkey_c1_t1 UNION SELECT * FROM nullkey_c2_t1; -DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: The target table's partition column should correspond to a partition column in the subquery. ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables DETAIL: router planner does not support queries that reference non-colocated distributed tables -- use a distributed table that is colocated with the target table INSERT INTO distributed_table_c1_t1 SELECT nullkey_c1_t1.a, nullkey_c1_t1.b FROM nullkey_c1_t1 JOIN distributed_table_c1_t2 USING (a); -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables DETAIL: router planner does not support queries that reference non-colocated distributed tables INSERT INTO distributed_table_c1_t1 SELECT distributed_table_c1_t2.a, distributed_table_c1_t2.b FROM nullkey_c1_t1 JOIN distributed_table_c1_t2 USING (a); -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables DETAIL: router planner does not support queries that reference non-colocated distributed tables INSERT INTO distributed_table_c1_t1 SELECT distributed_table_c1_t2.a, distributed_table_c1_t2.b FROM nullkey_c1_t1 JOIN distributed_table_c1_t2 USING (b); -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables DETAIL: router planner does not support queries that reference non-colocated distributed tables INSERT INTO distributed_table_c1_t1 SELECT distributed_table_c1_t2.a, distributed_table_c1_t2.b FROM nullkey_c1_t1 JOIN distributed_table_c1_t2 USING (a) WHERE distributed_table_c1_t2.a = 1; -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables DETAIL: router planner does not support queries that reference non-colocated distributed tables -- use a distributed table that is not colocated with the target table INSERT INTO distributed_table_c1_t1 SELECT nullkey_c1_t2.a, nullkey_c1_t2.b FROM nullkey_c1_t2 JOIN distributed_table_c2_t1 USING (a); -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables DETAIL: router planner does not support queries that reference non-colocated distributed tables -- use a citus local table @@ -199,12 +208,14 @@ ERROR: queries that reference a distributed table without a shard key can only DETAIL: Local tables cannot be used in distributed queries. -- use append / range distributed tables INSERT INTO range_table SELECT * FROM nullkey_c1_t1; -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: The target table's partition column should correspond to a partition column in the subquery. DEBUG: Distributed planning for a fast-path router query DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO append_table SELECT * FROM nullkey_c1_t1; -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: The target table's partition column should correspond to a partition column in the subquery. ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables DETAIL: INSERT ... SELECT into an append-distributed table is not supported SELECT avg(a), avg(b) FROM distributed_table_c1_t1 ORDER BY 1, 2; @@ -236,11 +247,12 @@ DEBUG: only reference tables may be queried when targeting a reference table wi DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO reference_table SELECT nullkey_c1_t1.a, nullkey_c1_t1.b FROM nullkey_c1_t1 UNION SELECT * FROM reference_table; -DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Reference tables are not supported with union operator DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO reference_table SELECT nullkey_c1_t2.a, nullkey_c1_t2.b FROM nullkey_c1_t2 LEFT JOIN reference_table USING (b) WHERE b IN (SELECT b FROM matview); -DEBUG: only reference tables may be queried when targeting a reference table with distributed INSERT ... SELECT +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables DETAIL: Local tables cannot be used in distributed queries. -- use a colocated single-shard table @@ -259,19 +271,19 @@ ERROR: queries that reference a distributed table without a shard key can only DETAIL: router planner does not support queries that reference non-colocated distributed tables -- use a distributed table INSERT INTO reference_table SELECT nullkey_c1_t1.a, nullkey_c1_t1.b FROM nullkey_c1_t1 JOIN distributed_table_c1_t2 USING (a); -DEBUG: only reference tables may be queried when targeting a reference table with distributed INSERT ... SELECT +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables DETAIL: router planner does not support queries that reference non-colocated distributed tables INSERT INTO reference_table SELECT distributed_table_c1_t2.a, distributed_table_c1_t2.b FROM nullkey_c1_t1 JOIN distributed_table_c1_t2 USING (a); -DEBUG: only reference tables may be queried when targeting a reference table with distributed INSERT ... SELECT +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables DETAIL: router planner does not support queries that reference non-colocated distributed tables INSERT INTO reference_table SELECT distributed_table_c1_t2.a, distributed_table_c1_t2.b FROM nullkey_c1_t1 JOIN distributed_table_c1_t2 USING (b); -DEBUG: only reference tables may be queried when targeting a reference table with distributed INSERT ... SELECT +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables DETAIL: router planner does not support queries that reference non-colocated distributed tables INSERT INTO reference_table SELECT distributed_table_c1_t2.a, distributed_table_c1_t2.b FROM nullkey_c1_t1 JOIN distributed_table_c1_t2 USING (a) WHERE distributed_table_c1_t2.a = 1; -DEBUG: only reference tables may be queried when targeting a reference table with distributed INSERT ... SELECT +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables DETAIL: router planner does not support queries that reference non-colocated distributed tables -- use a citus local table @@ -371,20 +383,20 @@ ERROR: queries that reference a distributed table without a shard key can only DETAIL: Local tables cannot be used in distributed queries. -- use a distributed table INSERT INTO nullkey_c1_t1 SELECT distributed_table_c1_t2.a, distributed_table_c1_t2.b FROM distributed_table_c1_t2; -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO nullkey_c1_t1 SELECT distributed_table_c1_t2.a, distributed_table_c1_t2.b FROM distributed_table_c1_t2 JOIN reference_table USING (a); -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO nullkey_c1_t1 SELECT distributed_table_c1_t2.a, distributed_table_c1_t2.b FROM distributed_table_c1_t2 JOIN nullkey_c1_t1 USING (a); -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables DETAIL: router planner does not support queries that reference non-colocated distributed tables -- use a non-colocated single-shard table INSERT INTO nullkey_c2_t1 SELECT q.* FROM (SELECT reference_table.* FROM reference_table LEFT JOIN nullkey_c1_t1 USING (a)) q JOIN nullkey_c1_t2 USING (a); -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator -- use a materialized view @@ -392,11 +404,11 @@ INSERT INTO nullkey_c1_t1 SELECT * FROM matview; DEBUG: distributed INSERT ... SELECT can only select from distributed tables DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO nullkey_c1_t1 SELECT reference_table.a, reference_table.b FROM reference_table JOIN matview ON (reference_table.a = matview.a); -DEBUG: distributed INSERT ... SELECT cannot select from a local relation when inserting into a distributed table +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO nullkey_c1_t1 SELECT q.* FROM (SELECT reference_table.* FROM reference_table JOIN nullkey_c1_t1 USING (a)) q JOIN matview USING (a); -DEBUG: distributed INSERT ... SELECT cannot select from a local relation when inserting into a distributed table +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables DETAIL: Local tables cannot be used in distributed queries. -- use append / range distributed tables @@ -405,7 +417,7 @@ DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO nullkey_c1_t1 SELECT * FROM append_table; -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Router planner does not support append-partitioned tables. DEBUG: Collecting INSERT ... SELECT results on coordinator SELECT avg(a), avg(b) FROM nullkey_c1_t1 ORDER BY 1, 2; @@ -462,8 +474,7 @@ cte_2 AS ( ) INSERT INTO distributed_table_c1_t1 SELECT cte_1.* FROM cte_1 JOIN cte_2 USING (a) JOIN distributed_table_c1_t2 USING (a) ORDER BY 1,2; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: The target table's partition column should correspond to a partition column in the subquery. +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: CTE cte_1 is going to be inlined via distributed planning DEBUG: CTE cte_2 is going to be inlined via distributed planning ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables @@ -479,7 +490,8 @@ cte_2 AS ( ) INSERT INTO distributed_table_c1_t1 SELECT * FROM cte_1 UNION SELECT * FROM cte_2 EXCEPT SELECT * FROM reference_table; -DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: CTEs in subqueries are currently unsupported DEBUG: CTE cte_1 is going to be inlined via distributed planning DEBUG: CTE cte_2 is going to be inlined via distributed planning DEBUG: Creating router plan @@ -491,7 +503,8 @@ JOIN ( SELECT b FROM nullkey_c1_t2 ORDER BY b DESC LIMIT 1 ) t2 ON t1.b < t2.b; -DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO distributed_table_c1_t1 (a, b) @@ -503,7 +516,9 @@ WITH cte AS ( ) SELECT d1, COALESCE(d2, a) FROM cte WHERE d1 IS NOT NULL AND d2 IS NOT NULL; DEBUG: CTE cte is going to be inlined via distributed planning -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column. +HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO citus_local_table (a, b) @@ -523,7 +538,8 @@ LEFT JOIN ( FROM nullkey_c1_t1 ) t2 ON t1.b = t2.b WHERE t2.rn > 0; -DEBUG: Window functions without PARTITION BY on distribution column is currently unsupported +DEBUG: cannot push down this subquery +DETAIL: Window functions without PARTITION BY on distribution column is currently unsupported DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO nullkey_c1_t1 (a, b) @@ -537,7 +553,7 @@ JOIN ( ) q ) t2 ON t1.b = t2.b WHERE t2.rn > 2; -DEBUG: Window functions without PARTITION BY on distribution column is currently unsupported +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables DETAIL: router planner does not support queries that reference non-colocated distributed tables INSERT INTO distributed_table_c1_t1 (a, b) @@ -551,21 +567,23 @@ JOIN ( ) q ) t2 ON t1.b = t2.b WHERE t2.sum_val > 2; -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: The target table's partition column should correspond to a partition column in the subquery. DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator +-- Temporaryly reduce the verbosity to avoid noise +-- in the output of the next query. +SET client_min_messages TO DEBUG1; -- MultiTaskRouterSelectQuerySupported() is unnecessarily restrictive -- about pushing down queries with DISTINCT ON clause even if the table -- doesn't have a shard key. See https://github.com/citusdata/citus/pull/6752. INSERT INTO nullkey_c1_t1 SELECT DISTINCT ON (a) a, b FROM nullkey_c1_t2; -DEBUG: DISTINCT ON (non-partition column) clauses are not allowed in distributed INSERT ... SELECT queries -DEBUG: Distributed planning for a fast-path router query -DEBUG: Creating router plan -DEBUG: Collecting INSERT ... SELECT results on coordinator +SET client_min_messages TO DEBUG2; -- Similarly, we could push down the following query as well. see -- https://github.com/citusdata/citus/pull/6831. INSERT INTO nullkey_c1_t1 SELECT b, SUM(a) OVER (ORDER BY b) AS sum_val FROM nullkey_c1_t1; -DEBUG: Window functions without PARTITION BY on distribution column is currently unsupported +DEBUG: cannot push down this subquery +DETAIL: Window functions without PARTITION BY on distribution column is currently unsupported DEBUG: Distributed planning for a fast-path router query DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator @@ -576,7 +594,7 @@ JOIN reference_table AS t3 ON (t2.a = t3.a) WHERE NOT EXISTS ( SELECT 1 FROM nullkey_c1_t2 AS t1 WHERE t1.b = t3.b ); -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO distributed_table_c1_t1 @@ -585,7 +603,7 @@ FROM nullkey_c1_t1 AS t1 WHERE t1.a NOT IN ( SELECT DISTINCT t2.a FROM distributed_table_c1_t2 AS t2 ); -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns ERROR: queries that reference a distributed table without a shard key can only reference colocated distributed tables or reference tables DETAIL: Router planner cannot handle multi-shard select queries INSERT INTO distributed_table_c1_t1 @@ -598,7 +616,8 @@ JOIN ( SELECT a FROM nullkey_c1_t2 ) AS t2 ) AS t3 ON t1.a = t3.a; -DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: The target table's partition column should correspond to a partition column in the subquery. DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator -- Temporaryly reduce the verbosity to avoid noise @@ -617,7 +636,7 @@ WHERE t1.a IN ( ) AS t4 ON t3.a = t4.a ) AS t2 ); -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: correlated subqueries are not supported when the FROM clause contains a reference table DEBUG: generating subplan XXX_1 for subquery SELECT a FROM (SELECT t3.a FROM ((SELECT distributed_table_c1_t1.a FROM insert_select_single_shard_table.distributed_table_c1_t1 WHERE (distributed_table_c1_t1.b OPERATOR(pg_catalog.>) 4)) t3 JOIN (SELECT distributed_table_c1_t2.a FROM insert_select_single_shard_table.distributed_table_c1_t2 WHERE (distributed_table_c1_t2.b OPERATOR(pg_catalog.<) 7)) t4 ON ((t3.a OPERATOR(pg_catalog.=) t4.a)))) t2 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a, b FROM insert_select_single_shard_table.reference_table t1 WHERE (a OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer))) DEBUG: Collecting INSERT ... SELECT results on coordinator @@ -736,16 +755,17 @@ DEBUG: distributed statement: INSERT INTO insert_select_single_shard_table.null SET client_min_messages TO DEBUG1; INSERT INTO distributed_table_c1_t1 AS t1 (a, b) SELECT t3.a, t3.b FROM nullkey_c1_t2 t2 JOIN reference_table t3 ON (t2.a = t3.a) ON CONFLICT (a, b) DO UPDATE SET b = t1.b + 10; -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: The target table's partition column should correspond to a partition column in the subquery. DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO nullkey_c1_t1 AS t1 (a, b) SELECT t3.a, t3.b FROM distributed_table_c1_t1 t2 JOIN reference_table t3 ON (t2.a = t3.a) ON CONFLICT (a) DO UPDATE SET a = t1.a + 10; -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Collecting INSERT ... SELECT results on coordinator -- This also fails due to https://github.com/citusdata/citus/issues/6826. INSERT INTO nullkey_c1_t1 AS t1 (a, b) SELECT t3.a, t3.b FROM distributed_table_c1_t1 t2 JOIN reference_table t3 ON (t2.a = t3.a) WHERE t2.a = 3 ON CONFLICT (a) DO UPDATE SET a = (SELECT max(b)+1 FROM distributed_table_c1_t1 WHERE a = 3); -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: Collecting INSERT ... SELECT results on coordinator ERROR: cannot execute a distributed query from a query on a shard DETAIL: Executing a distributed query in a function call that may be pushed to a remote node can lead to incorrect results. diff --git a/src/test/regress/expected/intermediate_result_pruning.out b/src/test/regress/expected/intermediate_result_pruning.out index f6cf8c1e1..e178765a8 100644 --- a/src/test/regress/expected/intermediate_result_pruning.out +++ b/src/test/regress/expected/intermediate_result_pruning.out @@ -762,7 +762,8 @@ ROLLBACK; -- We use offset 1 to make sure the result needs to be pulled to the coordinator, offset 0 would be optimized away INSERT INTO table_1 SELECT * FROM table_2 OFFSET 1; -DEBUG: OFFSET clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Offset clause is currently unsupported when a subquery references a column from another query DEBUG: Collecting INSERT ... SELECT results on coordinator -- INSERT .. SELECT via coordinator which has intermediate result, -- and can be pruned to a single worker because the final query is on @@ -793,7 +794,7 @@ INSERT INTO table_1 SELECT * FROM cte_1 UNION SELECT * FROM cte_2); -DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT table_1.key FROM intermediate_result_pruning.table_1 WHERE (table_1.key OPERATOR(pg_catalog.=) 1) INTERSECT SELECT table_1.key FROM intermediate_result_pruning.table_1 WHERE (table_1.key OPERATOR(pg_catalog.=) 2) DEBUG: generating subplan XXX_1 for subquery SELECT key FROM intermediate_result_pruning.table_1 WHERE (key OPERATOR(pg_catalog.=) 1) DEBUG: generating subplan XXX_2 for subquery SELECT key FROM intermediate_result_pruning.table_1 WHERE (key OPERATOR(pg_catalog.=) 2) @@ -830,7 +831,7 @@ INSERT INTO table_1 ) foo where table_2.key != 1 AND foo.key = table_2.value::int; -DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT table_1.key FROM intermediate_result_pruning.table_1 WHERE (table_1.key OPERATOR(pg_catalog.=) 1) INTERSECT SELECT table_1.key FROM intermediate_result_pruning.table_1 WHERE (table_1.key OPERATOR(pg_catalog.=) 2) DEBUG: generating subplan XXX_1 for subquery SELECT key FROM intermediate_result_pruning.table_1 WHERE (key OPERATOR(pg_catalog.=) 1) DEBUG: generating subplan XXX_2 for subquery SELECT key FROM intermediate_result_pruning.table_1 WHERE (key OPERATOR(pg_catalog.=) 2) @@ -1054,7 +1055,8 @@ inserts AS MATERIALIZED ( ) SELECT count(*) FROM inserts; DEBUG: generating subplan XXX_1 for CTE stats: SELECT count(key) AS m FROM intermediate_result_pruning.table_3 DEBUG: generating subplan XXX_2 for CTE inserts: INSERT INTO intermediate_result_pruning.table_2 (key, value) SELECT table_1.key, count(*) AS count FROM intermediate_result_pruning.table_1 WHERE (table_1.key OPERATOR(pg_catalog.>) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) GROUP BY table_1.key HAVING (count(*) OPERATOR(pg_catalog.<) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) LIMIT 1 RETURNING table_2.key, table_2.value -DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: push down of limit count: 1 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) inserts DEBUG: Subplan XXX_1 will be written to local file diff --git a/src/test/regress/expected/intermediate_result_pruning_0.out b/src/test/regress/expected/intermediate_result_pruning_0.out index 4ae6b8e16..ec4b489d0 100644 --- a/src/test/regress/expected/intermediate_result_pruning_0.out +++ b/src/test/regress/expected/intermediate_result_pruning_0.out @@ -762,7 +762,8 @@ ROLLBACK; -- We use offset 1 to make sure the result needs to be pulled to the coordinator, offset 0 would be optimized away INSERT INTO table_1 SELECT * FROM table_2 OFFSET 1; -DEBUG: OFFSET clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Offset clause is currently unsupported when a subquery references a column from another query DEBUG: Collecting INSERT ... SELECT results on coordinator -- INSERT .. SELECT via coordinator which has intermediate result, -- and can be pruned to a single worker because the final query is on @@ -793,7 +794,7 @@ INSERT INTO table_1 SELECT * FROM cte_1 UNION SELECT * FROM cte_2); -DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT table_1.key FROM intermediate_result_pruning.table_1 WHERE (table_1.key OPERATOR(pg_catalog.=) 1) INTERSECT SELECT table_1.key FROM intermediate_result_pruning.table_1 WHERE (table_1.key OPERATOR(pg_catalog.=) 2) DEBUG: generating subplan XXX_1 for subquery SELECT key FROM intermediate_result_pruning.table_1 WHERE (key OPERATOR(pg_catalog.=) 1) DEBUG: generating subplan XXX_2 for subquery SELECT key FROM intermediate_result_pruning.table_1 WHERE (key OPERATOR(pg_catalog.=) 2) @@ -830,7 +831,7 @@ INSERT INTO table_1 ) foo where table_2.key != 1 AND foo.key = table_2.value::int; -DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT table_1.key FROM intermediate_result_pruning.table_1 WHERE (table_1.key OPERATOR(pg_catalog.=) 1) INTERSECT SELECT table_1.key FROM intermediate_result_pruning.table_1 WHERE (table_1.key OPERATOR(pg_catalog.=) 2) DEBUG: generating subplan XXX_1 for subquery SELECT key FROM intermediate_result_pruning.table_1 WHERE (key OPERATOR(pg_catalog.=) 1) DEBUG: generating subplan XXX_2 for subquery SELECT key FROM intermediate_result_pruning.table_1 WHERE (key OPERATOR(pg_catalog.=) 2) @@ -1054,7 +1055,8 @@ inserts AS MATERIALIZED ( ) SELECT count(*) FROM inserts; DEBUG: generating subplan XXX_1 for CTE stats: SELECT count(key) AS m FROM intermediate_result_pruning.table_3 DEBUG: generating subplan XXX_2 for CTE inserts: INSERT INTO intermediate_result_pruning.table_2 (key, value) SELECT key, count(*) AS count FROM intermediate_result_pruning.table_1 WHERE (key OPERATOR(pg_catalog.>) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) GROUP BY key HAVING (count(*) OPERATOR(pg_catalog.<) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) LIMIT 1 RETURNING table_2.key, table_2.value -DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: push down of limit count: 1 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) inserts DEBUG: Subplan XXX_1 will be written to local file diff --git a/src/test/regress/expected/isolation_select_vs_all.out b/src/test/regress/expected/isolation_select_vs_all.out index 0485dba78..21be42915 100644 --- a/src/test/regress/expected/isolation_select_vs_all.out +++ b/src/test/regress/expected/isolation_select_vs_all.out @@ -341,29 +341,6 @@ count (1 row) -starting permutation: s1-initialize s1-begin s1-router-select s2-insert-select s1-commit s1-select-count -master_create_empty_shard ---------------------------------------------------------------------- - 6780300 -(1 row) - -step s1-initialize: COPY select_append FROM PROGRAM 'echo 0, a, 0 && echo 1, b, 1 && echo 2, c, 2 && echo 3, d, 3 && echo 4, e, 4' WITH (format 'csv', append_to_shard xxxxx); -step s1-begin: BEGIN; -step s1-router-select: SELECT * FROM select_append WHERE id = 1; -id|data|int_data ---------------------------------------------------------------------- - 1| b | 1 -(1 row) - -step s2-insert-select: INSERT INTO select_append SELECT * FROM select_append; -step s1-commit: COMMIT; -step s1-select-count: SELECT COUNT(*) FROM select_append; -count ---------------------------------------------------------------------- - 10 -(1 row) - - starting permutation: s1-initialize s1-begin s1-router-select s2-update s1-commit s1-select-count master_create_empty_shard --------------------------------------------------------------------- @@ -770,29 +747,6 @@ count (1 row) -starting permutation: s1-initialize s1-begin s1-insert-select s2-router-select s1-commit s1-select-count -master_create_empty_shard ---------------------------------------------------------------------- - 6780300 -(1 row) - -step s1-initialize: COPY select_append FROM PROGRAM 'echo 0, a, 0 && echo 1, b, 1 && echo 2, c, 2 && echo 3, d, 3 && echo 4, e, 4' WITH (format 'csv', append_to_shard xxxxx); -step s1-begin: BEGIN; -step s1-insert-select: INSERT INTO select_append SELECT * FROM select_append; -step s2-router-select: SELECT * FROM select_append WHERE id = 1; -id|data|int_data ---------------------------------------------------------------------- - 1| b | 1 -(1 row) - -step s1-commit: COMMIT; -step s1-select-count: SELECT COUNT(*) FROM select_append; -count ---------------------------------------------------------------------- - 10 -(1 row) - - starting permutation: s1-initialize s1-begin s1-update s2-router-select s1-commit s1-select-count master_create_empty_shard --------------------------------------------------------------------- @@ -1162,33 +1116,6 @@ count (1 row) -starting permutation: s1-initialize s1-begin s1-real-time-select s2-insert-select s1-commit s1-select-count -master_create_empty_shard ---------------------------------------------------------------------- - 6780300 -(1 row) - -step s1-initialize: COPY select_append FROM PROGRAM 'echo 0, a, 0 && echo 1, b, 1 && echo 2, c, 2 && echo 3, d, 3 && echo 4, e, 4' WITH (format 'csv', append_to_shard xxxxx); -step s1-begin: BEGIN; -step s1-real-time-select: SELECT * FROM select_append ORDER BY 1, 2; -id|data|int_data ---------------------------------------------------------------------- - 0| a | 0 - 1| b | 1 - 2| c | 2 - 3| d | 3 - 4| e | 4 -(5 rows) - -step s2-insert-select: INSERT INTO select_append SELECT * FROM select_append; -step s1-commit: COMMIT; -step s1-select-count: SELECT COUNT(*) FROM select_append; -count ---------------------------------------------------------------------- - 10 -(1 row) - - starting permutation: s1-initialize s1-begin s1-real-time-select s2-update s1-commit s1-select-count master_create_empty_shard --------------------------------------------------------------------- @@ -1621,33 +1548,6 @@ count (1 row) -starting permutation: s1-initialize s1-begin s1-insert-select s2-real-time-select s1-commit s1-select-count -master_create_empty_shard ---------------------------------------------------------------------- - 6780300 -(1 row) - -step s1-initialize: COPY select_append FROM PROGRAM 'echo 0, a, 0 && echo 1, b, 1 && echo 2, c, 2 && echo 3, d, 3 && echo 4, e, 4' WITH (format 'csv', append_to_shard xxxxx); -step s1-begin: BEGIN; -step s1-insert-select: INSERT INTO select_append SELECT * FROM select_append; -step s2-real-time-select: SELECT * FROM select_append ORDER BY 1, 2; -id|data|int_data ---------------------------------------------------------------------- - 0| a | 0 - 1| b | 1 - 2| c | 2 - 3| d | 3 - 4| e | 4 -(5 rows) - -step s1-commit: COMMIT; -step s1-select-count: SELECT COUNT(*) FROM select_append; -count ---------------------------------------------------------------------- - 10 -(1 row) - - starting permutation: s1-initialize s1-begin s1-update s2-real-time-select s1-commit s1-select-count master_create_empty_shard --------------------------------------------------------------------- @@ -2034,36 +1934,6 @@ count (1 row) -starting permutation: s1-initialize s1-begin s1-adaptive-select s2-insert-select s1-commit s1-select-count -master_create_empty_shard ---------------------------------------------------------------------- - 6780300 -(1 row) - -step s1-initialize: COPY select_append FROM PROGRAM 'echo 0, a, 0 && echo 1, b, 1 && echo 2, c, 2 && echo 3, d, 3 && echo 4, e, 4' WITH (format 'csv', append_to_shard xxxxx); -step s1-begin: BEGIN; -step s1-adaptive-select: - SET citus.enable_repartition_joins TO ON; - SELECT * FROM select_append AS t1 JOIN select_append AS t2 ON t1.id = t2.int_data ORDER BY 1, 2, 3, 4; - -id|data|int_data|id|data|int_data ---------------------------------------------------------------------- - 0| a | 0| 0| a | 0 - 1| b | 1| 1| b | 1 - 2| c | 2| 2| c | 2 - 3| d | 3| 3| d | 3 - 4| e | 4| 4| e | 4 -(5 rows) - -step s2-insert-select: INSERT INTO select_append SELECT * FROM select_append; -step s1-commit: COMMIT; -step s1-select-count: SELECT COUNT(*) FROM select_append; -count ---------------------------------------------------------------------- - 10 -(1 row) - - starting permutation: s1-initialize s1-begin s1-adaptive-select s2-update s1-commit s1-select-count master_create_empty_shard --------------------------------------------------------------------- @@ -2538,36 +2408,6 @@ count (1 row) -starting permutation: s1-initialize s1-begin s1-insert-select s2-adaptive-select s1-commit s1-select-count -master_create_empty_shard ---------------------------------------------------------------------- - 6780300 -(1 row) - -step s1-initialize: COPY select_append FROM PROGRAM 'echo 0, a, 0 && echo 1, b, 1 && echo 2, c, 2 && echo 3, d, 3 && echo 4, e, 4' WITH (format 'csv', append_to_shard xxxxx); -step s1-begin: BEGIN; -step s1-insert-select: INSERT INTO select_append SELECT * FROM select_append; -step s2-adaptive-select: - SET citus.enable_repartition_joins TO ON; - SELECT * FROM select_append AS t1 JOIN select_append AS t2 ON t1.id = t2.int_data ORDER BY 1, 2, 3, 4; - -id|data|int_data|id|data|int_data ---------------------------------------------------------------------- - 0| a | 0| 0| a | 0 - 1| b | 1| 1| b | 1 - 2| c | 2| 2| c | 2 - 3| d | 3| 3| d | 3 - 4| e | 4| 4| e | 4 -(5 rows) - -step s1-commit: COMMIT; -step s1-select-count: SELECT COUNT(*) FROM select_append; -count ---------------------------------------------------------------------- - 10 -(1 row) - - starting permutation: s1-initialize s1-begin s1-update s2-adaptive-select s1-commit s1-select-count master_create_empty_shard --------------------------------------------------------------------- diff --git a/src/test/regress/expected/multi_insert_select.out b/src/test/regress/expected/multi_insert_select.out index ed836d398..abebf314e 100644 --- a/src/test/regress/expected/multi_insert_select.out +++ b/src/test/regress/expected/multi_insert_select.out @@ -620,7 +620,8 @@ INSERT INTO agg_events (value_1_agg, user_id) DISTINCT ON (value_1) value_1, user_id FROM raw_events_first; -DEBUG: DISTINCT ON (non-partition column) clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Distinct on columns without partition column is currently unsupported DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Collecting INSERT ... SELECT results on coordinator SELECT user_id, value_1_agg FROM agg_events ORDER BY 1,2; @@ -686,7 +687,7 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, v1_a DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator ROLLBACK; --- We don't support CTEs that are referenced in the target list +-- We do support CTEs that are referenced in the target list INSERT INTO agg_events WITH sub_cte AS (SELECT 1) SELECT @@ -694,15 +695,11 @@ INSERT INTO agg_events FROM raw_events_first; DEBUG: CTE sub_cte is going to be inlined via distributed planning -DEBUG: Subqueries without relations are not allowed in distributed INSERT ... SELECT queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'user_id' -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT intermediate_result.user_id, intermediate_result.value_1_agg FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_0}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1_agg integer) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT intermediate_result.user_id, intermediate_result.value_1_agg FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300001_to_1}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1_agg integer) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT intermediate_result.user_id, intermediate_result.value_1_agg FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300002_to_2}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1_agg integer) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT intermediate_result.user_id, intermediate_result.value_1_agg FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300003_to_3}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1_agg integer) --- We support set operations via the coordinator +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) +-- We support set operations BEGIN; INSERT INTO raw_events_first(user_id) @@ -711,14 +708,10 @@ SELECT FROM ((SELECT user_id FROM raw_events_first) UNION (SELECT user_id FROM raw_events_second)) as foo; -DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'user_id' -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300004_to_0}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300005_to_1}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300006_to_2}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300007_to_3}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second) foo WHERE (foo.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second) foo WHERE (foo.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second) foo WHERE (foo.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second) foo WHERE (foo.user_id IS NOT NULL) ROLLBACK; -- We do support set operations through recursive planning BEGIN; @@ -727,7 +720,7 @@ INSERT INTO raw_events_first(user_id) (SELECT user_id FROM raw_events_first) INTERSECT (SELECT user_id FROM raw_events_first); -DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM multi_insert_select.raw_events_first @@ -747,7 +740,7 @@ SELECT FROM ((SELECT user_id FROM raw_events_first WHERE user_id = 15) EXCEPT (SELECT user_id FROM raw_events_second where user_id = 17)) as foo; -DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator -- some supported LEFT joins @@ -765,10 +758,16 @@ DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_133000 raw_events_second.user_id FROM reference_table LEFT JOIN raw_events_second ON reference_table.user_id = raw_events_second.user_id; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table_13300012 reference_table LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_second.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table_13300012 reference_table LEFT JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_second.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table_13300012 reference_table LEFT JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_second.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table_13300012 reference_table LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_second.user_id IS NOT NULL) +DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "raw_events_second" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "raw_events_second" to a subquery +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM multi_insert_select.raw_events_second WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table LEFT JOIN (SELECT raw_events_second_1.user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) raw_events_second_1) raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) +DEBUG: Creating router plan +DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id @@ -1126,7 +1125,8 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4, raw_events_second WHERE raw_events_first.user_id = raw_events_second.user_id GROUP BY raw_events_second.value_3) AS foo; -DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Group by list without partition column is currently unsupported when a subquery references a column from another query DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] @@ -1281,7 +1281,7 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4, GROUP BY raw_events_second.value_1 HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 ON (f.id = f2.id); -DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] @@ -1327,7 +1327,7 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4, GROUP BY raw_events_second.value_1 HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 ON (f.id = f2.id); -DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] @@ -3242,5 +3242,236 @@ returning text_col_1; string (1 row) +CREATE TABLE dist_table_3( +dist_col bigint, +int_col integer +); +SELECT create_distributed_table('dist_table_3', 'dist_col'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- dist_table_2 and dist_table_3 are non-colocated source tables. Repartitioning is also not possible due to +-- different types for distribution columns. Citus would not be able to handle this complex insert select. +INSERT INTO dist_table_1 SELECT dist_table_2.dist_col FROM dist_table_2 JOIN dist_table_3 USING(dist_col); +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +CREATE TABLE dist_table_4( +dist_col integer, +int_col integer +); +SELECT create_distributed_table('dist_table_4', 'dist_col'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Even if target table distribution column is colocated with dist_table_2's distributed column, source tables dist_table_2 and dist_table_4 +-- are non-colocated. Hence, SELECT part of the query should be pulled to coordinator. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT dist_table_2.dist_col FROM dist_table_2 JOIN dist_table_4 ON dist_table_2.dist_col = dist_table_4.int_col; +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(4 rows) + +-- For INSERT SELECT, when a lateral query references an outer query, push-down is possible even if limit clause exists in the lateral query. +-- It is because subquery with limit does not need to be merged at coordinator as it is a lateral query. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT d1.dist_col FROM dist_table_1 d1 LEFT JOIN LATERAL (SELECT * FROM dist_table_2 d2 WHERE d1.dist_col = d2.dist_col LIMIT 3) dummy USING(dist_col); +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + Task Count: 4 +(2 rows) + +-- For INSERT SELECT, when push-down is NOT possible when limit clause exists in a subquery at SELECT part of INSERT SELECT. +-- It is because the subquery with limit needs to be merged at coordinator. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT d1.dist_col FROM dist_table_1 d1 LEFT JOIN (SELECT * FROM dist_table_2 LIMIT 3) dummy USING(dist_col); +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: repartition + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Limit + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(7 rows) + +CREATE TABLE dist_table_5(id int, id2 int); +SELECT create_distributed_table('dist_table_5','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE dist_table_6(id int, id2 int); +SELECT create_distributed_table('dist_table_6','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- verify that insert select with union can be pushed down since UNION clause has FROM clause at top level query. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5(id) SELECT id FROM (SELECT id FROM dist_table_5 UNION SELECT id FROM dist_table_6) dummy; +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + Task Count: 4 +(2 rows) + +-- verify that insert select with sublink can be pushed down when tables are colocated. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM dist_table_5 WHERE dist_table_5.id = dist_table_6.id) FROM dist_table_6; +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + Task Count: 4 +(2 rows) + +CREATE TABLE ref_table_1(id int); +SELECT create_reference_table('ref_table_1'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +-- verify that insert select with sublink cannot be pushed down when from clause does not contain any distributed relation. +INSERT INTO dist_table_5 SELECT id, (SELECT id FROM dist_table_5 WHERE dist_table_5.id = ref_table_1.id) FROM ref_table_1; +ERROR: correlated subqueries are not supported when the FROM clause contains a reference table +-- verify that insert select cannot be pushed down when we have recurring range table in from clause. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM ref_table_1 WHERE id = 1) FROM ref_table_1; +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Custom Scan (Citus Adaptive) + Task Count: 1 +(4 rows) + +-- verify that insert select cannot be pushed down when we have reference table in outside of outer join. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT a.id FROM dist_table_5 a LEFT JOIN ref_table_1 b ON (true) RIGHT JOIN ref_table_1 c ON (true); +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(6 rows) + +-- verify that insert select cannot be pushed down when it has a recurring outer join in a subquery. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM ref_table_1 LEFT JOIN dist_table_5 USING(id); +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(6 rows) + +CREATE TABLE loc_table_1(id int); +-- verify that insert select cannot be pushed down when it contains join between local and distributed tables. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM dist_table_5 JOIN loc_table_1 USING(id); +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: repartition + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Seq Scan on loc_table_1 + Task Count: 4 +(6 rows) + +CREATE VIEW view_1 AS + SELECT id FROM dist_table_6; +CREATE MATERIALIZED VIEW view_2 AS + SELECT id FROM dist_table_6; +-- verify that insert select cannot be pushed down when it contains view. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT * FROM view_1; +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + Task Count: 4 +(2 rows) + +-- verify that insert select cannot be pushed down when it contains materialized view. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT * FROM view_2; +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Seq Scan on view_2 +(3 rows) + +CREATE TABLE append_table(id integer, data text, int_data int); +SELECT create_distributed_table('append_table', 'id', 'append'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT master_create_empty_shard('append_table'); + master_create_empty_shard +--------------------------------------------------------------------- + 13300096 +(1 row) + +-- verify that insert select push down for append tables are not supported. +INSERT INTO append_table SELECT * FROM append_table; +ERROR: INSERT ... SELECT into an append-distributed table is not supported +-- verify that CTEs at top level of INSERT SELECT, that can normally be inlined, would not be inlined by INSERT SELECT pushdown planner +-- and handled by pull to coordinator. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) WITH cte_1 AS (SELECT id FROM dist_table_5 WHERE id = 5) + INSERT INTO dist_table_5 + SELECT id FROM dist_table_5 JOIN cte_1 USING(id); +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Custom Scan (Citus Adaptive) + Task Count: 1 +(4 rows) + +-- verify that CTEs at top level of SELECT part, would be inlined by Postgres and pushed down by INSERT SELECT planner. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 + WITH cte_1 AS (SELECT id FROM dist_table_5 WHERE id = 5) + SELECT id FROM dist_table_5 JOIN cte_1 USING(id); +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + Task Count: 1 +(2 rows) + SET client_min_messages TO ERROR; DROP SCHEMA multi_insert_select CASCADE; diff --git a/src/test/regress/expected/multi_insert_select_0.out b/src/test/regress/expected/multi_insert_select_0.out index efc845e88..ee2341759 100644 --- a/src/test/regress/expected/multi_insert_select_0.out +++ b/src/test/regress/expected/multi_insert_select_0.out @@ -620,7 +620,8 @@ INSERT INTO agg_events (value_1_agg, user_id) DISTINCT ON (value_1) value_1, user_id FROM raw_events_first; -DEBUG: DISTINCT ON (non-partition column) clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Distinct on columns without partition column is currently unsupported DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Collecting INSERT ... SELECT results on coordinator SELECT user_id, value_1_agg FROM agg_events ORDER BY 1,2; @@ -686,7 +687,7 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, v1_a DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator ROLLBACK; --- We don't support CTEs that are referenced in the target list +-- We do support CTEs that are referenced in the target list INSERT INTO agg_events WITH sub_cte AS (SELECT 1) SELECT @@ -694,15 +695,11 @@ INSERT INTO agg_events FROM raw_events_first; DEBUG: CTE sub_cte is going to be inlined via distributed planning -DEBUG: Subqueries without relations are not allowed in distributed INSERT ... SELECT queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'user_id' -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT user_id, value_1_agg FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_0}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1_agg integer) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT user_id, value_1_agg FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300001_to_1}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1_agg integer) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT user_id, value_1_agg FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300002_to_2}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1_agg integer) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT user_id, value_1_agg FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300003_to_3}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1_agg integer) --- We support set operations via the coordinator +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (user_id IS NOT NULL) +-- We support set operations BEGIN; INSERT INTO raw_events_first(user_id) @@ -711,14 +708,10 @@ SELECT FROM ((SELECT user_id FROM raw_events_first) UNION (SELECT user_id FROM raw_events_second)) as foo; -DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'user_id' -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id) SELECT user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300004_to_0}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id) SELECT user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300005_to_1}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id) SELECT user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300006_to_2}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id) SELECT user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300007_to_3}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id) SELECT user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second) foo WHERE (user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id) SELECT user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second) foo WHERE (user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id) SELECT user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second) foo WHERE (user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id) SELECT user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second) foo WHERE (user_id IS NOT NULL) ROLLBACK; -- We do support set operations through recursive planning BEGIN; @@ -727,7 +720,7 @@ INSERT INTO raw_events_first(user_id) (SELECT user_id FROM raw_events_first) INTERSECT (SELECT user_id FROM raw_events_first); -DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM multi_insert_select.raw_events_first @@ -747,7 +740,7 @@ SELECT FROM ((SELECT user_id FROM raw_events_first WHERE user_id = 15) EXCEPT (SELECT user_id FROM raw_events_second where user_id = 17)) as foo; -DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator -- some supported LEFT joins @@ -765,10 +758,16 @@ DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_133000 raw_events_second.user_id FROM reference_table LEFT JOIN raw_events_second ON reference_table.user_id = raw_events_second.user_id; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table_13300012 reference_table LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_second.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table_13300012 reference_table LEFT JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_second.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table_13300012 reference_table LEFT JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_second.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table_13300012 reference_table LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_second.user_id IS NOT NULL) +DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "raw_events_second" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "raw_events_second" to a subquery +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM multi_insert_select.raw_events_second WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table LEFT JOIN (SELECT raw_events_second_1.user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) raw_events_second_1) raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) +DEBUG: Creating router plan +DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id @@ -1126,7 +1125,8 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4, raw_events_second WHERE raw_events_first.user_id = raw_events_second.user_id GROUP BY raw_events_second.value_3) AS foo; -DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Group by list without partition column is currently unsupported when a subquery references a column from another query DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] @@ -1281,7 +1281,7 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4, GROUP BY raw_events_second.value_1 HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 ON (f.id = f2.id); -DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] @@ -1327,7 +1327,7 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4, GROUP BY raw_events_second.value_1 HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 ON (f.id = f2.id); -DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] @@ -3242,5 +3242,236 @@ returning text_col_1; string (1 row) +CREATE TABLE dist_table_3( +dist_col bigint, +int_col integer +); +SELECT create_distributed_table('dist_table_3', 'dist_col'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- dist_table_2 and dist_table_3 are non-colocated source tables. Repartitioning is also not possible due to +-- different types for distribution columns. Citus would not be able to handle this complex insert select. +INSERT INTO dist_table_1 SELECT dist_table_2.dist_col FROM dist_table_2 JOIN dist_table_3 USING(dist_col); +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +CREATE TABLE dist_table_4( +dist_col integer, +int_col integer +); +SELECT create_distributed_table('dist_table_4', 'dist_col'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Even if target table distribution column is colocated with dist_table_2's distributed column, source tables dist_table_2 and dist_table_4 +-- are non-colocated. Hence, SELECT part of the query should be pulled to coordinator. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT dist_table_2.dist_col FROM dist_table_2 JOIN dist_table_4 ON dist_table_2.dist_col = dist_table_4.int_col; +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(4 rows) + +-- For INSERT SELECT, when a lateral query references an outer query, push-down is possible even if limit clause exists in the lateral query. +-- It is because subquery with limit does not need to be merged at coordinator as it is a lateral query. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT d1.dist_col FROM dist_table_1 d1 LEFT JOIN LATERAL (SELECT * FROM dist_table_2 d2 WHERE d1.dist_col = d2.dist_col LIMIT 3) dummy USING(dist_col); +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + Task Count: 4 +(2 rows) + +-- For INSERT SELECT, when push-down is NOT possible when limit clause exists in a subquery at SELECT part of INSERT SELECT. +-- It is because the subquery with limit needs to be merged at coordinator. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT d1.dist_col FROM dist_table_1 d1 LEFT JOIN (SELECT * FROM dist_table_2 LIMIT 3) dummy USING(dist_col); +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: repartition + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Limit + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(7 rows) + +CREATE TABLE dist_table_5(id int, id2 int); +SELECT create_distributed_table('dist_table_5','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE dist_table_6(id int, id2 int); +SELECT create_distributed_table('dist_table_6','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- verify that insert select with union can be pushed down since UNION clause has FROM clause at top level query. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5(id) SELECT id FROM (SELECT id FROM dist_table_5 UNION SELECT id FROM dist_table_6) dummy; +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + Task Count: 4 +(2 rows) + +-- verify that insert select with sublink can be pushed down when tables are colocated. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM dist_table_5 WHERE dist_table_5.id = dist_table_6.id) FROM dist_table_6; +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + Task Count: 4 +(2 rows) + +CREATE TABLE ref_table_1(id int); +SELECT create_reference_table('ref_table_1'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +-- verify that insert select with sublink cannot be pushed down when from clause does not contain any distributed relation. +INSERT INTO dist_table_5 SELECT id, (SELECT id FROM dist_table_5 WHERE dist_table_5.id = ref_table_1.id) FROM ref_table_1; +ERROR: correlated subqueries are not supported when the FROM clause contains a reference table +-- verify that insert select cannot be pushed down when we have recurring range table in from clause. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM ref_table_1 WHERE id = 1) FROM ref_table_1; +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Custom Scan (Citus Adaptive) + Task Count: 1 +(4 rows) + +-- verify that insert select cannot be pushed down when we have reference table in outside of outer join. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT a.id FROM dist_table_5 a LEFT JOIN ref_table_1 b ON (true) RIGHT JOIN ref_table_1 c ON (true); +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(6 rows) + +-- verify that insert select cannot be pushed down when it has a recurring outer join in a subquery. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM ref_table_1 LEFT JOIN dist_table_5 USING(id); +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(6 rows) + +CREATE TABLE loc_table_1(id int); +-- verify that insert select cannot be pushed down when it contains join between local and distributed tables. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM dist_table_5 JOIN loc_table_1 USING(id); +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: repartition + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Seq Scan on loc_table_1 + Task Count: 4 +(6 rows) + +CREATE VIEW view_1 AS + SELECT id FROM dist_table_6; +CREATE MATERIALIZED VIEW view_2 AS + SELECT id FROM dist_table_6; +-- verify that insert select cannot be pushed down when it contains view. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT * FROM view_1; +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + Task Count: 4 +(2 rows) + +-- verify that insert select cannot be pushed down when it contains materialized view. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT * FROM view_2; +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Seq Scan on view_2 +(3 rows) + +CREATE TABLE append_table(id integer, data text, int_data int); +SELECT create_distributed_table('append_table', 'id', 'append'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT master_create_empty_shard('append_table'); + master_create_empty_shard +--------------------------------------------------------------------- + 13300096 +(1 row) + +-- verify that insert select push down for append tables are not supported. +INSERT INTO append_table SELECT * FROM append_table; +ERROR: INSERT ... SELECT into an append-distributed table is not supported +-- verify that CTEs at top level of INSERT SELECT, that can normally be inlined, would not be inlined by INSERT SELECT pushdown planner +-- and handled by pull to coordinator. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) WITH cte_1 AS (SELECT id FROM dist_table_5 WHERE id = 5) + INSERT INTO dist_table_5 + SELECT id FROM dist_table_5 JOIN cte_1 USING(id); +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Custom Scan (Citus Adaptive) + Task Count: 1 +(4 rows) + +-- verify that CTEs at top level of SELECT part, would be inlined by Postgres and pushed down by INSERT SELECT planner. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 + WITH cte_1 AS (SELECT id FROM dist_table_5 WHERE id = 5) + SELECT id FROM dist_table_5 JOIN cte_1 USING(id); +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + Task Count: 1 +(2 rows) + SET client_min_messages TO ERROR; DROP SCHEMA multi_insert_select CASCADE; diff --git a/src/test/regress/expected/multi_insert_select_conflict.out b/src/test/regress/expected/multi_insert_select_conflict.out index df7bdc9b9..f344a8b79 100644 --- a/src/test/regress/expected/multi_insert_select_conflict.out +++ b/src/test/regress/expected/multi_insert_select_conflict.out @@ -106,7 +106,8 @@ FROM ( LIMIT 5 ) as foo ON CONFLICT DO NOTHING; -DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: push down of limit count: 5 DEBUG: generating subplan XXX_1 for subquery SELECT col_1, col_2, col_3 FROM on_conflict.source_table_1 LIMIT 5 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT col_1, col_2 FROM (SELECT intermediate_result.col_1, intermediate_result.col_2, intermediate_result.col_3 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(col_1 integer, col_2 integer, col_3 integer)) foo @@ -127,7 +128,8 @@ WITH inserted_table AS ( ON CONFLICT(col_1) DO UPDATE SET col_2 = EXCLUDED.col_2 RETURNING * ) SELECT * FROM inserted_table ORDER BY 1; DEBUG: generating subplan XXX_1 for CTE inserted_table: INSERT INTO on_conflict.target_table (col_1, col_2) SELECT foo.col_1, foo.col_2 FROM (SELECT source_table_1.col_1, source_table_1.col_2, source_table_1.col_3 FROM on_conflict.source_table_1 LIMIT 5) foo ON CONFLICT(col_1) DO UPDATE SET col_2 = excluded.col_2 RETURNING target_table.col_1, target_table.col_2 -DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: push down of limit count: 5 DEBUG: generating subplan XXX_1 for subquery SELECT col_1, col_2, col_3 FROM on_conflict.source_table_1 LIMIT 5 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT col_1, col_2 FROM (SELECT intermediate_result.col_1, intermediate_result.col_2, intermediate_result.col_3 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(col_1 integer, col_2 integer, col_3 integer)) foo @@ -163,7 +165,8 @@ WITH inserted_table AS ( ON CONFLICT(col_1) DO UPDATE SET col_2 = 0 RETURNING * ) SELECT * FROM inserted_table ORDER BY 1; DEBUG: generating subplan XXX_1 for CTE inserted_table: INSERT INTO on_conflict.target_table (col_1, col_2) SELECT foo.col_1, foo.col_2 FROM ((SELECT source_table_1.col_1, source_table_1.col_2, source_table_1.col_3 FROM on_conflict.source_table_1 LIMIT 5) UNION (SELECT source_table_2.col_1, source_table_2.col_2, source_table_2.col_3 FROM on_conflict.source_table_2 LIMIT 5)) foo ON CONFLICT(col_1) DO UPDATE SET col_2 = 0 RETURNING target_table.col_1, target_table.col_2 -DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: push down of limit count: 5 DEBUG: generating subplan XXX_1 for subquery SELECT col_1, col_2, col_3 FROM on_conflict.source_table_1 LIMIT 5 DEBUG: push down of limit count: 5 @@ -498,7 +501,8 @@ FROM ( LIMIT 5) ) as foo ON CONFLICT(col_1) DO UPDATE SET col_2 = 0; -DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: push down of limit count: 5 DEBUG: generating subplan XXX_1 for subquery SELECT col_1, col_2, col_3 FROM on_conflict.source_table_1 LIMIT 5 DEBUG: push down of limit count: 5 @@ -560,7 +564,8 @@ DEBUG: Collecting INSERT ... SELECT results on coordinator -- intermediate result file SET citus.max_adaptive_executor_pool_size TO 1; INSERT INTO target_table SELECT * FROM target_table LIMIT 10000 ON CONFLICT(col_1) DO UPDATE SET col_2 = EXCLUDED.col_2 + 1; -DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: push down of limit count: 10000 DEBUG: Collecting INSERT ... SELECT results on coordinator SELECT DISTINCT col_2 FROM target_table; @@ -572,7 +577,8 @@ SELECT DISTINCT col_2 FROM target_table; WITH cte_1 AS (INSERT INTO target_table SELECT * FROM target_table LIMIT 10000 ON CONFLICT(col_1) DO UPDATE SET col_2 = EXCLUDED.col_2 + 1 RETURNING *) SELECT DISTINCT col_2 FROM cte_1; DEBUG: generating subplan XXX_1 for CTE cte_1: INSERT INTO on_conflict.target_table (col_1, col_2) SELECT target_table_1.col_1, target_table_1.col_2 FROM on_conflict.target_table target_table_1 LIMIT 10000 ON CONFLICT(col_1) DO UPDATE SET col_2 = (excluded.col_2 OPERATOR(pg_catalog.+) 1) RETURNING target_table.col_1, target_table.col_2 -DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: push down of limit count: 10000 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT DISTINCT col_2 FROM (SELECT intermediate_result.col_1, intermediate_result.col_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(col_1 integer, col_2 integer)) cte_1 DEBUG: Collecting INSERT ... SELECT results on coordinator diff --git a/src/test/regress/expected/multi_insert_select_conflict_0.out b/src/test/regress/expected/multi_insert_select_conflict_0.out index b8f926d30..42b5aed31 100644 --- a/src/test/regress/expected/multi_insert_select_conflict_0.out +++ b/src/test/regress/expected/multi_insert_select_conflict_0.out @@ -106,7 +106,8 @@ FROM ( LIMIT 5 ) as foo ON CONFLICT DO NOTHING; -DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: push down of limit count: 5 DEBUG: generating subplan XXX_1 for subquery SELECT col_1, col_2, col_3 FROM on_conflict.source_table_1 LIMIT 5 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT col_1, col_2 FROM (SELECT intermediate_result.col_1, intermediate_result.col_2, intermediate_result.col_3 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(col_1 integer, col_2 integer, col_3 integer)) foo @@ -127,7 +128,8 @@ WITH inserted_table AS ( ON CONFLICT(col_1) DO UPDATE SET col_2 = EXCLUDED.col_2 RETURNING * ) SELECT * FROM inserted_table ORDER BY 1; DEBUG: generating subplan XXX_1 for CTE inserted_table: INSERT INTO on_conflict.target_table (col_1, col_2) SELECT col_1, col_2 FROM (SELECT source_table_1.col_1, source_table_1.col_2, source_table_1.col_3 FROM on_conflict.source_table_1 LIMIT 5) foo ON CONFLICT(col_1) DO UPDATE SET col_2 = excluded.col_2 RETURNING target_table.col_1, target_table.col_2 -DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: push down of limit count: 5 DEBUG: generating subplan XXX_1 for subquery SELECT col_1, col_2, col_3 FROM on_conflict.source_table_1 LIMIT 5 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT col_1, col_2 FROM (SELECT intermediate_result.col_1, intermediate_result.col_2, intermediate_result.col_3 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(col_1 integer, col_2 integer, col_3 integer)) foo @@ -163,7 +165,8 @@ WITH inserted_table AS ( ON CONFLICT(col_1) DO UPDATE SET col_2 = 0 RETURNING * ) SELECT * FROM inserted_table ORDER BY 1; DEBUG: generating subplan XXX_1 for CTE inserted_table: INSERT INTO on_conflict.target_table (col_1, col_2) SELECT col_1, col_2 FROM ((SELECT source_table_1.col_1, source_table_1.col_2, source_table_1.col_3 FROM on_conflict.source_table_1 LIMIT 5) UNION (SELECT source_table_2.col_1, source_table_2.col_2, source_table_2.col_3 FROM on_conflict.source_table_2 LIMIT 5)) foo ON CONFLICT(col_1) DO UPDATE SET col_2 = 0 RETURNING target_table.col_1, target_table.col_2 -DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: push down of limit count: 5 DEBUG: generating subplan XXX_1 for subquery SELECT col_1, col_2, col_3 FROM on_conflict.source_table_1 LIMIT 5 DEBUG: push down of limit count: 5 @@ -498,7 +501,8 @@ FROM ( LIMIT 5) ) as foo ON CONFLICT(col_1) DO UPDATE SET col_2 = 0; -DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: push down of limit count: 5 DEBUG: generating subplan XXX_1 for subquery SELECT col_1, col_2, col_3 FROM on_conflict.source_table_1 LIMIT 5 DEBUG: push down of limit count: 5 @@ -560,7 +564,8 @@ DEBUG: Collecting INSERT ... SELECT results on coordinator -- intermediate result file SET citus.max_adaptive_executor_pool_size TO 1; INSERT INTO target_table SELECT * FROM target_table LIMIT 10000 ON CONFLICT(col_1) DO UPDATE SET col_2 = EXCLUDED.col_2 + 1; -DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: push down of limit count: 10000 DEBUG: Collecting INSERT ... SELECT results on coordinator SELECT DISTINCT col_2 FROM target_table; @@ -572,7 +577,8 @@ SELECT DISTINCT col_2 FROM target_table; WITH cte_1 AS (INSERT INTO target_table SELECT * FROM target_table LIMIT 10000 ON CONFLICT(col_1) DO UPDATE SET col_2 = EXCLUDED.col_2 + 1 RETURNING *) SELECT DISTINCT col_2 FROM cte_1; DEBUG: generating subplan XXX_1 for CTE cte_1: INSERT INTO on_conflict.target_table (col_1, col_2) SELECT col_1, col_2 FROM on_conflict.target_table LIMIT 10000 ON CONFLICT(col_1) DO UPDATE SET col_2 = (excluded.col_2 OPERATOR(pg_catalog.+) 1) RETURNING target_table.col_1, target_table.col_2 -DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: push down of limit count: 10000 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT DISTINCT col_2 FROM (SELECT intermediate_result.col_1, intermediate_result.col_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(col_1 integer, col_2 integer)) cte_1 DEBUG: Collecting INSERT ... SELECT results on coordinator diff --git a/src/test/regress/expected/multi_insert_select_non_pushable_queries.out b/src/test/regress/expected/multi_insert_select_non_pushable_queries.out index fc3f62385..c4a537277 100644 --- a/src/test/regress/expected/multi_insert_select_non_pushable_queries.out +++ b/src/test/regress/expected/multi_insert_select_non_pushable_queries.out @@ -3,6 +3,8 @@ -- Vanilla funnel query --------------------------------------------------------------------- --------------------------------------------------------------------- +CREATE SCHEMA multi_insert_select_non_pushable_queries; +SET search_path = multi_insert_select_non_pushable_queries,public; -- not pushable since the JOIN is not an equi join INSERT INTO agg_results_third (user_id, value_1_agg) SELECT user_id, array_length(events_table, 1) @@ -147,7 +149,7 @@ FROM ( GROUP BY t1.user_id, hasdone_event ) t GROUP BY user_id, hasdone_event RETURNING user_id, value_1_agg, value_2_agg; -DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: generating subplan XXX_1 for subquery SELECT u.user_id, 'step=>1'::text AS event, e."time" FROM public.users_table u, public.events_table e WHERE ((u.user_id OPERATOR(pg_catalog.=) e.user_id) AND (u.user_id OPERATOR(pg_catalog.>=) 10) AND (u.user_id OPERATOR(pg_catalog.<=) 25) AND (e.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[100, 101, 102]))) DEBUG: generating subplan XXX_2 for subquery SELECT u.user_id, 'step=>2'::text AS event, e."time" FROM public.users_table u, public.events_table e WHERE ((u.user_id OPERATOR(pg_catalog.=) e.user_id) AND (u.user_id OPERATOR(pg_catalog.>=) 10) AND (u.user_id OPERATOR(pg_catalog.<=) 25) AND (e.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[103, 104, 105]))) DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone) UNION SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone) @@ -305,7 +307,7 @@ GROUP BY ORDER BY count_pay RETURNING user_id, value_1_agg, value_2_agg; -DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: generating subplan XXX_1 for subquery SELECT users_table.user_id, 'action=>1'::text AS event, events_table."time" FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (users_table.user_id OPERATOR(pg_catalog.>=) 10) AND (users_table.user_id OPERATOR(pg_catalog.<=) 70) AND (events_table.event_type OPERATOR(pg_catalog.>) 10) AND (events_table.event_type OPERATOR(pg_catalog.<) 12)) DEBUG: generating subplan XXX_2 for subquery SELECT users_table.user_id, 'action=>2'::text AS event, events_table."time" FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (users_table.user_id OPERATOR(pg_catalog.>=) 10) AND (users_table.user_id OPERATOR(pg_catalog.<=) 70) AND (events_table.event_type OPERATOR(pg_catalog.>) 12) AND (events_table.event_type OPERATOR(pg_catalog.<) 14)) DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone) UNION SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone) @@ -808,3 +810,36 @@ FROM ( GROUP BY user_id ) AS shard_union ORDER BY user_lastseen DESC; +CREATE TABLE dist_table_1(id int); +SELECT create_distributed_table('dist_table_1','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE dist_table_2(id int, id2 int); +SELECT create_distributed_table('dist_table_2','id2'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- verify that insert select with union can be pulled to coordinator. We cannot push down the query +-- since UNION clause has no FROM clause at top level query. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1(id) SELECT id FROM dist_table_1 UNION SELECT id FROM dist_table_2; +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(6 rows) + +DROP SCHEMA multi_insert_select_non_pushable_queries CASCADE; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to table dist_table_1 +drop cascades to table dist_table_2 diff --git a/src/test/regress/expected/multi_router_planner_fast_path.out b/src/test/regress/expected/multi_router_planner_fast_path.out index f2f18266b..474d4a107 100644 --- a/src/test/regress/expected/multi_router_planner_fast_path.out +++ b/src/test/regress/expected/multi_router_planner_fast_path.out @@ -1876,32 +1876,38 @@ PREPARE insert_sel(int, int) AS INSERT INTO articles_hash SELECT * FROM articles_hash WHERE author_id = $2 AND word_count = $1 OFFSET 0; EXECUTE insert_sel(1,1); -DEBUG: OFFSET clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Offset clause is currently unsupported when a subquery references a column from another query DEBUG: Deferred pruning for a fast-path router query DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator EXECUTE insert_sel(1,1); -DEBUG: OFFSET clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Offset clause is currently unsupported when a subquery references a column from another query DEBUG: Deferred pruning for a fast-path router query DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator EXECUTE insert_sel(1,1); -DEBUG: OFFSET clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Offset clause is currently unsupported when a subquery references a column from another query DEBUG: Deferred pruning for a fast-path router query DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator EXECUTE insert_sel(1,1); -DEBUG: OFFSET clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Offset clause is currently unsupported when a subquery references a column from another query DEBUG: Deferred pruning for a fast-path router query DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator EXECUTE insert_sel(1,1); -DEBUG: OFFSET clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Offset clause is currently unsupported when a subquery references a column from another query DEBUG: Deferred pruning for a fast-path router query DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator EXECUTE insert_sel(1,1); -DEBUG: OFFSET clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Offset clause is currently unsupported when a subquery references a column from another query DEBUG: Deferred pruning for a fast-path router query DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator diff --git a/src/test/regress/expected/mx_coordinator_shouldhaveshards.out b/src/test/regress/expected/mx_coordinator_shouldhaveshards.out index 438e1dcdd..547300460 100644 --- a/src/test/regress/expected/mx_coordinator_shouldhaveshards.out +++ b/src/test/regress/expected/mx_coordinator_shouldhaveshards.out @@ -89,7 +89,7 @@ key FROM a JOIN table_2 USING (key) GROUP BY key HAVING (max(table_2.value) >= (SELECT value FROM a)); -DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value FROM mx_coordinator_shouldhaveshards.table_1 ORDER BY key, value DESC LIMIT 1 DEBUG: push down of limit count: 1 DEBUG: generating subplan XXX_2 for subquery SELECT int4(count(*)) AS auto_coerced_by_citus_0, (a.key)::text AS auto_coerced_by_citus_1 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN mx_coordinator_shouldhaveshards.table_2 USING (key)) GROUP BY a.key HAVING (max(table_2.value) OPERATOR(pg_catalog.>=) (SELECT a_1.value FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a_1)) @@ -114,7 +114,8 @@ inserts AS ( ) SELECT count(*) FROM inserts; DEBUG: generating subplan XXX_1 for CTE stats: SELECT count(key) AS m FROM mx_coordinator_shouldhaveshards.table_1 DEBUG: generating subplan XXX_2 for CTE inserts: INSERT INTO mx_coordinator_shouldhaveshards.table_2 (key, value) SELECT table_1.key, count(*) AS count FROM mx_coordinator_shouldhaveshards.table_1 WHERE (table_1.key OPERATOR(pg_catalog.>=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) GROUP BY table_1.key HAVING (count(*) OPERATOR(pg_catalog.<=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) LIMIT 1 RETURNING table_2.key, table_2.value -DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: push down of limit count: 1 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) inserts DEBUG: Subplan XXX_1 will be written to local file @@ -150,7 +151,7 @@ key FROM a JOIN table_2_rep USING (key) GROUP BY key HAVING (max(table_2_rep.value) >= (SELECT value FROM a)); -DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value FROM mx_coordinator_shouldhaveshards.table_1_rep ORDER BY key, value DESC LIMIT 1 DEBUG: push down of limit count: 1 DEBUG: generating subplan XXX_2 for subquery SELECT int4(count(*)) AS auto_coerced_by_citus_0, (a.key)::text AS auto_coerced_by_citus_1 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN mx_coordinator_shouldhaveshards.table_2_rep USING (key)) GROUP BY a.key HAVING (max(table_2_rep.value) OPERATOR(pg_catalog.>=) (SELECT a_1.value FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a_1)) @@ -175,7 +176,8 @@ inserts AS ( ) SELECT count(*) FROM inserts; DEBUG: generating subplan XXX_1 for CTE stats: SELECT count(key) AS m FROM mx_coordinator_shouldhaveshards.table_1_rep DEBUG: generating subplan XXX_2 for CTE inserts: INSERT INTO mx_coordinator_shouldhaveshards.table_2_rep (key, value) SELECT table_1_rep.key, count(*) AS count FROM mx_coordinator_shouldhaveshards.table_1_rep WHERE (table_1_rep.key OPERATOR(pg_catalog.>=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) GROUP BY table_1_rep.key HAVING (count(*) OPERATOR(pg_catalog.<=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) LIMIT 1 RETURNING table_2_rep.key, table_2_rep.value -DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: push down of limit count: 1 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) inserts DEBUG: Subplan XXX_1 will be written to local file @@ -215,7 +217,7 @@ key FROM a JOIN table_2 USING (key) GROUP BY key HAVING (max(table_2.value) >= (SELECT value FROM a)); -DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value FROM mx_coordinator_shouldhaveshards.table_1 ORDER BY key, value DESC LIMIT 1 DEBUG: push down of limit count: 1 DEBUG: generating subplan XXX_2 for subquery SELECT int4(count(*)) AS auto_coerced_by_citus_0, (a.key)::text AS auto_coerced_by_citus_1 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN mx_coordinator_shouldhaveshards.table_2 USING (key)) GROUP BY a.key HAVING (max(table_2.value) OPERATOR(pg_catalog.>=) (SELECT a_1.value FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a_1)) @@ -240,7 +242,8 @@ inserts AS ( ) SELECT count(*) FROM inserts; DEBUG: generating subplan XXX_1 for CTE stats: SELECT count(key) AS m FROM mx_coordinator_shouldhaveshards.table_1 DEBUG: generating subplan XXX_2 for CTE inserts: INSERT INTO mx_coordinator_shouldhaveshards.table_2 (key, value) SELECT table_1.key, count(*) AS count FROM mx_coordinator_shouldhaveshards.table_1 WHERE (table_1.key OPERATOR(pg_catalog.>=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) GROUP BY table_1.key HAVING (count(*) OPERATOR(pg_catalog.<=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) LIMIT 1 RETURNING table_2.key, table_2.value -DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: push down of limit count: 1 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) inserts DEBUG: Subplan XXX_1 will be written to local file @@ -276,7 +279,7 @@ key FROM a JOIN table_2_rep USING (key) GROUP BY key HAVING (max(table_2_rep.value) >= (SELECT value FROM a)); -DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value FROM mx_coordinator_shouldhaveshards.table_1_rep ORDER BY key, value DESC LIMIT 1 DEBUG: push down of limit count: 1 DEBUG: generating subplan XXX_2 for subquery SELECT int4(count(*)) AS auto_coerced_by_citus_0, (a.key)::text AS auto_coerced_by_citus_1 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN mx_coordinator_shouldhaveshards.table_2_rep USING (key)) GROUP BY a.key HAVING (max(table_2_rep.value) OPERATOR(pg_catalog.>=) (SELECT a_1.value FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a_1)) @@ -301,7 +304,8 @@ inserts AS ( ) SELECT count(*) FROM inserts; DEBUG: generating subplan XXX_1 for CTE stats: SELECT count(key) AS m FROM mx_coordinator_shouldhaveshards.table_1_rep DEBUG: generating subplan XXX_2 for CTE inserts: INSERT INTO mx_coordinator_shouldhaveshards.table_2_rep (key, value) SELECT table_1_rep.key, count(*) AS count FROM mx_coordinator_shouldhaveshards.table_1_rep WHERE (table_1_rep.key OPERATOR(pg_catalog.>=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) GROUP BY table_1_rep.key HAVING (count(*) OPERATOR(pg_catalog.<=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) LIMIT 1 RETURNING table_2_rep.key, table_2_rep.value -DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: push down of limit count: 1 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) inserts DEBUG: Subplan XXX_1 will be written to local file diff --git a/src/test/regress/expected/mx_coordinator_shouldhaveshards_0.out b/src/test/regress/expected/mx_coordinator_shouldhaveshards_0.out index 398229fbb..15cd69068 100644 --- a/src/test/regress/expected/mx_coordinator_shouldhaveshards_0.out +++ b/src/test/regress/expected/mx_coordinator_shouldhaveshards_0.out @@ -89,7 +89,7 @@ key FROM a JOIN table_2 USING (key) GROUP BY key HAVING (max(table_2.value) >= (SELECT value FROM a)); -DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value FROM mx_coordinator_shouldhaveshards.table_1 ORDER BY key, value DESC LIMIT 1 DEBUG: push down of limit count: 1 DEBUG: generating subplan XXX_2 for subquery SELECT int4(count(*)) AS auto_coerced_by_citus_0, (a.key)::text AS auto_coerced_by_citus_1 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN mx_coordinator_shouldhaveshards.table_2 USING (key)) GROUP BY a.key HAVING (max(table_2.value) OPERATOR(pg_catalog.>=) (SELECT a_1.value FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a_1)) @@ -114,7 +114,8 @@ inserts AS ( ) SELECT count(*) FROM inserts; DEBUG: generating subplan XXX_1 for CTE stats: SELECT count(key) AS m FROM mx_coordinator_shouldhaveshards.table_1 DEBUG: generating subplan XXX_2 for CTE inserts: INSERT INTO mx_coordinator_shouldhaveshards.table_2 (key, value) SELECT key, count(*) AS count FROM mx_coordinator_shouldhaveshards.table_1 WHERE (key OPERATOR(pg_catalog.>=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) GROUP BY key HAVING (count(*) OPERATOR(pg_catalog.<=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) LIMIT 1 RETURNING table_2.key, table_2.value -DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: push down of limit count: 1 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) inserts DEBUG: Subplan XXX_1 will be written to local file @@ -150,7 +151,7 @@ key FROM a JOIN table_2_rep USING (key) GROUP BY key HAVING (max(table_2_rep.value) >= (SELECT value FROM a)); -DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value FROM mx_coordinator_shouldhaveshards.table_1_rep ORDER BY key, value DESC LIMIT 1 DEBUG: push down of limit count: 1 DEBUG: generating subplan XXX_2 for subquery SELECT int4(count(*)) AS auto_coerced_by_citus_0, (a.key)::text AS auto_coerced_by_citus_1 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN mx_coordinator_shouldhaveshards.table_2_rep USING (key)) GROUP BY a.key HAVING (max(table_2_rep.value) OPERATOR(pg_catalog.>=) (SELECT a_1.value FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a_1)) @@ -175,7 +176,8 @@ inserts AS ( ) SELECT count(*) FROM inserts; DEBUG: generating subplan XXX_1 for CTE stats: SELECT count(key) AS m FROM mx_coordinator_shouldhaveshards.table_1_rep DEBUG: generating subplan XXX_2 for CTE inserts: INSERT INTO mx_coordinator_shouldhaveshards.table_2_rep (key, value) SELECT key, count(*) AS count FROM mx_coordinator_shouldhaveshards.table_1_rep WHERE (key OPERATOR(pg_catalog.>=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) GROUP BY key HAVING (count(*) OPERATOR(pg_catalog.<=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) LIMIT 1 RETURNING table_2_rep.key, table_2_rep.value -DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: push down of limit count: 1 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) inserts DEBUG: Subplan XXX_1 will be written to local file @@ -215,7 +217,7 @@ key FROM a JOIN table_2 USING (key) GROUP BY key HAVING (max(table_2.value) >= (SELECT value FROM a)); -DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value FROM mx_coordinator_shouldhaveshards.table_1 ORDER BY key, value DESC LIMIT 1 DEBUG: push down of limit count: 1 DEBUG: generating subplan XXX_2 for subquery SELECT int4(count(*)) AS auto_coerced_by_citus_0, (a.key)::text AS auto_coerced_by_citus_1 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN mx_coordinator_shouldhaveshards.table_2 USING (key)) GROUP BY a.key HAVING (max(table_2.value) OPERATOR(pg_catalog.>=) (SELECT a_1.value FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a_1)) @@ -240,7 +242,8 @@ inserts AS ( ) SELECT count(*) FROM inserts; DEBUG: generating subplan XXX_1 for CTE stats: SELECT count(key) AS m FROM mx_coordinator_shouldhaveshards.table_1 DEBUG: generating subplan XXX_2 for CTE inserts: INSERT INTO mx_coordinator_shouldhaveshards.table_2 (key, value) SELECT key, count(*) AS count FROM mx_coordinator_shouldhaveshards.table_1 WHERE (key OPERATOR(pg_catalog.>=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) GROUP BY key HAVING (count(*) OPERATOR(pg_catalog.<=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) LIMIT 1 RETURNING table_2.key, table_2.value -DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: push down of limit count: 1 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) inserts DEBUG: Subplan XXX_1 will be written to local file @@ -276,7 +279,7 @@ key FROM a JOIN table_2_rep USING (key) GROUP BY key HAVING (max(table_2_rep.value) >= (SELECT value FROM a)); -DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value FROM mx_coordinator_shouldhaveshards.table_1_rep ORDER BY key, value DESC LIMIT 1 DEBUG: push down of limit count: 1 DEBUG: generating subplan XXX_2 for subquery SELECT int4(count(*)) AS auto_coerced_by_citus_0, (a.key)::text AS auto_coerced_by_citus_1 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN mx_coordinator_shouldhaveshards.table_2_rep USING (key)) GROUP BY a.key HAVING (max(table_2_rep.value) OPERATOR(pg_catalog.>=) (SELECT a_1.value FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a_1)) @@ -301,7 +304,8 @@ inserts AS ( ) SELECT count(*) FROM inserts; DEBUG: generating subplan XXX_1 for CTE stats: SELECT count(key) AS m FROM mx_coordinator_shouldhaveshards.table_1_rep DEBUG: generating subplan XXX_2 for CTE inserts: INSERT INTO mx_coordinator_shouldhaveshards.table_2_rep (key, value) SELECT key, count(*) AS count FROM mx_coordinator_shouldhaveshards.table_1_rep WHERE (key OPERATOR(pg_catalog.>=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) GROUP BY key HAVING (count(*) OPERATOR(pg_catalog.<=) (SELECT stats.m FROM (SELECT intermediate_result.m FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(m bigint)) stats)) LIMIT 1 RETURNING table_2_rep.key, table_2_rep.value -DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: push down of limit count: 1 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) inserts DEBUG: Subplan XXX_1 will be written to local file diff --git a/src/test/regress/expected/query_single_shard_table.out b/src/test/regress/expected/query_single_shard_table.out index 6e1fe1529..ff04ad50e 100644 --- a/src/test/regress/expected/query_single_shard_table.out +++ b/src/test/regress/expected/query_single_shard_table.out @@ -827,7 +827,7 @@ INSERT INTO nullkey_c1_t1 SELECT * FROM nullkey_c1_t2; SET client_min_messages TO DEBUG2; -- between two non-colocated single-shard tables INSERT INTO nullkey_c1_t1 SELECT * FROM nullkey_c2_t1; -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Distributed planning for a fast-path router query DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator @@ -848,7 +848,7 @@ INSERT INTO nullkey_c1_t1 SELECT * FROM reference_table; SET client_min_messages TO DEBUG2; INSERT INTO nullkey_c1_t1 SELECT * FROM distributed_table; -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: INSERT target relation and all source relations of the SELECT must be colocated in distributed INSERT ... SELECT DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO nullkey_c1_t1 SELECT * FROM citus_local_table; @@ -865,7 +865,8 @@ DEBUG: Distributed planning for a fast-path router query DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO distributed_table SELECT * FROM nullkey_c1_t1; -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: The target table's partition column should correspond to a partition column in the subquery. DEBUG: Distributed planning for a fast-path router query DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator @@ -1197,6 +1198,8 @@ SELECT COALESCE(raw_events_first.user_id, users_ref_table.user_id) FROM raw_events_first RIGHT JOIN (users_ref_table LEFT JOIN raw_events_second ON users_ref_table.user_id = raw_events_second.user_id) ON raw_events_first.user_id = users_ref_table.user_id; +DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table +DEBUG: Collecting INSERT ... SELECT results on coordinator -- using a full join INSERT INTO agg_events (user_id, value_1_agg) SELECT t1.user_id AS col1, @@ -1227,6 +1230,8 @@ FROM users_ref_table WHERE NOT EXISTS (SELECT 1 FROM raw_events_second WHERE raw_events_second.user_id = users_ref_table.user_id); +DEBUG: correlated subqueries are not supported when the FROM clause contains a reference table +DEBUG: Collecting INSERT ... SELECT results on coordinator -- using inner join INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id @@ -1247,13 +1252,16 @@ WHERE raw_events_first.value_1 IN (10, 11,12) OR users_ref_table.user_id IN (1,2 -- Below "limit / offset clause" test and some others are examples of this. -- limit / offset clause INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id FROM raw_events_first LIMIT 1; -DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id FROM raw_events_first OFFSET 1; -DEBUG: OFFSET clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Offset clause is currently unsupported when a subquery references a column from another query DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO agg_events (user_id) SELECT users_ref_table.user_id FROM users_ref_table LIMIT 1; -DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: Collecting INSERT ... SELECT results on coordinator -- using a materialized cte WITH cte AS MATERIALIZED @@ -1265,9 +1273,15 @@ DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO raw_events_second WITH cte AS MATERIALIZED (SELECT * FROM raw_events_first) SELECT user_id * 1000, time, value_1, value_2, value_3, value_4 FROM cte; +DEBUG: cannot push down this subquery +DETAIL: CTEs in subqueries are currently unsupported +DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO raw_events_second (user_id) WITH cte AS MATERIALIZED (SELECT * FROM users_ref_table) SELECT user_id FROM cte; +DEBUG: cannot push down this subquery +DETAIL: CTEs in subqueries are currently unsupported +DEBUG: Collecting INSERT ... SELECT results on coordinator -- using a regular cte WITH cte AS (SELECT * FROM raw_events_first) INSERT INTO raw_events_second @@ -1286,8 +1300,6 @@ INSERT INTO agg_events FROM raw_events_first; DEBUG: CTE sub_cte is going to be inlined via distributed planning -DEBUG: Subqueries without relations are not allowed in distributed INSERT ... SELECT queries -DEBUG: Collecting INSERT ... SELECT results on coordinator -- we still support complex joins via INSERT's cte list .. WITH cte AS ( SELECT DISTINCT(reference_table.a) AS a, 1 AS b @@ -1311,7 +1323,7 @@ WITH cte AS ( ) SELECT (a+5)*2, b FROM cte; DEBUG: CTE cte is going to be inlined via distributed planning -DEBUG: distributed INSERT ... SELECT cannot reference a distributed table without a shard key together with non-colocated distributed tables +DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table DEBUG: recursively planning left side of the right join since the outer side is a recurring rel DEBUG: recursively planning distributed relation "distributed_table" since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: Wrapping relation "distributed_table" to a subquery @@ -1323,13 +1335,15 @@ INSERT INTO raw_events_first(user_id) (SELECT user_id FROM raw_events_first) INTERSECT (SELECT user_id FROM raw_events_first); -DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Intersect and Except are currently unsupported DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO raw_events_first(user_id) (SELECT user_id FROM users_ref_table) INTERSECT (SELECT user_id FROM raw_events_first); -DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Intersect and Except are currently unsupported DEBUG: Collecting INSERT ... SELECT results on coordinator -- group by clause inside subquery INSERT INTO agg_events diff --git a/src/test/regress/expected/recurring_outer_join.out b/src/test/regress/expected/recurring_outer_join.out index 3cd7cc6dc..aa8cb906d 100644 --- a/src/test/regress/expected/recurring_outer_join.out +++ b/src/test/regress/expected/recurring_outer_join.out @@ -1969,8 +1969,7 @@ BEGIN; FROM ref_1 t1 LEFT JOIN dist_1 t2 ON (t1.a = t2.a); -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: The target table's partition column should correspond to a partition column in the subquery. +DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table DEBUG: recursively planning right side of the left join since the outer side is a recurring rel DEBUG: recursively planning distributed relation "dist_1" "t2" since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: Wrapping relation "dist_1" "t2" to a subquery @@ -1986,9 +1985,7 @@ BEGIN; JOIN (ref_1 t2 LEFT JOIN dist_1 t3 USING(a)) t4 ON (t1.a = t4.a); -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains an operator in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: recursively planning right side of the left join since the outer side is a recurring rel DEBUG: recursively planning distributed relation "dist_1" "t3" since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: Wrapping relation "dist_1" "t3" to a subquery @@ -2005,7 +2002,7 @@ BEGIN; JOIN (ref_1 t2 LEFT JOIN dist_1 t3 USING(a)) t4 ON (t1.a = t4.a); -DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: recursively planning right side of the left join since the outer side is a recurring rel DEBUG: recursively planning distributed relation "dist_1" "t3" since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: Wrapping relation "dist_1" "t3" to a subquery diff --git a/src/test/regress/expected/with_dml.out b/src/test/regress/expected/with_dml.out index b5141db33..f2743a8d9 100644 --- a/src/test/regress/expected/with_dml.out +++ b/src/test/regress/expected/with_dml.out @@ -103,9 +103,7 @@ WITH ids_to_insert AS INSERT INTO distributed_table SELECT DISTINCT ids_to_insert.tenant_id FROM ids_to_insert, distributed_table WHERE distributed_table.tenant_id < ids_to_insert.tenant_id; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: CTE ids_to_insert is going to be inlined via distributed planning DEBUG: generating subplan XXX_1 for subquery SELECT (((tenant_id)::integer OPERATOR(pg_catalog.*) 100))::text AS tenant_id FROM with_dml.distributed_table WHERE (dept OPERATOR(pg_catalog.>) 7) DEBUG: generating subplan XXX_2 for subquery SELECT DISTINCT ids_to_insert.tenant_id FROM (SELECT intermediate_result.tenant_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(tenant_id text)) ids_to_insert, with_dml.distributed_table WHERE (distributed_table.tenant_id OPERATOR(pg_catalog.<) ids_to_insert.tenant_id) diff --git a/src/test/regress/expected/with_modifying.out b/src/test/regress/expected/with_modifying.out index 9f62271d5..997c62f93 100644 --- a/src/test/regress/expected/with_modifying.out +++ b/src/test/regress/expected/with_modifying.out @@ -956,7 +956,8 @@ WITH first_query AS (INSERT INTO modify_table (id) VALUES (10001)), SET client_min_messages TO debug2; -- pushed down without the insert WITH mb AS (UPDATE modify_table SET val = 3 WHERE id = 3 RETURNING NULL) INSERT INTO modify_table WITH ma AS (SELECT * FROM modify_table LIMIT 10) SELECT count(*) FROM mb; -DEBUG: LIMIT clauses are not allowed in distributed INSERT ... SELECT queries +DEBUG: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query DEBUG: Creating router plan DEBUG: query has a single distribution column value: 3 DEBUG: Collecting INSERT ... SELECT results on coordinator diff --git a/src/test/regress/multi_1_schedule b/src/test/regress/multi_1_schedule index b5943f899..0936a9625 100644 --- a/src/test/regress/multi_1_schedule +++ b/src/test/regress/multi_1_schedule @@ -65,7 +65,12 @@ test: multi_remove_node_reference_table test: multi_create_table test: multi_create_table_superuser test: multi_master_protocol multi_load_data multi_load_data_superuser multi_behavioral_analytics_create_table -test: multi_behavioral_analytics_basics multi_behavioral_analytics_single_shard_queries multi_insert_select_non_pushable_queries multi_insert_select multi_behavioral_analytics_create_table_superuser +test: multi_behavioral_analytics_basics multi_behavioral_analytics_single_shard_queries multi_behavioral_analytics_create_table_superuser + +# We don't parallelize the following test with the ones above because they're +# not idempotent and hence causing flaky test detection check to fail. +test: multi_insert_select_non_pushable_queries multi_insert_select + test: multi_shard_update_delete recursive_dml_with_different_planners_executors test: insert_select_repartition window_functions dml_recursive multi_insert_select_window test: multi_insert_select_conflict citus_table_triggers alter_table_single_shard_table diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index a78ee6088..6dcf41266 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -12,7 +12,12 @@ test: replicated_table_disable_node test: multi_create_table test: multi_create_table_superuser test: multi_create_table_constraints multi_master_protocol multi_load_data multi_load_data_superuser multi_behavioral_analytics_create_table -test: multi_behavioral_analytics_basics multi_behavioral_analytics_single_shard_queries multi_insert_select_non_pushable_queries multi_insert_select multi_behavioral_analytics_create_table_superuser +test: multi_behavioral_analytics_basics multi_behavioral_analytics_single_shard_queries multi_behavioral_analytics_create_table_superuser + +# We don't parallelize the following test with the ones above because they're +# not idempotent and hence causing flaky test detection check to fail. +test: multi_insert_select_non_pushable_queries multi_insert_select + test: multi_shard_update_delete recursive_dml_with_different_planners_executors test: insert_select_repartition window_functions dml_recursive multi_insert_select_window test: multi_insert_select_conflict citus_table_triggers diff --git a/src/test/regress/spec/isolation_select_vs_all.spec b/src/test/regress/spec/isolation_select_vs_all.spec index a3b65ca77..3611e627e 100644 --- a/src/test/regress/spec/isolation_select_vs_all.spec +++ b/src/test/regress/spec/isolation_select_vs_all.spec @@ -35,7 +35,6 @@ step "s1-adaptive-select" SELECT * FROM select_append AS t1 JOIN select_append AS t2 ON t1.id = t2.int_data ORDER BY 1, 2, 3, 4; } step "s1-insert" { INSERT INTO select_append VALUES(0, 'k', 0); } -step "s1-insert-select" { INSERT INTO select_append SELECT * FROM select_append; } step "s1-update" { UPDATE select_append SET data = 'l' WHERE id = 0; } step "s1-delete" { DELETE FROM select_append WHERE id = 1; } step "s1-truncate" { TRUNCATE select_append; } @@ -65,7 +64,6 @@ step "s2-adaptive-select" SELECT * FROM select_append AS t1 JOIN select_append AS t2 ON t1.id = t2.int_data ORDER BY 1, 2, 3, 4; } step "s2-insert" { INSERT INTO select_append VALUES(0, 'k', 0); } -step "s2-insert-select" { INSERT INTO select_append SELECT * FROM select_append; } step "s2-update" { UPDATE select_append SET data = 'l' WHERE id = 0; } step "s2-delete" { DELETE FROM select_append WHERE id = 1; } step "s2-truncate" { TRUNCATE select_append; } @@ -101,7 +99,6 @@ permutation "s1-initialize" "s1-begin" "s1-adaptive-select" "s2-adaptive-select" // permutations - router SELECT first permutation "s1-initialize" "s1-begin" "s1-router-select" "s2-insert" "s1-commit" "s1-select-count" -permutation "s1-initialize" "s1-begin" "s1-router-select" "s2-insert-select" "s1-commit" "s1-select-count" permutation "s1-initialize" "s1-begin" "s1-router-select" "s2-update" "s1-commit" "s1-select-count" permutation "s1-initialize" "s1-begin" "s1-router-select" "s2-delete" "s1-commit" "s1-select-count" permutation "s1-initialize" "s1-begin" "s1-router-select" "s2-truncate" "s1-commit" "s1-select-count" @@ -119,7 +116,6 @@ permutation "s1-drop" "s1-create-non-distributed-table" "s1-begin" "s1-router-se // permutations - router SELECT second permutation "s1-initialize" "s1-begin" "s1-insert" "s2-router-select" "s1-commit" "s1-select-count" -permutation "s1-initialize" "s1-begin" "s1-insert-select" "s2-router-select" "s1-commit" "s1-select-count" permutation "s1-initialize" "s1-begin" "s1-update" "s2-router-select" "s1-commit" "s1-select-count" permutation "s1-initialize" "s1-begin" "s1-delete" "s2-router-select" "s1-commit" "s1-select-count" permutation "s1-initialize" "s1-begin" "s1-truncate" "s2-router-select" "s1-commit" "s1-select-count" @@ -136,7 +132,6 @@ permutation "s1-drop" "s1-create-non-distributed-table" "s1-begin" "s1-distribut // permutations - real-time SELECT first permutation "s1-initialize" "s1-begin" "s1-real-time-select" "s2-insert" "s1-commit" "s1-select-count" -permutation "s1-initialize" "s1-begin" "s1-real-time-select" "s2-insert-select" "s1-commit" "s1-select-count" permutation "s1-initialize" "s1-begin" "s1-real-time-select" "s2-update" "s1-commit" "s1-select-count" permutation "s1-initialize" "s1-begin" "s1-real-time-select" "s2-delete" "s1-commit" "s1-select-count" permutation "s1-initialize" "s1-begin" "s1-real-time-select" "s2-truncate" "s1-commit" "s1-select-count" @@ -153,7 +148,6 @@ permutation "s1-drop" "s1-create-non-distributed-table" "s1-begin" "s1-real-time // permutations - real-time SELECT second permutation "s1-initialize" "s1-begin" "s1-insert" "s2-real-time-select" "s1-commit" "s1-select-count" -permutation "s1-initialize" "s1-begin" "s1-insert-select" "s2-real-time-select" "s1-commit" "s1-select-count" permutation "s1-initialize" "s1-begin" "s1-update" "s2-real-time-select" "s1-commit" "s1-select-count" permutation "s1-initialize" "s1-begin" "s1-delete" "s2-real-time-select" "s1-commit" "s1-select-count" permutation "s1-initialize" "s1-begin" "s1-truncate" "s2-real-time-select" "s1-commit" "s1-select-count" @@ -169,7 +163,6 @@ permutation "s1-drop" "s1-create-non-distributed-table" "s1-begin" "s1-distribut // permutations - adaptive SELECT first permutation "s1-initialize" "s1-begin" "s1-adaptive-select" "s2-insert" "s1-commit" "s1-select-count" -permutation "s1-initialize" "s1-begin" "s1-adaptive-select" "s2-insert-select" "s1-commit" "s1-select-count" permutation "s1-initialize" "s1-begin" "s1-adaptive-select" "s2-update" "s1-commit" "s1-select-count" permutation "s1-initialize" "s1-begin" "s1-adaptive-select" "s2-delete" "s1-commit" "s1-select-count" permutation "s1-initialize" "s1-begin" "s1-adaptive-select" "s2-truncate" "s1-commit" "s1-select-count" @@ -186,7 +179,6 @@ permutation "s1-drop" "s1-create-non-distributed-table" "s1-begin" "s1-adaptive- // permutations - adaptive SELECT second permutation "s1-initialize" "s1-begin" "s1-insert" "s2-adaptive-select" "s1-commit" "s1-select-count" -permutation "s1-initialize" "s1-begin" "s1-insert-select" "s2-adaptive-select" "s1-commit" "s1-select-count" permutation "s1-initialize" "s1-begin" "s1-update" "s2-adaptive-select" "s1-commit" "s1-select-count" permutation "s1-initialize" "s1-begin" "s1-delete" "s2-adaptive-select" "s1-commit" "s1-select-count" permutation "s1-initialize" "s1-begin" "s1-truncate" "s2-adaptive-select" "s1-commit" "s1-select-count" diff --git a/src/test/regress/sql/insert_select_repartition.sql b/src/test/regress/sql/insert_select_repartition.sql index 526b6eff5..4d13a83f4 100644 --- a/src/test/regress/sql/insert_select_repartition.sql +++ b/src/test/regress/sql/insert_select_repartition.sql @@ -647,6 +647,17 @@ insert into table_with_user_sequences values (1,1); select create_distributed_table('table_with_user_sequences','x'); explain (costs off) insert into table_with_user_sequences select y, x from table_with_user_sequences; +CREATE TABLE dist_table_1(id int); +SELECT create_distributed_table('dist_table_1','id'); +CREATE TABLE dist_table_2(id int); +SELECT create_distributed_table('dist_table_2','id'); + +-- verify that insert select with union can be repartitioned. We cannot push down the query +-- since UNION clause has no FROM clause at top level query. +SELECT public.coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1(id) SELECT id FROM dist_table_1 UNION SELECT id FROM dist_table_2; +$$); + -- clean-up SET client_min_messages TO WARNING; DROP SCHEMA insert_select_repartition CASCADE; diff --git a/src/test/regress/sql/insert_select_single_shard_table.sql b/src/test/regress/sql/insert_select_single_shard_table.sql index f428752ec..6593ab90b 100644 --- a/src/test/regress/sql/insert_select_single_shard_table.sql +++ b/src/test/regress/sql/insert_select_single_shard_table.sql @@ -325,11 +325,17 @@ JOIN ( ) t2 ON t1.b = t2.b WHERE t2.sum_val > 2; +-- Temporaryly reduce the verbosity to avoid noise +-- in the output of the next query. +SET client_min_messages TO DEBUG1; + -- MultiTaskRouterSelectQuerySupported() is unnecessarily restrictive -- about pushing down queries with DISTINCT ON clause even if the table -- doesn't have a shard key. See https://github.com/citusdata/citus/pull/6752. INSERT INTO nullkey_c1_t1 SELECT DISTINCT ON (a) a, b FROM nullkey_c1_t2; +SET client_min_messages TO DEBUG2; + -- Similarly, we could push down the following query as well. see -- https://github.com/citusdata/citus/pull/6831. INSERT INTO nullkey_c1_t1 SELECT b, SUM(a) OVER (ORDER BY b) AS sum_val FROM nullkey_c1_t1; diff --git a/src/test/regress/sql/multi_insert_select.sql b/src/test/regress/sql/multi_insert_select.sql index b14affddf..4d202041f 100644 --- a/src/test/regress/sql/multi_insert_select.sql +++ b/src/test/regress/sql/multi_insert_select.sql @@ -536,7 +536,7 @@ INSERT INTO agg_events fist_table_agg; ROLLBACK; --- We don't support CTEs that are referenced in the target list +-- We do support CTEs that are referenced in the target list INSERT INTO agg_events WITH sub_cte AS (SELECT 1) SELECT @@ -544,7 +544,7 @@ INSERT INTO agg_events FROM raw_events_first; --- We support set operations via the coordinator +-- We support set operations BEGIN; INSERT INTO @@ -2341,5 +2341,121 @@ join dist_table_2 t2 using (dist_col) limit 1 returning text_col_1; +CREATE TABLE dist_table_3( +dist_col bigint, +int_col integer +); + +SELECT create_distributed_table('dist_table_3', 'dist_col'); + +-- dist_table_2 and dist_table_3 are non-colocated source tables. Repartitioning is also not possible due to +-- different types for distribution columns. Citus would not be able to handle this complex insert select. +INSERT INTO dist_table_1 SELECT dist_table_2.dist_col FROM dist_table_2 JOIN dist_table_3 USING(dist_col); + +CREATE TABLE dist_table_4( +dist_col integer, +int_col integer +); +SELECT create_distributed_table('dist_table_4', 'dist_col'); + +-- Even if target table distribution column is colocated with dist_table_2's distributed column, source tables dist_table_2 and dist_table_4 +-- are non-colocated. Hence, SELECT part of the query should be pulled to coordinator. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT dist_table_2.dist_col FROM dist_table_2 JOIN dist_table_4 ON dist_table_2.dist_col = dist_table_4.int_col; +$$); + +-- For INSERT SELECT, when a lateral query references an outer query, push-down is possible even if limit clause exists in the lateral query. +-- It is because subquery with limit does not need to be merged at coordinator as it is a lateral query. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT d1.dist_col FROM dist_table_1 d1 LEFT JOIN LATERAL (SELECT * FROM dist_table_2 d2 WHERE d1.dist_col = d2.dist_col LIMIT 3) dummy USING(dist_col); +$$); + +-- For INSERT SELECT, when push-down is NOT possible when limit clause exists in a subquery at SELECT part of INSERT SELECT. +-- It is because the subquery with limit needs to be merged at coordinator. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT d1.dist_col FROM dist_table_1 d1 LEFT JOIN (SELECT * FROM dist_table_2 LIMIT 3) dummy USING(dist_col); +$$); + +CREATE TABLE dist_table_5(id int, id2 int); +SELECT create_distributed_table('dist_table_5','id'); +CREATE TABLE dist_table_6(id int, id2 int); +SELECT create_distributed_table('dist_table_6','id'); + +-- verify that insert select with union can be pushed down since UNION clause has FROM clause at top level query. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5(id) SELECT id FROM (SELECT id FROM dist_table_5 UNION SELECT id FROM dist_table_6) dummy; +$$); + +-- verify that insert select with sublink can be pushed down when tables are colocated. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM dist_table_5 WHERE dist_table_5.id = dist_table_6.id) FROM dist_table_6; +$$); + +CREATE TABLE ref_table_1(id int); +SELECT create_reference_table('ref_table_1'); + +-- verify that insert select with sublink cannot be pushed down when from clause does not contain any distributed relation. +INSERT INTO dist_table_5 SELECT id, (SELECT id FROM dist_table_5 WHERE dist_table_5.id = ref_table_1.id) FROM ref_table_1; + +-- verify that insert select cannot be pushed down when we have recurring range table in from clause. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM ref_table_1 WHERE id = 1) FROM ref_table_1; +$$); + +-- verify that insert select cannot be pushed down when we have reference table in outside of outer join. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT a.id FROM dist_table_5 a LEFT JOIN ref_table_1 b ON (true) RIGHT JOIN ref_table_1 c ON (true); +$$); + +-- verify that insert select cannot be pushed down when it has a recurring outer join in a subquery. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM ref_table_1 LEFT JOIN dist_table_5 USING(id); +$$); + +CREATE TABLE loc_table_1(id int); + +-- verify that insert select cannot be pushed down when it contains join between local and distributed tables. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM dist_table_5 JOIN loc_table_1 USING(id); +$$); + +CREATE VIEW view_1 AS + SELECT id FROM dist_table_6; + +CREATE MATERIALIZED VIEW view_2 AS + SELECT id FROM dist_table_6; + +-- verify that insert select cannot be pushed down when it contains view. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT * FROM view_1; +$$); + +-- verify that insert select cannot be pushed down when it contains materialized view. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT * FROM view_2; +$$); + +CREATE TABLE append_table(id integer, data text, int_data int); +SELECT create_distributed_table('append_table', 'id', 'append'); +SELECT master_create_empty_shard('append_table'); + +-- verify that insert select push down for append tables are not supported. +INSERT INTO append_table SELECT * FROM append_table; + +-- verify that CTEs at top level of INSERT SELECT, that can normally be inlined, would not be inlined by INSERT SELECT pushdown planner +-- and handled by pull to coordinator. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) WITH cte_1 AS (SELECT id FROM dist_table_5 WHERE id = 5) + INSERT INTO dist_table_5 + SELECT id FROM dist_table_5 JOIN cte_1 USING(id); +$$); + +-- verify that CTEs at top level of SELECT part, would be inlined by Postgres and pushed down by INSERT SELECT planner. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 + WITH cte_1 AS (SELECT id FROM dist_table_5 WHERE id = 5) + SELECT id FROM dist_table_5 JOIN cte_1 USING(id); +$$); + SET client_min_messages TO ERROR; DROP SCHEMA multi_insert_select CASCADE; diff --git a/src/test/regress/sql/multi_insert_select_non_pushable_queries.sql b/src/test/regress/sql/multi_insert_select_non_pushable_queries.sql index b4654144b..539024141 100644 --- a/src/test/regress/sql/multi_insert_select_non_pushable_queries.sql +++ b/src/test/regress/sql/multi_insert_select_non_pushable_queries.sql @@ -4,6 +4,9 @@ ------------------------------------ ------------------------------------ +CREATE SCHEMA multi_insert_select_non_pushable_queries; +SET search_path = multi_insert_select_non_pushable_queries,public; + -- not pushable since the JOIN is not an equi join INSERT INTO agg_results_third (user_id, value_1_agg) SELECT user_id, array_length(events_table, 1) @@ -716,3 +719,16 @@ FROM ( GROUP BY user_id ) AS shard_union ORDER BY user_lastseen DESC; + +CREATE TABLE dist_table_1(id int); +SELECT create_distributed_table('dist_table_1','id'); +CREATE TABLE dist_table_2(id int, id2 int); +SELECT create_distributed_table('dist_table_2','id2'); + +-- verify that insert select with union can be pulled to coordinator. We cannot push down the query +-- since UNION clause has no FROM clause at top level query. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1(id) SELECT id FROM dist_table_1 UNION SELECT id FROM dist_table_2; +$$); + +DROP SCHEMA multi_insert_select_non_pushable_queries CASCADE;