diff --git a/src/backend/distributed/planner/multi_logical_optimizer.c b/src/backend/distributed/planner/multi_logical_optimizer.c index dcfd3fe89..ee32c50c0 100644 --- a/src/backend/distributed/planner/multi_logical_optimizer.c +++ b/src/backend/distributed/planner/multi_logical_optimizer.c @@ -52,6 +52,7 @@ #include "parser/parse_coerce.h" #include "parser/parse_oper.h" #include "parser/parsetree.h" +#include "rewrite/rewriteManip.h" #include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/rel.h" @@ -1434,7 +1435,7 @@ MasterExtendedOpNode(MultiExtendedOp *originalOpNode, Expr *originalExpression = originalTargetEntry->expr; Expr *newExpression = NULL; - bool hasAggregates = contain_agg_clause((Node *) originalExpression); + bool hasAggregates = contain_aggs_of_level((Node *) originalExpression, 0); bool hasWindowFunction = contain_window_function((Node *) originalExpression); /* @@ -2340,7 +2341,7 @@ ProcessTargetListForWorkerQuery(List *targetEntryList, TargetEntry *originalTargetEntry = (TargetEntry *) lfirst(targetEntryCell); Expr *originalExpression = originalTargetEntry->expr; List *newExpressionList = NIL; - bool hasAggregates = contain_agg_clause((Node *) originalExpression); + bool hasAggregates = contain_aggs_of_level((Node *) originalExpression, 0); bool hasWindowFunction = contain_window_function((Node *) originalExpression); /* reset walker context */ @@ -2682,7 +2683,7 @@ TargetListHasAggregates(List *targetEntryList) { TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell); Expr *targetExpr = targetEntry->expr; - bool hasAggregates = contain_agg_clause((Node *) targetExpr); + bool hasAggregates = contain_aggs_of_level((Node *) targetExpr, 0); bool hasWindowFunction = contain_window_function((Node *) targetExpr); /* @@ -4398,7 +4399,7 @@ GenerateNewTargetEntriesForSortClauses(List *originalTargetList, SortGroupClause *sgClause = (SortGroupClause *) lfirst(sortClauseCell); TargetEntry *targetEntry = get_sortgroupclause_tle(sgClause, originalTargetList); Expr *targetExpr = targetEntry->expr; - bool containsAggregate = contain_agg_clause((Node *) targetExpr); + bool containsAggregate = contain_aggs_of_level((Node *) targetExpr, 0); bool createNewTargetEntry = false; /* we are only interested in target entries containing aggregates */ @@ -4500,7 +4501,7 @@ HasOrderByAggregate(List *sortClauseList, List *targetList) SortGroupClause *sortClause = (SortGroupClause *) lfirst(sortClauseCell); Node *sortExpression = get_sortgroupclause_expr(sortClause, targetList); - bool containsAggregate = contain_agg_clause(sortExpression); + bool containsAggregate = contain_aggs_of_level(sortExpression, 0); if (containsAggregate) { hasOrderByAggregate = true; @@ -4574,7 +4575,7 @@ HasOrderByComplexExpression(List *sortClauseList, List *targetList) continue; } - bool nestedAggregate = contain_agg_clause(sortExpression); + bool nestedAggregate = contain_aggs_of_level(sortExpression, 0); if (nestedAggregate) { hasOrderByComplexExpression = true; diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index afb09e4d1..b4bd5f2de 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -72,6 +72,7 @@ static RuleApplyFunction RuleApplyFunctionArray[JOIN_RULE_LAST] = { 0 }; /* join /* Local functions forward declarations */ static bool AllTargetExpressionsAreColumnReferences(List *targetEntryList); static FieldSelect * CompositeFieldRecursive(Expr *expression, Query *query); +static Oid NodeTryGetRteRelid(Node *node); static bool FullCompositeFieldList(List *compositeFieldList); static bool HasUnsupportedJoinWalker(Node *node, void *context); static bool ErrorHintRequired(const char *errorHint, Query *queryTree); @@ -412,37 +413,55 @@ QueryContainsDistributedTableRTE(Query *query) /* - * IsDistributedTableRTE gets a node and returns true if the node - * is a range table relation entry that points to a distributed - * relation (i.e., excluding reference tables). + * NodeTryGetRteRelid returns the relid of the given RTE_RELATION RangeTableEntry. + * Returns InvalidOid if any of these assumptions fail for given node. */ -bool -IsDistributedTableRTE(Node *node) +static Oid +NodeTryGetRteRelid(Node *node) { if (node == NULL) { - return false; + return InvalidOid; } if (!IsA(node, RangeTblEntry)) { - return false; + return InvalidOid; } RangeTblEntry *rangeTableEntry = (RangeTblEntry *) node; if (rangeTableEntry->rtekind != RTE_RELATION) { - return false; + return InvalidOid; } - Oid relationId = rangeTableEntry->relid; - if (!IsCitusTable(relationId) || - PartitionMethod(relationId) == DISTRIBUTE_BY_NONE) - { - return false; - } + return rangeTableEntry->relid; +} - return true; + +/* + * IsCitusTableRTE gets a node and returns true if the node is a + * range table relation entry that points to a distributed relation. + */ +bool +IsCitusTableRTE(Node *node) +{ + Oid relationId = NodeTryGetRteRelid(node); + return relationId != InvalidOid && IsCitusTable(relationId); +} + + +/* + * IsDistributedTableRTE gets a node and returns true if the node + * is a range table relation entry that points to a distributed relation, + * returning false still if the relation is a reference table. + */ +bool +IsDistributedTableRTE(Node *node) +{ + Oid relationId = NodeTryGetRteRelid(node); + return relationId != InvalidOid && IsCitusTable(relationId) && + PartitionMethod(relationId) != DISTRIBUTE_BY_NONE; } diff --git a/src/backend/distributed/planner/multi_physical_planner.c b/src/backend/distributed/planner/multi_physical_planner.c index de3ab87ad..93ad31ac5 100644 --- a/src/backend/distributed/planner/multi_physical_planner.c +++ b/src/backend/distributed/planner/multi_physical_planner.c @@ -68,6 +68,7 @@ #include "optimizer/tlist.h" #include "parser/parse_relation.h" #include "parser/parsetree.h" +#include "rewrite/rewriteManip.h" #include "utils/builtins.h" #include "utils/catcache.h" #include "utils/fmgroids.h" @@ -719,8 +720,8 @@ BuildJobQuery(MultiNode *multiNode, List *dependentJobList) jobQuery->limitOffset = limitOffset; jobQuery->limitCount = limitCount; jobQuery->havingQual = havingQual; - jobQuery->hasAggs = contain_agg_clause((Node *) targetList) || - contain_agg_clause((Node *) havingQual); + jobQuery->hasAggs = contain_aggs_of_level((Node *) targetList, 0) || + contain_aggs_of_level((Node *) havingQual, 0); jobQuery->distinctClause = distinctClause; jobQuery->hasDistinctOn = hasDistinctOn; @@ -805,7 +806,7 @@ BuildReduceQuery(MultiExtendedOp *extendedOpNode, List *dependentJobList) reduceQuery->limitOffset = extendedOpNode->limitOffset; reduceQuery->limitCount = extendedOpNode->limitCount; reduceQuery->havingQual = extendedOpNode->havingQual; - reduceQuery->hasAggs = contain_agg_clause((Node *) targetList); + reduceQuery->hasAggs = contain_aggs_of_level((Node *) targetList, 0); return reduceQuery; } @@ -1553,8 +1554,8 @@ BuildSubqueryJobQuery(MultiNode *multiNode) /* build the where clause list using select predicates */ List *whereClauseList = QuerySelectClauseList(multiNode); - if (contain_agg_clause((Node *) targetList) || - contain_agg_clause((Node *) havingQual)) + if (contain_aggs_of_level((Node *) targetList, 0) || + contain_aggs_of_level((Node *) havingQual, 0)) { hasAggregates = true; } diff --git a/src/backend/distributed/planner/recursive_planning.c b/src/backend/distributed/planner/recursive_planning.c index dffa4357e..540177b13 100644 --- a/src/backend/distributed/planner/recursive_planning.c +++ b/src/backend/distributed/planner/recursive_planning.c @@ -59,6 +59,7 @@ #include "distributed/commands/multi_copy.h" #include "distributed/distributed_planner.h" #include "distributed/errormessage.h" +#include "distributed/listutils.h" #include "distributed/log_utils.h" #include "distributed/metadata_cache.h" #include "distributed/multi_logical_planner.h" @@ -147,7 +148,7 @@ static void RecursivelyPlanNonColocatedSubqueriesInWhere(Query *query, colocatedJoinChecker, RecursivePlanningContext * recursivePlanningContext); -static List * SublinkList(Query *originalQuery); +static List * SublinkListFromWhere(Query *originalQuery); static bool ExtractSublinkWalker(Node *node, List **sublinkList); static bool ShouldRecursivelyPlanAllSubqueriesInWhere(Query *query); static bool RecursivelyPlanAllSubqueries(Node *node, @@ -173,6 +174,7 @@ static bool CteReferenceListWalker(Node *node, CteReferenceWalkerContext *contex static bool ContainsReferencesToOuterQuery(Query *query); static bool ContainsReferencesToOuterQueryWalker(Node *node, VarLevelsUpWalkerContext *context); +static bool NodeContainsSubqueryReferencingOuterQuery(Node *node); static void WrapFunctionsInSubqueries(Query *query); static void TransformFunctionRTE(RangeTblEntry *rangeTblEntry); static bool ShouldTransformRTE(RangeTblEntry *rangeTableEntry); @@ -314,6 +316,18 @@ RecursivelyPlanSubqueriesAndCTEs(Query *query, RecursivePlanningContext *context RecursivelyPlanAllSubqueries((Node *) query->jointree->quals, context); } + if (query->havingQual != NULL) + { + if (NodeContainsSubqueryReferencingOuterQuery(query->havingQual)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "Subqueries in HAVING cannot refer to outer query", + NULL, NULL); + } + + RecursivelyPlanAllSubqueries(query->havingQual, context); + } + /* * If the query doesn't have distribution key equality, * recursively plan some of its subqueries. @@ -528,7 +542,7 @@ RecursivelyPlanNonColocatedSubqueriesInWhere(Query *query, RecursivePlanningContext * recursivePlanningContext) { - List *sublinkList = SublinkList(query); + List *sublinkList = SublinkListFromWhere(query); ListCell *sublinkCell = NULL; foreach(sublinkCell, sublinkList) @@ -551,12 +565,12 @@ RecursivelyPlanNonColocatedSubqueriesInWhere(Query *query, /* - * SublinkList finds the subquery nodes in the where clause of the given query. Note + * SublinkListFromWhere finds the subquery nodes in the where clause of the given query. Note * that the function should be called on the original query given that postgres * standard_planner() may convert the subqueries in WHERE clause to joins. */ static List * -SublinkList(Query *originalQuery) +SublinkListFromWhere(Query *originalQuery) { FromExpr *joinTree = originalQuery->jointree; List *sublinkList = NIL; @@ -648,8 +662,7 @@ RecursivelyPlanAllSubqueries(Node *node, RecursivePlanningContext *planningConte if (IsA(node, Query)) { Query *query = (Query *) node; - - if (FindNodeCheckInRangeTableList(query->rtable, IsDistributedTableRTE)) + if (FindNodeCheckInRangeTableList(query->rtable, IsCitusTableRTE)) { RecursivelyPlanSubquery(query, planningContext); } @@ -1025,7 +1038,7 @@ RecursivelyPlanSetOperations(Query *query, Node *node, Query *subquery = rangeTableEntry->subquery; if (rangeTableEntry->rtekind == RTE_SUBQUERY && - QueryContainsDistributedTableRTE(subquery)) + FindNodeCheck((Node *) subquery, IsDistributedTableRTE)) { RecursivelyPlanSubquery(subquery, context); } @@ -1223,7 +1236,7 @@ CteReferenceListWalker(Node *node, CteReferenceWalkerContext *context) /* * ContainsReferencesToOuterQuery determines whether the given query contains - * any Vars that point outside of the query itself. Such queries cannot be + * anything that points outside of the query itself. Such queries cannot be * planned recursively. */ static bool @@ -1302,6 +1315,29 @@ ContainsReferencesToOuterQueryWalker(Node *node, VarLevelsUpWalkerContext *conte } +/* + * NodeContainsSubqueryReferencingOuterQuery determines whether the given node + * contains anything that points outside of the query itself. + */ +static bool +NodeContainsSubqueryReferencingOuterQuery(Node *node) +{ + List *sublinks = NIL; + ExtractSublinkWalker(node, &sublinks); + + SubLink *sublink; + foreach_ptr(sublink, sublinks) + { + if (ContainsReferencesToOuterQuery(castNode(Query, sublink->subselect))) + { + return true; + } + } + + return false; +} + + /* * WrapFunctionsInSubqueries iterates over all the immediate Range Table Entries * of a query and wraps the functions inside (SELECT * FROM fnc() f) diff --git a/src/include/distributed/multi_logical_planner.h b/src/include/distributed/multi_logical_planner.h index 544023aa4..8a2fa1a0e 100644 --- a/src/include/distributed/multi_logical_planner.h +++ b/src/include/distributed/multi_logical_planner.h @@ -190,6 +190,7 @@ extern bool FindNodeCheck(Node *node, bool (*check)(Node *)); extern bool SingleRelationRepartitionSubquery(Query *queryTree); extern bool TargetListOnPartitionColumn(Query *query, List *targetEntryList); extern bool FindNodeCheckInRangeTableList(List *rtable, bool (*check)(Node *)); +extern bool IsCitusTableRTE(Node *node); extern bool IsDistributedTableRTE(Node *node); extern bool QueryContainsDistributedTableRTE(Query *query); extern bool IsCitusExtraDataContainerRelation(RangeTblEntry *rte); diff --git a/src/test/regress/bin/normalize.sed b/src/test/regress/bin/normalize.sed index 8d792f689..64d398028 100644 --- a/src/test/regress/bin/normalize.sed +++ b/src/test/regress/bin/normalize.sed @@ -28,6 +28,9 @@ s/\(ref_id\)=\([0-9]+\)/(ref_id)=(X)/g # shard table names for multi_subtransactions s/"t2_[0-9]+"/"t2_xxxxxxx"/g +# shard table names for multi_subquery +s/ keyval(1|2|ref)_[0-9]+ / keyval\1_xxxxxxx /g + # shard table names for custom_aggregate_support s/ daily_uniques_[0-9]+ / daily_uniques_xxxxxxx /g diff --git a/src/test/regress/expected/ch_bench_having.out b/src/test/regress/expected/ch_bench_having.out index 3ad09bd9e..51c6e85f5 100644 --- a/src/test/regress/expected/ch_bench_having.out +++ b/src/test/regress/expected/ch_bench_having.out @@ -280,14 +280,14 @@ where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock) group by s_i_id having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id) order by s_i_id; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +ERROR: Subqueries in HAVING cannot refer to outer query -- We don't support correlated subqueries in having select s_i_id, sum(s_order_cnt) as ordercount from stock s group by s_i_id having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id) order by s_i_id; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +ERROR: Subqueries in HAVING cannot refer to outer query DROP TABLE stock; CREATE TABLE stock ( s_w_id int NOT NULL, diff --git a/src/test/regress/expected/ch_bench_having_mx.out b/src/test/regress/expected/ch_bench_having_mx.out index 71478ae6f..372155303 100644 --- a/src/test/regress/expected/ch_bench_having_mx.out +++ b/src/test/regress/expected/ch_bench_having_mx.out @@ -283,14 +283,14 @@ where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock) group by s_i_id having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id) order by s_i_id; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +ERROR: Subqueries in HAVING cannot refer to outer query -- We don't support correlated subqueries in having select s_i_id, sum(s_order_cnt) as ordercount from stock s group by s_i_id having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id) order by s_i_id; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +ERROR: Subqueries in HAVING cannot refer to outer query \c - - - :master_port SET citus.replication_model TO streaming; SET citus.shard_replication_factor to 1; diff --git a/src/test/regress/expected/multi_reference_table.out b/src/test/regress/expected/multi_reference_table.out index 2012f8ee5..bb8a04bd8 100644 --- a/src/test/regress/expected/multi_reference_table.out +++ b/src/test/regress/expected/multi_reference_table.out @@ -990,7 +990,7 @@ INSERT INTO SET citus.shard_count TO 6; SET citus.shard_replication_factor TO 2; CREATE TABLE colocated_table_test (value_1 int, value_2 float, value_3 text, value_4 timestamp); -SELECT create_distributed_table('colocated_table_test', 'value_1'); + SELECT create_distributed_table('colocated_table_test', 'value_1'); create_distributed_table --------------------------------------------------------------------- @@ -1533,8 +1533,7 @@ SELECT select_count_all(); TRUNCATE reference_table_test; -- reference tables work with composite key --- and we even do not need to create hash --- function etc. +-- and we even do not need to create hash function etc. -- first create the type on all nodes CREATE TYPE reference_comp_key as (key text, value text); CREATE TABLE reference_table_composite (id int PRIMARY KEY, data reference_comp_key); @@ -1602,6 +1601,39 @@ BEGIN; ALTER TABLE reference_table_test ADD COLUMN value_dummy INT; INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02'); ROLLBACK; +-- Previous issue failed to rename reference tables in subqueries +EXPLAIN (COSTS OFF) SELECT value_1, count(*) FROM colocated_table_test GROUP BY value_1 +HAVING (SELECT rt.value_2 FROM reference_table_test rt where rt.value_2 = 2) > 0 +ORDER BY 1; + QUERY PLAN +--------------------------------------------------------------------- + Sort + Sort Key: remote_scan.value_1 + InitPlan 1 (returns $0) + -> Function Scan on read_intermediate_result intermediate_result + -> HashAggregate + Group Key: remote_scan.value_1 + -> Result + One-Time Filter: ($0 > '0'::double precision) + -> Custom Scan (Citus Adaptive) + Filter: ($0 > '0'::double precision) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on reference_table_test_1250000 rt + Filter: (value_2 = '2'::double precision) + Task Count: 6 + Tasks Shown: One of 6 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: colocated_table_test.value_1 + -> Seq Scan on colocated_table_test_1250005 colocated_table_test +(25 rows) + -- clean up tables, ... SET client_min_messages TO ERROR; DROP SEQUENCE example_ref_value_seq; diff --git a/src/test/regress/expected/multi_select_distinct.out b/src/test/regress/expected/multi_select_distinct.out index 8bb960161..c16014c48 100644 --- a/src/test/regress/expected/multi_select_distinct.out +++ b/src/test/regress/expected/multi_select_distinct.out @@ -10,6 +10,18 @@ SELECT DISTINCT l_orderkey, now() FROM lineitem_hash_part LIMIT 0; --------------------------------------------------------------------- (0 rows) +SELECT DISTINCT l_orderkey, avg(l_linenumber) +FROM lineitem_hash_part +GROUP BY l_orderkey +HAVING avg(l_linenumber) = (select avg(distinct l_linenumber)) +LIMIT 10; +ERROR: Subqueries in HAVING cannot refer to outer query +SELECT DISTINCT l_orderkey +FROM lineitem_hash_part +GROUP BY l_orderkey +HAVING (select avg(distinct l_linenumber) = l_orderkey) +LIMIT 10; +ERROR: Subqueries in HAVING cannot refer to outer query SELECT DISTINCT l_partkey, 1 + (random() * 0)::int FROM lineitem_hash_part ORDER BY 1 DESC LIMIT 3; l_partkey | ?column? --------------------------------------------------------------------- diff --git a/src/test/regress/expected/multi_subquery.out b/src/test/regress/expected/multi_subquery.out index 76f336320..d387ded3e 100644 --- a/src/test/regress/expected/multi_subquery.out +++ b/src/test/regress/expected/multi_subquery.out @@ -884,8 +884,165 @@ AS foo; --------------------------------------------------------------------- (0 rows) -DROP TABLE subquery_pruning_varchar_test_table; RESET citus.enable_router_execution; +-- Test https://github.com/citusdata/citus/issues/3424 +insert into subquery_pruning_varchar_test_table values ('1', '1'), (2, '1'), (3, '2'), (3, '1'), (4, '4'), (5, '6'); +WITH cte_1 AS (SELECT b max FROM subquery_pruning_varchar_test_table) +SELECT a +FROM subquery_pruning_varchar_test_table +JOIN cte_1 ON a = max::text +GROUP BY a HAVING a = (SELECT a) +ORDER BY 1; +ERROR: Subqueries in HAVING cannot refer to outer query +-- Test https://github.com/citusdata/citus/issues/3432 +SELECT t1.event_type FROM events_table t1 +GROUP BY t1.event_type HAVING t1.event_type > avg((SELECT t2.value_2 FROM users_table t2 ORDER BY 1 DESC LIMIT 1)) +ORDER BY 1; +ERROR: cannot handle unplanned sub-select +SELECT t1.event_type FROM events_table t1 +GROUP BY t1.event_type HAVING t1.event_type > avg(2 + (SELECT t2.value_2 FROM users_table t2 ORDER BY 1 DESC LIMIT 1)) +ORDER BY 1; +ERROR: cannot handle unplanned sub-select +-- Test https://github.com/citusdata/citus/issues/3433 +CREATE TABLE keyval1 (key int, value int); +SELECT create_distributed_table('keyval1', 'key'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE keyval2 (key int, value int); +SELECT create_distributed_table('keyval2', 'key'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE keyvalref (key int, value int); +SELECT create_reference_table('keyvalref'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyvalref GROUP BY key); + QUERY PLAN +--------------------------------------------------------------------- + HashAggregate + Group Key: remote_scan.worker_column_2 + Filter: ((pg_catalog.sum(remote_scan.worker_column_3))::bigint > $0) + InitPlan 1 (returns $0) + -> Function Scan on read_intermediate_result intermediate_result + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: key + -> Seq Scan on keyvalref_xxxxxxx keyvalref + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: keyval1.key + -> Seq Scan on keyval1_xxxxxxx keyval1 +(22 rows) + +-- For some reason 'ORDER BY 1 DESC LIMIT 1' triggers recursive planning +EXPLAIN (COSTS OFF) +SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyvalref GROUP BY key ORDER BY 1 DESC LIMIT 1); + QUERY PLAN +--------------------------------------------------------------------- + HashAggregate + Group Key: remote_scan.worker_column_2 + Filter: ((pg_catalog.sum(remote_scan.worker_column_3))::bigint > $0) + InitPlan 1 (returns $0) + -> Function Scan on read_intermediate_result intermediate_result + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Limit + -> Sort + Sort Key: (sum(value)) DESC + -> HashAggregate + Group Key: key + -> Seq Scan on keyvalref_xxxxxxx keyvalref + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: keyval1.key + -> Seq Scan on keyval1_xxxxxxx keyval1 +(25 rows) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyval2 GROUP BY key ORDER BY 1 DESC LIMIT 1); + QUERY PLAN +--------------------------------------------------------------------- + HashAggregate + Group Key: remote_scan.worker_column_2 + Filter: ((pg_catalog.sum(remote_scan.worker_column_3))::bigint > $0) + InitPlan 1 (returns $0) + -> Function Scan on read_intermediate_result intermediate_result + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Limit + -> Sort + Sort Key: remote_scan.sum DESC + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Limit + -> Sort + Sort Key: (sum(value)) DESC + -> HashAggregate + Group Key: key + -> Seq Scan on keyval2_xxxxxxx keyval2 + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: keyval1.key + -> Seq Scan on keyval1_xxxxxxx keyval1 +(28 rows) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM keyval1 k1 WHERE k1.key = 2 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyval2 k2 WHERE k2.key = 2 GROUP BY key ORDER BY 1 DESC LIMIT 1); + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> GroupAggregate + Group Key: k1.key + Filter: (sum(k1.value) > $0) + InitPlan 1 (returns $0) + -> Limit + -> Sort + Sort Key: (sum(k2.value)) DESC + -> GroupAggregate + Group Key: k2.key + -> Seq Scan on keyval2_xxxxxxx k2 + Filter: (key = 2) + -> Seq Scan on keyval1_xxxxxxx k1 + Filter: (key = 2) +(18 rows) + -- Simple join subquery pushdown SELECT avg(array_length(events, 1)) AS event_average @@ -1124,7 +1281,7 @@ LIMIT -- also set the min messages to WARNING to skip -- CASCADE NOTICE messagez SET client_min_messages TO WARNING; -DROP TABLE users, events; +DROP TABLE users, events, subquery_pruning_varchar_test_table, keyval1, keyval2, keyvalref; DROP TYPE user_composite_type CASCADE; SET client_min_messages TO DEFAULT; SET citus.subquery_pushdown to OFF; diff --git a/src/test/regress/sql/multi_reference_table.sql b/src/test/regress/sql/multi_reference_table.sql index f0e1fac1a..b078e487d 100644 --- a/src/test/regress/sql/multi_reference_table.sql +++ b/src/test/regress/sql/multi_reference_table.sql @@ -281,7 +281,6 @@ GROUP BY GROUPING sets ((value_4), (value_3)) ORDER BY 1, 2, 3; - -- distinct clauses also work fine SELECT DISTINCT value_4 @@ -620,7 +619,7 @@ SET citus.shard_count TO 6; SET citus.shard_replication_factor TO 2; CREATE TABLE colocated_table_test (value_1 int, value_2 float, value_3 text, value_4 timestamp); -SELECT create_distributed_table('colocated_table_test', 'value_1'); + SELECT create_distributed_table('colocated_table_test', 'value_1'); CREATE TABLE colocated_table_test_2 (value_1 int, value_2 float, value_3 text, value_4 timestamp); SELECT create_distributed_table('colocated_table_test_2', 'value_1'); @@ -964,8 +963,7 @@ SELECT select_count_all(); TRUNCATE reference_table_test; -- reference tables work with composite key --- and we even do not need to create hash --- function etc. +-- and we even do not need to create hash function etc. -- first create the type on all nodes CREATE TYPE reference_comp_key as (key text, value text); @@ -1007,6 +1005,11 @@ ALTER TABLE reference_table_test ADD COLUMN value_dummy INT; INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02'); ROLLBACK; +-- Previous issue failed to rename reference tables in subqueries +EXPLAIN (COSTS OFF) SELECT value_1, count(*) FROM colocated_table_test GROUP BY value_1 +HAVING (SELECT rt.value_2 FROM reference_table_test rt where rt.value_2 = 2) > 0 +ORDER BY 1; + -- clean up tables, ... SET client_min_messages TO ERROR; DROP SEQUENCE example_ref_value_seq; diff --git a/src/test/regress/sql/multi_select_distinct.sql b/src/test/regress/sql/multi_select_distinct.sql index 80a9fad5c..fdfeec76c 100644 --- a/src/test/regress/sql/multi_select_distinct.sql +++ b/src/test/regress/sql/multi_select_distinct.sql @@ -9,6 +9,18 @@ ANALYZE lineitem_hash_part; -- function calls are supported SELECT DISTINCT l_orderkey, now() FROM lineitem_hash_part LIMIT 0; +SELECT DISTINCT l_orderkey, avg(l_linenumber) +FROM lineitem_hash_part +GROUP BY l_orderkey +HAVING avg(l_linenumber) = (select avg(distinct l_linenumber)) +LIMIT 10; + +SELECT DISTINCT l_orderkey +FROM lineitem_hash_part +GROUP BY l_orderkey +HAVING (select avg(distinct l_linenumber) = l_orderkey) +LIMIT 10; + SELECT DISTINCT l_partkey, 1 + (random() * 0)::int FROM lineitem_hash_part ORDER BY 1 DESC LIMIT 3; -- const expressions are supported diff --git a/src/test/regress/sql/multi_subquery.sql b/src/test/regress/sql/multi_subquery.sql index 3301c35eb..dae0b6aee 100644 --- a/src/test/regress/sql/multi_subquery.sql +++ b/src/test/regress/sql/multi_subquery.sql @@ -620,10 +620,50 @@ SELECT * FROM a_inner) AS foo; -DROP TABLE subquery_pruning_varchar_test_table; - RESET citus.enable_router_execution; +-- Test https://github.com/citusdata/citus/issues/3424 +insert into subquery_pruning_varchar_test_table values ('1', '1'), (2, '1'), (3, '2'), (3, '1'), (4, '4'), (5, '6'); + +WITH cte_1 AS (SELECT b max FROM subquery_pruning_varchar_test_table) +SELECT a +FROM subquery_pruning_varchar_test_table +JOIN cte_1 ON a = max::text +GROUP BY a HAVING a = (SELECT a) +ORDER BY 1; + +-- Test https://github.com/citusdata/citus/issues/3432 +SELECT t1.event_type FROM events_table t1 +GROUP BY t1.event_type HAVING t1.event_type > avg((SELECT t2.value_2 FROM users_table t2 ORDER BY 1 DESC LIMIT 1)) +ORDER BY 1; + +SELECT t1.event_type FROM events_table t1 +GROUP BY t1.event_type HAVING t1.event_type > avg(2 + (SELECT t2.value_2 FROM users_table t2 ORDER BY 1 DESC LIMIT 1)) +ORDER BY 1; + +-- Test https://github.com/citusdata/citus/issues/3433 +CREATE TABLE keyval1 (key int, value int); +SELECT create_distributed_table('keyval1', 'key'); + +CREATE TABLE keyval2 (key int, value int); +SELECT create_distributed_table('keyval2', 'key'); + +CREATE TABLE keyvalref (key int, value int); +SELECT create_reference_table('keyvalref'); + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyvalref GROUP BY key); + +-- For some reason 'ORDER BY 1 DESC LIMIT 1' triggers recursive planning +EXPLAIN (COSTS OFF) +SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyvalref GROUP BY key ORDER BY 1 DESC LIMIT 1); + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyval2 GROUP BY key ORDER BY 1 DESC LIMIT 1); + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM keyval1 k1 WHERE k1.key = 2 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyval2 k2 WHERE k2.key = 2 GROUP BY key ORDER BY 1 DESC LIMIT 1); + -- Simple join subquery pushdown SELECT avg(array_length(events, 1)) AS event_average @@ -843,7 +883,7 @@ LIMIT -- also set the min messages to WARNING to skip -- CASCADE NOTICE messagez SET client_min_messages TO WARNING; -DROP TABLE users, events; +DROP TABLE users, events, subquery_pruning_varchar_test_table, keyval1, keyval2, keyvalref; DROP TYPE user_composite_type CASCADE;