First phase of addressing HAVING subquery issues

Add failing tests, make changes to avoid crashes at least Fix HAVING subquery pushdown ignoring reference table only subqueries, also include HAVING in recursive planning Given that we have a function IsDistributedTable which includes reference tables, it seems best to have IsDistributedTableRTE & QueryContainsDistributedTableRTE reflect that they do not include reference tables in their check Similarly SublinkList's name should reflect that it only scans WHERE contain_agg_clause asserts that we don't have SubLinks, use contain_aggs_of_level as suggested by pg sourcecode
2020-03-04 03:18:15 +00:00 · 2020-03-04 03:18:15 +00:00 · 81cfa05d3d
parent 7793d19b71
commit 81cfa05d3d
14 changed files with 367 additions and 50 deletions
--- a/src/backend/distributed/planner/multi_logical_optimizer.c
+++ b/src/backend/distributed/planner/multi_logical_optimizer.c
@ -52,6 +52,7 @@
 #include "parser/parse_coerce.h"
 #include "parser/parse_oper.h"
 #include "parser/parsetree.h"
+#include "rewrite/rewriteManip.h"
 #include "utils/fmgroids.h"
 #include "utils/lsyscache.h"
 #include "utils/rel.h"
@ -1434,7 +1435,7 @@ MasterExtendedOpNode(MultiExtendedOp *originalOpNode,
 		Expr *originalExpression = originalTargetEntry->expr;
 		Expr *newExpression = NULL;

-		bool hasAggregates = contain_agg_clause((Node *) originalExpression);
+		bool hasAggregates = contain_aggs_of_level((Node *) originalExpression, 0);
 		bool hasWindowFunction = contain_window_function((Node *) originalExpression);

 		/*
@ -2340,7 +2341,7 @@ ProcessTargetListForWorkerQuery(List *targetEntryList,
 		TargetEntry *originalTargetEntry = (TargetEntry *) lfirst(targetEntryCell);
 		Expr *originalExpression = originalTargetEntry->expr;
 		List *newExpressionList = NIL;
-		bool hasAggregates = contain_agg_clause((Node *) originalExpression);
+		bool hasAggregates = contain_aggs_of_level((Node *) originalExpression, 0);
 		bool hasWindowFunction = contain_window_function((Node *) originalExpression);

 		/* reset walker context */
@ -2682,7 +2683,7 @@ TargetListHasAggregates(List *targetEntryList)
 	{
 		TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
 		Expr *targetExpr = targetEntry->expr;
-		bool hasAggregates = contain_agg_clause((Node *) targetExpr);
+		bool hasAggregates = contain_aggs_of_level((Node *) targetExpr, 0);
 		bool hasWindowFunction = contain_window_function((Node *) targetExpr);

 		/*
@ -4398,7 +4399,7 @@ GenerateNewTargetEntriesForSortClauses(List *originalTargetList,
 		SortGroupClause *sgClause = (SortGroupClause *) lfirst(sortClauseCell);
 		TargetEntry *targetEntry = get_sortgroupclause_tle(sgClause, originalTargetList);
 		Expr *targetExpr = targetEntry->expr;
-		bool containsAggregate = contain_agg_clause((Node *) targetExpr);
+		bool containsAggregate = contain_aggs_of_level((Node *) targetExpr, 0);
 		bool createNewTargetEntry = false;

 		/* we are only interested in target entries containing aggregates */
@ -4500,7 +4501,7 @@ HasOrderByAggregate(List *sortClauseList, List *targetList)
 		SortGroupClause *sortClause = (SortGroupClause *) lfirst(sortClauseCell);
 		Node *sortExpression = get_sortgroupclause_expr(sortClause, targetList);

-		bool containsAggregate = contain_agg_clause(sortExpression);
+		bool containsAggregate = contain_aggs_of_level(sortExpression, 0);
 		if (containsAggregate)
 		{
 			hasOrderByAggregate = true;
@ -4574,7 +4575,7 @@ HasOrderByComplexExpression(List *sortClauseList, List *targetList)
 			continue;
 		}

-		bool nestedAggregate = contain_agg_clause(sortExpression);
+		bool nestedAggregate = contain_aggs_of_level(sortExpression, 0);
 		if (nestedAggregate)
 		{
 			hasOrderByComplexExpression = true;
--- a/src/backend/distributed/planner/multi_logical_planner.c
+++ b/src/backend/distributed/planner/multi_logical_planner.c
@ -72,6 +72,7 @@ static RuleApplyFunction RuleApplyFunctionArray[JOIN_RULE_LAST] = { 0 }; /* join
 /* Local functions forward declarations */
 static bool AllTargetExpressionsAreColumnReferences(List *targetEntryList);
 static FieldSelect * CompositeFieldRecursive(Expr *expression, Query *query);
+static Oid NodeTryGetRteRelid(Node *node);
 static bool FullCompositeFieldList(List *compositeFieldList);
 static bool HasUnsupportedJoinWalker(Node *node, void *context);
 static bool ErrorHintRequired(const char *errorHint, Query *queryTree);
@ -412,37 +413,55 @@ QueryContainsDistributedTableRTE(Query *query)


 /*
- * IsDistributedTableRTE gets a node and returns true if the node
- * is a range table relation entry that points to a distributed
- * relation (i.e., excluding reference tables).
+ * NodeTryGetRteRelid returns the relid of the given RTE_RELATION RangeTableEntry.
+ * Returns InvalidOid if any of these assumptions fail for given node.
 */
-bool
-IsDistributedTableRTE(Node *node)
+static Oid
+NodeTryGetRteRelid(Node *node)
 {
 	if (node == NULL)
 	{
-		return false;
+		return InvalidOid;
 	}

 	if (!IsA(node, RangeTblEntry))
 	{
-		return false;
+		return InvalidOid;
 	}

 	RangeTblEntry *rangeTableEntry = (RangeTblEntry *) node;
 	if (rangeTableEntry->rtekind != RTE_RELATION)
 	{
-		return false;
+		return InvalidOid;
 	}

-	Oid relationId = rangeTableEntry->relid;
-	if (!IsCitusTable(relationId) ||
-		PartitionMethod(relationId) == DISTRIBUTE_BY_NONE)
-	{
-		return false;
-	}
+	return rangeTableEntry->relid;
+}

-	return true;
+
+/*
+ * IsCitusTableRTE gets a node and returns true if the node is a
+ * range table relation entry that points to a distributed relation.
+ */
+bool
+IsCitusTableRTE(Node *node)
+{
+	Oid relationId = NodeTryGetRteRelid(node);
+	return relationId != InvalidOid && IsCitusTable(relationId);
+}
+
+
+/*
+ * IsDistributedTableRTE gets a node and returns true if the node
+ * is a range table relation entry that points to a distributed relation,
+ * returning false still if the relation is a reference table.
+ */
+bool
+IsDistributedTableRTE(Node *node)
+{
+	Oid relationId = NodeTryGetRteRelid(node);
+	return relationId != InvalidOid && IsCitusTable(relationId) &&
+		   PartitionMethod(relationId) != DISTRIBUTE_BY_NONE;
 }


--- a/src/backend/distributed/planner/multi_physical_planner.c
+++ b/src/backend/distributed/planner/multi_physical_planner.c
@ -68,6 +68,7 @@
 #include "optimizer/tlist.h"
 #include "parser/parse_relation.h"
 #include "parser/parsetree.h"
+#include "rewrite/rewriteManip.h"
 #include "utils/builtins.h"
 #include "utils/catcache.h"
 #include "utils/fmgroids.h"
@ -719,8 +720,8 @@ BuildJobQuery(MultiNode *multiNode, List *dependentJobList)
 	jobQuery->limitOffset = limitOffset;
 	jobQuery->limitCount = limitCount;
 	jobQuery->havingQual = havingQual;
-	jobQuery->hasAggs = contain_agg_clause((Node *) targetList) ||
-						contain_agg_clause((Node *) havingQual);
+	jobQuery->hasAggs = contain_aggs_of_level((Node *) targetList, 0) ||
+						contain_aggs_of_level((Node *) havingQual, 0);
 	jobQuery->distinctClause = distinctClause;
 	jobQuery->hasDistinctOn = hasDistinctOn;

@ -805,7 +806,7 @@ BuildReduceQuery(MultiExtendedOp *extendedOpNode, List *dependentJobList)
 	reduceQuery->limitOffset = extendedOpNode->limitOffset;
 	reduceQuery->limitCount = extendedOpNode->limitCount;
 	reduceQuery->havingQual = extendedOpNode->havingQual;
-	reduceQuery->hasAggs = contain_agg_clause((Node *) targetList);
+	reduceQuery->hasAggs = contain_aggs_of_level((Node *) targetList, 0);

 	return reduceQuery;
 }
@ -1553,8 +1554,8 @@ BuildSubqueryJobQuery(MultiNode *multiNode)
 	/* build the where clause list using select predicates */
 	List *whereClauseList = QuerySelectClauseList(multiNode);

-	if (contain_agg_clause((Node *) targetList) ||
-		contain_agg_clause((Node *) havingQual))
+	if (contain_aggs_of_level((Node *) targetList, 0) ||
+		contain_aggs_of_level((Node *) havingQual, 0))
 	{
 		hasAggregates = true;
 	}
--- a/src/backend/distributed/planner/recursive_planning.c
+++ b/src/backend/distributed/planner/recursive_planning.c
@ -59,6 +59,7 @@
 #include "distributed/commands/multi_copy.h"
 #include "distributed/distributed_planner.h"
 #include "distributed/errormessage.h"
+#include "distributed/listutils.h"
 #include "distributed/log_utils.h"
 #include "distributed/metadata_cache.h"
 #include "distributed/multi_logical_planner.h"
@ -147,7 +148,7 @@ static void RecursivelyPlanNonColocatedSubqueriesInWhere(Query *query,
 														 colocatedJoinChecker,
 														 RecursivePlanningContext *
 														 recursivePlanningContext);
-static List * SublinkList(Query *originalQuery);
+static List * SublinkListFromWhere(Query *originalQuery);
 static bool ExtractSublinkWalker(Node *node, List **sublinkList);
 static bool ShouldRecursivelyPlanAllSubqueriesInWhere(Query *query);
 static bool RecursivelyPlanAllSubqueries(Node *node,
@ -173,6 +174,7 @@ static bool CteReferenceListWalker(Node *node, CteReferenceWalkerContext *contex
 static bool ContainsReferencesToOuterQuery(Query *query);
 static bool ContainsReferencesToOuterQueryWalker(Node *node,
 												 VarLevelsUpWalkerContext *context);
+static bool NodeContainsSubqueryReferencingOuterQuery(Node *node);
 static void WrapFunctionsInSubqueries(Query *query);
 static void TransformFunctionRTE(RangeTblEntry *rangeTblEntry);
 static bool ShouldTransformRTE(RangeTblEntry *rangeTableEntry);
@ -314,6 +316,18 @@ RecursivelyPlanSubqueriesAndCTEs(Query *query, RecursivePlanningContext *context
 		RecursivelyPlanAllSubqueries((Node *) query->jointree->quals, context);
 	}

+	if (query->havingQual != NULL)
+	{
+		if (NodeContainsSubqueryReferencingOuterQuery(query->havingQual))
+		{
+			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+								 "Subqueries in HAVING cannot refer to outer query",
+								 NULL, NULL);
+		}
+
+		RecursivelyPlanAllSubqueries(query->havingQual, context);
+	}
+
 	/*
 	 * If the query doesn't have distribution key equality,
 	 * recursively plan some of its subqueries.
@ -528,7 +542,7 @@ RecursivelyPlanNonColocatedSubqueriesInWhere(Query *query,
 											 RecursivePlanningContext *
 											 recursivePlanningContext)
 {
-	List *sublinkList = SublinkList(query);
+	List *sublinkList = SublinkListFromWhere(query);
 	ListCell *sublinkCell = NULL;

 	foreach(sublinkCell, sublinkList)
@ -551,12 +565,12 @@ RecursivelyPlanNonColocatedSubqueriesInWhere(Query *query,


 /*
- * SublinkList finds the subquery nodes in the where clause of the given query. Note
+ * SublinkListFromWhere finds the subquery nodes in the where clause of the given query. Note
 * that the function should be called on the original query given that postgres
 * standard_planner() may convert the subqueries in WHERE clause to joins.
 */
 static List *
-SublinkList(Query *originalQuery)
+SublinkListFromWhere(Query *originalQuery)
 {
 	FromExpr *joinTree = originalQuery->jointree;
 	List *sublinkList = NIL;
@ -648,8 +662,7 @@ RecursivelyPlanAllSubqueries(Node *node, RecursivePlanningContext *planningConte
 	if (IsA(node, Query))
 	{
 		Query *query = (Query *) node;
-
-		if (FindNodeCheckInRangeTableList(query->rtable, IsDistributedTableRTE))
+		if (FindNodeCheckInRangeTableList(query->rtable, IsCitusTableRTE))
 		{
 			RecursivelyPlanSubquery(query, planningContext);
 		}
@ -1025,7 +1038,7 @@ RecursivelyPlanSetOperations(Query *query, Node *node,
 		Query *subquery = rangeTableEntry->subquery;

 		if (rangeTableEntry->rtekind == RTE_SUBQUERY &&
-			QueryContainsDistributedTableRTE(subquery))
+			FindNodeCheck((Node *) subquery, IsDistributedTableRTE))
 		{
 			RecursivelyPlanSubquery(subquery, context);
 		}
@ -1223,7 +1236,7 @@ CteReferenceListWalker(Node *node, CteReferenceWalkerContext *context)

 /*
 * ContainsReferencesToOuterQuery determines whether the given query contains
- * any Vars that point outside of the query itself. Such queries cannot be
+ * anything that points outside of the query itself. Such queries cannot be
 * planned recursively.
 */
 static bool
@ -1302,6 +1315,29 @@ ContainsReferencesToOuterQueryWalker(Node *node, VarLevelsUpWalkerContext *conte
 }


+/*
+ * NodeContainsSubqueryReferencingOuterQuery determines whether the given node
+ * contains anything that points outside of the query itself.
+ */
+static bool
+NodeContainsSubqueryReferencingOuterQuery(Node *node)
+{
+	List *sublinks = NIL;
+	ExtractSublinkWalker(node, &sublinks);
+
+	SubLink *sublink;
+	foreach_ptr(sublink, sublinks)
+	{
+		if (ContainsReferencesToOuterQuery(castNode(Query, sublink->subselect)))
+		{
+			return true;
+		}
+	}
+
+	return false;
+}
+
+
 /*
 * WrapFunctionsInSubqueries iterates over all the immediate Range Table Entries
 * of a query and wraps the functions inside (SELECT * FROM fnc() f)
--- a/src/include/distributed/multi_logical_planner.h
+++ b/src/include/distributed/multi_logical_planner.h
@ -190,6 +190,7 @@ extern bool FindNodeCheck(Node *node, bool (*check)(Node *));
 extern bool SingleRelationRepartitionSubquery(Query *queryTree);
 extern bool TargetListOnPartitionColumn(Query *query, List *targetEntryList);
 extern bool FindNodeCheckInRangeTableList(List *rtable, bool (*check)(Node *));
+extern bool IsCitusTableRTE(Node *node);
 extern bool IsDistributedTableRTE(Node *node);
 extern bool QueryContainsDistributedTableRTE(Query *query);
 extern bool IsCitusExtraDataContainerRelation(RangeTblEntry *rte);
--- a/src/test/regress/bin/normalize.sed
+++ b/src/test/regress/bin/normalize.sed
@ -28,6 +28,9 @@ s/\(ref_id\)=\([0-9]+\)/(ref_id)=(X)/g
 # shard table names for multi_subtransactions
 s/"t2_[0-9]+"/"t2_xxxxxxx"/g

+# shard table names for multi_subquery
+s/ keyval(1|2|ref)_[0-9]+ / keyval\1_xxxxxxx /g
+
 # shard table names for custom_aggregate_support
 s/ daily_uniques_[0-9]+ / daily_uniques_xxxxxxx /g

--- a/src/test/regress/expected/ch_bench_having.out
+++ b/src/test/regress/expected/ch_bench_having.out
@ -280,14 +280,14 @@ where   s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
 group by s_i_id
 having   (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id)
 order by s_i_id;
-ERROR:  complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
+ERROR:  Subqueries in HAVING cannot refer to outer query
 -- We don't support correlated subqueries in having
 select     s_i_id, sum(s_order_cnt) as ordercount
 from     stock s
 group by s_i_id
 having   (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id)
 order by s_i_id;
-ERROR:  complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
+ERROR:  Subqueries in HAVING cannot refer to outer query
 DROP TABLE stock;
 CREATE TABLE stock (
  s_w_id int NOT NULL,
--- a/src/test/regress/expected/ch_bench_having_mx.out
+++ b/src/test/regress/expected/ch_bench_having_mx.out
@ -283,14 +283,14 @@ where   s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
 group by s_i_id
 having   (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id)
 order by s_i_id;
-ERROR:  complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
+ERROR:  Subqueries in HAVING cannot refer to outer query
 -- We don't support correlated subqueries in having
 select     s_i_id, sum(s_order_cnt) as ordercount
 from     stock s
 group by s_i_id
 having   (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id)
 order by s_i_id;
-ERROR:  complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
+ERROR:  Subqueries in HAVING cannot refer to outer query
 \c - - - :master_port
 SET citus.replication_model TO streaming;
 SET citus.shard_replication_factor to 1;
--- a/src/test/regress/expected/multi_reference_table.out
+++ b/src/test/regress/expected/multi_reference_table.out
@ -990,7 +990,7 @@ INSERT INTO
 SET citus.shard_count TO 6;
 SET citus.shard_replication_factor TO 2;
 CREATE TABLE colocated_table_test (value_1 int, value_2 float, value_3 text, value_4 timestamp);
-SELECT create_distributed_table('colocated_table_test', 'value_1');
+    SELECT create_distributed_table('colocated_table_test', 'value_1');
 create_distributed_table
 ---------------------------------------------------------------------

@ -1533,8 +1533,7 @@ SELECT select_count_all();

 TRUNCATE reference_table_test;
 -- reference tables work with composite key
-- and we even do not need to create hash
-- function etc.
+-- and we even do not need to create hash function etc.
 -- first create the type on all nodes
 CREATE TYPE reference_comp_key as (key text, value text);
 CREATE TABLE reference_table_composite (id int PRIMARY KEY, data reference_comp_key);
@ -1602,6 +1601,39 @@ BEGIN;
 ALTER TABLE reference_table_test ADD COLUMN value_dummy INT;
 INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02');
 ROLLBACK;
+-- Previous issue failed to rename reference tables in subqueries
+EXPLAIN (COSTS OFF) SELECT value_1, count(*) FROM colocated_table_test GROUP BY value_1
+HAVING (SELECT rt.value_2 FROM reference_table_test rt where rt.value_2 = 2) > 0
+ORDER BY 1;
+                                            QUERY PLAN
+---------------------------------------------------------------------
+ Sort
+   Sort Key: remote_scan.value_1
+   InitPlan 1 (returns $0)
+     ->  Function Scan on read_intermediate_result intermediate_result
+   ->  HashAggregate
+         Group Key: remote_scan.value_1
+         ->  Result
+               One-Time Filter: ($0 > '0'::double precision)
+               ->  Custom Scan (Citus Adaptive)
+                     Filter: ($0 > '0'::double precision)
+                     ->  Distributed Subplan XXX_1
+                           ->  Custom Scan (Citus Adaptive)
+                                 Task Count: 1
+                                 Tasks Shown: All
+                                 ->  Task
+                                       Node: host=localhost port=xxxxx dbname=regression
+                                       ->  Seq Scan on reference_table_test_1250000 rt
+                                             Filter: (value_2 = '2'::double precision)
+                     Task Count: 6
+                     Tasks Shown: One of 6
+                     ->  Task
+                           Node: host=localhost port=xxxxx dbname=regression
+                           ->  HashAggregate
+                                 Group Key: colocated_table_test.value_1
+                                 ->  Seq Scan on colocated_table_test_1250005 colocated_table_test
+(25 rows)
+
 -- clean up tables, ...
 SET client_min_messages TO ERROR;
 DROP SEQUENCE example_ref_value_seq;
--- a/src/test/regress/expected/multi_select_distinct.out
+++ b/src/test/regress/expected/multi_select_distinct.out
@ -10,6 +10,18 @@ SELECT DISTINCT l_orderkey, now() FROM lineitem_hash_part LIMIT 0;
 ---------------------------------------------------------------------
 (0 rows)

+SELECT DISTINCT l_orderkey, avg(l_linenumber)
+FROM lineitem_hash_part
+GROUP BY l_orderkey
+HAVING avg(l_linenumber) = (select avg(distinct l_linenumber))
+LIMIT 10;
+ERROR:  Subqueries in HAVING cannot refer to outer query
+SELECT DISTINCT l_orderkey
+FROM lineitem_hash_part
+GROUP BY l_orderkey
+HAVING (select avg(distinct l_linenumber) = l_orderkey)
+LIMIT 10;
+ERROR:  Subqueries in HAVING cannot refer to outer query
 SELECT DISTINCT l_partkey, 1 + (random() * 0)::int FROM lineitem_hash_part ORDER BY 1 DESC LIMIT 3;
 l_partkey | ?column?
 ---------------------------------------------------------------------
--- a/src/test/regress/expected/multi_subquery.out
+++ b/src/test/regress/expected/multi_subquery.out
@ -884,8 +884,165 @@ AS foo;
 ---------------------------------------------------------------------
 (0 rows)

-DROP TABLE subquery_pruning_varchar_test_table;
 RESET citus.enable_router_execution;
+-- Test https://github.com/citusdata/citus/issues/3424
+insert into subquery_pruning_varchar_test_table values ('1', '1'), (2, '1'), (3, '2'), (3, '1'), (4, '4'), (5, '6');
+WITH cte_1 AS (SELECT b max FROM subquery_pruning_varchar_test_table)
+SELECT a
+FROM subquery_pruning_varchar_test_table
+JOIN cte_1 ON a = max::text
+GROUP BY a HAVING a = (SELECT a)
+ORDER BY 1;
+ERROR:  Subqueries in HAVING cannot refer to outer query
+-- Test https://github.com/citusdata/citus/issues/3432
+SELECT t1.event_type FROM events_table t1
+GROUP BY t1.event_type HAVING t1.event_type > avg((SELECT t2.value_2 FROM users_table t2 ORDER BY 1 DESC LIMIT 1))
+ORDER BY 1;
+ERROR:  cannot handle unplanned sub-select
+SELECT t1.event_type FROM events_table t1
+GROUP BY t1.event_type HAVING t1.event_type > avg(2 + (SELECT t2.value_2 FROM users_table t2 ORDER BY 1 DESC LIMIT 1))
+ORDER BY 1;
+ERROR:  cannot handle unplanned sub-select
+-- Test https://github.com/citusdata/citus/issues/3433
+CREATE TABLE keyval1 (key int, value int);
+SELECT create_distributed_table('keyval1', 'key');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE TABLE keyval2 (key int, value int);
+SELECT create_distributed_table('keyval2', 'key');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE TABLE keyvalref (key int, value int);
+SELECT create_reference_table('keyvalref');
+ create_reference_table
+---------------------------------------------------------------------
+
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyvalref GROUP BY key);
+                                 QUERY PLAN
+---------------------------------------------------------------------
+ HashAggregate
+   Group Key: remote_scan.worker_column_2
+   Filter: ((pg_catalog.sum(remote_scan.worker_column_3))::bigint > $0)
+   InitPlan 1 (returns $0)
+     ->  Function Scan on read_intermediate_result intermediate_result
+   ->  Custom Scan (Citus Adaptive)
+         ->  Distributed Subplan XXX_1
+               ->  Custom Scan (Citus Adaptive)
+                     Task Count: 1
+                     Tasks Shown: All
+                     ->  Task
+                           Node: host=localhost port=xxxxx dbname=regression
+                           ->  HashAggregate
+                                 Group Key: key
+                                 ->  Seq Scan on keyvalref_xxxxxxx keyvalref
+         Task Count: 4
+         Tasks Shown: One of 4
+         ->  Task
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  HashAggregate
+                     Group Key: keyval1.key
+                     ->  Seq Scan on keyval1_xxxxxxx keyval1
+(22 rows)
+
+-- For some reason 'ORDER BY 1 DESC LIMIT 1' triggers recursive planning
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyvalref GROUP BY key ORDER BY 1 DESC LIMIT 1);
+                                       QUERY PLAN
+---------------------------------------------------------------------
+ HashAggregate
+   Group Key: remote_scan.worker_column_2
+   Filter: ((pg_catalog.sum(remote_scan.worker_column_3))::bigint > $0)
+   InitPlan 1 (returns $0)
+     ->  Function Scan on read_intermediate_result intermediate_result
+   ->  Custom Scan (Citus Adaptive)
+         ->  Distributed Subplan XXX_1
+               ->  Custom Scan (Citus Adaptive)
+                     Task Count: 1
+                     Tasks Shown: All
+                     ->  Task
+                           Node: host=localhost port=xxxxx dbname=regression
+                           ->  Limit
+                                 ->  Sort
+                                       Sort Key: (sum(value)) DESC
+                                       ->  HashAggregate
+                                             Group Key: key
+                                             ->  Seq Scan on keyvalref_xxxxxxx keyvalref
+         Task Count: 4
+         Tasks Shown: One of 4
+         ->  Task
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  HashAggregate
+                     Group Key: keyval1.key
+                     ->  Seq Scan on keyval1_xxxxxxx keyval1
+(25 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyval2 GROUP BY key ORDER BY 1 DESC LIMIT 1);
+                                           QUERY PLAN
+---------------------------------------------------------------------
+ HashAggregate
+   Group Key: remote_scan.worker_column_2
+   Filter: ((pg_catalog.sum(remote_scan.worker_column_3))::bigint > $0)
+   InitPlan 1 (returns $0)
+     ->  Function Scan on read_intermediate_result intermediate_result
+   ->  Custom Scan (Citus Adaptive)
+         ->  Distributed Subplan XXX_1
+               ->  Limit
+                     ->  Sort
+                           Sort Key: remote_scan.sum DESC
+                           ->  Custom Scan (Citus Adaptive)
+                                 Task Count: 4
+                                 Tasks Shown: One of 4
+                                 ->  Task
+                                       Node: host=localhost port=xxxxx dbname=regression
+                                       ->  Limit
+                                             ->  Sort
+                                                   Sort Key: (sum(value)) DESC
+                                                   ->  HashAggregate
+                                                         Group Key: key
+                                                         ->  Seq Scan on keyval2_xxxxxxx keyval2
+         Task Count: 4
+         Tasks Shown: One of 4
+         ->  Task
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  HashAggregate
+                     Group Key: keyval1.key
+                     ->  Seq Scan on keyval1_xxxxxxx keyval1
+(28 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM keyval1 k1 WHERE k1.key = 2 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyval2 k2 WHERE k2.key = 2 GROUP BY key ORDER BY 1 DESC LIMIT 1);
+                             QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive)
+   Task Count: 1
+   Tasks Shown: All
+   ->  Task
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  GroupAggregate
+               Group Key: k1.key
+               Filter: (sum(k1.value) > $0)
+               InitPlan 1 (returns $0)
+                 ->  Limit
+                       ->  Sort
+                             Sort Key: (sum(k2.value)) DESC
+                             ->  GroupAggregate
+                                   Group Key: k2.key
+                                   ->  Seq Scan on keyval2_xxxxxxx k2
+                                         Filter: (key = 2)
+               ->  Seq Scan on keyval1_xxxxxxx k1
+                     Filter: (key = 2)
+(18 rows)
+
 -- Simple join subquery pushdown
 SELECT
 	avg(array_length(events, 1)) AS event_average
@ -1124,7 +1281,7 @@ LIMIT
 -- also set the min messages to WARNING to skip
 -- CASCADE NOTICE messagez
 SET client_min_messages TO WARNING;
-DROP TABLE users, events;
+DROP TABLE users, events, subquery_pruning_varchar_test_table, keyval1, keyval2, keyvalref;
 DROP TYPE user_composite_type CASCADE;
 SET client_min_messages TO DEFAULT;
 SET citus.subquery_pushdown to OFF;
--- a/src/test/regress/sql/multi_reference_table.sql
+++ b/src/test/regress/sql/multi_reference_table.sql
@ -281,7 +281,6 @@ GROUP BY
 	GROUPING sets ((value_4), (value_3))
 ORDER BY 1, 2, 3;

-
 -- distinct clauses also work fine
 SELECT DISTINCT
 	value_4
@ -620,7 +619,7 @@ SET citus.shard_count TO 6;
 SET citus.shard_replication_factor TO 2;

 CREATE TABLE colocated_table_test (value_1 int, value_2 float, value_3 text, value_4 timestamp);
-SELECT create_distributed_table('colocated_table_test', 'value_1');
+    SELECT create_distributed_table('colocated_table_test', 'value_1');

 CREATE TABLE colocated_table_test_2 (value_1 int, value_2 float, value_3 text, value_4 timestamp);
 SELECT create_distributed_table('colocated_table_test_2', 'value_1');
@ -964,8 +963,7 @@ SELECT select_count_all();
 TRUNCATE reference_table_test;

 -- reference tables work with composite key
-- and we even do not need to create hash
-- function etc.
+-- and we even do not need to create hash function etc.

 -- first create the type on all nodes
 CREATE TYPE reference_comp_key as (key text, value text);
@ -1007,6 +1005,11 @@ ALTER TABLE reference_table_test ADD COLUMN value_dummy INT;
 INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02');
 ROLLBACK;

+-- Previous issue failed to rename reference tables in subqueries
+EXPLAIN (COSTS OFF) SELECT value_1, count(*) FROM colocated_table_test GROUP BY value_1
+HAVING (SELECT rt.value_2 FROM reference_table_test rt where rt.value_2 = 2) > 0
+ORDER BY 1;
+
 -- clean up tables, ...
 SET client_min_messages TO ERROR;
 DROP SEQUENCE example_ref_value_seq;
--- a/src/test/regress/sql/multi_select_distinct.sql
+++ b/src/test/regress/sql/multi_select_distinct.sql
@ -9,6 +9,18 @@ ANALYZE lineitem_hash_part;
 -- function calls are supported
 SELECT DISTINCT l_orderkey, now() FROM lineitem_hash_part LIMIT 0;

+SELECT DISTINCT l_orderkey, avg(l_linenumber)
+FROM lineitem_hash_part
+GROUP BY l_orderkey
+HAVING avg(l_linenumber) = (select avg(distinct l_linenumber))
+LIMIT 10;
+
+SELECT DISTINCT l_orderkey
+FROM lineitem_hash_part
+GROUP BY l_orderkey
+HAVING (select avg(distinct l_linenumber) = l_orderkey)
+LIMIT 10;
+
 SELECT DISTINCT l_partkey, 1 + (random() * 0)::int FROM lineitem_hash_part ORDER BY 1 DESC LIMIT 3;

 -- const expressions are supported
--- a/src/test/regress/sql/multi_subquery.sql
+++ b/src/test/regress/sql/multi_subquery.sql
@ -620,10 +620,50 @@ SELECT * FROM
 		a_inner)
 AS foo;

-DROP TABLE subquery_pruning_varchar_test_table;
-
 RESET citus.enable_router_execution;

+-- Test https://github.com/citusdata/citus/issues/3424
+insert into subquery_pruning_varchar_test_table values ('1', '1'), (2, '1'), (3, '2'), (3, '1'), (4, '4'), (5, '6');
+
+WITH cte_1 AS (SELECT b max FROM subquery_pruning_varchar_test_table)
+SELECT a
+FROM subquery_pruning_varchar_test_table
+JOIN cte_1 ON a = max::text
+GROUP BY a HAVING a = (SELECT a)
+ORDER BY 1;
+
+-- Test https://github.com/citusdata/citus/issues/3432
+SELECT t1.event_type FROM events_table t1
+GROUP BY t1.event_type HAVING t1.event_type > avg((SELECT t2.value_2 FROM users_table t2 ORDER BY 1 DESC LIMIT 1))
+ORDER BY 1;
+
+SELECT t1.event_type FROM events_table t1
+GROUP BY t1.event_type HAVING t1.event_type > avg(2 + (SELECT t2.value_2 FROM users_table t2 ORDER BY 1 DESC LIMIT 1))
+ORDER BY 1;
+
+-- Test https://github.com/citusdata/citus/issues/3433
+CREATE TABLE keyval1 (key int, value int);
+SELECT create_distributed_table('keyval1', 'key');
+
+CREATE TABLE keyval2 (key int, value int);
+SELECT create_distributed_table('keyval2', 'key');
+
+CREATE TABLE keyvalref (key int, value int);
+SELECT create_reference_table('keyvalref');
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyvalref GROUP BY key);
+
+-- For some reason 'ORDER BY 1 DESC LIMIT 1' triggers recursive planning
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyvalref GROUP BY key ORDER BY 1 DESC LIMIT 1);
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyval2 GROUP BY key ORDER BY 1 DESC LIMIT 1);
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM keyval1 k1 WHERE k1.key = 2 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyval2 k2 WHERE k2.key = 2 GROUP BY key ORDER BY 1 DESC LIMIT 1);
+
 -- Simple join subquery pushdown
 SELECT
 	avg(array_length(events, 1)) AS event_average
@ -843,7 +883,7 @@ LIMIT
 -- also set the min messages to WARNING to skip
 -- CASCADE NOTICE messagez
 SET client_min_messages TO WARNING;
-DROP TABLE users, events;
+DROP TABLE users, events, subquery_pruning_varchar_test_table, keyval1, keyval2, keyvalref;

 DROP TYPE user_composite_type CASCADE;