Merge pull request #3568 from citusdata/fix-having-subquery-ref

First phase of addressing HAVING subquery issues
pull/3601/head
Philip Dubé 2020-03-09 18:23:02 +00:00 committed by GitHub
commit 877687cc64
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 367 additions and 50 deletions

View File

@ -52,6 +52,7 @@
#include "parser/parse_coerce.h"
#include "parser/parse_oper.h"
#include "parser/parsetree.h"
#include "rewrite/rewriteManip.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
@ -1434,7 +1435,7 @@ MasterExtendedOpNode(MultiExtendedOp *originalOpNode,
Expr *originalExpression = originalTargetEntry->expr;
Expr *newExpression = NULL;
bool hasAggregates = contain_agg_clause((Node *) originalExpression);
bool hasAggregates = contain_aggs_of_level((Node *) originalExpression, 0);
bool hasWindowFunction = contain_window_function((Node *) originalExpression);
/*
@ -2340,7 +2341,7 @@ ProcessTargetListForWorkerQuery(List *targetEntryList,
TargetEntry *originalTargetEntry = (TargetEntry *) lfirst(targetEntryCell);
Expr *originalExpression = originalTargetEntry->expr;
List *newExpressionList = NIL;
bool hasAggregates = contain_agg_clause((Node *) originalExpression);
bool hasAggregates = contain_aggs_of_level((Node *) originalExpression, 0);
bool hasWindowFunction = contain_window_function((Node *) originalExpression);
/* reset walker context */
@ -2682,7 +2683,7 @@ TargetListHasAggregates(List *targetEntryList)
{
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
Expr *targetExpr = targetEntry->expr;
bool hasAggregates = contain_agg_clause((Node *) targetExpr);
bool hasAggregates = contain_aggs_of_level((Node *) targetExpr, 0);
bool hasWindowFunction = contain_window_function((Node *) targetExpr);
/*
@ -4398,7 +4399,7 @@ GenerateNewTargetEntriesForSortClauses(List *originalTargetList,
SortGroupClause *sgClause = (SortGroupClause *) lfirst(sortClauseCell);
TargetEntry *targetEntry = get_sortgroupclause_tle(sgClause, originalTargetList);
Expr *targetExpr = targetEntry->expr;
bool containsAggregate = contain_agg_clause((Node *) targetExpr);
bool containsAggregate = contain_aggs_of_level((Node *) targetExpr, 0);
bool createNewTargetEntry = false;
/* we are only interested in target entries containing aggregates */
@ -4500,7 +4501,7 @@ HasOrderByAggregate(List *sortClauseList, List *targetList)
SortGroupClause *sortClause = (SortGroupClause *) lfirst(sortClauseCell);
Node *sortExpression = get_sortgroupclause_expr(sortClause, targetList);
bool containsAggregate = contain_agg_clause(sortExpression);
bool containsAggregate = contain_aggs_of_level(sortExpression, 0);
if (containsAggregate)
{
hasOrderByAggregate = true;
@ -4574,7 +4575,7 @@ HasOrderByComplexExpression(List *sortClauseList, List *targetList)
continue;
}
bool nestedAggregate = contain_agg_clause(sortExpression);
bool nestedAggregate = contain_aggs_of_level(sortExpression, 0);
if (nestedAggregate)
{
hasOrderByComplexExpression = true;

View File

@ -72,6 +72,7 @@ static RuleApplyFunction RuleApplyFunctionArray[JOIN_RULE_LAST] = { 0 }; /* join
/* Local functions forward declarations */
static bool AllTargetExpressionsAreColumnReferences(List *targetEntryList);
static FieldSelect * CompositeFieldRecursive(Expr *expression, Query *query);
static Oid NodeTryGetRteRelid(Node *node);
static bool FullCompositeFieldList(List *compositeFieldList);
static bool HasUnsupportedJoinWalker(Node *node, void *context);
static bool ErrorHintRequired(const char *errorHint, Query *queryTree);
@ -412,37 +413,55 @@ QueryContainsDistributedTableRTE(Query *query)
/*
* IsDistributedTableRTE gets a node and returns true if the node
* is a range table relation entry that points to a distributed
* relation (i.e., excluding reference tables).
* NodeTryGetRteRelid returns the relid of the given RTE_RELATION RangeTableEntry.
* Returns InvalidOid if any of these assumptions fail for given node.
*/
bool
IsDistributedTableRTE(Node *node)
static Oid
NodeTryGetRteRelid(Node *node)
{
if (node == NULL)
{
return false;
return InvalidOid;
}
if (!IsA(node, RangeTblEntry))
{
return false;
return InvalidOid;
}
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) node;
if (rangeTableEntry->rtekind != RTE_RELATION)
{
return false;
return InvalidOid;
}
Oid relationId = rangeTableEntry->relid;
if (!IsCitusTable(relationId) ||
PartitionMethod(relationId) == DISTRIBUTE_BY_NONE)
return rangeTableEntry->relid;
}
/*
* IsCitusTableRTE gets a node and returns true if the node is a
* range table relation entry that points to a distributed relation.
*/
bool
IsCitusTableRTE(Node *node)
{
return false;
Oid relationId = NodeTryGetRteRelid(node);
return relationId != InvalidOid && IsCitusTable(relationId);
}
return true;
/*
* IsDistributedTableRTE gets a node and returns true if the node
* is a range table relation entry that points to a distributed relation,
* returning false still if the relation is a reference table.
*/
bool
IsDistributedTableRTE(Node *node)
{
Oid relationId = NodeTryGetRteRelid(node);
return relationId != InvalidOid && IsCitusTable(relationId) &&
PartitionMethod(relationId) != DISTRIBUTE_BY_NONE;
}

View File

@ -68,6 +68,7 @@
#include "optimizer/tlist.h"
#include "parser/parse_relation.h"
#include "parser/parsetree.h"
#include "rewrite/rewriteManip.h"
#include "utils/builtins.h"
#include "utils/catcache.h"
#include "utils/fmgroids.h"
@ -719,8 +720,8 @@ BuildJobQuery(MultiNode *multiNode, List *dependentJobList)
jobQuery->limitOffset = limitOffset;
jobQuery->limitCount = limitCount;
jobQuery->havingQual = havingQual;
jobQuery->hasAggs = contain_agg_clause((Node *) targetList) ||
contain_agg_clause((Node *) havingQual);
jobQuery->hasAggs = contain_aggs_of_level((Node *) targetList, 0) ||
contain_aggs_of_level((Node *) havingQual, 0);
jobQuery->distinctClause = distinctClause;
jobQuery->hasDistinctOn = hasDistinctOn;
@ -805,7 +806,7 @@ BuildReduceQuery(MultiExtendedOp *extendedOpNode, List *dependentJobList)
reduceQuery->limitOffset = extendedOpNode->limitOffset;
reduceQuery->limitCount = extendedOpNode->limitCount;
reduceQuery->havingQual = extendedOpNode->havingQual;
reduceQuery->hasAggs = contain_agg_clause((Node *) targetList);
reduceQuery->hasAggs = contain_aggs_of_level((Node *) targetList, 0);
return reduceQuery;
}
@ -1553,8 +1554,8 @@ BuildSubqueryJobQuery(MultiNode *multiNode)
/* build the where clause list using select predicates */
List *whereClauseList = QuerySelectClauseList(multiNode);
if (contain_agg_clause((Node *) targetList) ||
contain_agg_clause((Node *) havingQual))
if (contain_aggs_of_level((Node *) targetList, 0) ||
contain_aggs_of_level((Node *) havingQual, 0))
{
hasAggregates = true;
}

View File

@ -59,6 +59,7 @@
#include "distributed/commands/multi_copy.h"
#include "distributed/distributed_planner.h"
#include "distributed/errormessage.h"
#include "distributed/listutils.h"
#include "distributed/log_utils.h"
#include "distributed/metadata_cache.h"
#include "distributed/multi_logical_planner.h"
@ -147,7 +148,7 @@ static void RecursivelyPlanNonColocatedSubqueriesInWhere(Query *query,
colocatedJoinChecker,
RecursivePlanningContext *
recursivePlanningContext);
static List * SublinkList(Query *originalQuery);
static List * SublinkListFromWhere(Query *originalQuery);
static bool ExtractSublinkWalker(Node *node, List **sublinkList);
static bool ShouldRecursivelyPlanAllSubqueriesInWhere(Query *query);
static bool RecursivelyPlanAllSubqueries(Node *node,
@ -173,6 +174,7 @@ static bool CteReferenceListWalker(Node *node, CteReferenceWalkerContext *contex
static bool ContainsReferencesToOuterQuery(Query *query);
static bool ContainsReferencesToOuterQueryWalker(Node *node,
VarLevelsUpWalkerContext *context);
static bool NodeContainsSubqueryReferencingOuterQuery(Node *node);
static void WrapFunctionsInSubqueries(Query *query);
static void TransformFunctionRTE(RangeTblEntry *rangeTblEntry);
static bool ShouldTransformRTE(RangeTblEntry *rangeTableEntry);
@ -314,6 +316,18 @@ RecursivelyPlanSubqueriesAndCTEs(Query *query, RecursivePlanningContext *context
RecursivelyPlanAllSubqueries((Node *) query->jointree->quals, context);
}
if (query->havingQual != NULL)
{
if (NodeContainsSubqueryReferencingOuterQuery(query->havingQual))
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"Subqueries in HAVING cannot refer to outer query",
NULL, NULL);
}
RecursivelyPlanAllSubqueries(query->havingQual, context);
}
/*
* If the query doesn't have distribution key equality,
* recursively plan some of its subqueries.
@ -528,7 +542,7 @@ RecursivelyPlanNonColocatedSubqueriesInWhere(Query *query,
RecursivePlanningContext *
recursivePlanningContext)
{
List *sublinkList = SublinkList(query);
List *sublinkList = SublinkListFromWhere(query);
ListCell *sublinkCell = NULL;
foreach(sublinkCell, sublinkList)
@ -551,12 +565,12 @@ RecursivelyPlanNonColocatedSubqueriesInWhere(Query *query,
/*
* SublinkList finds the subquery nodes in the where clause of the given query. Note
* SublinkListFromWhere finds the subquery nodes in the where clause of the given query. Note
* that the function should be called on the original query given that postgres
* standard_planner() may convert the subqueries in WHERE clause to joins.
*/
static List *
SublinkList(Query *originalQuery)
SublinkListFromWhere(Query *originalQuery)
{
FromExpr *joinTree = originalQuery->jointree;
List *sublinkList = NIL;
@ -648,8 +662,7 @@ RecursivelyPlanAllSubqueries(Node *node, RecursivePlanningContext *planningConte
if (IsA(node, Query))
{
Query *query = (Query *) node;
if (FindNodeCheckInRangeTableList(query->rtable, IsDistributedTableRTE))
if (FindNodeCheckInRangeTableList(query->rtable, IsCitusTableRTE))
{
RecursivelyPlanSubquery(query, planningContext);
}
@ -1025,7 +1038,7 @@ RecursivelyPlanSetOperations(Query *query, Node *node,
Query *subquery = rangeTableEntry->subquery;
if (rangeTableEntry->rtekind == RTE_SUBQUERY &&
QueryContainsDistributedTableRTE(subquery))
FindNodeCheck((Node *) subquery, IsDistributedTableRTE))
{
RecursivelyPlanSubquery(subquery, context);
}
@ -1223,7 +1236,7 @@ CteReferenceListWalker(Node *node, CteReferenceWalkerContext *context)
/*
* ContainsReferencesToOuterQuery determines whether the given query contains
* any Vars that point outside of the query itself. Such queries cannot be
* anything that points outside of the query itself. Such queries cannot be
* planned recursively.
*/
static bool
@ -1302,6 +1315,29 @@ ContainsReferencesToOuterQueryWalker(Node *node, VarLevelsUpWalkerContext *conte
}
/*
* NodeContainsSubqueryReferencingOuterQuery determines whether the given node
* contains anything that points outside of the query itself.
*/
static bool
NodeContainsSubqueryReferencingOuterQuery(Node *node)
{
List *sublinks = NIL;
ExtractSublinkWalker(node, &sublinks);
SubLink *sublink;
foreach_ptr(sublink, sublinks)
{
if (ContainsReferencesToOuterQuery(castNode(Query, sublink->subselect)))
{
return true;
}
}
return false;
}
/*
* WrapFunctionsInSubqueries iterates over all the immediate Range Table Entries
* of a query and wraps the functions inside (SELECT * FROM fnc() f)

View File

@ -190,6 +190,7 @@ extern bool FindNodeCheck(Node *node, bool (*check)(Node *));
extern bool SingleRelationRepartitionSubquery(Query *queryTree);
extern bool TargetListOnPartitionColumn(Query *query, List *targetEntryList);
extern bool FindNodeCheckInRangeTableList(List *rtable, bool (*check)(Node *));
extern bool IsCitusTableRTE(Node *node);
extern bool IsDistributedTableRTE(Node *node);
extern bool QueryContainsDistributedTableRTE(Query *query);
extern bool IsCitusExtraDataContainerRelation(RangeTblEntry *rte);

View File

@ -28,6 +28,9 @@ s/\(ref_id\)=\([0-9]+\)/(ref_id)=(X)/g
# shard table names for multi_subtransactions
s/"t2_[0-9]+"/"t2_xxxxxxx"/g
# shard table names for multi_subquery
s/ keyval(1|2|ref)_[0-9]+ / keyval\1_xxxxxxx /g
# shard table names for custom_aggregate_support
s/ daily_uniques_[0-9]+ / daily_uniques_xxxxxxx /g

View File

@ -280,14 +280,14 @@ where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id)
order by s_i_id;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: Subqueries in HAVING cannot refer to outer query
-- We don't support correlated subqueries in having
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id)
order by s_i_id;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: Subqueries in HAVING cannot refer to outer query
DROP TABLE stock;
CREATE TABLE stock (
s_w_id int NOT NULL,

View File

@ -283,14 +283,14 @@ where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id)
order by s_i_id;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: Subqueries in HAVING cannot refer to outer query
-- We don't support correlated subqueries in having
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id)
order by s_i_id;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: Subqueries in HAVING cannot refer to outer query
\c - - - :master_port
SET citus.replication_model TO streaming;
SET citus.shard_replication_factor to 1;

View File

@ -1533,8 +1533,7 @@ SELECT select_count_all();
TRUNCATE reference_table_test;
-- reference tables work with composite key
-- and we even do not need to create hash
-- function etc.
-- and we even do not need to create hash function etc.
-- first create the type on all nodes
CREATE TYPE reference_comp_key as (key text, value text);
CREATE TABLE reference_table_composite (id int PRIMARY KEY, data reference_comp_key);
@ -1602,6 +1601,39 @@ BEGIN;
ALTER TABLE reference_table_test ADD COLUMN value_dummy INT;
INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02');
ROLLBACK;
-- Previous issue failed to rename reference tables in subqueries
EXPLAIN (COSTS OFF) SELECT value_1, count(*) FROM colocated_table_test GROUP BY value_1
HAVING (SELECT rt.value_2 FROM reference_table_test rt where rt.value_2 = 2) > 0
ORDER BY 1;
QUERY PLAN
---------------------------------------------------------------------
Sort
Sort Key: remote_scan.value_1
InitPlan 1 (returns $0)
-> Function Scan on read_intermediate_result intermediate_result
-> HashAggregate
Group Key: remote_scan.value_1
-> Result
One-Time Filter: ($0 > '0'::double precision)
-> Custom Scan (Citus Adaptive)
Filter: ($0 > '0'::double precision)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on reference_table_test_1250000 rt
Filter: (value_2 = '2'::double precision)
Task Count: 6
Tasks Shown: One of 6
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: colocated_table_test.value_1
-> Seq Scan on colocated_table_test_1250005 colocated_table_test
(25 rows)
-- clean up tables, ...
SET client_min_messages TO ERROR;
DROP SEQUENCE example_ref_value_seq;

View File

@ -10,6 +10,18 @@ SELECT DISTINCT l_orderkey, now() FROM lineitem_hash_part LIMIT 0;
---------------------------------------------------------------------
(0 rows)
SELECT DISTINCT l_orderkey, avg(l_linenumber)
FROM lineitem_hash_part
GROUP BY l_orderkey
HAVING avg(l_linenumber) = (select avg(distinct l_linenumber))
LIMIT 10;
ERROR: Subqueries in HAVING cannot refer to outer query
SELECT DISTINCT l_orderkey
FROM lineitem_hash_part
GROUP BY l_orderkey
HAVING (select avg(distinct l_linenumber) = l_orderkey)
LIMIT 10;
ERROR: Subqueries in HAVING cannot refer to outer query
SELECT DISTINCT l_partkey, 1 + (random() * 0)::int FROM lineitem_hash_part ORDER BY 1 DESC LIMIT 3;
l_partkey | ?column?
---------------------------------------------------------------------

View File

@ -884,8 +884,165 @@ AS foo;
---------------------------------------------------------------------
(0 rows)
DROP TABLE subquery_pruning_varchar_test_table;
RESET citus.enable_router_execution;
-- Test https://github.com/citusdata/citus/issues/3424
insert into subquery_pruning_varchar_test_table values ('1', '1'), (2, '1'), (3, '2'), (3, '1'), (4, '4'), (5, '6');
WITH cte_1 AS (SELECT b max FROM subquery_pruning_varchar_test_table)
SELECT a
FROM subquery_pruning_varchar_test_table
JOIN cte_1 ON a = max::text
GROUP BY a HAVING a = (SELECT a)
ORDER BY 1;
ERROR: Subqueries in HAVING cannot refer to outer query
-- Test https://github.com/citusdata/citus/issues/3432
SELECT t1.event_type FROM events_table t1
GROUP BY t1.event_type HAVING t1.event_type > avg((SELECT t2.value_2 FROM users_table t2 ORDER BY 1 DESC LIMIT 1))
ORDER BY 1;
ERROR: cannot handle unplanned sub-select
SELECT t1.event_type FROM events_table t1
GROUP BY t1.event_type HAVING t1.event_type > avg(2 + (SELECT t2.value_2 FROM users_table t2 ORDER BY 1 DESC LIMIT 1))
ORDER BY 1;
ERROR: cannot handle unplanned sub-select
-- Test https://github.com/citusdata/citus/issues/3433
CREATE TABLE keyval1 (key int, value int);
SELECT create_distributed_table('keyval1', 'key');
create_distributed_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE keyval2 (key int, value int);
SELECT create_distributed_table('keyval2', 'key');
create_distributed_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE keyvalref (key int, value int);
SELECT create_reference_table('keyvalref');
create_reference_table
---------------------------------------------------------------------
(1 row)
EXPLAIN (COSTS OFF)
SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyvalref GROUP BY key);
QUERY PLAN
---------------------------------------------------------------------
HashAggregate
Group Key: remote_scan.worker_column_2
Filter: ((pg_catalog.sum(remote_scan.worker_column_3))::bigint > $0)
InitPlan 1 (returns $0)
-> Function Scan on read_intermediate_result intermediate_result
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: key
-> Seq Scan on keyvalref_xxxxxxx keyvalref
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: keyval1.key
-> Seq Scan on keyval1_xxxxxxx keyval1
(22 rows)
-- For some reason 'ORDER BY 1 DESC LIMIT 1' triggers recursive planning
EXPLAIN (COSTS OFF)
SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyvalref GROUP BY key ORDER BY 1 DESC LIMIT 1);
QUERY PLAN
---------------------------------------------------------------------
HashAggregate
Group Key: remote_scan.worker_column_2
Filter: ((pg_catalog.sum(remote_scan.worker_column_3))::bigint > $0)
InitPlan 1 (returns $0)
-> Function Scan on read_intermediate_result intermediate_result
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Limit
-> Sort
Sort Key: (sum(value)) DESC
-> HashAggregate
Group Key: key
-> Seq Scan on keyvalref_xxxxxxx keyvalref
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: keyval1.key
-> Seq Scan on keyval1_xxxxxxx keyval1
(25 rows)
EXPLAIN (COSTS OFF)
SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyval2 GROUP BY key ORDER BY 1 DESC LIMIT 1);
QUERY PLAN
---------------------------------------------------------------------
HashAggregate
Group Key: remote_scan.worker_column_2
Filter: ((pg_catalog.sum(remote_scan.worker_column_3))::bigint > $0)
InitPlan 1 (returns $0)
-> Function Scan on read_intermediate_result intermediate_result
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Limit
-> Sort
Sort Key: remote_scan.sum DESC
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Limit
-> Sort
Sort Key: (sum(value)) DESC
-> HashAggregate
Group Key: key
-> Seq Scan on keyval2_xxxxxxx keyval2
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: keyval1.key
-> Seq Scan on keyval1_xxxxxxx keyval1
(28 rows)
EXPLAIN (COSTS OFF)
SELECT count(*) FROM keyval1 k1 WHERE k1.key = 2 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyval2 k2 WHERE k2.key = 2 GROUP BY key ORDER BY 1 DESC LIMIT 1);
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> GroupAggregate
Group Key: k1.key
Filter: (sum(k1.value) > $0)
InitPlan 1 (returns $0)
-> Limit
-> Sort
Sort Key: (sum(k2.value)) DESC
-> GroupAggregate
Group Key: k2.key
-> Seq Scan on keyval2_xxxxxxx k2
Filter: (key = 2)
-> Seq Scan on keyval1_xxxxxxx k1
Filter: (key = 2)
(18 rows)
-- Simple join subquery pushdown
SELECT
avg(array_length(events, 1)) AS event_average
@ -1124,7 +1281,7 @@ LIMIT
-- also set the min messages to WARNING to skip
-- CASCADE NOTICE messagez
SET client_min_messages TO WARNING;
DROP TABLE users, events;
DROP TABLE users, events, subquery_pruning_varchar_test_table, keyval1, keyval2, keyvalref;
DROP TYPE user_composite_type CASCADE;
SET client_min_messages TO DEFAULT;
SET citus.subquery_pushdown to OFF;

View File

@ -281,7 +281,6 @@ GROUP BY
GROUPING sets ((value_4), (value_3))
ORDER BY 1, 2, 3;
-- distinct clauses also work fine
SELECT DISTINCT
value_4
@ -964,8 +963,7 @@ SELECT select_count_all();
TRUNCATE reference_table_test;
-- reference tables work with composite key
-- and we even do not need to create hash
-- function etc.
-- and we even do not need to create hash function etc.
-- first create the type on all nodes
CREATE TYPE reference_comp_key as (key text, value text);
@ -1007,6 +1005,11 @@ ALTER TABLE reference_table_test ADD COLUMN value_dummy INT;
INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02');
ROLLBACK;
-- Previous issue failed to rename reference tables in subqueries
EXPLAIN (COSTS OFF) SELECT value_1, count(*) FROM colocated_table_test GROUP BY value_1
HAVING (SELECT rt.value_2 FROM reference_table_test rt where rt.value_2 = 2) > 0
ORDER BY 1;
-- clean up tables, ...
SET client_min_messages TO ERROR;
DROP SEQUENCE example_ref_value_seq;

View File

@ -9,6 +9,18 @@ ANALYZE lineitem_hash_part;
-- function calls are supported
SELECT DISTINCT l_orderkey, now() FROM lineitem_hash_part LIMIT 0;
SELECT DISTINCT l_orderkey, avg(l_linenumber)
FROM lineitem_hash_part
GROUP BY l_orderkey
HAVING avg(l_linenumber) = (select avg(distinct l_linenumber))
LIMIT 10;
SELECT DISTINCT l_orderkey
FROM lineitem_hash_part
GROUP BY l_orderkey
HAVING (select avg(distinct l_linenumber) = l_orderkey)
LIMIT 10;
SELECT DISTINCT l_partkey, 1 + (random() * 0)::int FROM lineitem_hash_part ORDER BY 1 DESC LIMIT 3;
-- const expressions are supported

View File

@ -620,10 +620,50 @@ SELECT * FROM
a_inner)
AS foo;
DROP TABLE subquery_pruning_varchar_test_table;
RESET citus.enable_router_execution;
-- Test https://github.com/citusdata/citus/issues/3424
insert into subquery_pruning_varchar_test_table values ('1', '1'), (2, '1'), (3, '2'), (3, '1'), (4, '4'), (5, '6');
WITH cte_1 AS (SELECT b max FROM subquery_pruning_varchar_test_table)
SELECT a
FROM subquery_pruning_varchar_test_table
JOIN cte_1 ON a = max::text
GROUP BY a HAVING a = (SELECT a)
ORDER BY 1;
-- Test https://github.com/citusdata/citus/issues/3432
SELECT t1.event_type FROM events_table t1
GROUP BY t1.event_type HAVING t1.event_type > avg((SELECT t2.value_2 FROM users_table t2 ORDER BY 1 DESC LIMIT 1))
ORDER BY 1;
SELECT t1.event_type FROM events_table t1
GROUP BY t1.event_type HAVING t1.event_type > avg(2 + (SELECT t2.value_2 FROM users_table t2 ORDER BY 1 DESC LIMIT 1))
ORDER BY 1;
-- Test https://github.com/citusdata/citus/issues/3433
CREATE TABLE keyval1 (key int, value int);
SELECT create_distributed_table('keyval1', 'key');
CREATE TABLE keyval2 (key int, value int);
SELECT create_distributed_table('keyval2', 'key');
CREATE TABLE keyvalref (key int, value int);
SELECT create_reference_table('keyvalref');
EXPLAIN (COSTS OFF)
SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyvalref GROUP BY key);
-- For some reason 'ORDER BY 1 DESC LIMIT 1' triggers recursive planning
EXPLAIN (COSTS OFF)
SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyvalref GROUP BY key ORDER BY 1 DESC LIMIT 1);
EXPLAIN (COSTS OFF)
SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyval2 GROUP BY key ORDER BY 1 DESC LIMIT 1);
EXPLAIN (COSTS OFF)
SELECT count(*) FROM keyval1 k1 WHERE k1.key = 2 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyval2 k2 WHERE k2.key = 2 GROUP BY key ORDER BY 1 DESC LIMIT 1);
-- Simple join subquery pushdown
SELECT
avg(array_length(events, 1)) AS event_average
@ -843,7 +883,7 @@ LIMIT
-- also set the min messages to WARNING to skip
-- CASCADE NOTICE messagez
SET client_min_messages TO WARNING;
DROP TABLE users, events;
DROP TABLE users, events, subquery_pruning_varchar_test_table, keyval1, keyval2, keyvalref;
DROP TYPE user_composite_type CASCADE;