First phase of addressing HAVING subquery issues

Add failing tests, make changes to avoid crashes at least

Fix HAVING subquery pushdown ignoring reference table only subqueries,
also include HAVING in recursive planning

Given that we have a function IsDistributedTable which includes reference tables,
it seems best to have IsDistributedTableRTE & QueryContainsDistributedTableRTE
reflect that they do not include reference tables in their check

Similarly SublinkList's name should reflect that it only scans WHERE

contain_agg_clause asserts that we don't have SubLinks,
use contain_aggs_of_level as suggested by pg sourcecode
pull/3568/head
Philip Dubé 2020-03-04 03:18:15 +00:00
parent 7793d19b71
commit 81cfa05d3d
14 changed files with 367 additions and 50 deletions

View File

@ -52,6 +52,7 @@
#include "parser/parse_coerce.h"
#include "parser/parse_oper.h"
#include "parser/parsetree.h"
#include "rewrite/rewriteManip.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
@ -1434,7 +1435,7 @@ MasterExtendedOpNode(MultiExtendedOp *originalOpNode,
Expr *originalExpression = originalTargetEntry->expr;
Expr *newExpression = NULL;
bool hasAggregates = contain_agg_clause((Node *) originalExpression);
bool hasAggregates = contain_aggs_of_level((Node *) originalExpression, 0);
bool hasWindowFunction = contain_window_function((Node *) originalExpression);
/*
@ -2340,7 +2341,7 @@ ProcessTargetListForWorkerQuery(List *targetEntryList,
TargetEntry *originalTargetEntry = (TargetEntry *) lfirst(targetEntryCell);
Expr *originalExpression = originalTargetEntry->expr;
List *newExpressionList = NIL;
bool hasAggregates = contain_agg_clause((Node *) originalExpression);
bool hasAggregates = contain_aggs_of_level((Node *) originalExpression, 0);
bool hasWindowFunction = contain_window_function((Node *) originalExpression);
/* reset walker context */
@ -2682,7 +2683,7 @@ TargetListHasAggregates(List *targetEntryList)
{
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
Expr *targetExpr = targetEntry->expr;
bool hasAggregates = contain_agg_clause((Node *) targetExpr);
bool hasAggregates = contain_aggs_of_level((Node *) targetExpr, 0);
bool hasWindowFunction = contain_window_function((Node *) targetExpr);
/*
@ -4398,7 +4399,7 @@ GenerateNewTargetEntriesForSortClauses(List *originalTargetList,
SortGroupClause *sgClause = (SortGroupClause *) lfirst(sortClauseCell);
TargetEntry *targetEntry = get_sortgroupclause_tle(sgClause, originalTargetList);
Expr *targetExpr = targetEntry->expr;
bool containsAggregate = contain_agg_clause((Node *) targetExpr);
bool containsAggregate = contain_aggs_of_level((Node *) targetExpr, 0);
bool createNewTargetEntry = false;
/* we are only interested in target entries containing aggregates */
@ -4500,7 +4501,7 @@ HasOrderByAggregate(List *sortClauseList, List *targetList)
SortGroupClause *sortClause = (SortGroupClause *) lfirst(sortClauseCell);
Node *sortExpression = get_sortgroupclause_expr(sortClause, targetList);
bool containsAggregate = contain_agg_clause(sortExpression);
bool containsAggregate = contain_aggs_of_level(sortExpression, 0);
if (containsAggregate)
{
hasOrderByAggregate = true;
@ -4574,7 +4575,7 @@ HasOrderByComplexExpression(List *sortClauseList, List *targetList)
continue;
}
bool nestedAggregate = contain_agg_clause(sortExpression);
bool nestedAggregate = contain_aggs_of_level(sortExpression, 0);
if (nestedAggregate)
{
hasOrderByComplexExpression = true;

View File

@ -72,6 +72,7 @@ static RuleApplyFunction RuleApplyFunctionArray[JOIN_RULE_LAST] = { 0 }; /* join
/* Local functions forward declarations */
static bool AllTargetExpressionsAreColumnReferences(List *targetEntryList);
static FieldSelect * CompositeFieldRecursive(Expr *expression, Query *query);
static Oid NodeTryGetRteRelid(Node *node);
static bool FullCompositeFieldList(List *compositeFieldList);
static bool HasUnsupportedJoinWalker(Node *node, void *context);
static bool ErrorHintRequired(const char *errorHint, Query *queryTree);
@ -412,37 +413,55 @@ QueryContainsDistributedTableRTE(Query *query)
/*
* IsDistributedTableRTE gets a node and returns true if the node
* is a range table relation entry that points to a distributed
* relation (i.e., excluding reference tables).
* NodeTryGetRteRelid returns the relid of the given RTE_RELATION RangeTableEntry.
* Returns InvalidOid if any of these assumptions fail for given node.
*/
bool
IsDistributedTableRTE(Node *node)
static Oid
NodeTryGetRteRelid(Node *node)
{
if (node == NULL)
{
return false;
return InvalidOid;
}
if (!IsA(node, RangeTblEntry))
{
return false;
return InvalidOid;
}
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) node;
if (rangeTableEntry->rtekind != RTE_RELATION)
{
return false;
return InvalidOid;
}
Oid relationId = rangeTableEntry->relid;
if (!IsCitusTable(relationId) ||
PartitionMethod(relationId) == DISTRIBUTE_BY_NONE)
{
return false;
}
return rangeTableEntry->relid;
}
return true;
/*
* IsCitusTableRTE gets a node and returns true if the node is a
* range table relation entry that points to a distributed relation.
*/
bool
IsCitusTableRTE(Node *node)
{
Oid relationId = NodeTryGetRteRelid(node);
return relationId != InvalidOid && IsCitusTable(relationId);
}
/*
* IsDistributedTableRTE gets a node and returns true if the node
* is a range table relation entry that points to a distributed relation,
* returning false still if the relation is a reference table.
*/
bool
IsDistributedTableRTE(Node *node)
{
Oid relationId = NodeTryGetRteRelid(node);
return relationId != InvalidOid && IsCitusTable(relationId) &&
PartitionMethod(relationId) != DISTRIBUTE_BY_NONE;
}

View File

@ -68,6 +68,7 @@
#include "optimizer/tlist.h"
#include "parser/parse_relation.h"
#include "parser/parsetree.h"
#include "rewrite/rewriteManip.h"
#include "utils/builtins.h"
#include "utils/catcache.h"
#include "utils/fmgroids.h"
@ -719,8 +720,8 @@ BuildJobQuery(MultiNode *multiNode, List *dependentJobList)
jobQuery->limitOffset = limitOffset;
jobQuery->limitCount = limitCount;
jobQuery->havingQual = havingQual;
jobQuery->hasAggs = contain_agg_clause((Node *) targetList) ||
contain_agg_clause((Node *) havingQual);
jobQuery->hasAggs = contain_aggs_of_level((Node *) targetList, 0) ||
contain_aggs_of_level((Node *) havingQual, 0);
jobQuery->distinctClause = distinctClause;
jobQuery->hasDistinctOn = hasDistinctOn;
@ -805,7 +806,7 @@ BuildReduceQuery(MultiExtendedOp *extendedOpNode, List *dependentJobList)
reduceQuery->limitOffset = extendedOpNode->limitOffset;
reduceQuery->limitCount = extendedOpNode->limitCount;
reduceQuery->havingQual = extendedOpNode->havingQual;
reduceQuery->hasAggs = contain_agg_clause((Node *) targetList);
reduceQuery->hasAggs = contain_aggs_of_level((Node *) targetList, 0);
return reduceQuery;
}
@ -1553,8 +1554,8 @@ BuildSubqueryJobQuery(MultiNode *multiNode)
/* build the where clause list using select predicates */
List *whereClauseList = QuerySelectClauseList(multiNode);
if (contain_agg_clause((Node *) targetList) ||
contain_agg_clause((Node *) havingQual))
if (contain_aggs_of_level((Node *) targetList, 0) ||
contain_aggs_of_level((Node *) havingQual, 0))
{
hasAggregates = true;
}

View File

@ -59,6 +59,7 @@
#include "distributed/commands/multi_copy.h"
#include "distributed/distributed_planner.h"
#include "distributed/errormessage.h"
#include "distributed/listutils.h"
#include "distributed/log_utils.h"
#include "distributed/metadata_cache.h"
#include "distributed/multi_logical_planner.h"
@ -147,7 +148,7 @@ static void RecursivelyPlanNonColocatedSubqueriesInWhere(Query *query,
colocatedJoinChecker,
RecursivePlanningContext *
recursivePlanningContext);
static List * SublinkList(Query *originalQuery);
static List * SublinkListFromWhere(Query *originalQuery);
static bool ExtractSublinkWalker(Node *node, List **sublinkList);
static bool ShouldRecursivelyPlanAllSubqueriesInWhere(Query *query);
static bool RecursivelyPlanAllSubqueries(Node *node,
@ -173,6 +174,7 @@ static bool CteReferenceListWalker(Node *node, CteReferenceWalkerContext *contex
static bool ContainsReferencesToOuterQuery(Query *query);
static bool ContainsReferencesToOuterQueryWalker(Node *node,
VarLevelsUpWalkerContext *context);
static bool NodeContainsSubqueryReferencingOuterQuery(Node *node);
static void WrapFunctionsInSubqueries(Query *query);
static void TransformFunctionRTE(RangeTblEntry *rangeTblEntry);
static bool ShouldTransformRTE(RangeTblEntry *rangeTableEntry);
@ -314,6 +316,18 @@ RecursivelyPlanSubqueriesAndCTEs(Query *query, RecursivePlanningContext *context
RecursivelyPlanAllSubqueries((Node *) query->jointree->quals, context);
}
if (query->havingQual != NULL)
{
if (NodeContainsSubqueryReferencingOuterQuery(query->havingQual))
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"Subqueries in HAVING cannot refer to outer query",
NULL, NULL);
}
RecursivelyPlanAllSubqueries(query->havingQual, context);
}
/*
* If the query doesn't have distribution key equality,
* recursively plan some of its subqueries.
@ -528,7 +542,7 @@ RecursivelyPlanNonColocatedSubqueriesInWhere(Query *query,
RecursivePlanningContext *
recursivePlanningContext)
{
List *sublinkList = SublinkList(query);
List *sublinkList = SublinkListFromWhere(query);
ListCell *sublinkCell = NULL;
foreach(sublinkCell, sublinkList)
@ -551,12 +565,12 @@ RecursivelyPlanNonColocatedSubqueriesInWhere(Query *query,
/*
* SublinkList finds the subquery nodes in the where clause of the given query. Note
* SublinkListFromWhere finds the subquery nodes in the where clause of the given query. Note
* that the function should be called on the original query given that postgres
* standard_planner() may convert the subqueries in WHERE clause to joins.
*/
static List *
SublinkList(Query *originalQuery)
SublinkListFromWhere(Query *originalQuery)
{
FromExpr *joinTree = originalQuery->jointree;
List *sublinkList = NIL;
@ -648,8 +662,7 @@ RecursivelyPlanAllSubqueries(Node *node, RecursivePlanningContext *planningConte
if (IsA(node, Query))
{
Query *query = (Query *) node;
if (FindNodeCheckInRangeTableList(query->rtable, IsDistributedTableRTE))
if (FindNodeCheckInRangeTableList(query->rtable, IsCitusTableRTE))
{
RecursivelyPlanSubquery(query, planningContext);
}
@ -1025,7 +1038,7 @@ RecursivelyPlanSetOperations(Query *query, Node *node,
Query *subquery = rangeTableEntry->subquery;
if (rangeTableEntry->rtekind == RTE_SUBQUERY &&
QueryContainsDistributedTableRTE(subquery))
FindNodeCheck((Node *) subquery, IsDistributedTableRTE))
{
RecursivelyPlanSubquery(subquery, context);
}
@ -1223,7 +1236,7 @@ CteReferenceListWalker(Node *node, CteReferenceWalkerContext *context)
/*
* ContainsReferencesToOuterQuery determines whether the given query contains
* any Vars that point outside of the query itself. Such queries cannot be
* anything that points outside of the query itself. Such queries cannot be
* planned recursively.
*/
static bool
@ -1302,6 +1315,29 @@ ContainsReferencesToOuterQueryWalker(Node *node, VarLevelsUpWalkerContext *conte
}
/*
* NodeContainsSubqueryReferencingOuterQuery determines whether the given node
* contains anything that points outside of the query itself.
*/
static bool
NodeContainsSubqueryReferencingOuterQuery(Node *node)
{
List *sublinks = NIL;
ExtractSublinkWalker(node, &sublinks);
SubLink *sublink;
foreach_ptr(sublink, sublinks)
{
if (ContainsReferencesToOuterQuery(castNode(Query, sublink->subselect)))
{
return true;
}
}
return false;
}
/*
* WrapFunctionsInSubqueries iterates over all the immediate Range Table Entries
* of a query and wraps the functions inside (SELECT * FROM fnc() f)

View File

@ -190,6 +190,7 @@ extern bool FindNodeCheck(Node *node, bool (*check)(Node *));
extern bool SingleRelationRepartitionSubquery(Query *queryTree);
extern bool TargetListOnPartitionColumn(Query *query, List *targetEntryList);
extern bool FindNodeCheckInRangeTableList(List *rtable, bool (*check)(Node *));
extern bool IsCitusTableRTE(Node *node);
extern bool IsDistributedTableRTE(Node *node);
extern bool QueryContainsDistributedTableRTE(Query *query);
extern bool IsCitusExtraDataContainerRelation(RangeTblEntry *rte);

View File

@ -28,6 +28,9 @@ s/\(ref_id\)=\([0-9]+\)/(ref_id)=(X)/g
# shard table names for multi_subtransactions
s/"t2_[0-9]+"/"t2_xxxxxxx"/g
# shard table names for multi_subquery
s/ keyval(1|2|ref)_[0-9]+ / keyval\1_xxxxxxx /g
# shard table names for custom_aggregate_support
s/ daily_uniques_[0-9]+ / daily_uniques_xxxxxxx /g

View File

@ -280,14 +280,14 @@ where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id)
order by s_i_id;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: Subqueries in HAVING cannot refer to outer query
-- We don't support correlated subqueries in having
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id)
order by s_i_id;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: Subqueries in HAVING cannot refer to outer query
DROP TABLE stock;
CREATE TABLE stock (
s_w_id int NOT NULL,

View File

@ -283,14 +283,14 @@ where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id)
order by s_i_id;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: Subqueries in HAVING cannot refer to outer query
-- We don't support correlated subqueries in having
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id)
order by s_i_id;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: Subqueries in HAVING cannot refer to outer query
\c - - - :master_port
SET citus.replication_model TO streaming;
SET citus.shard_replication_factor to 1;

View File

@ -990,7 +990,7 @@ INSERT INTO
SET citus.shard_count TO 6;
SET citus.shard_replication_factor TO 2;
CREATE TABLE colocated_table_test (value_1 int, value_2 float, value_3 text, value_4 timestamp);
SELECT create_distributed_table('colocated_table_test', 'value_1');
SELECT create_distributed_table('colocated_table_test', 'value_1');
create_distributed_table
---------------------------------------------------------------------
@ -1533,8 +1533,7 @@ SELECT select_count_all();
TRUNCATE reference_table_test;
-- reference tables work with composite key
-- and we even do not need to create hash
-- function etc.
-- and we even do not need to create hash function etc.
-- first create the type on all nodes
CREATE TYPE reference_comp_key as (key text, value text);
CREATE TABLE reference_table_composite (id int PRIMARY KEY, data reference_comp_key);
@ -1602,6 +1601,39 @@ BEGIN;
ALTER TABLE reference_table_test ADD COLUMN value_dummy INT;
INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02');
ROLLBACK;
-- Previous issue failed to rename reference tables in subqueries
EXPLAIN (COSTS OFF) SELECT value_1, count(*) FROM colocated_table_test GROUP BY value_1
HAVING (SELECT rt.value_2 FROM reference_table_test rt where rt.value_2 = 2) > 0
ORDER BY 1;
QUERY PLAN
---------------------------------------------------------------------
Sort
Sort Key: remote_scan.value_1
InitPlan 1 (returns $0)
-> Function Scan on read_intermediate_result intermediate_result
-> HashAggregate
Group Key: remote_scan.value_1
-> Result
One-Time Filter: ($0 > '0'::double precision)
-> Custom Scan (Citus Adaptive)
Filter: ($0 > '0'::double precision)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on reference_table_test_1250000 rt
Filter: (value_2 = '2'::double precision)
Task Count: 6
Tasks Shown: One of 6
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: colocated_table_test.value_1
-> Seq Scan on colocated_table_test_1250005 colocated_table_test
(25 rows)
-- clean up tables, ...
SET client_min_messages TO ERROR;
DROP SEQUENCE example_ref_value_seq;

View File

@ -10,6 +10,18 @@ SELECT DISTINCT l_orderkey, now() FROM lineitem_hash_part LIMIT 0;
---------------------------------------------------------------------
(0 rows)
SELECT DISTINCT l_orderkey, avg(l_linenumber)
FROM lineitem_hash_part
GROUP BY l_orderkey
HAVING avg(l_linenumber) = (select avg(distinct l_linenumber))
LIMIT 10;
ERROR: Subqueries in HAVING cannot refer to outer query
SELECT DISTINCT l_orderkey
FROM lineitem_hash_part
GROUP BY l_orderkey
HAVING (select avg(distinct l_linenumber) = l_orderkey)
LIMIT 10;
ERROR: Subqueries in HAVING cannot refer to outer query
SELECT DISTINCT l_partkey, 1 + (random() * 0)::int FROM lineitem_hash_part ORDER BY 1 DESC LIMIT 3;
l_partkey | ?column?
---------------------------------------------------------------------

View File

@ -884,8 +884,165 @@ AS foo;
---------------------------------------------------------------------
(0 rows)
DROP TABLE subquery_pruning_varchar_test_table;
RESET citus.enable_router_execution;
-- Test https://github.com/citusdata/citus/issues/3424
insert into subquery_pruning_varchar_test_table values ('1', '1'), (2, '1'), (3, '2'), (3, '1'), (4, '4'), (5, '6');
WITH cte_1 AS (SELECT b max FROM subquery_pruning_varchar_test_table)
SELECT a
FROM subquery_pruning_varchar_test_table
JOIN cte_1 ON a = max::text
GROUP BY a HAVING a = (SELECT a)
ORDER BY 1;
ERROR: Subqueries in HAVING cannot refer to outer query
-- Test https://github.com/citusdata/citus/issues/3432
SELECT t1.event_type FROM events_table t1
GROUP BY t1.event_type HAVING t1.event_type > avg((SELECT t2.value_2 FROM users_table t2 ORDER BY 1 DESC LIMIT 1))
ORDER BY 1;
ERROR: cannot handle unplanned sub-select
SELECT t1.event_type FROM events_table t1
GROUP BY t1.event_type HAVING t1.event_type > avg(2 + (SELECT t2.value_2 FROM users_table t2 ORDER BY 1 DESC LIMIT 1))
ORDER BY 1;
ERROR: cannot handle unplanned sub-select
-- Test https://github.com/citusdata/citus/issues/3433
CREATE TABLE keyval1 (key int, value int);
SELECT create_distributed_table('keyval1', 'key');
create_distributed_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE keyval2 (key int, value int);
SELECT create_distributed_table('keyval2', 'key');
create_distributed_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE keyvalref (key int, value int);
SELECT create_reference_table('keyvalref');
create_reference_table
---------------------------------------------------------------------
(1 row)
EXPLAIN (COSTS OFF)
SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyvalref GROUP BY key);
QUERY PLAN
---------------------------------------------------------------------
HashAggregate
Group Key: remote_scan.worker_column_2
Filter: ((pg_catalog.sum(remote_scan.worker_column_3))::bigint > $0)
InitPlan 1 (returns $0)
-> Function Scan on read_intermediate_result intermediate_result
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: key
-> Seq Scan on keyvalref_xxxxxxx keyvalref
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: keyval1.key
-> Seq Scan on keyval1_xxxxxxx keyval1
(22 rows)
-- For some reason 'ORDER BY 1 DESC LIMIT 1' triggers recursive planning
EXPLAIN (COSTS OFF)
SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyvalref GROUP BY key ORDER BY 1 DESC LIMIT 1);
QUERY PLAN
---------------------------------------------------------------------
HashAggregate
Group Key: remote_scan.worker_column_2
Filter: ((pg_catalog.sum(remote_scan.worker_column_3))::bigint > $0)
InitPlan 1 (returns $0)
-> Function Scan on read_intermediate_result intermediate_result
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Limit
-> Sort
Sort Key: (sum(value)) DESC
-> HashAggregate
Group Key: key
-> Seq Scan on keyvalref_xxxxxxx keyvalref
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: keyval1.key
-> Seq Scan on keyval1_xxxxxxx keyval1
(25 rows)
EXPLAIN (COSTS OFF)
SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyval2 GROUP BY key ORDER BY 1 DESC LIMIT 1);
QUERY PLAN
---------------------------------------------------------------------
HashAggregate
Group Key: remote_scan.worker_column_2
Filter: ((pg_catalog.sum(remote_scan.worker_column_3))::bigint > $0)
InitPlan 1 (returns $0)
-> Function Scan on read_intermediate_result intermediate_result
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Limit
-> Sort
Sort Key: remote_scan.sum DESC
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Limit
-> Sort
Sort Key: (sum(value)) DESC
-> HashAggregate
Group Key: key
-> Seq Scan on keyval2_xxxxxxx keyval2
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: keyval1.key
-> Seq Scan on keyval1_xxxxxxx keyval1
(28 rows)
EXPLAIN (COSTS OFF)
SELECT count(*) FROM keyval1 k1 WHERE k1.key = 2 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyval2 k2 WHERE k2.key = 2 GROUP BY key ORDER BY 1 DESC LIMIT 1);
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> GroupAggregate
Group Key: k1.key
Filter: (sum(k1.value) > $0)
InitPlan 1 (returns $0)
-> Limit
-> Sort
Sort Key: (sum(k2.value)) DESC
-> GroupAggregate
Group Key: k2.key
-> Seq Scan on keyval2_xxxxxxx k2
Filter: (key = 2)
-> Seq Scan on keyval1_xxxxxxx k1
Filter: (key = 2)
(18 rows)
-- Simple join subquery pushdown
SELECT
avg(array_length(events, 1)) AS event_average
@ -1124,7 +1281,7 @@ LIMIT
-- also set the min messages to WARNING to skip
-- CASCADE NOTICE messagez
SET client_min_messages TO WARNING;
DROP TABLE users, events;
DROP TABLE users, events, subquery_pruning_varchar_test_table, keyval1, keyval2, keyvalref;
DROP TYPE user_composite_type CASCADE;
SET client_min_messages TO DEFAULT;
SET citus.subquery_pushdown to OFF;

View File

@ -281,7 +281,6 @@ GROUP BY
GROUPING sets ((value_4), (value_3))
ORDER BY 1, 2, 3;
-- distinct clauses also work fine
SELECT DISTINCT
value_4
@ -620,7 +619,7 @@ SET citus.shard_count TO 6;
SET citus.shard_replication_factor TO 2;
CREATE TABLE colocated_table_test (value_1 int, value_2 float, value_3 text, value_4 timestamp);
SELECT create_distributed_table('colocated_table_test', 'value_1');
SELECT create_distributed_table('colocated_table_test', 'value_1');
CREATE TABLE colocated_table_test_2 (value_1 int, value_2 float, value_3 text, value_4 timestamp);
SELECT create_distributed_table('colocated_table_test_2', 'value_1');
@ -964,8 +963,7 @@ SELECT select_count_all();
TRUNCATE reference_table_test;
-- reference tables work with composite key
-- and we even do not need to create hash
-- function etc.
-- and we even do not need to create hash function etc.
-- first create the type on all nodes
CREATE TYPE reference_comp_key as (key text, value text);
@ -1007,6 +1005,11 @@ ALTER TABLE reference_table_test ADD COLUMN value_dummy INT;
INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02');
ROLLBACK;
-- Previous issue failed to rename reference tables in subqueries
EXPLAIN (COSTS OFF) SELECT value_1, count(*) FROM colocated_table_test GROUP BY value_1
HAVING (SELECT rt.value_2 FROM reference_table_test rt where rt.value_2 = 2) > 0
ORDER BY 1;
-- clean up tables, ...
SET client_min_messages TO ERROR;
DROP SEQUENCE example_ref_value_seq;

View File

@ -9,6 +9,18 @@ ANALYZE lineitem_hash_part;
-- function calls are supported
SELECT DISTINCT l_orderkey, now() FROM lineitem_hash_part LIMIT 0;
SELECT DISTINCT l_orderkey, avg(l_linenumber)
FROM lineitem_hash_part
GROUP BY l_orderkey
HAVING avg(l_linenumber) = (select avg(distinct l_linenumber))
LIMIT 10;
SELECT DISTINCT l_orderkey
FROM lineitem_hash_part
GROUP BY l_orderkey
HAVING (select avg(distinct l_linenumber) = l_orderkey)
LIMIT 10;
SELECT DISTINCT l_partkey, 1 + (random() * 0)::int FROM lineitem_hash_part ORDER BY 1 DESC LIMIT 3;
-- const expressions are supported

View File

@ -620,10 +620,50 @@ SELECT * FROM
a_inner)
AS foo;
DROP TABLE subquery_pruning_varchar_test_table;
RESET citus.enable_router_execution;
-- Test https://github.com/citusdata/citus/issues/3424
insert into subquery_pruning_varchar_test_table values ('1', '1'), (2, '1'), (3, '2'), (3, '1'), (4, '4'), (5, '6');
WITH cte_1 AS (SELECT b max FROM subquery_pruning_varchar_test_table)
SELECT a
FROM subquery_pruning_varchar_test_table
JOIN cte_1 ON a = max::text
GROUP BY a HAVING a = (SELECT a)
ORDER BY 1;
-- Test https://github.com/citusdata/citus/issues/3432
SELECT t1.event_type FROM events_table t1
GROUP BY t1.event_type HAVING t1.event_type > avg((SELECT t2.value_2 FROM users_table t2 ORDER BY 1 DESC LIMIT 1))
ORDER BY 1;
SELECT t1.event_type FROM events_table t1
GROUP BY t1.event_type HAVING t1.event_type > avg(2 + (SELECT t2.value_2 FROM users_table t2 ORDER BY 1 DESC LIMIT 1))
ORDER BY 1;
-- Test https://github.com/citusdata/citus/issues/3433
CREATE TABLE keyval1 (key int, value int);
SELECT create_distributed_table('keyval1', 'key');
CREATE TABLE keyval2 (key int, value int);
SELECT create_distributed_table('keyval2', 'key');
CREATE TABLE keyvalref (key int, value int);
SELECT create_reference_table('keyvalref');
EXPLAIN (COSTS OFF)
SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyvalref GROUP BY key);
-- For some reason 'ORDER BY 1 DESC LIMIT 1' triggers recursive planning
EXPLAIN (COSTS OFF)
SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyvalref GROUP BY key ORDER BY 1 DESC LIMIT 1);
EXPLAIN (COSTS OFF)
SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyval2 GROUP BY key ORDER BY 1 DESC LIMIT 1);
EXPLAIN (COSTS OFF)
SELECT count(*) FROM keyval1 k1 WHERE k1.key = 2 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyval2 k2 WHERE k2.key = 2 GROUP BY key ORDER BY 1 DESC LIMIT 1);
-- Simple join subquery pushdown
SELECT
avg(array_length(events, 1)) AS event_average
@ -843,7 +883,7 @@ LIMIT
-- also set the min messages to WARNING to skip
-- CASCADE NOTICE messagez
SET client_min_messages TO WARNING;
DROP TABLE users, events;
DROP TABLE users, events, subquery_pruning_varchar_test_table, keyval1, keyval2, keyvalref;
DROP TYPE user_composite_type CASCADE;