Support subqueries in HAVING (#3098)

Areas for further optimization:
- Don't save subquery results to a local file on the coordinator when the subquery is not in the having clause
- Push the the HAVING with subquery to the workers if there's a group by on the distribution column
- Don't push down the results to the workers when we don't push down the HAVING clause, only the coordinator needs it

Fixes #520
Fixes #756
Closes #2047
pull/3112/head^2
Jelte Fennema 2019-10-16 16:40:14 +02:00 committed by GitHub
parent 3bfb2a078b
commit 7abedc38b0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
37 changed files with 1793 additions and 267 deletions

View File

@ -263,8 +263,6 @@ RemoteFileDestReceiverStartup(DestReceiver *dest, int operation,
fileName = QueryResultFileName(resultId);
elog(DEBUG1, "writing to local file \"%s\"", fileName);
resultDest->fileCompat = FileCompatFromFileStart(FileOpenForTransmit(fileName,
fileFlags,
fileMode));

View File

@ -36,7 +36,11 @@ ExecuteSubPlans(DistributedPlan *distributedPlan)
List *subPlanList = distributedPlan->subPlanList;
ListCell *subPlanCell = NULL;
List *nodeList = NIL;
bool writeLocalFile = false;
/* If you're not a worker node, you should write local file to make sure
* you have the data too */
bool writeLocalFile = GetLocalGroupId() == 0;
if (subPlanList == NIL)
{

View File

@ -1546,6 +1546,8 @@ MasterAggregateExpression(Aggref *originalAggregate,
columnIndex++;
}
columnToUpdate->varno = masterTableId;
columnToUpdate->varnoold = masterTableId;
columnToUpdate->varattno = startColumnCount + columnIndex;
columnToUpdate->varoattno = startColumnCount + columnIndex;
}

View File

@ -828,13 +828,13 @@ DeferErrorIfQueryNotSupported(Query *queryTree)
/*
* There could be Sublinks in the target list as well. To produce better
* error messages we're checking sublinks in the where clause.
* error messages we're checking if that's the case.
*/
if (queryTree->hasSubLinks && !WhereClauseContainsSubquery(queryTree))
if (queryTree->hasSubLinks && TargetListContainsSubquery(queryTree))
{
preconditionsSatisfied = false;
errorMessage = "could not run distributed query with subquery outside the "
"FROM and WHERE clauses";
"FROM, WHERE and HAVING clauses";
errorHint = filterHint;
}

View File

@ -30,6 +30,7 @@
#include "optimizer/cost.h"
#include "optimizer/planmain.h"
#include "optimizer/tlist.h"
#include "optimizer/subselect.h"
#if PG_VERSION_NUM >= 120000
#include "optimizer/optimizer.h"
#else
@ -46,14 +47,14 @@
static List * MasterTargetList(List *workerTargetList);
static PlannedStmt * BuildSelectStatement(Query *masterQuery, List *masterTargetList,
CustomScan *remoteScan);
static Agg * BuildAggregatePlan(Query *masterQuery, Plan *subPlan);
static Agg * BuildAggregatePlan(PlannerInfo *root, Query *masterQuery, Plan *subPlan);
static bool HasDistinctAggregate(Query *masterQuery);
static bool UseGroupAggregateWithHLL(Query *masterQuery);
static bool QueryContainsAggregateWithHLL(Query *query);
static Plan * BuildDistinctPlan(Query *masterQuery, Plan *subPlan);
static List * PrepareTargetListForNextPlan(List *targetList);
static Agg * makeAggNode(List *groupClauseList, List *havingQual,
AggStrategy aggrStrategy, List *queryTargetList, Plan *subPlan);
static void FinalizeStatement(PlannerInfo *root, PlannedStmt *stmt, Plan *topLevelPlan);
/*
@ -136,12 +137,21 @@ static PlannedStmt *
BuildSelectStatement(Query *masterQuery, List *masterTargetList, CustomScan *remoteScan)
{
PlannedStmt *selectStatement = NULL;
RangeTblEntry *customScanRangeTableEntry = NULL;
Agg *aggregationPlan = NULL;
Plan *topLevelPlan = NULL;
List *sortClauseList = copyObject(masterQuery->sortClause);
ListCell *targetEntryCell = NULL;
List *columnNameList = NULL;
List *sortClauseList = copyObject(masterQuery->sortClause);
RangeTblEntry *customScanRangeTableEntry = NULL;
PlannerGlobal *glob = makeNode(PlannerGlobal);
PlannerInfo *root = makeNode(PlannerInfo);
root->parse = masterQuery;
root->glob = glob;
root->query_level = 1;
root->planner_cxt = CurrentMemoryContext;
root->wt_param_id = -1;
/* (1) make PlannedStmt and set basic information */
selectStatement = makeNode(PlannedStmt);
@ -152,29 +162,29 @@ BuildSelectStatement(Query *masterQuery, List *masterTargetList, CustomScan *rem
/* top level select query should have only one range table entry */
Assert(list_length(masterQuery->rtable) == 1);
/* compute column names for the custom range table entry */
foreach(targetEntryCell, masterTargetList)
{
TargetEntry *targetEntry = lfirst(targetEntryCell);
columnNameList = lappend(columnNameList, makeString(targetEntry->resname));
}
customScanRangeTableEntry = RemoteScanRangeTableEntry(columnNameList);
/* set the single element range table list */
selectStatement->rtable = list_make1(customScanRangeTableEntry);
remoteScan->custom_scan_tlist = masterTargetList;
/* (2) add an aggregation plan if needed */
if (masterQuery->hasAggs || masterQuery->groupClause)
{
remoteScan->scan.plan.targetlist = masterTargetList;
aggregationPlan = BuildAggregatePlan(masterQuery, &remoteScan->scan.plan);
aggregationPlan = BuildAggregatePlan(root, masterQuery, &remoteScan->scan.plan);
topLevelPlan = (Plan *) aggregationPlan;
selectStatement->planTree = topLevelPlan;
}
else
{
/* otherwise set the final projections on the scan plan directly */
/*
* The masterTargetList contains all columns that we fetch from
* the worker as non-resjunk.
*
* Here the output of the plan node determines the output of the query.
* We therefore use the targetList of masterQuery, which has non-output
* columns set as resjunk.
*/
remoteScan->scan.plan.targetlist = masterQuery->targetList;
topLevelPlan = &remoteScan->scan.plan;
}
@ -252,30 +262,115 @@ BuildSelectStatement(Query *masterQuery, List *masterTargetList, CustomScan *rem
topLevelPlan = (Plan *) limitPlan;
}
/* (6) finally set our top level plan in the plan tree */
selectStatement->planTree = topLevelPlan;
/*
* (6) set top level plan in the plantree and copy over some things from
* PlannerInfo
*/
FinalizeStatement(root, selectStatement, topLevelPlan);
/*
* (7) Replace rangetable with one with nice names to show in EXPLAIN plans
*/
foreach(targetEntryCell, masterTargetList)
{
TargetEntry *targetEntry = lfirst(targetEntryCell);
columnNameList = lappend(columnNameList, makeString(targetEntry->resname));
}
customScanRangeTableEntry = linitial(selectStatement->rtable);
customScanRangeTableEntry->eref = makeAlias("remote_scan", columnNameList);
return selectStatement;
}
/*
* FinalizeStatement sets some necessary fields on the final statement and its
* plan to make it work with the regular postgres executor. This code is copied
* almost verbatim from standard_planner in the PG source code.
*
* Modifications from original code:
* - Added SS_attach_initplans call
*/
static void
FinalizeStatement(PlannerInfo *root, PlannedStmt *result, Plan *top_plan)
{
ListCell *lp,
*lr;
PlannerGlobal *glob = root->glob;
/* Taken from create_plan */
SS_attach_initplans(root, top_plan);
/*
* If any Params were generated, run through the plan tree and compute
* each plan node's extParam/allParam sets. Ideally we'd merge this into
* set_plan_references' tree traversal, but for now it has to be separate
* because we need to visit subplans before not after main plan.
*/
if (glob->paramExecTypes != NIL)
{
Assert(list_length(glob->subplans) == list_length(glob->subroots));
forboth(lp, glob->subplans, lr, glob->subroots)
{
Plan *subplan = (Plan *) lfirst(lp);
PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
SS_finalize_plan(subroot, subplan);
}
SS_finalize_plan(root, top_plan);
}
/* final cleanup of the plan */
Assert(glob->finalrtable == NIL);
Assert(glob->finalrowmarks == NIL);
Assert(glob->resultRelations == NIL);
Assert(glob->rootResultRelations == NIL);
top_plan = set_plan_references(root, top_plan);
/* ... and the subplans (both regular subplans and initplans) */
Assert(list_length(glob->subplans) == list_length(glob->subroots));
forboth(lp, glob->subplans, lr, glob->subroots)
{
Plan *subplan = (Plan *) lfirst(lp);
PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
lfirst(lp) = set_plan_references(subroot, subplan);
}
result->transientPlan = glob->transientPlan;
result->dependsOnRole = glob->dependsOnRole;
result->parallelModeNeeded = glob->parallelModeNeeded;
result->planTree = top_plan;
result->rtable = glob->finalrtable;
result->resultRelations = glob->resultRelations;
#if PG_VERSION_NUM < 120000
result->nonleafResultRelations = glob->nonleafResultRelations;
#endif
result->rootResultRelations = glob->rootResultRelations;
result->subplans = glob->subplans;
result->rewindPlanIDs = glob->rewindPlanIDs;
result->rowMarks = glob->finalrowmarks;
result->relationOids = glob->relationOids;
result->invalItems = glob->invalItems;
result->paramExecTypes = glob->paramExecTypes;
}
/*
* BuildAggregatePlan creates and returns an aggregate plan. This aggregate plan
* builds aggreation and grouping operators (if any) that are to be executed on
* the master node.
*/
static Agg *
BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
BuildAggregatePlan(PlannerInfo *root, Query *masterQuery, Plan *subPlan)
{
Agg *aggregatePlan = NULL;
AggStrategy aggregateStrategy = AGG_PLAIN;
AggClauseCosts aggregateCosts;
List *aggregateTargetList = NIL;
List *groupColumnList = NIL;
List *aggregateColumnList = NIL;
List *havingColumnList = NIL;
List *columnList = NIL;
ListCell *columnCell = NULL;
Node *havingQual = NULL;
uint32 groupColumnCount = 0;
@ -283,27 +378,31 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
Assert(masterQuery->hasAggs || masterQuery->groupClause);
aggregateTargetList = masterQuery->targetList;
havingQual = masterQuery->havingQual;
/*
* Replaces SubLink nodes with SubPlan nodes in the having section of the
* query. (and creates the subplans in root->subplans)
*
* Would be nice if we could use masterQuery->hasSubLinks to only call
* these when that is true. However, for some reason hasSubLinks is false
* even when there are SubLinks.
*/
havingQual = SS_process_sublinks(root, masterQuery->havingQual, true);
/*
* Right now this is not really needed, since we don't support correlated
* subqueries anyway. Once we do calling this is critical to do right after
* calling SS_process_sublinks, according to the postgres function comment.
*/
havingQual = SS_replace_correlation_vars(root, havingQual);
/* estimate aggregate execution costs */
memset(&aggregateCosts, 0, sizeof(AggClauseCosts));
get_agg_clause_costs(NULL, (Node *) aggregateTargetList, AGGSPLIT_SIMPLE,
get_agg_clause_costs(root, (Node *) aggregateTargetList, AGGSPLIT_SIMPLE,
&aggregateCosts);
get_agg_clause_costs(NULL, (Node *) havingQual, AGGSPLIT_SIMPLE, &aggregateCosts);
/*
* For upper level plans above the sequential scan, the planner expects the
* table id (varno) to be set to OUTER_VAR.
*/
aggregateColumnList = pull_var_clause_default((Node *) aggregateTargetList);
havingColumnList = pull_var_clause_default(havingQual);
columnList = list_concat(aggregateColumnList, havingColumnList);
foreach(columnCell, columnList)
{
Var *column = (Var *) lfirst(columnCell);
column->varno = OUTER_VAR;
}
get_agg_clause_costs(root, (Node *) havingQual, AGGSPLIT_SIMPLE, &aggregateCosts);
groupColumnList = masterQuery->groupClause;
groupColumnCount = list_length(groupColumnList);
@ -500,14 +599,6 @@ BuildDistinctPlan(Query *masterQuery, Plan *subPlan)
return subPlan;
}
/*
* We need to adjust varno to OUTER_VAR, since planner expects that for upper
* level plans above the sequential scan. We also need to convert aggregations
* (if exists) to regular Vars since the aggregation would be applied by the
* previous aggregation plan and we don't want them to be applied again.
*/
targetList = PrepareTargetListForNextPlan(targetList);
Assert(masterQuery->distinctClause);
Assert(!masterQuery->hasDistinctOn);
@ -536,38 +627,6 @@ BuildDistinctPlan(Query *masterQuery, Plan *subPlan)
}
/*
* PrepareTargetListForNextPlan handles both regular columns to have right varno
* and convert aggregates to regular Vars in the target list.
*/
static List *
PrepareTargetListForNextPlan(List *targetList)
{
List *newtargetList = NIL;
ListCell *targetEntryCell = NULL;
foreach(targetEntryCell, targetList)
{
TargetEntry *targetEntry = lfirst(targetEntryCell);
TargetEntry *newTargetEntry = NULL;
Var *newVar = NULL;
Assert(IsA(targetEntry, TargetEntry));
/*
* For upper level plans above the sequential scan, the planner expects the
* table id (varno) to be set to OUTER_VAR.
*/
newVar = makeVarFromTargetEntry(OUTER_VAR, targetEntry);
newTargetEntry = flatCopyTargetEntry(targetEntry);
newTargetEntry->expr = (Expr *) newVar;
newtargetList = lappend(newtargetList, newTargetEntry);
}
return newtargetList;
}
/*
* makeAggNode creates a "Agg" plan node. groupClauseList is a list of
* SortGroupClause's.

View File

@ -68,7 +68,8 @@ bool SubqueryPushdown = false; /* is subquery pushdown enabled */
/* Local functions forward declarations */
static bool JoinTreeContainsSubqueryWalker(Node *joinTreeNode, void *context);
static bool IsFunctionRTE(Node *node);
static bool IsNodeQuery(Node *node);
static bool IsNodeSubquery(Node *node);
static bool IsNodeSubqueryOrParamExec(Node *node);
static bool IsOuterJoinExpr(Node *node);
static bool WindowPartitionOnDistributionColumn(Query *query);
static DeferredErrorMessage * DeferErrorIfFromClauseRecurs(Query *queryTree);
@ -126,7 +127,7 @@ ShouldUseSubqueryPushDown(Query *originalQuery, Query *rewrittenQuery)
* standard_planner() may replace the sublinks with anti/semi joins and
* MultiPlanTree() cannot plan such queries.
*/
if (WhereClauseContainsSubquery(originalQuery))
if (WhereOrHavingClauseContainsSubquery(originalQuery))
{
return true;
}
@ -259,15 +260,20 @@ JoinTreeContainsSubqueryWalker(Node *joinTreeNode, void *context)
/*
* WhereClauseContainsSubquery returns true if the input query contains
* any subqueries in the WHERE clause.
* WhereOrHavingClauseContainsSubquery returns true if the input query contains
* any subqueries in the WHERE or HAVING clause.
*/
bool
WhereClauseContainsSubquery(Query *query)
WhereOrHavingClauseContainsSubquery(Query *query)
{
FromExpr *joinTree = query->jointree;
Node *queryQuals = NULL;
if (FindNodeCheck(query->havingQual, IsNodeSubquery))
{
return true;
}
if (!joinTree)
{
return false;
@ -275,7 +281,18 @@ WhereClauseContainsSubquery(Query *query)
queryQuals = joinTree->quals;
return FindNodeCheck(queryQuals, IsNodeQuery);
return FindNodeCheck(queryQuals, IsNodeSubquery);
}
/*
* TargetList returns true if the input query contains
* any subqueries in the WHERE clause.
*/
bool
TargetListContainsSubquery(Query *query)
{
return FindNodeCheck((Node *) query->targetList, IsNodeSubqueryOrParamExec);
}
@ -300,17 +317,46 @@ IsFunctionRTE(Node *node)
/*
* IsNodeQuery returns true if the given node is a Query.
* IsNodeSubquery returns true if the given node is a Query or SubPlan.
*
* The check for SubPlan is needed whev this is used on a already rewritten
* query. Such a query has SubPlan nodes instead of SubLink nodes (which
* contain a Query node).
*/
static bool
IsNodeQuery(Node *node)
IsNodeSubquery(Node *node)
{
if (node == NULL)
{
return false;
}
return IsA(node, Query);
return IsA(node, Query) || IsA(node, SubPlan);
}
/*
* IsNodeSubqueryOrParamExec returns true if the given node is a subquery or a
* Param node with paramkind PARAM_EXEC.
*/
static bool
IsNodeSubqueryOrParamExec(Node *node)
{
if (node == NULL)
{
return false;
}
if (IsNodeSubquery(node))
{
return true;
}
if (!IsA(node, Param))
{
return false;
}
return ((Param *) node)->paramkind == PARAM_EXEC;
}

View File

@ -384,7 +384,7 @@ ShouldRecursivelyPlanNonColocatedSubqueries(Query *subquery,
static bool
ContainsSubquery(Query *query)
{
return JoinTreeContainsSubquery(query) || WhereClauseContainsSubquery(query);
return JoinTreeContainsSubquery(query) || WhereOrHavingClauseContainsSubquery(query);
}

View File

@ -25,7 +25,8 @@ extern bool SubqueryPushdown;
extern bool ShouldUseSubqueryPushDown(Query *originalQuery, Query *rewrittenQuery);
extern bool JoinTreeContainsSubquery(Query *query);
extern bool HasEmptyJoinTree(Query *query);
extern bool WhereClauseContainsSubquery(Query *query);
extern bool WhereOrHavingClauseContainsSubquery(Query *query);
extern bool TargetListContainsSubquery(Query *query);
extern bool SafeToPushdownWindowFunction(Query *query, StringInfo *errorDetail);
extern MultiNode * SubqueryMultiNodeTree(Query *originalQuery,
Query *queryTree,

View File

@ -14,6 +14,9 @@
#ifndef WORKER_MANAGER_H
#define WORKER_MANAGER_H
#include "postgres.h"
#include "storage/lockdefs.h"
#include "nodes/pg_list.h"

View File

@ -0,0 +1,359 @@
SET citus.next_shard_id TO 1640000;
CREATE SCHEMA ch_bench_having;
SET search_path = ch_bench_having, public;
CREATE TABLE stock (
s_w_id int NOT NULL,
s_i_id int NOT NULL,
s_order_cnt int NOT NULL
);
SELECT create_distributed_table('stock','s_w_id');
create_distributed_table
--------------------------
(1 row)
explain (costs false, summary false, timing false)
select s_i_id, sum(s_order_cnt) as ordercount
from stock
where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
QUERY PLAN
-----------------------------------------------------------------------------------------------
Sort
Sort Key: s_i_id
InitPlan 1 (returns $0)
-> Function Scan on read_intermediate_result intermediate_result
-> HashAggregate
Group Key: s_i_id
Filter: ((pg_catalog.sum(worker_column_3))::bigint > $0)
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan 1_1
-> Aggregate
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Seq Scan on stock_1640000 stock
-> Distributed Subplan 1_2
-> Aggregate
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Seq Scan on stock_1640000 stock
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: stock.s_i_id
InitPlan 1 (returns $0)
-> Function Scan on read_intermediate_result intermediate_result
-> Seq Scan on stock_1640000 stock
Filter: ((s_order_cnt)::numeric > $0)
(36 rows)
explain (costs false, summary false, timing false)
select s_i_id, sum(s_order_cnt) as ordercount
from stock
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
QUERY PLAN
-----------------------------------------------------------------------------------------
Sort
Sort Key: s_i_id
InitPlan 1 (returns $0)
-> Function Scan on read_intermediate_result intermediate_result
-> HashAggregate
Group Key: s_i_id
Filter: ((pg_catalog.sum(worker_column_3))::bigint > $0)
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan 4_1
-> Aggregate
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Seq Scan on stock_1640000 stock
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: stock.s_i_id
-> Seq Scan on stock_1640000 stock
(24 rows)
explain (costs false, summary false, timing false)
select s_i_id, sum(s_order_cnt) as ordercount
from stock
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock);
QUERY PLAN
-----------------------------------------------------------------------------------
HashAggregate
Group Key: s_i_id
Filter: ((pg_catalog.sum(worker_column_3))::bigint > $0)
InitPlan 1 (returns $0)
-> Function Scan on read_intermediate_result intermediate_result
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan 6_1
-> Aggregate
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Seq Scan on stock_1640000 stock
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: stock.s_i_id
-> Seq Scan on stock_1640000 stock
(22 rows)
explain select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select true)
order by s_i_id;
QUERY PLAN
-------------------------------------------------------------------------------------------------
Sort (cost=0.00..0.00 rows=0 width=0)
Sort Key: remote_scan.s_i_id
InitPlan 1 (returns $0)
-> Result (cost=0.00..0.01 rows=1 width=1)
-> HashAggregate (cost=0.00..0.00 rows=0 width=0)
Group Key: remote_scan.s_i_id
Filter: $0
-> Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate (cost=40.60..42.60 rows=200 width=12)
Group Key: s.s_i_id
-> Seq Scan on stock_1640000 s (cost=0.00..30.40 rows=2040 width=8)
(15 rows)
explain select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select true);
QUERY PLAN
-------------------------------------------------------------------------------------------
HashAggregate (cost=0.00..0.00 rows=0 width=0)
Group Key: remote_scan.s_i_id
Filter: $0
InitPlan 1 (returns $0)
-> Result (cost=0.00..0.01 rows=1 width=1)
-> Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate (cost=40.60..42.60 rows=200 width=12)
Group Key: s.s_i_id
-> Seq Scan on stock_1640000 s (cost=0.00..30.40 rows=2040 width=8)
(13 rows)
select s_i_id, sum(s_order_cnt) as ordercount
from stock
where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
s_i_id | ordercount
--------+------------
(0 rows)
INSERT INTO stock SELECT c, c, c FROM generate_series(1, 5) as c;
select s_i_id, sum(s_order_cnt) as ordercount
from stock
where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
s_i_id | ordercount
--------+------------
3 | 3
4 | 4
5 | 5
(3 rows)
select s_i_id, sum(s_order_cnt) as ordercount
from stock
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
s_i_id | ordercount
--------+------------
3 | 3
4 | 4
5 | 5
(3 rows)
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having (select true)
order by s_i_id;
s_i_id | ordercount
--------+------------
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
(5 rows)
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having (select false)
order by s_i_id;
s_i_id | ordercount
--------+------------
(0 rows)
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select true)
order by s_i_id;
s_i_id | ordercount
--------+------------
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
(5 rows)
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select false)
order by s_i_id;
s_i_id | ordercount
--------+------------
(0 rows)
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select true)
order by s_i_id;
s_i_id | ordercount
--------+------------
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
(5 rows)
-- We don't support correlated subqueries in having
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id)
order by s_i_id;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
-- We don't support correlated subqueries in having
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id)
order by s_i_id;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
DROP TABLE stock;
CREATE TABLE stock (
s_w_id int NOT NULL,
s_i_id int NOT NULL,
s_quantity decimal(4,0) NOT NULL,
s_ytd decimal(8,2) NOT NULL,
s_order_cnt int NOT NULL,
s_remote_cnt int NOT NULL,
s_data varchar(50) NOT NULL,
s_dist_01 char(24) NOT NULL,
s_dist_02 char(24) NOT NULL,
s_dist_03 char(24) NOT NULL,
s_dist_04 char(24) NOT NULL,
s_dist_05 char(24) NOT NULL,
s_dist_06 char(24) NOT NULL,
s_dist_07 char(24) NOT NULL,
s_dist_08 char(24) NOT NULL,
s_dist_09 char(24) NOT NULL,
s_dist_10 char(24) NOT NULL,
PRIMARY KEY (s_w_id,s_i_id)
);
insert into stock VALUES
(1, 33, 1, 1, 1, 1, '', '','','','','','','','','',''),
(33, 1, 1, 1, 1, 1, '', '','','','','','','','','',''),
(32, 1, 1, 1, 1, 1, '', '','','','','','','','','','');
SELECT create_distributed_table('stock','s_w_id');
NOTICE: Copying data from local table...
create_distributed_table
--------------------------
(1 row)
select s_i_id, sum(s_order_cnt) as ordercount
from stock, supplier, nation
where mod((s_w_id * s_i_id),10000) = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY'
group by s_i_id
having sum(s_order_cnt) >
(select sum(s_order_cnt) * .005
from stock, supplier, nation
where mod((s_w_id * s_i_id),10000) = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY')
order by ordercount desc;
s_i_id | ordercount
--------+------------
33 | 1
1 | 1
(2 rows)
insert into stock VALUES
(10033, 1, 1, 1, 100000, 1, '', '','','','','','','','','','');
select s_i_id, sum(s_order_cnt) as ordercount
from stock, supplier, nation
where mod((s_w_id * s_i_id),10000) = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY'
group by s_i_id
having sum(s_order_cnt) >
(select sum(s_order_cnt) * .005
from stock, supplier, nation
where mod((s_w_id * s_i_id),10000) = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY')
order by ordercount desc;
s_i_id | ordercount
--------+------------
1 | 100001
(1 row)
BEGIN;
SET LOCAL client_min_messages TO WARNING;
DROP SCHEMA ch_bench_having CASCADE;
COMMIT;

View File

@ -0,0 +1,372 @@
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1640000;
SET citus.replication_model TO streaming;
SET citus.shard_replication_factor to 1;
SET citus.shard_count to 4;
CREATE SCHEMA ch_bench_having;
SET search_path = ch_bench_having;
CREATE TABLE stock (
s_w_id int NOT NULL,
s_i_id int NOT NULL,
s_order_cnt int NOT NULL
);
SELECT create_distributed_table('stock','s_w_id');
create_distributed_table
--------------------------
(1 row)
\c - - - :worker_1_port
SET search_path = ch_bench_having;
explain (costs false, summary false, timing false)
select s_i_id, sum(s_order_cnt) as ordercount
from stock
where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
QUERY PLAN
-----------------------------------------------------------------------------------------------
Sort
Sort Key: s_i_id
InitPlan 1 (returns $0)
-> Function Scan on read_intermediate_result intermediate_result
-> HashAggregate
Group Key: s_i_id
Filter: ((pg_catalog.sum(worker_column_3))::bigint > $0)
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan 1_1
-> Aggregate
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Seq Scan on stock_1640000 stock
-> Distributed Subplan 1_2
-> Aggregate
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Seq Scan on stock_1640000 stock
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: stock.s_i_id
InitPlan 1 (returns $0)
-> Function Scan on read_intermediate_result intermediate_result
-> Seq Scan on stock_1640000 stock
Filter: ((s_order_cnt)::numeric > $0)
(36 rows)
explain (costs false, summary false, timing false)
select s_i_id, sum(s_order_cnt) as ordercount
from stock
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
QUERY PLAN
-----------------------------------------------------------------------------------------
Sort
Sort Key: s_i_id
InitPlan 1 (returns $0)
-> Function Scan on read_intermediate_result intermediate_result
-> HashAggregate
Group Key: s_i_id
Filter: ((pg_catalog.sum(worker_column_3))::bigint > $0)
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan 4_1
-> Aggregate
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Seq Scan on stock_1640000 stock
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: stock.s_i_id
-> Seq Scan on stock_1640000 stock
(24 rows)
explain (costs false, summary false, timing false)
select s_i_id, sum(s_order_cnt) as ordercount
from stock
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock);
QUERY PLAN
-----------------------------------------------------------------------------------
HashAggregate
Group Key: s_i_id
Filter: ((pg_catalog.sum(worker_column_3))::bigint > $0)
InitPlan 1 (returns $0)
-> Function Scan on read_intermediate_result intermediate_result
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan 6_1
-> Aggregate
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Seq Scan on stock_1640000 stock
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: stock.s_i_id
-> Seq Scan on stock_1640000 stock
(22 rows)
explain select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select true)
order by s_i_id;
QUERY PLAN
-------------------------------------------------------------------------------------------------
Sort (cost=0.00..0.00 rows=0 width=0)
Sort Key: remote_scan.s_i_id
InitPlan 1 (returns $0)
-> Result (cost=0.00..0.01 rows=1 width=1)
-> HashAggregate (cost=0.00..0.00 rows=0 width=0)
Group Key: remote_scan.s_i_id
Filter: $0
-> Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate (cost=40.60..42.60 rows=200 width=12)
Group Key: s.s_i_id
-> Seq Scan on stock_1640000 s (cost=0.00..30.40 rows=2040 width=8)
(15 rows)
explain select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select true);
QUERY PLAN
-------------------------------------------------------------------------------------------
HashAggregate (cost=0.00..0.00 rows=0 width=0)
Group Key: remote_scan.s_i_id
Filter: $0
InitPlan 1 (returns $0)
-> Result (cost=0.00..0.01 rows=1 width=1)
-> Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate (cost=40.60..42.60 rows=200 width=12)
Group Key: s.s_i_id
-> Seq Scan on stock_1640000 s (cost=0.00..30.40 rows=2040 width=8)
(13 rows)
select s_i_id, sum(s_order_cnt) as ordercount
from stock
where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
s_i_id | ordercount
--------+------------
(0 rows)
INSERT INTO stock SELECT c, c, c FROM generate_series(1, 5) as c;
select s_i_id, sum(s_order_cnt) as ordercount
from stock
where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
s_i_id | ordercount
--------+------------
3 | 3
4 | 4
5 | 5
(3 rows)
select s_i_id, sum(s_order_cnt) as ordercount
from stock
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
s_i_id | ordercount
--------+------------
3 | 3
4 | 4
5 | 5
(3 rows)
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having (select true)
order by s_i_id;
s_i_id | ordercount
--------+------------
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
(5 rows)
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having (select false)
order by s_i_id;
s_i_id | ordercount
--------+------------
(0 rows)
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select true)
order by s_i_id;
s_i_id | ordercount
--------+------------
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
(5 rows)
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select false)
order by s_i_id;
s_i_id | ordercount
--------+------------
(0 rows)
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select true)
order by s_i_id;
s_i_id | ordercount
--------+------------
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
(5 rows)
-- We don't support correlated subqueries in having
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id)
order by s_i_id;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
-- We don't support correlated subqueries in having
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id)
order by s_i_id;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
\c - - - :master_port
SET citus.replication_model TO streaming;
SET citus.shard_replication_factor to 1;
SET citus.shard_count to 4;
SET search_path = ch_bench_having, public;
DROP TABLE stock;
CREATE TABLE stock (
s_w_id int NOT NULL,
s_i_id int NOT NULL,
s_quantity decimal(4,0) NOT NULL,
s_ytd decimal(8,2) NOT NULL,
s_order_cnt int NOT NULL,
s_remote_cnt int NOT NULL,
s_data varchar(50) NOT NULL,
s_dist_01 char(24) NOT NULL,
s_dist_02 char(24) NOT NULL,
s_dist_03 char(24) NOT NULL,
s_dist_04 char(24) NOT NULL,
s_dist_05 char(24) NOT NULL,
s_dist_06 char(24) NOT NULL,
s_dist_07 char(24) NOT NULL,
s_dist_08 char(24) NOT NULL,
s_dist_09 char(24) NOT NULL,
s_dist_10 char(24) NOT NULL,
PRIMARY KEY (s_w_id,s_i_id)
);
insert into stock VALUES
(1, 33, 1, 1, 1, 1, '', '','','','','','','','','',''),
(33, 1, 1, 1, 1, 1, '', '','','','','','','','','',''),
(32, 1, 1, 1, 1, 1, '', '','','','','','','','','','');
SELECT create_distributed_table('stock','s_w_id');
NOTICE: Copying data from local table...
create_distributed_table
--------------------------
(1 row)
\c - - - :worker_1_port
SET search_path = ch_bench_having, public;
select s_i_id, sum(s_order_cnt) as ordercount
from stock, supplier_mx, nation_mx
where mod((s_w_id * s_i_id),10000) = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY'
group by s_i_id
having sum(s_order_cnt) >
(select sum(s_order_cnt) * .005
from stock, supplier_mx, nation_mx
where mod((s_w_id * s_i_id),10000) = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY')
order by ordercount desc;
s_i_id | ordercount
--------+------------
33 | 1
1 | 1
(2 rows)
insert into stock VALUES
(10033, 1, 1, 1, 100000, 1, '', '','','','','','','','','','');
select s_i_id, sum(s_order_cnt) as ordercount
from stock, supplier_mx, nation_mx
where mod((s_w_id * s_i_id),10000) = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY'
group by s_i_id
having sum(s_order_cnt) >
(select sum(s_order_cnt) * .005
from stock, supplier_mx, nation_mx
where mod((s_w_id * s_i_id),10000) = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY')
order by ordercount desc;
s_i_id | ordercount
--------+------------
1 | 100001
(1 row)
\c - - - :master_port
BEGIN;
SET LOCAL client_min_messages TO WARNING;
DROP SCHEMA ch_bench_having CASCADE;
COMMIT;

View File

@ -35,7 +35,7 @@ EXPLAIN (COSTS FALSE, FORMAT TEXT)
SELECT l_quantity, count(*) count_quantity FROM lineitem
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
Sort
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint), remote_scan.l_quantity
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint)), remote_scan.l_quantity
-> HashAggregate
Group Key: remote_scan.l_quantity
-> Custom Scan (Citus Adaptive)
@ -52,7 +52,7 @@ EXPLAIN (COSTS FALSE, FORMAT TEXT)
SELECT l_quantity, count(*) count_quantity FROM lineitem
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
Sort
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint), remote_scan.l_quantity
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint)), remote_scan.l_quantity
-> GroupAggregate
Group Key: remote_scan.l_quantity
-> Sort
@ -75,7 +75,7 @@ EXPLAIN (COSTS FALSE, FORMAT JSON)
"Plan": {
"Node Type": "Sort",
"Parallel Aware": false,
"Sort Key": ["COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint)", "remote_scan.l_quantity"],
"Sort Key": ["(COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))", "remote_scan.l_quantity"],
"Plans": [
{
"Node Type": "Aggregate",
@ -146,7 +146,7 @@ EXPLAIN (COSTS FALSE, FORMAT XML)
<Node-Type>Sort</Node-Type>
<Parallel-Aware>false</Parallel-Aware>
<Sort-Key>
<Item>COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint)</Item>
<Item>(COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))</Item>
<Item>remote_scan.l_quantity</Item>
</Sort-Key>
<Plans>
@ -220,7 +220,7 @@ EXPLAIN (COSTS FALSE, FORMAT YAML)
Node Type: "Sort"
Parallel Aware: false
Sort Key:
- "COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint)"
- "(COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))"
- "remote_scan.l_quantity"
Plans:
- Node Type: "Aggregate"
@ -261,7 +261,7 @@ EXPLAIN (COSTS FALSE, FORMAT TEXT)
SELECT l_quantity, count(*) count_quantity FROM lineitem
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
Sort
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint), remote_scan.l_quantity
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint)), remote_scan.l_quantity
-> HashAggregate
Group Key: remote_scan.l_quantity
-> Custom Scan (Citus Adaptive)
@ -277,7 +277,7 @@ EXPLAIN (COSTS FALSE, ANALYZE TRUE, TIMING FALSE, SUMMARY FALSE)
SELECT l_quantity, count(*) count_quantity FROM lineitem
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
Sort (actual rows=50 loops=1)
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint), remote_scan.l_quantity
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint)), remote_scan.l_quantity
Sort Method: quicksort Memory: 27kB
-> HashAggregate (actual rows=50 loops=1)
Group Key: remote_scan.l_quantity
@ -678,7 +678,53 @@ GROUP BY
count_pay
ORDER BY
count_pay;
ERROR: bogus varattno for OUTER_VAR var: 3
Sort
Sort Key: remote_scan.count_pay
-> HashAggregate
Group Key: remote_scan.count_pay
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> GroupAggregate
Group Key: subquery_top.count_pay
-> Sort
Sort Key: subquery_top.count_pay
-> Subquery Scan on subquery_top
-> GroupAggregate
Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), subquery_2.count_pay
Filter: (array_ndims(array_agg(('action=>1'::text) ORDER BY events.event_time)) > 0)
-> Sort
Sort Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), subquery_2.count_pay
-> Hash Left Join
Hash Cond: (users.composite_id = subquery_2.composite_id)
-> HashAggregate
Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), users.composite_id, ('action=>1'::text), events.event_time
-> Append
-> Hash Join
Hash Cond: (users.composite_id = events.composite_id)
-> Seq Scan on users_1400289 users
Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type))
-> Hash
-> Seq Scan on events_1400285 events
Filter: ((event_type)::text = 'click'::text)
-> Hash Join
Hash Cond: (users_1.composite_id = events_1.composite_id)
-> Seq Scan on users_1400289 users_1
Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type))
-> Hash
-> Seq Scan on events_1400285 events_1
Filter: ((event_type)::text = 'submit'::text)
-> Hash
-> Subquery Scan on subquery_2
-> GroupAggregate
Group Key: events_2.composite_id
Filter: (count(*) > 2)
-> Sort
Sort Key: events_2.composite_id
-> Seq Scan on events_1400285 events_2
Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type) AND ((event_type)::text = 'pay'::text))
-- Lateral join subquery pushdown
-- set subquery_pushdown due to limit in the query
SET citus.subquery_pushdown to ON;
@ -845,7 +891,7 @@ EXPLAIN (COSTS FALSE, ANALYZE TRUE, TIMING FALSE, SUMMARY FALSE)
SELECT l_quantity, count(*) count_quantity FROM lineitem
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
Sort (actual rows=50 loops=1)
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint), remote_scan.l_quantity
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint)), remote_scan.l_quantity
Sort Method: quicksort Memory: 27kB
-> HashAggregate (actual rows=50 loops=1)
Group Key: remote_scan.l_quantity
@ -1148,7 +1194,7 @@ Custom Scan (Citus INSERT ... SELECT via coordinator)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57638 dbname=regression
Node: host=localhost port=57637 dbname=regression
-> Limit
-> Seq Scan on orders_hash_part_360045 orders_hash_part
SELECT true AS valid FROM explain_json($$
@ -1165,7 +1211,7 @@ Custom Scan (Citus INSERT ... SELECT via coordinator)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57638 dbname=regression
Node: host=localhost port=57637 dbname=regression
-> Limit
-> Seq Scan on orders_hash_part_360045 orders_hash_part
EXPLAIN (COSTS OFF)

View File

@ -21,6 +21,18 @@ SELECT create_distributed_table('the_table', 'a');
INSERT INTO the_table (a, b) VALUES (1, 1);
INSERT INTO the_table (a, b) VALUES (1, 2);
CREATE TABLE stock (
s_w_id int NOT NULL,
s_i_id int NOT NULL,
s_order_cnt int NOT NULL
);
SELECT create_distributed_table('stock','s_w_id');
create_distributed_table
--------------------------
(1 row)
INSERT INTO stock SELECT c, c, c FROM generate_series(1, 5) as c;
-- connect to the follower and check that a simple select query works, the follower
-- is still in the default cluster and will send queries to the primary nodes
\c - - - :follower_master_port
@ -31,12 +43,24 @@ SELECT * FROM the_table;
1 | 2
(2 rows)
select s_i_id, sum(s_order_cnt) as ordercount
from stock
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
s_i_id | ordercount
--------+------------
3 | 3
4 | 4
5 | 5
(3 rows)
-- now, connect to the follower but tell it to use secondary nodes. There are no
-- secondary nodes so this should fail.
-- (this is :follower_master_port but substitution doesn't work here)
\c "port=9070 dbname=regression options='-c\ citus.use_secondary_nodes=always'"
SELECT * FROM the_table;
ERROR: node group 2 does not have a secondary node
ERROR: node group 1 does not have a secondary node
-- add the secondary nodes and try again, the SELECT statement should work this time
\c - - - :master_port
SELECT 1 FROM master_add_node('localhost', :follower_worker_1_port,
@ -64,6 +88,18 @@ SELECT * FROM the_table;
1 | 2
(2 rows)
select s_i_id, sum(s_order_cnt) as ordercount
from stock
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
s_i_id | ordercount
--------+------------
3 | 3
4 | 4
5 | 5
(3 rows)
SELECT
node_name, node_port
FROM
@ -81,7 +117,13 @@ ORDER BY
\c "port=9070 dbname=regression options='-c\ citus.use_secondary_nodes=always\ -c\ citus.cluster_name=second-cluster'"
-- there are no secondary nodes in this cluster, so this should fail!
SELECT * FROM the_table;
ERROR: there is a shard placement in node group 2 but there are no nodes in that group
ERROR: there is a shard placement in node group 1 but there are no nodes in that group
select s_i_id, sum(s_order_cnt) as ordercount
from stock
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
ERROR: there is a shard placement in node group 1 but there are no nodes in that group
-- now move the secondary nodes into the new cluster and see that the follower, finally
-- correctly configured, can run select queries involving them
\c - - - :master_port
@ -97,3 +139,4 @@ SELECT * FROM the_table;
-- clean up after ourselves
\c - - - :master_port
DROP TABLE the_table;
DROP TABLE stock;

View File

@ -26,11 +26,11 @@ EXPLAIN (COSTS FALSE)
--------------------------------------------------------------------------------------------------------
Limit
-> Sort
Sort Key: sum((sum(remote_scan.revenue))) DESC, remote_scan.l_orderkey
Sort Key: (sum(remote_scan.revenue)) DESC, remote_scan.l_orderkey
-> HashAggregate
Group Key: remote_scan.l_orderkey
Filter: (sum(remote_scan.worker_column_3) > '24'::numeric)
-> Custom Scan (Citus Real-Time)
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
@ -50,15 +50,15 @@ EXPLAIN (COSTS FALSE)
FROM lineitem
GROUP BY l_orderkey HAVING sum(l_quantity) > 24
ORDER BY 2 DESC, 1 ASC LIMIT 3;
QUERY PLAN
--------------------------------------------------------------------------------
QUERY PLAN
-----------------------------------------------------------------------------
Limit
-> Sort
Sort Key: sum((sum(remote_scan.revenue))) DESC, remote_scan.l_orderkey
Sort Key: (sum(remote_scan.revenue)) DESC, remote_scan.l_orderkey
-> HashAggregate
Group Key: remote_scan.l_orderkey
Filter: (sum(remote_scan.worker_column_3) > '24'::numeric)
-> Custom Scan (Citus Real-Time)
-> Custom Scan (Citus Adaptive)
Task Count: 2
Tasks Shown: One of 2
-> Task
@ -78,11 +78,11 @@ EXPLAIN (COSTS FALSE)
------------------------------------------------------------------------------------
Limit
-> Sort
Sort Key: sum((sum(remote_scan.revenue))) DESC, remote_scan.l_shipmode
Sort Key: (sum(remote_scan.revenue)) DESC, remote_scan.l_shipmode
-> HashAggregate
Group Key: remote_scan.l_shipmode
Filter: (sum(remote_scan.worker_column_3) > '24'::numeric)
-> Custom Scan (Citus Real-Time)
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
@ -102,11 +102,11 @@ EXPLAIN (COSTS FALSE)
--------------------------------------------------------------------------------------------------------------------
Limit
-> Sort
Sort Key: sum((sum(remote_scan.revenue))) DESC, remote_scan.l_shipmode, remote_scan.l_orderkey
Sort Key: (sum(remote_scan.revenue)) DESC, remote_scan.l_shipmode, remote_scan.l_orderkey
-> HashAggregate
Group Key: remote_scan.l_shipmode, remote_scan.l_orderkey
Filter: (sum(remote_scan.worker_column_4) > '24'::numeric)
-> Custom Scan (Citus Real-Time)
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
@ -131,11 +131,11 @@ EXPLAIN (COSTS FALSE)
-----------------------------------------------------------------------------------------------------------------------------------
Limit
-> Sort
Sort Key: sum((sum(remote_scan.revenue))) DESC
Sort Key: (sum(remote_scan.revenue)) DESC
-> HashAggregate
Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3, remote_scan.worker_column_4
Filter: (sum(remote_scan.worker_column_5) > '24'::numeric)
-> Custom Scan (Citus Real-Time)
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
@ -163,11 +163,11 @@ EXPLAIN (COSTS FALSE)
------------------------------------------------------------------------------------------------------
Limit
-> Sort
Sort Key: sum((sum(remote_scan.revenue))) DESC
Sort Key: (sum(remote_scan.revenue)) DESC
-> HashAggregate
Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3
Filter: (sum(remote_scan.worker_column_4) > '24'::numeric)
-> Custom Scan (Citus Real-Time)
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task

View File

@ -712,7 +712,7 @@ DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan 54_1 for CTE sub_cte: SELECT 1
DEBUG: Plan 54 query after replacing subqueries and CTEs: SELECT user_id, (SELECT sub_cte."?column?" FROM (SELECT intermediate_result."?column?" FROM read_intermediate_result('54_1'::text, 'binary'::citus_copy_format) intermediate_result("?column?" integer)) sub_cte) FROM public.raw_events_first
DEBUG: Router planner cannot handle multi-shard select queries
ERROR: could not run distributed query with subquery outside the FROM and WHERE clauses
ERROR: could not run distributed query with subquery outside the FROM, WHERE and HAVING clauses
HINT: Consider using an equality filter on the distributed table's partition column.
-- We support set operations via the coordinator
BEGIN;

View File

@ -711,7 +711,7 @@ DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan 54_1 for CTE sub_cte: SELECT 1
DEBUG: Plan 54 query after replacing subqueries and CTEs: SELECT user_id, (SELECT sub_cte."?column?" FROM (SELECT intermediate_result."?column?" FROM read_intermediate_result('54_1'::text, 'binary'::citus_copy_format) intermediate_result("?column?" integer)) sub_cte) FROM public.raw_events_first
DEBUG: Router planner cannot handle multi-shard select queries
ERROR: could not run distributed query with subquery outside the FROM and WHERE clauses
ERROR: could not run distributed query with subquery outside the FROM, WHERE and HAVING clauses
HINT: Consider using an equality filter on the distributed table's partition column.
-- We support set operations via the coordinator
BEGIN;

View File

@ -55,7 +55,7 @@ LOG: join order: [ "orders" ][ local partition join "lineitem" ][ single range
QUERY PLAN
------------------------------------------------------------------------------------------------
Sort (cost=0.00..0.00 rows=0 width=0)
Sort Key: sum((sum(remote_scan.revenue))) DESC, remote_scan.o_orderdate
Sort Key: (sum(remote_scan.revenue)) DESC, remote_scan.o_orderdate
-> HashAggregate (cost=0.00..0.00 rows=0 width=0)
Group Key: remote_scan.l_orderkey, remote_scan.o_orderdate, remote_scan.o_shippriority
-> Custom Scan (Citus Task-Tracker) (cost=0.00..0.00 rows=0 width=0)
@ -98,7 +98,7 @@ LOG: join order: [ "orders" ][ local partition join "lineitem" ][ single range
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Sort (cost=0.00..0.00 rows=0 width=0)
Sort Key: sum((sum(remote_scan.revenue))) DESC
Sort Key: (sum(remote_scan.revenue)) DESC
-> HashAggregate (cost=0.00..0.00 rows=0 width=0)
Group Key: remote_scan.c_custkey, remote_scan.c_name, remote_scan.c_acctbal, remote_scan.c_phone, remote_scan.n_name, remote_scan.c_address, remote_scan.c_comment
-> Custom Scan (Citus Task-Tracker) (cost=0.00..0.00 rows=0 width=0)

View File

@ -16,10 +16,10 @@ WHERE
and l_discount between 0.06 - 0.01 and 0.06 + 0.01
and l_quantity < 24;
LOG: join order: [ "lineitem" ]
QUERY PLAN
-----------------------------------------------------------------------
QUERY PLAN
----------------------------------------------------------------------
Aggregate (cost=0.00..0.00 rows=0 width=0)
-> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0)
-> Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0)
explain statements for distributed queries are not enabled
(3 rows)
@ -50,10 +50,10 @@ LOG: join order: [ "orders" ][ reference join "customer" ][ local partition joi
QUERY PLAN
------------------------------------------------------------------------------------------------
Sort (cost=0.00..0.00 rows=0 width=0)
Sort Key: sum((sum(remote_scan.revenue))) DESC, remote_scan.o_orderdate
Sort Key: (sum(remote_scan.revenue)) DESC, remote_scan.o_orderdate
-> HashAggregate (cost=0.00..0.00 rows=0 width=0)
Group Key: remote_scan.l_orderkey, remote_scan.o_orderdate, remote_scan.o_shippriority
-> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0)
-> Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0)
explain statements for distributed queries are not enabled
(6 rows)
@ -93,10 +93,10 @@ LOG: join order: [ "orders" ][ reference join "customer" ][ reference join "nat
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Sort (cost=0.00..0.00 rows=0 width=0)
Sort Key: sum((sum(remote_scan.revenue))) DESC
Sort Key: (sum(remote_scan.revenue)) DESC
-> HashAggregate (cost=0.00..0.00 rows=0 width=0)
Group Key: remote_scan.c_custkey, remote_scan.c_name, remote_scan.c_acctbal, remote_scan.c_phone, remote_scan.n_name, remote_scan.c_address, remote_scan.c_comment
-> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0)
-> Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0)
explain statements for distributed queries are not enabled
(6 rows)
@ -131,10 +131,10 @@ WHERE
AND l_shipinstruct = 'DELIVER IN PERSON'
);
LOG: join order: [ "lineitem" ][ reference join "part" ]
QUERY PLAN
-----------------------------------------------------------------------
QUERY PLAN
----------------------------------------------------------------------
Aggregate (cost=0.00..0.00 rows=0 width=0)
-> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0)
-> Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0)
explain statements for distributed queries are not enabled
(3 rows)

View File

@ -63,7 +63,7 @@ EXPLAIN (COSTS FALSE, FORMAT TEXT)
SELECT l_quantity, count(*) count_quantity FROM lineitem_mx
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
Sort
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint), remote_scan.l_quantity
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint)), remote_scan.l_quantity
-> HashAggregate
Group Key: remote_scan.l_quantity
-> Custom Scan (Citus Adaptive)
@ -83,7 +83,7 @@ EXPLAIN (COSTS FALSE, FORMAT JSON)
"Plan": {
"Node Type": "Sort",
"Parallel Aware": false,
"Sort Key": ["COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint)", "remote_scan.l_quantity"],
"Sort Key": ["(COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))", "remote_scan.l_quantity"],
"Plans": [
{
"Node Type": "Aggregate",
@ -155,7 +155,7 @@ EXPLAIN (COSTS FALSE, FORMAT XML)
<Node-Type>Sort</Node-Type>
<Parallel-Aware>false</Parallel-Aware>
<Sort-Key>
<Item>COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint)</Item>
<Item>(COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))</Item>
<Item>remote_scan.l_quantity</Item>
</Sort-Key>
<Plans>
@ -229,7 +229,7 @@ EXPLAIN (COSTS FALSE, FORMAT YAML)
Node Type: "Sort"
Parallel Aware: false
Sort Key:
- "COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint)"
- "(COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))"
- "remote_scan.l_quantity"
Plans:
- Node Type: "Aggregate"
@ -270,7 +270,7 @@ EXPLAIN (COSTS FALSE, FORMAT TEXT)
SELECT l_quantity, count(*) count_quantity FROM lineitem_mx
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
Sort
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint), remote_scan.l_quantity
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint)), remote_scan.l_quantity
-> HashAggregate
Group Key: remote_scan.l_quantity
-> Custom Scan (Citus Adaptive)

View File

@ -512,7 +512,7 @@ SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard_hash_mx a2 WHER
AS special_price FROM articles_hash_mx a;
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
ERROR: could not run distributed query with subquery outside the FROM and WHERE clauses
ERROR: could not run distributed query with subquery outside the FROM, WHERE and HAVING clauses
HINT: Consider using an equality filter on the distributed table's partition column.
-- simple lookup query
SELECT *

View File

@ -34,7 +34,26 @@ FROM users_table
GROUP BY user_id
ORDER BY avg(value_1) DESC
LIMIT 1;
ERROR: bogus varattno for OUTER_VAR var: 3
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------
Limit (cost=0.00..0.00 rows=0 width=0)
-> Sort (cost=0.00..0.00 rows=0 width=0)
Sort Key: ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))) DESC
-> HashAggregate (cost=0.00..0.00 rows=0 width=0)
Group Key: remote_scan.user_id
-> Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> Limit (cost=1.70..1.70 rows=1 width=52)
-> Sort (cost=1.70..1.70 rows=2 width=52)
Sort Key: (avg(value_1)) DESC
-> HashAggregate (cost=1.66..1.69 rows=2 width=52)
Group Key: user_id
-> Seq Scan on users_table_1400256 users_table (cost=0.00..1.33 rows=33 width=8)
(16 rows)
SELECT user_id, avg(value_1) + 1
FROM users_table
GROUP BY user_id
@ -84,7 +103,22 @@ SELECT user_id, avg(value_1) + count(value_2)
FROM users_table
GROUP BY user_id
ORDER BY 2 DESC;
ERROR: bogus varattno for OUTER_VAR var: 3
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Sort (cost=0.00..0.00 rows=0 width=0)
Sort Key: (((pg_catalog.sum(remote_scan."?column?") / pg_catalog.sum(remote_scan."?column?_1")) + (COALESCE((pg_catalog.sum(remote_scan."?column?_2"))::bigint, '0'::bigint))::numeric)) DESC
-> HashAggregate (cost=0.00..0.00 rows=0 width=0)
Group Key: remote_scan.user_id
-> Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate (cost=1.66..1.68 rows=2 width=28)
Group Key: user_id
-> Seq Scan on users_table_1400256 users_table (cost=0.00..1.33 rows=33 width=12)
(12 rows)
SELECT user_id, avg(value_1) + count(value_2)
FROM users_table
GROUP BY user_id
@ -187,14 +221,14 @@ FROM users_table
GROUP BY user_id
ORDER BY (10000 / (sum(value_1 + value_2))) DESC
LIMIT 2;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
---------------------------------------------------------------------------------------------
Limit
-> Sort
Sort Key: (10000 / (pg_catalog.sum(((10000 / (pg_catalog.sum(remote_scan.worker_column_2))::bigint))))::bigint) DESC
Sort Key: ((10000 / (pg_catalog.sum(remote_scan.worker_column_2))::bigint)) DESC
-> HashAggregate
Group Key: remote_scan.user_id
-> Custom Scan (Citus Real-Time)
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
@ -255,14 +289,14 @@ FROM users_table
GROUP BY user_id
ORDER BY sum(value_1) DESC
LIMIT 2;
QUERY PLAN
----------------------------------------------------------------------------------------------------------
QUERY PLAN
---------------------------------------------------------------------------------------------
Limit
-> Sort
Sort Key: (pg_catalog.sum(((pg_catalog.sum(remote_scan.worker_column_2))::bigint)))::bigint DESC
Sort Key: ((pg_catalog.sum(remote_scan.worker_column_2))::bigint) DESC
-> HashAggregate
Group Key: remote_scan.user_id
-> Custom Scan (Citus Real-Time)
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
@ -297,7 +331,31 @@ WHERE ut.user_id = et.user_id and et.value_2 < 5
GROUP BY ut.user_id
ORDER BY MAX(et.time), AVG(ut.value_1)
LIMIT 5;
ERROR: bogus varattno for OUTER_VAR var: 5
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------
Limit
-> Sort
Sort Key: (max(remote_scan.worker_column_4)), ((pg_catalog.sum(remote_scan.worker_column_5) / pg_catalog.sum(remote_scan.worker_column_6)))
-> HashAggregate
Group Key: remote_scan.user_id
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> Limit
-> Sort
Sort Key: (max(et."time")), (avg(ut.value_1))
-> HashAggregate
Group Key: ut.user_id
-> Hash Join
Hash Cond: (ut.user_id = et.user_id)
-> Seq Scan on users_table_1400256 ut
-> Hash
-> Seq Scan on events_table_1400260 et
Filter: (value_2 < 5)
(21 rows)
SELECT ut.user_id, avg(et.value_2)
FROM users_table ut, events_table et
WHERE ut.user_id = et.user_id and et.value_2 < 5
@ -332,4 +390,30 @@ WHERE ut.user_id = et.user_id and et.value_2 < 5
GROUP BY ut.user_id
ORDER BY 2, AVG(ut.value_1), 1 DESC
LIMIT 5;
ERROR: bogus varattno for OUTER_VAR var: 4
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
-> Sort
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)), ((pg_catalog.sum(remote_scan.worker_column_3) / pg_catalog.sum(remote_scan.worker_column_4))), remote_scan.user_id DESC
-> HashAggregate
Group Key: remote_scan.user_id
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> Limit
-> Sort
Sort Key: (count(DISTINCT ut.value_2)), (avg(ut.value_1)), ut.user_id DESC
-> GroupAggregate
Group Key: ut.user_id
-> Sort
Sort Key: ut.user_id DESC
-> Hash Join
Hash Cond: (ut.user_id = et.user_id)
-> Seq Scan on users_table_1400256 ut
-> Hash
-> Seq Scan on events_table_1400260 et
Filter: (value_2 < 5)
(23 rows)

View File

@ -671,7 +671,7 @@ SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard_hash a2 WHERE a
AS special_price FROM articles_hash a;
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
ERROR: could not run distributed query with subquery outside the FROM and WHERE clauses
ERROR: could not run distributed query with subquery outside the FROM, WHERE and HAVING clauses
HINT: Consider using an equality filter on the distributed table's partition column.
-- simple lookup query
SELECT *

View File

@ -452,7 +452,7 @@ SELECT a.title AS name, (SELECT a2.id FROM articles_hash a2 WHERE a.id = a2.id
AS special_price FROM articles_hash a;
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
ERROR: could not run distributed query with subquery outside the FROM and WHERE clauses
ERROR: could not run distributed query with subquery outside the FROM, WHERE and HAVING clauses
HINT: Consider using an equality filter on the distributed table's partition column.
-- simple lookup query just works
SELECT *

View File

@ -197,10 +197,10 @@ EXPLAIN (COSTS FALSE)
GROUP BY 1
HAVING count(*) > 5
ORDER BY 2 DESC, 1;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
---------------------------------------------------------------------------------------------------------------
Sort
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint) DESC, remote_scan.l_orderkey
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) DESC, remote_scan.l_orderkey
-> HashAggregate
Group Key: remote_scan.l_orderkey
Filter: (COALESCE((pg_catalog.sum(remote_scan.worker_column_3))::bigint, '0'::bigint) > 5)
@ -225,10 +225,10 @@ EXPLAIN (COSTS FALSE)
GROUP BY 1
HAVING count(*) > 5
ORDER BY 2 DESC, 1;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
---------------------------------------------------------------------------------------------------------------
Sort
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint) DESC, remote_scan.l_orderkey
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) DESC, remote_scan.l_orderkey
-> GroupAggregate
Group Key: remote_scan.l_orderkey
Filter: (COALESCE((pg_catalog.sum(remote_scan.worker_column_3))::bigint, '0'::bigint) > 5)
@ -296,13 +296,13 @@ EXPLAIN (COSTS FALSE)
FROM lineitem_hash_part
GROUP BY l_suppkey, l_linenumber
ORDER BY 1;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
----------------------------------------------------------------------------------------------------------
Sort
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint)
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
-> Unique
-> Sort
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint)
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
-> GroupAggregate
Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3
-> Sort
@ -375,14 +375,14 @@ EXPLAIN (COSTS FALSE)
GROUP BY l_suppkey, l_linenumber
ORDER BY 1
LIMIT 10;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------
Limit
-> Sort
Sort Key: remote_scan.l_suppkey
-> Unique
-> Sort
Sort Key: remote_scan.l_suppkey, COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint)
Sort Key: remote_scan.l_suppkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
-> GroupAggregate
Group Key: remote_scan.l_suppkey, remote_scan.worker_column_3
-> Sort
@ -456,7 +456,28 @@ EXPLAIN (COSTS FALSE)
GROUP BY l_suppkey, l_linenumber
ORDER BY 1,2
LIMIT 10;
ERROR: bogus varattno for OUTER_VAR var: 4
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------
Limit
-> Sort
Sort Key: remote_scan.l_suppkey, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))
-> Unique
-> Sort
Sort Key: remote_scan.l_suppkey, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))
-> GroupAggregate
Group Key: remote_scan.l_suppkey, remote_scan.worker_column_4
-> Sort
Sort Key: remote_scan.l_suppkey, remote_scan.worker_column_4
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: l_suppkey, l_linenumber
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(18 rows)
SET enable_hashagg TO on;
-- Similar to the above query but with distinct on
SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
@ -487,12 +508,12 @@ EXPLAIN (COSTS FALSE)
GROUP BY l_suppkey, l_linenumber
ORDER BY l_suppkey,1
LIMIT 10;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------
Limit
-> Unique
-> Sort
Sort Key: remote_scan.worker_column_4, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.worker_column_3))
Sort Key: remote_scan.worker_column_3, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))
-> HashAggregate
Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
-> Custom Scan (Citus Adaptive)
@ -514,12 +535,12 @@ EXPLAIN (COSTS FALSE)
GROUP BY l_suppkey, l_linenumber
ORDER BY l_suppkey,1
LIMIT 10;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------
Limit
-> Unique
-> Sort
Sort Key: remote_scan.worker_column_4, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.worker_column_3))
Sort Key: remote_scan.worker_column_3, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))
-> GroupAggregate
Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
-> Sort
@ -591,7 +612,28 @@ EXPLAIN (COSTS FALSE)
GROUP BY l_suppkey, l_linenumber
ORDER BY 1
LIMIT 10;
ERROR: bogus varattno for OUTER_VAR var: 4
QUERY PLAN
----------------------------------------------------------------------------------------------------------------
Limit
-> Sort
Sort Key: ((sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision))
-> Unique
-> Sort
Sort Key: ((sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision))
-> GroupAggregate
Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
-> Sort
Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: l_suppkey, l_linenumber
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(18 rows)
SET enable_hashagg TO on;
-- expression among aggregations.
@ -650,7 +692,28 @@ EXPLAIN (COSTS FALSE)
GROUP BY l_suppkey, l_linenumber
ORDER BY 1
LIMIT 10;
ERROR: bogus varattno for OUTER_VAR var: 4
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------
Limit
-> Sort
Sort Key: (((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint)))
-> Unique
-> Sort
Sort Key: (((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint)))
-> GroupAggregate
Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
-> Sort
Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: l_suppkey, l_linenumber
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(18 rows)
SET enable_hashagg TO on;
@ -809,13 +872,13 @@ EXPLAIN (COSTS FALSE)
FROM lineitem_hash_part
GROUP BY l_orderkey
ORDER BY 1,2;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------
Sort
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint), COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint)
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)), (COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))
-> Unique
-> Sort
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint), COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))))::bigint, '0'::bigint)
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)), (COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))
-> GroupAggregate
Group Key: remote_scan.worker_column_3
-> Sort
@ -880,13 +943,13 @@ EXPLAIN (COSTS FALSE)
FROM lineitem_hash_part
GROUP BY l_suppkey
ORDER BY 1;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------
Sort
Sort Key: ceil(((COALESCE((pg_catalog.sum((ceil(((COALESCE((pg_catalog.sum((ceil(((COALESCE((pg_catalog.sum((ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision))))::bigint, '0'::bigint) / 2))::double precision))))::bigint, '0'::bigint) / 2))::double precision))))::bigint, '0'::bigint) / 2))::double precision)
Sort Key: (ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision))
-> Unique
-> Sort
Sort Key: ceil(((COALESCE((pg_catalog.sum((ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision))))::bigint, '0'::bigint) / 2))::double precision)
Sort Key: (ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision))
-> GroupAggregate
Group Key: remote_scan.worker_column_2
-> Sort
@ -940,14 +1003,14 @@ EXPLAIN (COSTS FALSE)
GROUP BY l_orderkey
ORDER BY 2
LIMIT 15;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------
Limit
-> Sort
Sort Key: array_length(array_cat_agg((array_length(array_cat_agg((array_length(array_cat_agg((array_length(array_cat_agg(remote_scan.array_length), 1))), 1))), 1))), 1)
Sort Key: (array_length(array_cat_agg(remote_scan.array_length), 1))
-> Unique
-> Sort
Sort Key: array_length(array_cat_agg((array_length(array_cat_agg(remote_scan.array_length), 1))), 1), array_cat_agg((array_cat_agg(remote_scan.array_agg)))
Sort Key: (array_length(array_cat_agg(remote_scan.array_length), 1)), (array_cat_agg(remote_scan.array_agg))
-> GroupAggregate
Group Key: remote_scan.worker_column_3
-> Sort

View File

@ -279,7 +279,7 @@ ORDER BY articles.id;
-- subqueries are not supported in SELECT clause
SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard a2 WHERE a.id = a2.id LIMIT 1)
AS special_price FROM articles a;
ERROR: could not run distributed query with subquery outside the FROM and WHERE clauses
ERROR: could not run distributed query with subquery outside the FROM, WHERE and HAVING clauses
HINT: Consider using an equality filter on the distributed table's partition column.
-- joins are not supported between local and distributed tables
SELECT title, authors.name FROM authors, articles WHERE authors.id = articles.author_id;

View File

@ -223,7 +223,7 @@ ERROR: Complex subqueries and CTEs are not supported when task_executor_type is
-- subqueries are not supported in SELECT clause
SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard a2 WHERE a.id = a2.id LIMIT 1)
AS special_price FROM articles a;
ERROR: could not run distributed query with subquery outside the FROM and WHERE clauses
ERROR: could not run distributed query with subquery outside the FROM, WHERE and HAVING clauses
HINT: Consider using an equality filter on the distributed table's partition column.
-- joins are not supported between local and distributed tables
SELECT title, authors.name FROM authors, articles WHERE authors.id = articles.author_id;

View File

@ -24,11 +24,11 @@ FROM
-- Update metadata in order to make all shards equal
-- note that the table is created on multi_insert_select_create_table.sql
UPDATE
pg_dist_shard
SET
shardmaxvalue = '14947'
WHERE
UPDATE
pg_dist_shard
SET
shardmaxvalue = '14947'
WHERE
shardid IN (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'orders_subquery'::regclass ORDER BY shardid DESC LIMIT 1);
SET client_min_messages TO DEBUG1;
-- If group by is not on partition column then we recursively plan
@ -394,7 +394,7 @@ ERROR: complex joins are only supported when all distributed tables are joined
SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3
FROM events_table t1
LEFT JOIN users_reference_table t2 ON t1.user_id = trunc(t2.user_id)
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
LIMIT 5;
user_id | value_1 | value_2 | value_3
---------+---------+---------+---------
@ -409,7 +409,7 @@ LIMIT 5;
SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3
FROM events_table t1
LEFT JOIN users_reference_table t2 ON t1.user_id > t2.user_id
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
LIMIT 5;
user_id | value_1 | value_2 | value_3
---------+---------+---------+---------
@ -424,14 +424,14 @@ LIMIT 5;
SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3
FROM events_table t1
LEFT JOIN users_table t2 ON t1.user_id > t2.user_id
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
LIMIT 5;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
-- outer joins on reference tables with expressions should work
SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3
FROM events_table t1
LEFT JOIN users_reference_table t2 ON t1.user_id = (CASE WHEN t2.user_id > 3 THEN 3 ELSE t2.user_id END)
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
LIMIT 5;
user_id | value_1 | value_2 | value_3
---------+---------+---------+---------
@ -444,11 +444,11 @@ LIMIT 5;
-- outer joins on distributed tables and reference tables with expressions should work
SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3
FROM
FROM
users_table t0 LEFT JOIN
events_table t1 ON t0.user_id = t1.user_id
LEFT JOIN users_reference_table t2 ON t1.user_id = trunc(t2.user_id)
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
LIMIT 5;
user_id | value_1 | value_2 | value_3
---------+---------+---------+---------
@ -461,11 +461,11 @@ SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3
-- outer joins on distributed tables with expressions should not work
SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3
FROM
FROM
users_table t0 LEFT JOIN
events_table t1 ON t0.user_id = trunc(t1.user_id)
LEFT JOIN users_reference_table t2 ON t1.user_id = trunc(t2.user_id)
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
LIMIT 5;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
-- outer joins as subqueries should work
@ -491,7 +491,7 @@ LIMIT 5;
SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3
FROM events_table t1
JOIN users_reference_table t2 ON t1.user_id = trunc(t2.user_id)
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
LIMIT 5;
user_id | value_1 | value_2 | value_3
---------+---------+---------+---------
@ -703,6 +703,11 @@ WHERE
4968.4946466804019323
(1 row)
-- Check unsupported subqueries in target list
SELECT (SELECT 1) FROM orders_subquery;
ERROR: could not run distributed query with subquery outside the FROM, WHERE and HAVING clauses
SELECT sum((SELECT 1)) FROM orders_subquery;
ERROR: could not run distributed query with subquery outside the FROM, WHERE and HAVING clauses
-- Check that if subquery is pulled, we don't error and run query properly.
SELECT count(*) FROM
(
@ -1055,7 +1060,6 @@ ORDER BY
3.0000000000000000 | 0
(1 row)
-- Lateral join subquery pushdown
-- set subquery_pushdown since there is limit in the query
SET citus.subquery_pushdown to ON;

View File

@ -870,7 +870,7 @@ EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USIN
-> Distributed Subplan 98_1
-> Limit
-> Sort
Sort Key: max((max(remote_scan.lastseen))) DESC
Sort Key: (max(remote_scan.lastseen)) DESC
-> HashAggregate
Group Key: remote_scan.user_id
-> Custom Scan (Citus Adaptive)

View File

@ -939,10 +939,10 @@ FROM
users_table
GROUP BY user_id, value_2
ORDER BY user_id, avg(value_1) DESC;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------
Sort
Sort Key: remote_scan.user_id, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.rank)) DESC
Sort Key: remote_scan.user_id, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))) DESC
-> HashAggregate
Group Key: remote_scan.user_id, remote_scan.worker_column_5
-> Custom Scan (Citus Adaptive)
@ -1014,11 +1014,11 @@ FROM
GROUP BY user_id, value_2
ORDER BY user_id, avg(value_1) DESC
LIMIT 5;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------
Limit
-> Sort
Sort Key: remote_scan.user_id, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.rank)) DESC
Sort Key: remote_scan.user_id, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))) DESC
-> HashAggregate
Group Key: remote_scan.user_id, remote_scan.worker_column_5
-> Custom Scan (Citus Adaptive)
@ -1044,11 +1044,11 @@ FROM
GROUP BY user_id, value_2
ORDER BY user_id, avg(value_1) DESC
LIMIT 5;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------
Limit
-> Sort
Sort Key: remote_scan.user_id, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.rank)) DESC
Sort Key: remote_scan.user_id, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))) DESC
-> HashAggregate
Group Key: remote_scan.user_id, remote_scan.worker_column_5
-> Custom Scan (Citus Adaptive)
@ -1074,11 +1074,11 @@ FROM
GROUP BY user_id, value_2
ORDER BY user_id, avg(value_1) DESC
LIMIT 5;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------
Limit
-> Sort
Sort Key: remote_scan.user_id, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.rank)) DESC
Sort Key: remote_scan.user_id, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))) DESC
-> HashAggregate
Group Key: remote_scan.user_id, remote_scan.worker_column_5
-> Custom Scan (Citus Adaptive)
@ -1104,11 +1104,11 @@ FROM
GROUP BY user_id, value_2
ORDER BY user_id, avg(value_1) DESC
LIMIT 5;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------
Limit
-> Sort
Sort Key: remote_scan.user_id, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.rank)) DESC
Sort Key: remote_scan.user_id, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))) DESC
-> HashAggregate
Group Key: remote_scan.user_id, remote_scan.worker_column_5
-> Custom Scan (Citus Adaptive)

View File

@ -108,7 +108,7 @@ SELECT
(SELECT user_id FROM cte_basic), user_id
FROM
users_table;
ERROR: could not run distributed query with subquery outside the FROM and WHERE clauses
ERROR: could not run distributed query with subquery outside the FROM, WHERE and HAVING clauses
HINT: Consider using an equality filter on the distributed table's partition column.
-- single-row sublink is acceptable when there is no FROM
WITH cte AS (

View File

@ -27,7 +27,7 @@ test: multi_mx_copy_data multi_mx_router_planner
test: multi_mx_schema_support multi_mx_tpch_query1 multi_mx_tpch_query10
test: multi_mx_tpch_query12 multi_mx_tpch_query14 multi_mx_tpch_query19
test: multi_mx_tpch_query3 multi_mx_tpch_query6 multi_mx_tpch_query7
test: multi_mx_tpch_query7_nested multi_mx_ddl
test: multi_mx_tpch_query7_nested multi_mx_ddl ch_bench_having_mx
test: recursive_dml_queries_mx multi_mx_truncate_from_worker
test: multi_mx_repartition_udt_prepare mx_foreign_key_to_reference_table
test: multi_mx_repartition_join_w1 multi_mx_repartition_join_w2 multi_mx_repartition_udt_w1 multi_mx_repartition_udt_w2

View File

@ -54,7 +54,7 @@ test: multi_partitioning_utils multi_partitioning replicated_partitioned_table
# Tests for recursive subquery planning
# ----------
test: subquery_basics subquery_local_tables subquery_executors subquery_and_cte set_operations set_operation_and_local_tables
test: subqueries_deep subquery_view subquery_partitioning subquery_complex_target_list subqueries_not_supported subquery_in_where
test: subqueries_deep subquery_view subquery_partitioning subquery_complex_target_list subqueries_not_supported subquery_in_where
test: non_colocated_leaf_subquery_joins non_colocated_subquery_joins non_colocated_join_order
test: subquery_prepared_statements pg12
@ -73,7 +73,7 @@ test: multi_reference_table multi_select_for_update relation_access_tracking
test: custom_aggregate_support
test: multi_average_expression multi_working_columns multi_having_pushdown
test: multi_array_agg multi_limit_clause multi_orderby_limit_pushdown
test: multi_jsonb_agg multi_jsonb_object_agg multi_json_agg multi_json_object_agg bool_agg
test: multi_jsonb_agg multi_jsonb_object_agg multi_json_agg multi_json_object_agg bool_agg ch_bench_having
test: multi_agg_type_conversion multi_count_type_conversion
test: multi_partition_pruning single_hash_repartition_join
test: multi_join_pruning multi_hash_pruning
@ -129,7 +129,7 @@ test: multi_shard_modify
# ----------
# Tests around DDL statements run on distributed tables
# ----------
test: multi_index_statements
test: multi_index_statements
test: multi_alter_table_statements
test: multi_alter_table_add_constraints
@ -174,7 +174,7 @@ test: multi_create_fdw
test: multi_complex_count_distinct multi_select_distinct
test: multi_modifications
test: multi_distribution_metadata
test: multi_generate_ddl_commands multi_create_shards multi_prune_shard_list multi_repair_shards
test: multi_generate_ddl_commands multi_create_shards multi_prune_shard_list multi_repair_shards
test: multi_upsert multi_simple_queries multi_data_types
test: multi_utilities foreign_key_to_reference_table validate_constraint
test: multi_modifying_xacts

View File

@ -65,10 +65,10 @@ SELECT
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: remote_scan.l_orderkey, COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint)
Output: remote_scan.l_orderkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
-> Sort
Output: remote_scan.l_orderkey, COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint)
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint) DESC, remote_scan.l_orderkey DESC
Output: remote_scan.l_orderkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) DESC, remote_scan.l_orderkey DESC
-> HashAggregate
Output: remote_scan.l_orderkey, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)
Group Key: remote_scan.l_orderkey
@ -110,10 +110,10 @@ SELECT
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: count(DISTINCT (count(DISTINCT (count(DISTINCT remote_scan.count)))))
Output: (count(DISTINCT remote_scan.count))
-> Sort
Output: count(DISTINCT (count(DISTINCT remote_scan.count)))
Sort Key: count(DISTINCT (count(DISTINCT remote_scan.count))) DESC
Output: (count(DISTINCT remote_scan.count))
Sort Key: (count(DISTINCT remote_scan.count)) DESC
-> Aggregate
Output: count(DISTINCT remote_scan.count)
-> Custom Scan (Citus Task-Tracker)
@ -156,10 +156,10 @@ SELECT
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: remote_scan.l_shipmode, count(DISTINCT (count(DISTINCT (count(DISTINCT remote_scan.count)))))
Output: remote_scan.l_shipmode, (count(DISTINCT remote_scan.count))
-> Sort
Output: remote_scan.l_shipmode, count(DISTINCT (count(DISTINCT remote_scan.count)))
Sort Key: count(DISTINCT (count(DISTINCT remote_scan.count))) DESC, remote_scan.l_shipmode DESC
Output: remote_scan.l_shipmode, (count(DISTINCT remote_scan.count))
Sort Key: (count(DISTINCT remote_scan.count)) DESC, remote_scan.l_shipmode DESC
-> GroupAggregate
Output: remote_scan.l_shipmode, count(DISTINCT remote_scan.count)
Group Key: remote_scan.l_shipmode
@ -207,13 +207,13 @@ SELECT
GROUP BY l_orderkey
ORDER BY 3 DESC, 2 DESC, 1
LIMIT 10;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: remote_scan.l_orderkey, COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint), COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint)
Output: remote_scan.l_orderkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)), (COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))
-> Sort
Output: remote_scan.l_orderkey, COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint), COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))))::bigint, '0'::bigint)
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))))::bigint, '0'::bigint) DESC, COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint) DESC, remote_scan.l_orderkey
Output: remote_scan.l_orderkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)), (COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint)) DESC, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) DESC, remote_scan.l_orderkey
-> HashAggregate
Output: remote_scan.l_orderkey, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint), COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint)
Group Key: remote_scan.l_orderkey
@ -367,8 +367,8 @@ SELECT
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Sort
Output: remote_scan.l_shipmode, count(DISTINCT (count(DISTINCT remote_scan.count)))
Sort Key: remote_scan.l_shipmode, count(DISTINCT (count(DISTINCT remote_scan.count))) DESC
Output: remote_scan.l_shipmode, (count(DISTINCT remote_scan.count))
Sort Key: remote_scan.l_shipmode, (count(DISTINCT remote_scan.count)) DESC
-> GroupAggregate
Output: remote_scan.l_shipmode, count(DISTINCT remote_scan.count)
Group Key: remote_scan.l_shipmode
@ -491,13 +491,13 @@ SELECT
GROUP BY l_orderkey
ORDER BY 2 DESC, 3 DESC, 1
LIMIT 10;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: remote_scan.l_orderkey, COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint), COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint)
Output: remote_scan.l_orderkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)), (COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))
-> Sort
Output: remote_scan.l_orderkey, COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint), COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))))::bigint, '0'::bigint)
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint) DESC, COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))))::bigint, '0'::bigint) DESC, remote_scan.l_orderkey
Output: remote_scan.l_orderkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)), (COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) DESC, (COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint)) DESC, remote_scan.l_orderkey
-> HashAggregate
Output: remote_scan.l_orderkey, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint), COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint)
Group Key: remote_scan.l_orderkey
@ -548,7 +548,32 @@ SELECT
GROUP BY l_suppkey
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
ERROR: bogus varattno for OUTER_VAR var: 3
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: remote_scan.l_suppkey, (count(DISTINCT remote_scan.count) FILTER (WHERE (remote_scan.count_1 = 'AIR'::bpchar)))
-> Sort
Output: remote_scan.l_suppkey, (count(DISTINCT remote_scan.count) FILTER (WHERE (remote_scan.count_1 = 'AIR'::bpchar)))
Sort Key: (count(DISTINCT remote_scan.count) FILTER (WHERE (remote_scan.count_1 = 'AIR'::bpchar))) DESC, remote_scan.l_suppkey DESC
-> GroupAggregate
Output: remote_scan.l_suppkey, count(DISTINCT remote_scan.count) FILTER (WHERE (remote_scan.count_1 = 'AIR'::bpchar))
Group Key: remote_scan.l_suppkey
-> Sort
Output: remote_scan.l_suppkey, remote_scan.count, remote_scan.count_1
Sort Key: remote_scan.l_suppkey DESC
-> Custom Scan (Citus Task-Tracker)
Output: remote_scan.l_suppkey, remote_scan.count, remote_scan.count_1
Task Count: 8
Tasks Shown: One of 8
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Output: l_suppkey, l_partkey, l_shipmode
Group Key: lineitem_hash.l_suppkey, lineitem_hash.l_partkey, lineitem_hash.l_shipmode
-> Seq Scan on public.lineitem_hash_240000 lineitem_hash
Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
(22 rows)
-- without group by, on partition column
SELECT
count(DISTINCT l_orderkey) FILTER (WHERE l_shipmode = 'AIR')

View File

@ -0,0 +1,182 @@
SET citus.next_shard_id TO 1640000;
CREATE SCHEMA ch_bench_having;
SET search_path = ch_bench_having, public;
CREATE TABLE stock (
s_w_id int NOT NULL,
s_i_id int NOT NULL,
s_order_cnt int NOT NULL
);
SELECT create_distributed_table('stock','s_w_id');
explain (costs false, summary false, timing false)
select s_i_id, sum(s_order_cnt) as ordercount
from stock
where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
explain (costs false, summary false, timing false)
select s_i_id, sum(s_order_cnt) as ordercount
from stock
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
explain (costs false, summary false, timing false)
select s_i_id, sum(s_order_cnt) as ordercount
from stock
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock);
explain select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select true)
order by s_i_id;
explain select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select true);
select s_i_id, sum(s_order_cnt) as ordercount
from stock
where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
INSERT INTO stock SELECT c, c, c FROM generate_series(1, 5) as c;
select s_i_id, sum(s_order_cnt) as ordercount
from stock
where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
select s_i_id, sum(s_order_cnt) as ordercount
from stock
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having (select true)
order by s_i_id;
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having (select false)
order by s_i_id;
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select true)
order by s_i_id;
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select false)
order by s_i_id;
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select true)
order by s_i_id;
-- We don't support correlated subqueries in having
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id)
order by s_i_id;
-- We don't support correlated subqueries in having
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id)
order by s_i_id;
DROP TABLE stock;
CREATE TABLE stock (
s_w_id int NOT NULL,
s_i_id int NOT NULL,
s_quantity decimal(4,0) NOT NULL,
s_ytd decimal(8,2) NOT NULL,
s_order_cnt int NOT NULL,
s_remote_cnt int NOT NULL,
s_data varchar(50) NOT NULL,
s_dist_01 char(24) NOT NULL,
s_dist_02 char(24) NOT NULL,
s_dist_03 char(24) NOT NULL,
s_dist_04 char(24) NOT NULL,
s_dist_05 char(24) NOT NULL,
s_dist_06 char(24) NOT NULL,
s_dist_07 char(24) NOT NULL,
s_dist_08 char(24) NOT NULL,
s_dist_09 char(24) NOT NULL,
s_dist_10 char(24) NOT NULL,
PRIMARY KEY (s_w_id,s_i_id)
);
insert into stock VALUES
(1, 33, 1, 1, 1, 1, '', '','','','','','','','','',''),
(33, 1, 1, 1, 1, 1, '', '','','','','','','','','',''),
(32, 1, 1, 1, 1, 1, '', '','','','','','','','','','');
SELECT create_distributed_table('stock','s_w_id');
select s_i_id, sum(s_order_cnt) as ordercount
from stock, supplier, nation
where mod((s_w_id * s_i_id),10000) = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY'
group by s_i_id
having sum(s_order_cnt) >
(select sum(s_order_cnt) * .005
from stock, supplier, nation
where mod((s_w_id * s_i_id),10000) = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY')
order by ordercount desc;
insert into stock VALUES
(10033, 1, 1, 1, 100000, 1, '', '','','','','','','','','','');
select s_i_id, sum(s_order_cnt) as ordercount
from stock, supplier, nation
where mod((s_w_id * s_i_id),10000) = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY'
group by s_i_id
having sum(s_order_cnt) >
(select sum(s_order_cnt) * .005
from stock, supplier, nation
where mod((s_w_id * s_i_id),10000) = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY')
order by ordercount desc;
BEGIN;
SET LOCAL client_min_messages TO WARNING;
DROP SCHEMA ch_bench_having CASCADE;
COMMIT;

View File

@ -0,0 +1,199 @@
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1640000;
SET citus.replication_model TO streaming;
SET citus.shard_replication_factor to 1;
SET citus.shard_count to 4;
CREATE SCHEMA ch_bench_having;
SET search_path = ch_bench_having;
CREATE TABLE stock (
s_w_id int NOT NULL,
s_i_id int NOT NULL,
s_order_cnt int NOT NULL
);
SELECT create_distributed_table('stock','s_w_id');
\c - - - :worker_1_port
SET search_path = ch_bench_having;
explain (costs false, summary false, timing false)
select s_i_id, sum(s_order_cnt) as ordercount
from stock
where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
explain (costs false, summary false, timing false)
select s_i_id, sum(s_order_cnt) as ordercount
from stock
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
explain (costs false, summary false, timing false)
select s_i_id, sum(s_order_cnt) as ordercount
from stock
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock);
explain select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select true)
order by s_i_id;
explain select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select true);
select s_i_id, sum(s_order_cnt) as ordercount
from stock
where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
INSERT INTO stock SELECT c, c, c FROM generate_series(1, 5) as c;
select s_i_id, sum(s_order_cnt) as ordercount
from stock
where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
select s_i_id, sum(s_order_cnt) as ordercount
from stock
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having (select true)
order by s_i_id;
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having (select false)
order by s_i_id;
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select true)
order by s_i_id;
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select false)
order by s_i_id;
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select true)
order by s_i_id;
-- We don't support correlated subqueries in having
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id)
order by s_i_id;
-- We don't support correlated subqueries in having
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id)
order by s_i_id;
\c - - - :master_port
SET citus.replication_model TO streaming;
SET citus.shard_replication_factor to 1;
SET citus.shard_count to 4;
SET search_path = ch_bench_having, public;
DROP TABLE stock;
CREATE TABLE stock (
s_w_id int NOT NULL,
s_i_id int NOT NULL,
s_quantity decimal(4,0) NOT NULL,
s_ytd decimal(8,2) NOT NULL,
s_order_cnt int NOT NULL,
s_remote_cnt int NOT NULL,
s_data varchar(50) NOT NULL,
s_dist_01 char(24) NOT NULL,
s_dist_02 char(24) NOT NULL,
s_dist_03 char(24) NOT NULL,
s_dist_04 char(24) NOT NULL,
s_dist_05 char(24) NOT NULL,
s_dist_06 char(24) NOT NULL,
s_dist_07 char(24) NOT NULL,
s_dist_08 char(24) NOT NULL,
s_dist_09 char(24) NOT NULL,
s_dist_10 char(24) NOT NULL,
PRIMARY KEY (s_w_id,s_i_id)
);
insert into stock VALUES
(1, 33, 1, 1, 1, 1, '', '','','','','','','','','',''),
(33, 1, 1, 1, 1, 1, '', '','','','','','','','','',''),
(32, 1, 1, 1, 1, 1, '', '','','','','','','','','','');
SELECT create_distributed_table('stock','s_w_id');
\c - - - :worker_1_port
SET search_path = ch_bench_having, public;
select s_i_id, sum(s_order_cnt) as ordercount
from stock, supplier_mx, nation_mx
where mod((s_w_id * s_i_id),10000) = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY'
group by s_i_id
having sum(s_order_cnt) >
(select sum(s_order_cnt) * .005
from stock, supplier_mx, nation_mx
where mod((s_w_id * s_i_id),10000) = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY')
order by ordercount desc;
insert into stock VALUES
(10033, 1, 1, 1, 100000, 1, '', '','','','','','','','','','');
select s_i_id, sum(s_order_cnt) as ordercount
from stock, supplier_mx, nation_mx
where mod((s_w_id * s_i_id),10000) = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY'
group by s_i_id
having sum(s_order_cnt) >
(select sum(s_order_cnt) * .005
from stock, supplier_mx, nation_mx
where mod((s_w_id * s_i_id),10000) = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY')
order by ordercount desc;
\c - - - :master_port
BEGIN;
SET LOCAL client_min_messages TO WARNING;
DROP SCHEMA ch_bench_having CASCADE;
COMMIT;

View File

@ -11,6 +11,17 @@ SELECT create_distributed_table('the_table', 'a');
INSERT INTO the_table (a, b) VALUES (1, 1);
INSERT INTO the_table (a, b) VALUES (1, 2);
CREATE TABLE stock (
s_w_id int NOT NULL,
s_i_id int NOT NULL,
s_order_cnt int NOT NULL
);
SELECT create_distributed_table('stock','s_w_id');
INSERT INTO stock SELECT c, c, c FROM generate_series(1, 5) as c;
-- connect to the follower and check that a simple select query works, the follower
-- is still in the default cluster and will send queries to the primary nodes
@ -18,6 +29,13 @@ INSERT INTO the_table (a, b) VALUES (1, 2);
SELECT * FROM the_table;
select s_i_id, sum(s_order_cnt) as ordercount
from stock
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
-- now, connect to the follower but tell it to use secondary nodes. There are no
-- secondary nodes so this should fail.
@ -42,6 +60,13 @@ SELECT 1 FROM master_add_node('localhost', :follower_worker_2_port,
-- now that we've added secondaries this should work
SELECT * FROM the_table;
select s_i_id, sum(s_order_cnt) as ordercount
from stock
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
SELECT
node_name, node_port
FROM
@ -57,6 +82,12 @@ ORDER BY
-- there are no secondary nodes in this cluster, so this should fail!
SELECT * FROM the_table;
select s_i_id, sum(s_order_cnt) as ordercount
from stock
group by s_i_id
having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock)
order by s_i_id;
-- now move the secondary nodes into the new cluster and see that the follower, finally
-- correctly configured, can run select queries involving them
@ -68,3 +99,4 @@ SELECT * FROM the_table;
-- clean up after ourselves
\c - - - :master_port
DROP TABLE the_table;
DROP TABLE stock;

View File

@ -22,11 +22,11 @@ FROM
-- Update metadata in order to make all shards equal
-- note that the table is created on multi_insert_select_create_table.sql
UPDATE
pg_dist_shard
SET
shardmaxvalue = '14947'
WHERE
UPDATE
pg_dist_shard
SET
shardmaxvalue = '14947'
WHERE
shardid IN (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'orders_subquery'::regclass ORDER BY shardid DESC LIMIT 1);
SET client_min_messages TO DEBUG1;
@ -272,46 +272,46 @@ LIMIT 10;
SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3
FROM events_table t1
LEFT JOIN users_reference_table t2 ON t1.user_id = trunc(t2.user_id)
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
LIMIT 5;
-- outer joins on reference tables with simple expressions should work
SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3
FROM events_table t1
LEFT JOIN users_reference_table t2 ON t1.user_id > t2.user_id
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
LIMIT 5;
-- outer joins on distributed tables with simple expressions should not work
SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3
FROM events_table t1
LEFT JOIN users_table t2 ON t1.user_id > t2.user_id
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
LIMIT 5;
-- outer joins on reference tables with expressions should work
SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3
FROM events_table t1
LEFT JOIN users_reference_table t2 ON t1.user_id = (CASE WHEN t2.user_id > 3 THEN 3 ELSE t2.user_id END)
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
LIMIT 5;
-- outer joins on distributed tables and reference tables with expressions should work
SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3
FROM
FROM
users_table t0 LEFT JOIN
events_table t1 ON t0.user_id = t1.user_id
LEFT JOIN users_reference_table t2 ON t1.user_id = trunc(t2.user_id)
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
LIMIT 5;
-- outer joins on distributed tables with expressions should not work
SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3
FROM
FROM
users_table t0 LEFT JOIN
events_table t1 ON t0.user_id = trunc(t1.user_id)
LEFT JOIN users_reference_table t2 ON t1.user_id = trunc(t2.user_id)
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
LIMIT 5;
-- outer joins as subqueries should work
@ -329,7 +329,7 @@ LIMIT 5;
SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3
FROM events_table t1
JOIN users_reference_table t2 ON t1.user_id = trunc(t2.user_id)
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
LIMIT 5;
-- distinct queries work
@ -467,6 +467,10 @@ WHERE
unit_price > 1000 AND
unit_price < 10000;
-- Check unsupported subqueries in target list
SELECT (SELECT 1) FROM orders_subquery;
SELECT sum((SELECT 1)) FROM orders_subquery;
-- Check that if subquery is pulled, we don't error and run query properly.
SELECT count(*) FROM
@ -780,7 +784,7 @@ GROUP BY
count_pay
ORDER BY
count_pay;
-- Lateral join subquery pushdown
-- set subquery_pushdown since there is limit in the query
SET citus.subquery_pushdown to ON;