mirror of https://github.com/citusdata/citus.git
Add planner changes and tests for subquery on reference tables
parent
45717dd013
commit
c4e3b8b5e1
|
@ -464,6 +464,13 @@ RouterModifyTaskForShardInterval(Query *originalQuery, ShardInterval *shardInter
|
||||||
}
|
}
|
||||||
|
|
||||||
shardOpExpressions = ShardIntervalOpExpressions(shardInterval, rteIndex);
|
shardOpExpressions = ShardIntervalOpExpressions(shardInterval, rteIndex);
|
||||||
|
|
||||||
|
/* means it is a reference table and do not add any shard interval information */
|
||||||
|
if (shardOpExpressions == NIL)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
shardRestrictionList = make_simple_restrictinfo((Expr *) shardOpExpressions);
|
shardRestrictionList = make_simple_restrictinfo((Expr *) shardOpExpressions);
|
||||||
extendedBaseRestrictInfo = lappend(extendedBaseRestrictInfo,
|
extendedBaseRestrictInfo = lappend(extendedBaseRestrictInfo,
|
||||||
shardRestrictionList);
|
shardRestrictionList);
|
||||||
|
|
|
@ -997,6 +997,23 @@ TargetListOnPartitionColumn(Query *query, List *targetEntryList)
|
||||||
Expr *targetExpression = targetEntry->expr;
|
Expr *targetExpression = targetEntry->expr;
|
||||||
|
|
||||||
bool isPartitionColumn = IsPartitionColumn(targetExpression, query);
|
bool isPartitionColumn = IsPartitionColumn(targetExpression, query);
|
||||||
|
Oid relationId = InvalidOid;
|
||||||
|
Var *column = NULL;
|
||||||
|
|
||||||
|
FindReferencedTableColumn(targetExpression, NIL, query, &relationId, &column);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the expression belongs to reference table directly returns true,
|
||||||
|
* since logic of caller function checks whether it can find the necessaary
|
||||||
|
* data from each node.
|
||||||
|
*/
|
||||||
|
if (IsDistributedTable(relationId) && PartitionMethod(relationId) ==
|
||||||
|
DISTRIBUTE_BY_NONE)
|
||||||
|
{
|
||||||
|
targetListOnPartitionColumn = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (isPartitionColumn)
|
if (isPartitionColumn)
|
||||||
{
|
{
|
||||||
FieldSelect *compositeField = CompositeFieldRecursive(targetExpression,
|
FieldSelect *compositeField = CompositeFieldRecursive(targetExpression,
|
||||||
|
@ -2728,6 +2745,50 @@ ExtractRangeTableRelationWalker(Node *node, List **rangeTableRelationList)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Get the list of relations from the given node. Note that the difference between
|
||||||
|
* this function and ExtractRangeTableRelationWalker is that this one recursively
|
||||||
|
* walk into range table entries if it can.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
ExtractRTRelationFromNode(Node *node, List **rangeTableList)
|
||||||
|
{
|
||||||
|
bool walkIsComplete = false;
|
||||||
|
if (node == NULL)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (IsA(node, RangeTblEntry))
|
||||||
|
{
|
||||||
|
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) node;
|
||||||
|
|
||||||
|
if (rangeTableEntry->rtekind == RTE_RELATION &&
|
||||||
|
rangeTableEntry->relkind != RELKIND_VIEW)
|
||||||
|
{
|
||||||
|
(*rangeTableList) = lappend(*rangeTableList, rangeTableEntry);
|
||||||
|
}
|
||||||
|
else if (rangeTableEntry->rtekind == RTE_SUBQUERY)
|
||||||
|
{
|
||||||
|
walkIsComplete = query_tree_walker(rangeTableEntry->subquery,
|
||||||
|
ExtractRTRelationFromNode,
|
||||||
|
rangeTableList, QTW_EXAMINE_RTES);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (IsA(node, Query))
|
||||||
|
{
|
||||||
|
walkIsComplete = query_tree_walker((Query *) node, ExtractRTRelationFromNode,
|
||||||
|
rangeTableList, QTW_EXAMINE_RTES);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
walkIsComplete = expression_tree_walker(node, ExtractRTRelationFromNode,
|
||||||
|
rangeTableList);
|
||||||
|
}
|
||||||
|
|
||||||
|
return walkIsComplete;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ExtractRangeTableEntryWalker walks over a query tree, and finds all range
|
* ExtractRangeTableEntryWalker walks over a query tree, and finds all range
|
||||||
* table entries. For recursing into the query tree, this function uses the
|
* table entries. For recursing into the query tree, this function uses the
|
||||||
|
|
|
@ -127,6 +127,8 @@ static Job * BuildJobTreeTaskList(Job *jobTree,
|
||||||
static List * SubquerySqlTaskList(Job *job,
|
static List * SubquerySqlTaskList(Job *job,
|
||||||
PlannerRestrictionContext *plannerRestrictionContext);
|
PlannerRestrictionContext *plannerRestrictionContext);
|
||||||
static void ErrorIfUnsupportedShardDistribution(Query *query);
|
static void ErrorIfUnsupportedShardDistribution(Query *query);
|
||||||
|
static void ErrorIfUnsupportedJoinReferenceTable(
|
||||||
|
PlannerRestrictionContext *plannerRestrictionContext);
|
||||||
static bool CoPartitionedTables(Oid firstRelationId, Oid secondRelationId);
|
static bool CoPartitionedTables(Oid firstRelationId, Oid secondRelationId);
|
||||||
static bool ShardIntervalsEqual(FmgrInfo *comparisonFunction,
|
static bool ShardIntervalsEqual(FmgrInfo *comparisonFunction,
|
||||||
ShardInterval *firstInterval,
|
ShardInterval *firstInterval,
|
||||||
|
@ -195,6 +197,7 @@ static StringInfo MergeTableQueryString(uint32 taskIdIndex, List *targetEntryLis
|
||||||
static StringInfo IntermediateTableQueryString(uint64 jobId, uint32 taskIdIndex,
|
static StringInfo IntermediateTableQueryString(uint64 jobId, uint32 taskIdIndex,
|
||||||
Query *reduceQuery);
|
Query *reduceQuery);
|
||||||
static uint32 FinalTargetEntryCount(List *targetEntryList);
|
static uint32 FinalTargetEntryCount(List *targetEntryList);
|
||||||
|
static bool ReferenceTableExist(PlannerInfo *plannerInfo, RelOptInfo *relationInfo);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2038,6 +2041,9 @@ SubquerySqlTaskList(Job *job, PlannerRestrictionContext *plannerRestrictionConte
|
||||||
/* error if shards are not co-partitioned */
|
/* error if shards are not co-partitioned */
|
||||||
ErrorIfUnsupportedShardDistribution(subquery);
|
ErrorIfUnsupportedShardDistribution(subquery);
|
||||||
|
|
||||||
|
/* error if unsupported join on reference tables */
|
||||||
|
ErrorIfUnsupportedJoinReferenceTable(plannerRestrictionContext);
|
||||||
|
|
||||||
/* get list of all range tables in subquery tree */
|
/* get list of all range tables in subquery tree */
|
||||||
ExtractRangeTableRelationWalker((Node *) subquery, &rangeTableList);
|
ExtractRangeTableRelationWalker((Node *) subquery, &rangeTableList);
|
||||||
|
|
||||||
|
@ -2061,7 +2067,16 @@ SubquerySqlTaskList(Job *job, PlannerRestrictionContext *plannerRestrictionConte
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert(targetCacheEntry != NULL);
|
/*
|
||||||
|
* That means all table are reference table and we can assign any reference
|
||||||
|
* table as an anchor one .
|
||||||
|
*/
|
||||||
|
if (targetCacheEntry == NULL)
|
||||||
|
{
|
||||||
|
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) linitial(rangeTableList);
|
||||||
|
relationId = rangeTableEntry->relid;
|
||||||
|
targetCacheEntry = DistributedTableCacheEntry(relationId);
|
||||||
|
}
|
||||||
|
|
||||||
shardCount = targetCacheEntry->shardIntervalArrayLength;
|
shardCount = targetCacheEntry->shardIntervalArrayLength;
|
||||||
for (shardOffset = 0; shardOffset < shardCount; shardOffset++)
|
for (shardOffset = 0; shardOffset < shardCount; shardOffset++)
|
||||||
|
@ -2088,6 +2103,146 @@ SubquerySqlTaskList(Job *job, PlannerRestrictionContext *plannerRestrictionConte
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ErrorIfUnsupportedJoinReferenceTable errors out if there exists a outer join
|
||||||
|
* exist between reference table and distributed tables.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
ErrorIfUnsupportedJoinReferenceTable(PlannerRestrictionContext *plannerRestrictionContext)
|
||||||
|
{
|
||||||
|
List *joinRestrictionList =
|
||||||
|
plannerRestrictionContext->joinRestrictionContext->joinRestrictionList;
|
||||||
|
ListCell *joinRestrictionCell = NULL;
|
||||||
|
|
||||||
|
foreach(joinRestrictionCell, joinRestrictionList)
|
||||||
|
{
|
||||||
|
JoinRestriction *joinRestriction = (JoinRestriction *) lfirst(
|
||||||
|
joinRestrictionCell);
|
||||||
|
JoinType joinType = joinRestriction->joinType;
|
||||||
|
PlannerInfo *plannerInfo = joinRestriction->plannerInfo;
|
||||||
|
RelOptInfo *innerrel = joinRestriction->innerrel;
|
||||||
|
RelOptInfo *outerrel = joinRestriction->outerrel;
|
||||||
|
|
||||||
|
switch (joinType)
|
||||||
|
{
|
||||||
|
case JOIN_SEMI:
|
||||||
|
{
|
||||||
|
if (ReferenceTableExist(plannerInfo, outerrel))
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
|
errmsg("can not plan query having reference table on"
|
||||||
|
" the left part of semi join")));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case JOIN_ANTI:
|
||||||
|
{
|
||||||
|
if (ReferenceTableExist(plannerInfo, innerrel))
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
|
errmsg("can not plan query having reference table on"
|
||||||
|
" the left part of anti join")));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case JOIN_LEFT:
|
||||||
|
{
|
||||||
|
if (ReferenceTableExist(plannerInfo, outerrel))
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
|
errmsg("can not plan query having reference table on"
|
||||||
|
" the left part of left join")));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case JOIN_RIGHT:
|
||||||
|
{
|
||||||
|
if (ReferenceTableExist(plannerInfo, innerrel))
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
|
errmsg("can not plan query having reference table on"
|
||||||
|
" the right part of right join")));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case JOIN_FULL:
|
||||||
|
{
|
||||||
|
if (ReferenceTableExist(plannerInfo, innerrel) || ReferenceTableExist(
|
||||||
|
plannerInfo, outerrel))
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
|
errmsg(
|
||||||
|
"can not plan query having reference table as a"
|
||||||
|
" part of full join")));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
{ }
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ReferenceTableExist check whether the relationInfo has reference table.
|
||||||
|
* Since relation ids of relationInfo indexes to the range table entry list of
|
||||||
|
* query, query is also passed.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
ReferenceTableExist(PlannerInfo *plannerInfo, RelOptInfo *relationInfo)
|
||||||
|
{
|
||||||
|
Relids relids = bms_copy(relationInfo->relids);
|
||||||
|
int relationId = -1;
|
||||||
|
|
||||||
|
while ((relationId = bms_first_member(relids)) >= 0)
|
||||||
|
{
|
||||||
|
RangeTblEntry *rangeTableEntry = plannerInfo->simple_rte_array[relationId];
|
||||||
|
|
||||||
|
/* relationInfo has this range table entry */
|
||||||
|
if (RTEContainsReferenceTable(rangeTableEntry))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* RTEContainsReferenceTable checks whether there exist a reference table in the
|
||||||
|
* given range table entry.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
RTEContainsReferenceTable(RangeTblEntry *rangeTableEntry)
|
||||||
|
{
|
||||||
|
List *relationList = NIL;
|
||||||
|
ListCell *relationCell = NULL;
|
||||||
|
ExtractRTRelationFromNode((Node *) rangeTableEntry, &relationList);
|
||||||
|
|
||||||
|
foreach(relationCell, relationList)
|
||||||
|
{
|
||||||
|
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(relationCell);
|
||||||
|
Oid relationId = rangeTableEntry->relid;
|
||||||
|
|
||||||
|
if (IsDistributedTable(relationId) && PartitionMethod(relationId) ==
|
||||||
|
DISTRIBUTE_BY_NONE)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ErrorIfUnsupportedShardDistribution gets list of relations in the given query
|
* ErrorIfUnsupportedShardDistribution gets list of relations in the given query
|
||||||
* and checks if two conditions below hold for them, otherwise it errors out.
|
* and checks if two conditions below hold for them, otherwise it errors out.
|
||||||
|
@ -2121,10 +2276,8 @@ ErrorIfUnsupportedShardDistribution(Query *query)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
/* do not need to handle reference tables */
|
||||||
errmsg("cannot push down this subquery"),
|
continue;
|
||||||
errdetail("Currently range and hash partitioned "
|
|
||||||
"relations are supported")));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2141,6 +2294,13 @@ ErrorIfUnsupportedShardDistribution(Query *query)
|
||||||
Oid relationId = lfirst_oid(relationIdCell);
|
Oid relationId = lfirst_oid(relationIdCell);
|
||||||
bool coPartitionedTables = false;
|
bool coPartitionedTables = false;
|
||||||
Oid currentRelationId = relationId;
|
Oid currentRelationId = relationId;
|
||||||
|
char partitionMethod = PartitionMethod(relationId);
|
||||||
|
|
||||||
|
/* do not need to check reference tables */
|
||||||
|
if (partitionMethod == DISTRIBUTE_BY_NONE)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
/* get shard list of first relation and continue for the next relation */
|
/* get shard list of first relation and continue for the next relation */
|
||||||
if (relationIndex == 0)
|
if (relationIndex == 0)
|
||||||
|
@ -2298,9 +2458,6 @@ SubqueryTaskCreate(Query *originalQuery, ShardInterval *shardInterval,
|
||||||
RestrictInfo *shardRestrictionList = NULL;
|
RestrictInfo *shardRestrictionList = NULL;
|
||||||
DeferredErrorMessage *planningError = NULL;
|
DeferredErrorMessage *planningError = NULL;
|
||||||
|
|
||||||
/* such queries should go through router planner */
|
|
||||||
Assert(!restrictionContext->allReferenceTables);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Add the restriction qual parameter value in all baserestrictinfos.
|
* Add the restriction qual parameter value in all baserestrictinfos.
|
||||||
* Note that this has to be done on a copy, as the originals are needed
|
* Note that this has to be done on a copy, as the originals are needed
|
||||||
|
@ -2315,6 +2472,12 @@ SubqueryTaskCreate(Query *originalQuery, ShardInterval *shardInterval,
|
||||||
|
|
||||||
shardOpExpressions = ShardIntervalOpExpressions(shardInterval, rteIndex);
|
shardOpExpressions = ShardIntervalOpExpressions(shardInterval, rteIndex);
|
||||||
|
|
||||||
|
/* means it is a reference table and do not add any shard interval info */
|
||||||
|
if (shardOpExpressions == NIL)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
shardRestrictionList = make_simple_restrictinfo((Expr *) shardOpExpressions);
|
shardRestrictionList = make_simple_restrictinfo((Expr *) shardOpExpressions);
|
||||||
extendedBaseRestrictInfo = lappend(extendedBaseRestrictInfo,
|
extendedBaseRestrictInfo = lappend(extendedBaseRestrictInfo,
|
||||||
shardRestrictionList);
|
shardRestrictionList);
|
||||||
|
|
|
@ -740,6 +740,8 @@ multi_join_restriction_hook(PlannerInfo *root,
|
||||||
joinRestriction->joinType = jointype;
|
joinRestriction->joinType = jointype;
|
||||||
joinRestriction->joinRestrictInfoList = restrictInfoList;
|
joinRestriction->joinRestrictInfoList = restrictInfoList;
|
||||||
joinRestriction->plannerInfo = root;
|
joinRestriction->plannerInfo = root;
|
||||||
|
joinRestriction->innerrel = innerrel;
|
||||||
|
joinRestriction->outerrel = outerrel;
|
||||||
|
|
||||||
joinRestrictionContext->joinRestrictionList =
|
joinRestrictionContext->joinRestrictionList =
|
||||||
lappend(joinRestrictionContext->joinRestrictionList, joinRestriction);
|
lappend(joinRestrictionContext->joinRestrictionList, joinRestriction);
|
||||||
|
|
|
@ -300,10 +300,8 @@ ShardIntervalOpExpressions(ShardInterval *shardInterval, Index rteIndex)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
/* do not add any shard range interval for reference tables */
|
||||||
errmsg("cannot create shard interval operator expression for "
|
return NIL;
|
||||||
"distributed relations other than hash, range and append distributed "
|
|
||||||
"relations")));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* build the base expression for constraint */
|
/* build the base expression for constraint */
|
||||||
|
|
|
@ -206,6 +206,7 @@ extern List * TableEntryList(List *rangeTableList);
|
||||||
extern List * UsedTableEntryList(Query *query);
|
extern List * UsedTableEntryList(Query *query);
|
||||||
extern bool ExtractRangeTableRelationWalker(Node *node, List **rangeTableList);
|
extern bool ExtractRangeTableRelationWalker(Node *node, List **rangeTableList);
|
||||||
extern bool ExtractRangeTableEntryWalker(Node *node, List **rangeTableList);
|
extern bool ExtractRangeTableEntryWalker(Node *node, List **rangeTableList);
|
||||||
|
extern bool ExtractRTRelationFromNode(Node *node, List **rangeTableList);
|
||||||
extern List * pull_var_clause_default(Node *node);
|
extern List * pull_var_clause_default(Node *node);
|
||||||
extern bool OperatorImplementsEquality(Oid opno);
|
extern bool OperatorImplementsEquality(Oid opno);
|
||||||
|
|
||||||
|
|
|
@ -286,6 +286,7 @@ extern Const * MakeInt4Constant(Datum constantValue);
|
||||||
extern int CompareShardPlacements(const void *leftElement, const void *rightElement);
|
extern int CompareShardPlacements(const void *leftElement, const void *rightElement);
|
||||||
extern bool ShardIntervalsOverlap(ShardInterval *firstInterval,
|
extern bool ShardIntervalsOverlap(ShardInterval *firstInterval,
|
||||||
ShardInterval *secondInterval);
|
ShardInterval *secondInterval);
|
||||||
|
extern bool RTEContainsReferenceTable(RangeTblEntry *rangeTableEntry);
|
||||||
|
|
||||||
/* function declarations for Task and Task list operations */
|
/* function declarations for Task and Task list operations */
|
||||||
extern bool TasksEqual(const Task *a, const Task *b);
|
extern bool TasksEqual(const Task *a, const Task *b);
|
||||||
|
|
|
@ -52,6 +52,8 @@ typedef struct JoinRestriction
|
||||||
JoinType joinType;
|
JoinType joinType;
|
||||||
List *joinRestrictInfoList;
|
List *joinRestrictInfoList;
|
||||||
PlannerInfo *plannerInfo;
|
PlannerInfo *plannerInfo;
|
||||||
|
RelOptInfo *innerrel;
|
||||||
|
RelOptInfo *outerrel;
|
||||||
} JoinRestriction;
|
} JoinRestriction;
|
||||||
|
|
||||||
typedef struct PlannerRestrictionContext
|
typedef struct PlannerRestrictionContext
|
||||||
|
|
|
@ -0,0 +1,376 @@
|
||||||
|
--
|
||||||
|
-- multi subquery complex queries aims to expand existing subquery pushdown
|
||||||
|
-- regression tests to cover more caeses
|
||||||
|
-- the tables that are used depends to multi_insert_select_behavioral_analytics_create_table.sql
|
||||||
|
--
|
||||||
|
-- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests
|
||||||
|
-- ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1400000;
|
||||||
|
ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000;
|
||||||
|
|
||||||
|
SET citus.enable_router_execution TO FALSE;
|
||||||
|
CREATE TABLE events_reference_table as SELECT * FROM events_table;
|
||||||
|
CREATE TABLE users_reference_table as SELECT * FROM users_table;
|
||||||
|
SELECT create_reference_table('events_reference_table');
|
||||||
|
NOTICE: Copying data from local table...
|
||||||
|
create_reference_table
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT create_reference_table('users_reference_table');
|
||||||
|
NOTICE: Copying data from local table...
|
||||||
|
create_reference_table
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- LATERAL JOINs used with INNER JOINs with reference tables
|
||||||
|
SET citus.subquery_pushdown to ON;
|
||||||
|
SELECT user_id, lastseen
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"some_users_data".user_id, lastseen
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
filter_users_1.user_id, time AS lastseen
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_where_1_1.user_id
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 12 and user_id < 16 and value_1 > 20) user_where_1_1
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 12 and user_id < 16 and value_2 > 60) user_where_1_join_1
|
||||||
|
ON ("user_where_1_1".user_id = "user_where_1_join_1".user_id))
|
||||||
|
filter_users_1
|
||||||
|
JOIN LATERAL
|
||||||
|
(SELECT
|
||||||
|
user_id, time
|
||||||
|
FROM
|
||||||
|
events_reference_table as "events"
|
||||||
|
WHERE
|
||||||
|
user_id > 12 and user_id < 16 AND
|
||||||
|
user_id = filter_users_1.user_id
|
||||||
|
ORDER BY
|
||||||
|
time DESC
|
||||||
|
LIMIT 1) "last_events_1"
|
||||||
|
ON TRUE
|
||||||
|
ORDER BY
|
||||||
|
time DESC
|
||||||
|
LIMIT 10) "some_recent_users"
|
||||||
|
JOIN LATERAL
|
||||||
|
(SELECT
|
||||||
|
"users".user_id
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
||||||
|
"users"."value_2" > 70
|
||||||
|
LIMIT 1) "some_users_data"
|
||||||
|
ON TRUE
|
||||||
|
ORDER BY
|
||||||
|
lastseen DESC
|
||||||
|
LIMIT 10) "some_users"
|
||||||
|
ORDER BY
|
||||||
|
user_id DESC
|
||||||
|
LIMIT 10;
|
||||||
|
user_id | lastseen
|
||||||
|
---------+---------------------------------
|
||||||
|
14 | Tue Jan 21 05:46:51.286381 2014
|
||||||
|
14 | Tue Jan 21 05:46:51.286381 2014
|
||||||
|
14 | Tue Jan 21 05:46:51.286381 2014
|
||||||
|
14 | Tue Jan 21 05:46:51.286381 2014
|
||||||
|
14 | Tue Jan 21 05:46:51.286381 2014
|
||||||
|
14 | Tue Jan 21 05:46:51.286381 2014
|
||||||
|
14 | Tue Jan 21 05:46:51.286381 2014
|
||||||
|
14 | Tue Jan 21 05:46:51.286381 2014
|
||||||
|
14 | Tue Jan 21 05:46:51.286381 2014
|
||||||
|
14 | Tue Jan 21 05:46:51.286381 2014
|
||||||
|
(10 rows)
|
||||||
|
|
||||||
|
SET citus.subquery_pushdown to OFF;
|
||||||
|
-- NESTED INNER JOINs with reference tables
|
||||||
|
SELECT
|
||||||
|
count(*) AS value, "generated_group_field"
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
DISTINCT "pushedDownQuery"."real_user_id", "generated_group_field"
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"eventQuery"."real_user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
*
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"events"."time", "events"."user_id", "events"."value_2"
|
||||||
|
FROM
|
||||||
|
events_reference_table as "events"
|
||||||
|
WHERE
|
||||||
|
user_id > 10 and user_id < 40 AND event_type IN (40, 41, 42, 43, 44, 45) ) "temp_data_queries"
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT
|
||||||
|
user_where_1_1.real_user_id
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id" as real_user_id
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 10 and user_id < 40 and value_2 > 50 ) user_where_1_1
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 10 and user_id < 40 and value_3 > 50 ) user_where_1_join_1
|
||||||
|
ON ("user_where_1_1".real_user_id = "user_where_1_join_1".user_id)) "user_filters_1"
|
||||||
|
ON ("temp_data_queries".user_id = "user_filters_1".real_user_id)) "eventQuery") "pushedDownQuery") "pushedDownQuery"
|
||||||
|
GROUP BY
|
||||||
|
"generated_group_field"
|
||||||
|
ORDER BY
|
||||||
|
generated_group_field DESC, value DESC;
|
||||||
|
value | generated_group_field
|
||||||
|
-------+-----------------------
|
||||||
|
1 | 966
|
||||||
|
1 | 917
|
||||||
|
1 | 905
|
||||||
|
1 | 868
|
||||||
|
1 | 836
|
||||||
|
1 | 791
|
||||||
|
1 | 671
|
||||||
|
1 | 642
|
||||||
|
1 | 358
|
||||||
|
1 | 317
|
||||||
|
1 | 307
|
||||||
|
1 | 302
|
||||||
|
1 | 214
|
||||||
|
1 | 166
|
||||||
|
1 | 116
|
||||||
|
1 | 1
|
||||||
|
(16 rows)
|
||||||
|
|
||||||
|
-- single level inner joins with reference tables
|
||||||
|
SELECT
|
||||||
|
"value_3", count(*) AS cnt
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"value_3", "user_id", random()
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
users_in_segment_1.user_id, value_3
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, value_3 * 2 as value_3
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, value_3
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id", value_3
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 10 and user_id < 40 and value_2 > 30
|
||||||
|
) simple_user_where_1
|
||||||
|
) all_buckets_1
|
||||||
|
) users_in_segment_1
|
||||||
|
JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 10 and user_id < 40 and value_2 > 60
|
||||||
|
) some_users_data
|
||||||
|
ON ("users_in_segment_1".user_id = "some_users_data".user_id)
|
||||||
|
) segmentalias_1) "tempQuery"
|
||||||
|
GROUP BY "value_3"
|
||||||
|
ORDER BY cnt, value_3 DESC LIMIT 10;
|
||||||
|
value_3 | cnt
|
||||||
|
---------+-----
|
||||||
|
556 | 75
|
||||||
|
228 | 75
|
||||||
|
146 | 75
|
||||||
|
70 | 75
|
||||||
|
1442 | 79
|
||||||
|
1232 | 79
|
||||||
|
1090 | 79
|
||||||
|
1012 | 79
|
||||||
|
886 | 79
|
||||||
|
674 | 79
|
||||||
|
(10 rows)
|
||||||
|
|
||||||
|
-- nested LATERAL JOINs with reference tables
|
||||||
|
SET citus.subquery_pushdown to ON;
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
(SELECT "some_users_data".user_id, "some_recent_users".value_3
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
filter_users_1.user_id, value_3
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 20 and user_id < 70 and users.value_2 = 200) filter_users_1
|
||||||
|
JOIN LATERAL
|
||||||
|
(SELECT
|
||||||
|
user_id, value_3
|
||||||
|
FROM
|
||||||
|
events_reference_table as "events"
|
||||||
|
WHERE
|
||||||
|
user_id > 20 and user_id < 70 AND
|
||||||
|
("events".user_id = "filter_users_1".user_id)
|
||||||
|
ORDER BY
|
||||||
|
value_3 DESC
|
||||||
|
LIMIT 1) "last_events_1" ON true
|
||||||
|
ORDER BY value_3 DESC
|
||||||
|
LIMIT 10) "some_recent_users"
|
||||||
|
JOIN LATERAL
|
||||||
|
(SELECT
|
||||||
|
"users".user_id
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
||||||
|
users.value_2 > 200
|
||||||
|
LIMIT 1) "some_users_data" ON true
|
||||||
|
ORDER BY
|
||||||
|
value_3 DESC
|
||||||
|
LIMIT 10) "some_users"
|
||||||
|
ORDER BY
|
||||||
|
value_3 DESC
|
||||||
|
LIMIT 10;
|
||||||
|
user_id | value_3
|
||||||
|
---------+---------
|
||||||
|
44 | 998
|
||||||
|
65 | 996
|
||||||
|
66 | 996
|
||||||
|
37 | 995
|
||||||
|
57 | 989
|
||||||
|
21 | 985
|
||||||
|
(6 rows)
|
||||||
|
|
||||||
|
SET citus.subquery_pushdown to OFF;
|
||||||
|
-- LEFT JOINs used with INNER JOINs should error out since reference table exist in the
|
||||||
|
-- left side of the LEFT JOIN.
|
||||||
|
SELECT
|
||||||
|
count(*) AS cnt, "generated_group_field"
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"eventQuery"."user_id", random(), generated_group_field
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"multi_group_wrapper_1".*, generated_group_field, random()
|
||||||
|
FROM
|
||||||
|
(SELECT *
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"events"."time", "events"."user_id" as event_user_id
|
||||||
|
FROM
|
||||||
|
events_table as "events"
|
||||||
|
WHERE
|
||||||
|
user_id > 80) "temp_data_queries"
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 80 and value_2 = 5) "user_filters_1"
|
||||||
|
ON ("temp_data_queries".event_user_id = "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
||||||
|
LEFT JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
||||||
|
FROM
|
||||||
|
users_table as "users") "left_group_by_1"
|
||||||
|
ON ("left_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
||||||
|
group BY
|
||||||
|
"generated_group_field"
|
||||||
|
ORDER BY
|
||||||
|
cnt DESC, generated_group_field ASC
|
||||||
|
LIMIT 10;
|
||||||
|
ERROR: can not plan query having reference table on the left part of left join
|
||||||
|
-- RIGHT JOINs used with INNER JOINs should error out since reference table exist in the
|
||||||
|
-- right side of the RIGHT JOIN.
|
||||||
|
SELECT
|
||||||
|
count(*) AS cnt, "generated_group_field"
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"eventQuery"."user_id", random(), generated_group_field
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"multi_group_wrapper_1".*, generated_group_field, random()
|
||||||
|
FROM
|
||||||
|
(SELECT *
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"events"."time", "events"."user_id" as event_user_id
|
||||||
|
FROM
|
||||||
|
events_table as "events"
|
||||||
|
WHERE
|
||||||
|
user_id > 80) "temp_data_queries"
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 80 and value_2 = 5) "user_filters_1"
|
||||||
|
ON ("temp_data_queries".event_user_id = "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
||||||
|
RIGHT JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users") "right_group_by_1"
|
||||||
|
ON ("right_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
||||||
|
group BY
|
||||||
|
"generated_group_field"
|
||||||
|
ORDER BY
|
||||||
|
cnt DESC, generated_group_field ASC
|
||||||
|
LIMIT 10;
|
||||||
|
ERROR: can not plan query having reference table on the left part of left join
|
||||||
|
-- Outer subquery with reference table
|
||||||
|
SELECT "some_users_data".user_id, lastseen
|
||||||
|
FROM
|
||||||
|
(SELECT user_id, max(time) AS lastseen
|
||||||
|
FROM
|
||||||
|
(SELECT user_id, time
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, time
|
||||||
|
FROM
|
||||||
|
events_reference_table as "events"
|
||||||
|
WHERE
|
||||||
|
user_id > 10 and user_id < 40) "events_1"
|
||||||
|
ORDER BY
|
||||||
|
time DESC) "recent_events_1"
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
ORDER BY
|
||||||
|
max(TIME) DESC) "some_recent_users"
|
||||||
|
FULL JOIN
|
||||||
|
(SELECT
|
||||||
|
"users".user_id
|
||||||
|
FROM
|
||||||
|
users_table as "users"
|
||||||
|
WHERE
|
||||||
|
users.value_2 > 50 and users.value_2 < 55) "some_users_data"
|
||||||
|
ON "some_users_data"."user_id" = "some_recent_users"."user_id"
|
||||||
|
ORDER BY
|
||||||
|
user_id
|
||||||
|
limit 50;
|
||||||
|
ERROR: can not plan query having reference table as a part of full join
|
|
@ -0,0 +1,144 @@
|
||||||
|
--
|
||||||
|
-- queries to test the subquery pushdown on reference tables
|
||||||
|
-- subqueries in WHERE with greater operator
|
||||||
|
SELECT
|
||||||
|
user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
value_2 >
|
||||||
|
(SELECT
|
||||||
|
max(value_2)
|
||||||
|
FROM
|
||||||
|
events_reference_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_reference_table.user_id AND event_type = 50
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
)
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING count(*) > 66
|
||||||
|
ORDER BY user_id
|
||||||
|
LIMIT 5;
|
||||||
|
user_id
|
||||||
|
---------
|
||||||
|
49
|
||||||
|
55
|
||||||
|
56
|
||||||
|
63
|
||||||
|
(4 rows)
|
||||||
|
|
||||||
|
-- have reference table without any equality, should error out
|
||||||
|
SELECT
|
||||||
|
user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
value_2 >
|
||||||
|
(SELECT
|
||||||
|
max(value_2)
|
||||||
|
FROM
|
||||||
|
events_reference_table
|
||||||
|
WHERE
|
||||||
|
event_type = 50
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
)
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING count(*) > 66
|
||||||
|
ORDER BY user_id
|
||||||
|
LIMIT 5;
|
||||||
|
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||||
|
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||||
|
-- users that appeared more than 118 times, should run since the reference table
|
||||||
|
-- on the right side of the semi join
|
||||||
|
SELECT
|
||||||
|
user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE 118 <=
|
||||||
|
(SELECT
|
||||||
|
count(*)
|
||||||
|
FROM
|
||||||
|
events_reference_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_reference_table.user_id
|
||||||
|
GROUP BY
|
||||||
|
user_id)
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
ORDER BY
|
||||||
|
user_id;
|
||||||
|
user_id
|
||||||
|
---------
|
||||||
|
13
|
||||||
|
17
|
||||||
|
23
|
||||||
|
25
|
||||||
|
(4 rows)
|
||||||
|
|
||||||
|
-- should error out since reference table exist on the left side
|
||||||
|
-- of the left lateral join
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 > 101 AND value_1 < 110
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND user_id IN
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
e1.user_id
|
||||||
|
FROM (
|
||||||
|
-- Get the first time each user viewed the homepage.
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
1 AS view_homepage,
|
||||||
|
min(time) AS view_homepage_time
|
||||||
|
FROM events_reference_table
|
||||||
|
WHERE
|
||||||
|
event_type IN (10, 20, 30, 40, 50, 60, 70, 80, 90)
|
||||||
|
GROUP BY user_id
|
||||||
|
) e1 LEFT JOIN LATERAL (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
1 AS use_demo,
|
||||||
|
time AS use_demo_time
|
||||||
|
FROM events_reference_table
|
||||||
|
WHERE
|
||||||
|
user_id = e1.user_id AND
|
||||||
|
event_type IN (11, 21, 31, 41, 51, 61, 71, 81, 91)
|
||||||
|
ORDER BY time
|
||||||
|
) e2 ON true LEFT JOIN LATERAL (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
1 AS enter_credit_card,
|
||||||
|
time AS enter_credit_card_time
|
||||||
|
FROM events_reference_table
|
||||||
|
WHERE
|
||||||
|
user_id = e2.user_id AND
|
||||||
|
event_type IN (12, 22, 32, 42, 52, 62, 72, 82, 92)
|
||||||
|
ORDER BY time
|
||||||
|
) e3 ON true LEFT JOIN LATERAL (
|
||||||
|
SELECT
|
||||||
|
1 AS submit_card_info,
|
||||||
|
user_id,
|
||||||
|
time AS enter_credit_card_time
|
||||||
|
FROM events_reference_table
|
||||||
|
WHERE
|
||||||
|
user_id = e3.user_id AND
|
||||||
|
event_type IN (13, 23, 33, 43, 53, 63, 73, 83, 93)
|
||||||
|
ORDER BY time
|
||||||
|
) e4 ON true LEFT JOIN LATERAL (
|
||||||
|
SELECT
|
||||||
|
1 AS see_bought_screen
|
||||||
|
FROM events_reference_table
|
||||||
|
WHERE
|
||||||
|
user_id = e4.user_id AND
|
||||||
|
event_type IN (14, 24, 34, 44, 54, 64, 74, 84, 94)
|
||||||
|
ORDER BY time
|
||||||
|
) e5 ON true
|
||||||
|
group by e1.user_id
|
||||||
|
HAVING sum(submit_card_info) > 0
|
||||||
|
)
|
||||||
|
ORDER BY 1, 2;
|
||||||
|
ERROR: can not plan query having reference table on the left part of left join
|
||||||
|
DROP TABLE events_reference_table;
|
||||||
|
DROP TABLE users_reference_table;
|
|
@ -51,6 +51,8 @@ test: multi_deparse_shard_query multi_distributed_transaction_id
|
||||||
test: multi_basic_queries multi_complex_expressions
|
test: multi_basic_queries multi_complex_expressions
|
||||||
test: multi_explain
|
test: multi_explain
|
||||||
test: multi_subquery multi_subquery_complex_queries multi_subquery_behavioral_analytics
|
test: multi_subquery multi_subquery_complex_queries multi_subquery_behavioral_analytics
|
||||||
|
test: multi_subquery_complex_reference_clause
|
||||||
|
test: multi_subquery_in_where_reference_clause
|
||||||
test: multi_subquery_union multi_subquery_in_where_clause multi_subquery_misc
|
test: multi_subquery_union multi_subquery_in_where_clause multi_subquery_misc
|
||||||
test: multi_reference_table
|
test: multi_reference_table
|
||||||
test: multi_outer_join_reference
|
test: multi_outer_join_reference
|
||||||
|
|
|
@ -0,0 +1,313 @@
|
||||||
|
--
|
||||||
|
-- multi subquery complex queries aims to expand existing subquery pushdown
|
||||||
|
-- regression tests to cover more caeses
|
||||||
|
-- the tables that are used depends to multi_insert_select_behavioral_analytics_create_table.sql
|
||||||
|
--
|
||||||
|
|
||||||
|
-- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests
|
||||||
|
-- ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1400000;
|
||||||
|
ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000;
|
||||||
|
|
||||||
|
SET citus.enable_router_execution TO FALSE;
|
||||||
|
|
||||||
|
CREATE TABLE events_reference_table as SELECT * FROM events_table;
|
||||||
|
CREATE TABLE users_reference_table as SELECT * FROM users_table;
|
||||||
|
|
||||||
|
SELECT create_reference_table('events_reference_table');
|
||||||
|
SELECT create_reference_table('users_reference_table');
|
||||||
|
|
||||||
|
-- LATERAL JOINs used with INNER JOINs with reference tables
|
||||||
|
SET citus.subquery_pushdown to ON;
|
||||||
|
SELECT user_id, lastseen
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"some_users_data".user_id, lastseen
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
filter_users_1.user_id, time AS lastseen
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_where_1_1.user_id
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 12 and user_id < 16 and value_1 > 20) user_where_1_1
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 12 and user_id < 16 and value_2 > 60) user_where_1_join_1
|
||||||
|
ON ("user_where_1_1".user_id = "user_where_1_join_1".user_id))
|
||||||
|
filter_users_1
|
||||||
|
JOIN LATERAL
|
||||||
|
(SELECT
|
||||||
|
user_id, time
|
||||||
|
FROM
|
||||||
|
events_reference_table as "events"
|
||||||
|
WHERE
|
||||||
|
user_id > 12 and user_id < 16 AND
|
||||||
|
user_id = filter_users_1.user_id
|
||||||
|
ORDER BY
|
||||||
|
time DESC
|
||||||
|
LIMIT 1) "last_events_1"
|
||||||
|
ON TRUE
|
||||||
|
ORDER BY
|
||||||
|
time DESC
|
||||||
|
LIMIT 10) "some_recent_users"
|
||||||
|
JOIN LATERAL
|
||||||
|
(SELECT
|
||||||
|
"users".user_id
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
||||||
|
"users"."value_2" > 70
|
||||||
|
LIMIT 1) "some_users_data"
|
||||||
|
ON TRUE
|
||||||
|
ORDER BY
|
||||||
|
lastseen DESC
|
||||||
|
LIMIT 10) "some_users"
|
||||||
|
ORDER BY
|
||||||
|
user_id DESC
|
||||||
|
LIMIT 10;
|
||||||
|
SET citus.subquery_pushdown to OFF;
|
||||||
|
|
||||||
|
-- NESTED INNER JOINs with reference tables
|
||||||
|
SELECT
|
||||||
|
count(*) AS value, "generated_group_field"
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
DISTINCT "pushedDownQuery"."real_user_id", "generated_group_field"
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"eventQuery"."real_user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
*
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"events"."time", "events"."user_id", "events"."value_2"
|
||||||
|
FROM
|
||||||
|
events_reference_table as "events"
|
||||||
|
WHERE
|
||||||
|
user_id > 10 and user_id < 40 AND event_type IN (40, 41, 42, 43, 44, 45) ) "temp_data_queries"
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT
|
||||||
|
user_where_1_1.real_user_id
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id" as real_user_id
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 10 and user_id < 40 and value_2 > 50 ) user_where_1_1
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 10 and user_id < 40 and value_3 > 50 ) user_where_1_join_1
|
||||||
|
ON ("user_where_1_1".real_user_id = "user_where_1_join_1".user_id)) "user_filters_1"
|
||||||
|
ON ("temp_data_queries".user_id = "user_filters_1".real_user_id)) "eventQuery") "pushedDownQuery") "pushedDownQuery"
|
||||||
|
GROUP BY
|
||||||
|
"generated_group_field"
|
||||||
|
ORDER BY
|
||||||
|
generated_group_field DESC, value DESC;
|
||||||
|
|
||||||
|
-- single level inner joins with reference tables
|
||||||
|
SELECT
|
||||||
|
"value_3", count(*) AS cnt
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"value_3", "user_id", random()
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
users_in_segment_1.user_id, value_3
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, value_3 * 2 as value_3
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, value_3
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id", value_3
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 10 and user_id < 40 and value_2 > 30
|
||||||
|
) simple_user_where_1
|
||||||
|
) all_buckets_1
|
||||||
|
) users_in_segment_1
|
||||||
|
JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 10 and user_id < 40 and value_2 > 60
|
||||||
|
) some_users_data
|
||||||
|
ON ("users_in_segment_1".user_id = "some_users_data".user_id)
|
||||||
|
) segmentalias_1) "tempQuery"
|
||||||
|
GROUP BY "value_3"
|
||||||
|
ORDER BY cnt, value_3 DESC LIMIT 10;
|
||||||
|
|
||||||
|
-- nested LATERAL JOINs with reference tables
|
||||||
|
SET citus.subquery_pushdown to ON;
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
(SELECT "some_users_data".user_id, "some_recent_users".value_3
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
filter_users_1.user_id, value_3
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 20 and user_id < 70 and users.value_2 = 200) filter_users_1
|
||||||
|
JOIN LATERAL
|
||||||
|
(SELECT
|
||||||
|
user_id, value_3
|
||||||
|
FROM
|
||||||
|
events_reference_table as "events"
|
||||||
|
WHERE
|
||||||
|
user_id > 20 and user_id < 70 AND
|
||||||
|
("events".user_id = "filter_users_1".user_id)
|
||||||
|
ORDER BY
|
||||||
|
value_3 DESC
|
||||||
|
LIMIT 1) "last_events_1" ON true
|
||||||
|
ORDER BY value_3 DESC
|
||||||
|
LIMIT 10) "some_recent_users"
|
||||||
|
JOIN LATERAL
|
||||||
|
(SELECT
|
||||||
|
"users".user_id
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
||||||
|
users.value_2 > 200
|
||||||
|
LIMIT 1) "some_users_data" ON true
|
||||||
|
ORDER BY
|
||||||
|
value_3 DESC
|
||||||
|
LIMIT 10) "some_users"
|
||||||
|
ORDER BY
|
||||||
|
value_3 DESC
|
||||||
|
LIMIT 10;
|
||||||
|
SET citus.subquery_pushdown to OFF;
|
||||||
|
|
||||||
|
-- LEFT JOINs used with INNER JOINs should error out since reference table exist in the
|
||||||
|
-- left side of the LEFT JOIN.
|
||||||
|
SELECT
|
||||||
|
count(*) AS cnt, "generated_group_field"
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"eventQuery"."user_id", random(), generated_group_field
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"multi_group_wrapper_1".*, generated_group_field, random()
|
||||||
|
FROM
|
||||||
|
(SELECT *
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"events"."time", "events"."user_id" as event_user_id
|
||||||
|
FROM
|
||||||
|
events_table as "events"
|
||||||
|
WHERE
|
||||||
|
user_id > 80) "temp_data_queries"
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 80 and value_2 = 5) "user_filters_1"
|
||||||
|
ON ("temp_data_queries".event_user_id = "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
||||||
|
LEFT JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
||||||
|
FROM
|
||||||
|
users_table as "users") "left_group_by_1"
|
||||||
|
ON ("left_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
||||||
|
group BY
|
||||||
|
"generated_group_field"
|
||||||
|
ORDER BY
|
||||||
|
cnt DESC, generated_group_field ASC
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- RIGHT JOINs used with INNER JOINs should error out since reference table exist in the
|
||||||
|
-- right side of the RIGHT JOIN.
|
||||||
|
SELECT
|
||||||
|
count(*) AS cnt, "generated_group_field"
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"eventQuery"."user_id", random(), generated_group_field
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"multi_group_wrapper_1".*, generated_group_field, random()
|
||||||
|
FROM
|
||||||
|
(SELECT *
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"events"."time", "events"."user_id" as event_user_id
|
||||||
|
FROM
|
||||||
|
events_table as "events"
|
||||||
|
WHERE
|
||||||
|
user_id > 80) "temp_data_queries"
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 80 and value_2 = 5) "user_filters_1"
|
||||||
|
ON ("temp_data_queries".event_user_id = "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
||||||
|
RIGHT JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users") "right_group_by_1"
|
||||||
|
ON ("right_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
||||||
|
group BY
|
||||||
|
"generated_group_field"
|
||||||
|
ORDER BY
|
||||||
|
cnt DESC, generated_group_field ASC
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- Outer subquery with reference table
|
||||||
|
SELECT "some_users_data".user_id, lastseen
|
||||||
|
FROM
|
||||||
|
(SELECT user_id, max(time) AS lastseen
|
||||||
|
FROM
|
||||||
|
(SELECT user_id, time
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, time
|
||||||
|
FROM
|
||||||
|
events_reference_table as "events"
|
||||||
|
WHERE
|
||||||
|
user_id > 10 and user_id < 40) "events_1"
|
||||||
|
ORDER BY
|
||||||
|
time DESC) "recent_events_1"
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
ORDER BY
|
||||||
|
max(TIME) DESC) "some_recent_users"
|
||||||
|
FULL JOIN
|
||||||
|
(SELECT
|
||||||
|
"users".user_id
|
||||||
|
FROM
|
||||||
|
users_table as "users"
|
||||||
|
WHERE
|
||||||
|
users.value_2 > 50 and users.value_2 < 55) "some_users_data"
|
||||||
|
ON "some_users_data"."user_id" = "some_recent_users"."user_id"
|
||||||
|
ORDER BY
|
||||||
|
user_id
|
||||||
|
limit 50;
|
|
@ -0,0 +1,130 @@
|
||||||
|
--
|
||||||
|
-- queries to test the subquery pushdown on reference tables
|
||||||
|
|
||||||
|
-- subqueries in WHERE with greater operator
|
||||||
|
SELECT
|
||||||
|
user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
value_2 >
|
||||||
|
(SELECT
|
||||||
|
max(value_2)
|
||||||
|
FROM
|
||||||
|
events_reference_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_reference_table.user_id AND event_type = 50
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
)
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING count(*) > 66
|
||||||
|
ORDER BY user_id
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
-- have reference table without any equality, should error out
|
||||||
|
SELECT
|
||||||
|
user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
value_2 >
|
||||||
|
(SELECT
|
||||||
|
max(value_2)
|
||||||
|
FROM
|
||||||
|
events_reference_table
|
||||||
|
WHERE
|
||||||
|
event_type = 50
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
)
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING count(*) > 66
|
||||||
|
ORDER BY user_id
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
-- users that appeared more than 118 times, should run since the reference table
|
||||||
|
-- on the right side of the semi join
|
||||||
|
SELECT
|
||||||
|
user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE 118 <=
|
||||||
|
(SELECT
|
||||||
|
count(*)
|
||||||
|
FROM
|
||||||
|
events_reference_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_reference_table.user_id
|
||||||
|
GROUP BY
|
||||||
|
user_id)
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
ORDER BY
|
||||||
|
user_id;
|
||||||
|
|
||||||
|
-- should error out since reference table exist on the left side
|
||||||
|
-- of the left lateral join
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 > 101 AND value_1 < 110
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND user_id IN
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
e1.user_id
|
||||||
|
FROM (
|
||||||
|
-- Get the first time each user viewed the homepage.
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
1 AS view_homepage,
|
||||||
|
min(time) AS view_homepage_time
|
||||||
|
FROM events_reference_table
|
||||||
|
WHERE
|
||||||
|
event_type IN (10, 20, 30, 40, 50, 60, 70, 80, 90)
|
||||||
|
GROUP BY user_id
|
||||||
|
) e1 LEFT JOIN LATERAL (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
1 AS use_demo,
|
||||||
|
time AS use_demo_time
|
||||||
|
FROM events_reference_table
|
||||||
|
WHERE
|
||||||
|
user_id = e1.user_id AND
|
||||||
|
event_type IN (11, 21, 31, 41, 51, 61, 71, 81, 91)
|
||||||
|
ORDER BY time
|
||||||
|
) e2 ON true LEFT JOIN LATERAL (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
1 AS enter_credit_card,
|
||||||
|
time AS enter_credit_card_time
|
||||||
|
FROM events_reference_table
|
||||||
|
WHERE
|
||||||
|
user_id = e2.user_id AND
|
||||||
|
event_type IN (12, 22, 32, 42, 52, 62, 72, 82, 92)
|
||||||
|
ORDER BY time
|
||||||
|
) e3 ON true LEFT JOIN LATERAL (
|
||||||
|
SELECT
|
||||||
|
1 AS submit_card_info,
|
||||||
|
user_id,
|
||||||
|
time AS enter_credit_card_time
|
||||||
|
FROM events_reference_table
|
||||||
|
WHERE
|
||||||
|
user_id = e3.user_id AND
|
||||||
|
event_type IN (13, 23, 33, 43, 53, 63, 73, 83, 93)
|
||||||
|
ORDER BY time
|
||||||
|
) e4 ON true LEFT JOIN LATERAL (
|
||||||
|
SELECT
|
||||||
|
1 AS see_bought_screen
|
||||||
|
FROM events_reference_table
|
||||||
|
WHERE
|
||||||
|
user_id = e4.user_id AND
|
||||||
|
event_type IN (14, 24, 34, 44, 54, 64, 74, 84, 94)
|
||||||
|
ORDER BY time
|
||||||
|
) e5 ON true
|
||||||
|
group by e1.user_id
|
||||||
|
HAVING sum(submit_card_info) > 0
|
||||||
|
)
|
||||||
|
ORDER BY 1, 2;
|
||||||
|
|
||||||
|
DROP TABLE events_reference_table;
|
||||||
|
DROP TABLE users_reference_table;
|
Loading…
Reference in New Issue