mirror of https://github.com/citusdata/citus.git
Enable top level subquery join queries
This work enables - Top level subquery joins - Joins between subqueries and relations - Joins involving more than 2 range table entries A new regression test file is added to reflect enabled test casespull/1347/head
parent
a3cadbb1e1
commit
60fa537e3f
|
@ -3051,8 +3051,7 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit)
|
|||
* ErrorIfUnsupportedTableCombination checks if the given query tree contains any
|
||||
* unsupported range table combinations. For this, the function walks over all
|
||||
* range tables in the join tree, and checks if they correspond to simple relations
|
||||
* or subqueries. It also checks if there is a join between a regular table and
|
||||
* a subquery and if join is on more than two range table entries.
|
||||
* or subqueries.
|
||||
*/
|
||||
static void
|
||||
ErrorIfUnsupportedTableCombination(Query *queryTree)
|
||||
|
@ -3062,8 +3061,6 @@ ErrorIfUnsupportedTableCombination(Query *queryTree)
|
|||
ListCell *joinTreeTableIndexCell = NULL;
|
||||
bool unsupporteTableCombination = false;
|
||||
char *errorDetail = NULL;
|
||||
uint32 relationRangeTableCount = 0;
|
||||
uint32 subqueryRangeTableCount = 0;
|
||||
|
||||
/*
|
||||
* Extract all range table indexes from the join tree. Note that sub-queries
|
||||
|
@ -3086,15 +3083,8 @@ ErrorIfUnsupportedTableCombination(Query *queryTree)
|
|||
* Check if the range table in the join tree is a simple relation or a
|
||||
* subquery.
|
||||
*/
|
||||
if (rangeTableEntry->rtekind == RTE_RELATION)
|
||||
{
|
||||
relationRangeTableCount++;
|
||||
}
|
||||
else if (rangeTableEntry->rtekind == RTE_SUBQUERY)
|
||||
{
|
||||
subqueryRangeTableCount++;
|
||||
}
|
||||
else
|
||||
if (rangeTableEntry->rtekind != RTE_RELATION &&
|
||||
rangeTableEntry->rtekind != RTE_SUBQUERY)
|
||||
{
|
||||
unsupporteTableCombination = true;
|
||||
errorDetail = "Table expressions other than simple relations and "
|
||||
|
@ -3103,19 +3093,6 @@ ErrorIfUnsupportedTableCombination(Query *queryTree)
|
|||
}
|
||||
}
|
||||
|
||||
if ((subqueryRangeTableCount > 0) && (relationRangeTableCount > 0))
|
||||
{
|
||||
unsupporteTableCombination = true;
|
||||
errorDetail = "Joins between regular tables and subqueries are unsupported";
|
||||
}
|
||||
|
||||
if ((relationRangeTableCount > 2) || (subqueryRangeTableCount > 2))
|
||||
{
|
||||
unsupporteTableCombination = true;
|
||||
errorDetail = "Joins between more than two relations and subqueries are "
|
||||
"unsupported";
|
||||
}
|
||||
|
||||
/* finally check and error out if not satisfied */
|
||||
if (unsupporteTableCombination)
|
||||
{
|
||||
|
|
|
@ -105,8 +105,12 @@ static MultiNode * ApplyCartesianProduct(MultiNode *leftNode, MultiNode *rightNo
|
|||
* functions will be removed with upcoming subqery changes.
|
||||
*/
|
||||
static MultiNode * SubqueryPushdownMultiPlanTree(Query *queryTree);
|
||||
|
||||
static void ErrorIfSubqueryJoin(Query *queryTree);
|
||||
static MultiTable * MultiSubqueryPushdownTable(RangeTblEntry *subqueryRangeTableEntry);
|
||||
static List * CreateSubqueryTargetEntryList(List *columnList);
|
||||
static void UpdateVarMappingsForExtendedOpNode(List *columnList,
|
||||
List *subqueryTargetEntryList);
|
||||
static MultiTable * MultiSubqueryPushdownTable(Query *subquery);
|
||||
|
||||
|
||||
/*
|
||||
|
@ -1987,67 +1991,109 @@ ApplyCartesianProduct(MultiNode *leftNode, MultiNode *rightNode,
|
|||
* SubqueryPushdownMultiTree creates logical plan for subquery pushdown logic.
|
||||
* Note that this logic will be changed in next iterations, so we decoupled it
|
||||
* from other parts of code although it causes some code duplication.
|
||||
*
|
||||
* Current subquery pushdown support in MultiTree logic requires a single range
|
||||
* table entry in the top most from clause. Therefore we inject an synthetic
|
||||
* query derived from the top level query and make it the only range table
|
||||
* entry for the top level query. This way we can push down any subquery joins
|
||||
* down to workers without invoking join order planner.
|
||||
*/
|
||||
static MultiNode *
|
||||
SubqueryPushdownMultiPlanTree(Query *queryTree)
|
||||
{
|
||||
List *targetEntryList = queryTree->targetList;
|
||||
List *qualifierList = NIL;
|
||||
List *qualifierColumnList = NIL;
|
||||
List *targetListColumnList = NIL;
|
||||
List *columnList = NIL;
|
||||
ListCell *columnCell = NULL;
|
||||
List *targetColumnList = NIL;
|
||||
MultiCollect *subqueryCollectNode = CitusMakeNode(MultiCollect);
|
||||
MultiTable *subqueryNode = NULL;
|
||||
MultiSelect *selectNode = NULL;
|
||||
MultiProject *projectNode = NULL;
|
||||
MultiExtendedOp *extendedOpNode = NULL;
|
||||
MultiNode *currentTopNode = NULL;
|
||||
RangeTblEntry *subqueryRangeTableEntry = NULL;
|
||||
List *subqueryEntryList = SubqueryEntryList(queryTree);
|
||||
Query *pushedDownQuery = NULL;
|
||||
List *subqueryTargetEntryList = NIL;
|
||||
List *havingClauseColumnList = NIL;
|
||||
|
||||
/* verify we can perform distributed planning on this query */
|
||||
ErrorIfQueryNotSupported(queryTree);
|
||||
ErrorIfSubqueryJoin(queryTree);
|
||||
|
||||
/* extract qualifiers and verify we can plan for them */
|
||||
qualifierList = QualifierList(queryTree->jointree);
|
||||
ValidateClauseList(qualifierList);
|
||||
|
||||
/*
|
||||
* We disregard pulled subqueries. This changes order of range table list.
|
||||
* We do not allow subquery joins, so we will have only one range table
|
||||
* entry in range table list after dropping pulled subquery. For this reason,
|
||||
* here we are updating columns in the most outer query for where clause
|
||||
* list and target list accordingly.
|
||||
* We would be creating a new Query and pushing down top level query's
|
||||
* contents down to it. Join and filter clauses in higher level query would
|
||||
* be transferred to lower query. Therefore after this function we would
|
||||
* only have a single range table entry in the top level query. We need to
|
||||
* create a target list entry in lower query for each column reference in
|
||||
* upper level query's target list and having clauses. Any column reference
|
||||
* in the upper query will be updated to have varno=1, and varattno=<resno>
|
||||
* of matching target entry in pushed down query.
|
||||
* Consider query
|
||||
* SELECT s1.a, sum(s2.c)
|
||||
* FROM (some subquery) s1, (some subquery) s2
|
||||
* WHERE s1.a = s2.a
|
||||
* GROUP BY s1.a
|
||||
* HAVING avg(s2.b);
|
||||
*
|
||||
* We want to prepare a multi tree to avoid subquery joins at top level,
|
||||
* therefore above query is converted to an equivalent
|
||||
* SELECT worker_column_0, sum(worker_column_1)
|
||||
* FROM (
|
||||
* SELECT
|
||||
* s1.a AS worker_column_0,
|
||||
* s2.c AS worker_column_1,
|
||||
* s2.b AS as worker_column_2
|
||||
* FROM (some subquery) s1, (some subquery) s2
|
||||
* WHERE s1.a = s2.a) worker_subquery
|
||||
* GROUP BY worker_column_0
|
||||
* HAVING avg(worker_column_2);
|
||||
* After this conversion MultiTree is created as follows
|
||||
*
|
||||
* MultiExtendedOpNode(
|
||||
* targetList : worker_column_0, sum(worker_column_1)
|
||||
* groupBy : worker_column_0
|
||||
* having : avg(worker_column_2))
|
||||
* --->MultiProject (worker_column_0, worker_column_1, worker_column_2)
|
||||
* --->---> MultiTable (subquery : worker_subquery)
|
||||
*
|
||||
* Master and worker queries will be created out of this MultiTree at later stages.
|
||||
*/
|
||||
Assert(list_length(subqueryEntryList) == 1);
|
||||
|
||||
qualifierColumnList = pull_var_clause_default((Node *) qualifierList);
|
||||
targetListColumnList = pull_var_clause_default((Node *) targetEntryList);
|
||||
/*
|
||||
* uniqueColumnList contains all columns returned by subquery. Subquery target
|
||||
* entry list, subquery range table entry's column name list are derived from
|
||||
* uniqueColumnList. Columns mentioned in multiProject node and multiExtendedOp
|
||||
* node are indexed with their respective position in uniqueColumnList.
|
||||
*/
|
||||
targetColumnList = pull_var_clause_default((Node *) targetEntryList);
|
||||
havingClauseColumnList = pull_var_clause_default(queryTree->havingQual);
|
||||
columnList = list_concat(targetColumnList, havingClauseColumnList);
|
||||
|
||||
columnList = list_concat(qualifierColumnList, targetListColumnList);
|
||||
foreach(columnCell, columnList)
|
||||
{
|
||||
Var *column = (Var *) lfirst(columnCell);
|
||||
column->varno = 1;
|
||||
}
|
||||
/* create a target entry for each unique column */
|
||||
subqueryTargetEntryList = CreateSubqueryTargetEntryList(columnList);
|
||||
|
||||
/* create multi node for the subquery */
|
||||
subqueryRangeTableEntry = (RangeTblEntry *) linitial(subqueryEntryList);
|
||||
subqueryNode = MultiSubqueryPushdownTable(subqueryRangeTableEntry);
|
||||
/*
|
||||
* Update varno/varattno fields of columns in columnList to
|
||||
* point to corresponding target entry in subquery target entry list.
|
||||
*/
|
||||
UpdateVarMappingsForExtendedOpNode(columnList, subqueryTargetEntryList);
|
||||
|
||||
/* new query only has target entries, join tree, and rtable*/
|
||||
pushedDownQuery = makeNode(Query);
|
||||
pushedDownQuery->commandType = queryTree->commandType;
|
||||
pushedDownQuery->targetList = subqueryTargetEntryList;
|
||||
pushedDownQuery->jointree = copyObject(queryTree->jointree);
|
||||
pushedDownQuery->rtable = copyObject(queryTree->rtable);
|
||||
pushedDownQuery->setOperations = copyObject(queryTree->setOperations);
|
||||
pushedDownQuery->querySource = queryTree->querySource;
|
||||
|
||||
subqueryNode = MultiSubqueryPushdownTable(pushedDownQuery);
|
||||
|
||||
SetChild((MultiUnaryNode *) subqueryCollectNode, (MultiNode *) subqueryNode);
|
||||
currentTopNode = (MultiNode *) subqueryCollectNode;
|
||||
|
||||
/* build select node if the query has selection criteria */
|
||||
selectNode = MultiSelectNode(qualifierList);
|
||||
if (selectNode != NULL)
|
||||
{
|
||||
SetChild((MultiUnaryNode *) selectNode, currentTopNode);
|
||||
currentTopNode = (MultiNode *) selectNode;
|
||||
}
|
||||
|
||||
/* build project node for the columns to project */
|
||||
projectNode = MultiProjectNode(targetEntryList);
|
||||
SetChild((MultiUnaryNode *) projectNode, currentTopNode);
|
||||
|
@ -2060,6 +2106,20 @@ SubqueryPushdownMultiPlanTree(Query *queryTree)
|
|||
* in the logical optimizer.
|
||||
*/
|
||||
extendedOpNode = MultiExtendedOpNode(queryTree);
|
||||
|
||||
/*
|
||||
* Postgres standard planner converts having qual node to a list of and
|
||||
* clauses and expects havingQual to be of type List when executing the
|
||||
* query later. This function is called on an original query, therefore
|
||||
* havingQual has not been converted yet. Perform conversion here.
|
||||
*/
|
||||
if (extendedOpNode->havingQual != NULL &&
|
||||
!IsA(extendedOpNode->havingQual, List))
|
||||
{
|
||||
extendedOpNode->havingQual =
|
||||
(Node *) make_ands_implicit((Expr *) extendedOpNode->havingQual);
|
||||
}
|
||||
|
||||
SetChild((MultiUnaryNode *) extendedOpNode, currentTopNode);
|
||||
currentTopNode = (MultiNode *) extendedOpNode;
|
||||
|
||||
|
@ -2094,22 +2154,105 @@ ErrorIfSubqueryJoin(Query *queryTree)
|
|||
|
||||
|
||||
/*
|
||||
* MultiSubqueryPushdownTable creates a MultiTable from the given subquery range
|
||||
* table entry and returns it. Note that this sets subquery field of MultiTable
|
||||
* to subquery of the given range table entry.
|
||||
* CreateSubqueryTargetEntryList creates a target entry for each unique column
|
||||
* in the column list and returns the target entry list.
|
||||
*/
|
||||
static List *
|
||||
CreateSubqueryTargetEntryList(List *columnList)
|
||||
{
|
||||
AttrNumber resNo = 1;
|
||||
ListCell *columnCell = NULL;
|
||||
List *uniqueColumnList = NIL;
|
||||
List *subqueryTargetEntryList = NIL;
|
||||
|
||||
foreach(columnCell, columnList)
|
||||
{
|
||||
Var *column = (Var *) lfirst(columnCell);
|
||||
uniqueColumnList = list_append_unique(uniqueColumnList, copyObject(column));
|
||||
}
|
||||
|
||||
foreach(columnCell, uniqueColumnList)
|
||||
{
|
||||
Var *column = (Var *) lfirst(columnCell);
|
||||
TargetEntry *newTargetEntry = makeNode(TargetEntry);
|
||||
StringInfo columnNameString = makeStringInfo();
|
||||
|
||||
newTargetEntry->expr = (Expr *) copyObject(column);
|
||||
appendStringInfo(columnNameString, WORKER_COLUMN_FORMAT, resNo);
|
||||
newTargetEntry->resname = columnNameString->data;
|
||||
newTargetEntry->resjunk = false;
|
||||
newTargetEntry->resno = resNo;
|
||||
|
||||
subqueryTargetEntryList = lappend(subqueryTargetEntryList, newTargetEntry);
|
||||
resNo++;
|
||||
}
|
||||
|
||||
return subqueryTargetEntryList;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* UpdateVarMappingsForExtendedOpNode updates varno/varattno fields of columns
|
||||
* in columnList to point to corresponding target in subquery target entry
|
||||
* list.
|
||||
*/
|
||||
static void
|
||||
UpdateVarMappingsForExtendedOpNode(List *columnList, List *subqueryTargetEntryList)
|
||||
{
|
||||
ListCell *columnCell = NULL;
|
||||
foreach(columnCell, columnList)
|
||||
{
|
||||
Var *columnOnTheExtendedNode = (Var *) lfirst(columnCell);
|
||||
ListCell *targetEntryCell = NULL;
|
||||
foreach(targetEntryCell, subqueryTargetEntryList)
|
||||
{
|
||||
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
|
||||
Var *targetColumn = NULL;
|
||||
|
||||
Assert(IsA(targetEntry->expr, Var));
|
||||
targetColumn = (Var *) targetEntry->expr;
|
||||
if (columnOnTheExtendedNode->varno == targetColumn->varno &&
|
||||
columnOnTheExtendedNode->varattno == targetColumn->varattno)
|
||||
{
|
||||
columnOnTheExtendedNode->varno = 1;
|
||||
columnOnTheExtendedNode->varattno = targetEntry->resno;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* MultiSubqueryPushdownTable creates a MultiTable from the given subquery,
|
||||
* populates column list and returns the multitable.
|
||||
*/
|
||||
static MultiTable *
|
||||
MultiSubqueryPushdownTable(RangeTblEntry *subqueryRangeTableEntry)
|
||||
MultiSubqueryPushdownTable(Query *subquery)
|
||||
{
|
||||
Query *subquery = subqueryRangeTableEntry->subquery;
|
||||
MultiTable *subqueryTableNode = NULL;
|
||||
StringInfo rteName = makeStringInfo();
|
||||
List *columnNamesList = NIL;
|
||||
ListCell *targetEntryCell = NULL;
|
||||
|
||||
MultiTable *subqueryTableNode = CitusMakeNode(MultiTable);
|
||||
appendStringInfo(rteName, "worker_subquery");
|
||||
|
||||
foreach(targetEntryCell, subquery->targetList)
|
||||
{
|
||||
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
|
||||
columnNamesList = lappend(columnNamesList, makeString(targetEntry->resname));
|
||||
}
|
||||
|
||||
subqueryTableNode = CitusMakeNode(MultiTable);
|
||||
subqueryTableNode->subquery = subquery;
|
||||
subqueryTableNode->relationId = HEAP_ANALYTICS_SUBQUERY_RELATION_ID;
|
||||
subqueryTableNode->relationId = SUBQUERY_PUSHDOWN_RELATION_ID;
|
||||
subqueryTableNode->rangeTableId = SUBQUERY_RANGE_TABLE_ID;
|
||||
subqueryTableNode->partitionColumn = NULL;
|
||||
subqueryTableNode->alias = subqueryRangeTableEntry->alias;
|
||||
subqueryTableNode->referenceNames = subqueryRangeTableEntry->eref;
|
||||
subqueryTableNode->alias = makeNode(Alias);
|
||||
subqueryTableNode->alias->aliasname = rteName->data;
|
||||
subqueryTableNode->referenceNames = makeNode(Alias);
|
||||
subqueryTableNode->referenceNames->aliasname = rteName->data;
|
||||
subqueryTableNode->referenceNames->colnames = columnNamesList;
|
||||
|
||||
return subqueryTableNode;
|
||||
}
|
||||
|
|
|
@ -817,7 +817,7 @@ BaseRangeTableList(MultiNode *multiNode)
|
|||
*/
|
||||
MultiTable *multiTable = (MultiTable *) multiNode;
|
||||
if (multiTable->relationId != SUBQUERY_RELATION_ID &&
|
||||
multiTable->relationId != HEAP_ANALYTICS_SUBQUERY_RELATION_ID)
|
||||
multiTable->relationId != SUBQUERY_PUSHDOWN_RELATION_ID)
|
||||
{
|
||||
RangeTblEntry *rangeTableEntry = makeNode(RangeTblEntry);
|
||||
rangeTableEntry->inFromCl = true;
|
||||
|
@ -1397,6 +1397,7 @@ BuildSubqueryJobQuery(MultiNode *multiNode)
|
|||
List *sortClauseList = NIL;
|
||||
List *groupClauseList = NIL;
|
||||
List *whereClauseList = NIL;
|
||||
Node *havingQual = NULL;
|
||||
Node *limitCount = NULL;
|
||||
Node *limitOffset = NULL;
|
||||
FromExpr *joinTree = NULL;
|
||||
|
@ -1436,7 +1437,7 @@ BuildSubqueryJobQuery(MultiNode *multiNode)
|
|||
targetList = QueryTargetList(multiNode);
|
||||
}
|
||||
|
||||
/* extract limit count/offset and sort clauses */
|
||||
/* extract limit count/offset, sort and having clauses */
|
||||
if (extendedOpNodeList != NIL)
|
||||
{
|
||||
MultiExtendedOp *extendedOp = (MultiExtendedOp *) linitial(extendedOpNodeList);
|
||||
|
@ -1444,6 +1445,7 @@ BuildSubqueryJobQuery(MultiNode *multiNode)
|
|||
limitCount = extendedOp->limitCount;
|
||||
limitOffset = extendedOp->limitOffset;
|
||||
sortClauseList = extendedOp->sortClauseList;
|
||||
havingQual = extendedOp->havingQual;
|
||||
}
|
||||
|
||||
/* build group clauses */
|
||||
|
@ -1473,7 +1475,9 @@ BuildSubqueryJobQuery(MultiNode *multiNode)
|
|||
jobQuery->groupClause = groupClauseList;
|
||||
jobQuery->limitOffset = limitOffset;
|
||||
jobQuery->limitCount = limitCount;
|
||||
jobQuery->hasAggs = contain_agg_clause((Node *) targetList);
|
||||
jobQuery->havingQual = havingQual;
|
||||
jobQuery->hasAggs = contain_agg_clause((Node *) targetList) ||
|
||||
contain_agg_clause((Node *) havingQual);
|
||||
|
||||
return jobQuery;
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
|
||||
#define SUBQUERY_RANGE_TABLE_ID -1
|
||||
#define SUBQUERY_RELATION_ID 10000
|
||||
#define HEAP_ANALYTICS_SUBQUERY_RELATION_ID 10001
|
||||
#define SUBQUERY_PUSHDOWN_RELATION_ID 10001
|
||||
|
||||
|
||||
/*
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -208,6 +208,41 @@ FROM
|
|||
34 | Tue Jan 21 04:15:03.874341 2014
|
||||
(6 rows)
|
||||
|
||||
-- same query with subuqery joins in topmost select
|
||||
SELECT "some_users_data".user_id, lastseen
|
||||
FROM
|
||||
(SELECT user_id,
|
||||
Max(TIME) AS lastseen
|
||||
FROM
|
||||
(SELECT user_id,
|
||||
TIME
|
||||
FROM
|
||||
(SELECT user_id,
|
||||
TIME
|
||||
FROM events_table as "events"
|
||||
WHERE user_id > 10 and user_id < 40) "events_1"
|
||||
ORDER BY TIME DESC
|
||||
LIMIT 1000) "recent_events_1"
|
||||
GROUP BY user_id
|
||||
ORDER BY max(TIME) DESC) "some_recent_users"
|
||||
JOIN LATERAL
|
||||
(SELECT "users".user_id
|
||||
FROM users_table as "users"
|
||||
WHERE "users"."user_id" = "some_recent_users"."user_id"
|
||||
AND users.value_2 > 50 and users.value_2 < 55
|
||||
LIMIT 1) "some_users_data" ON TRUE
|
||||
ORDER BY user_id
|
||||
limit 50;
|
||||
user_id | lastseen
|
||||
---------+---------------------------------
|
||||
19 | Tue Jan 21 05:23:09.26298 2014
|
||||
22 | Tue Jan 21 05:22:28.223506 2014
|
||||
25 | Tue Jan 21 01:10:29.315788 2014
|
||||
31 | Tue Jan 21 02:43:24.591489 2014
|
||||
33 | Tue Jan 21 04:23:35.623056 2014
|
||||
34 | Tue Jan 21 04:15:03.874341 2014
|
||||
(6 rows)
|
||||
|
||||
-- not supported since JOIN is not on the partition key
|
||||
SELECT *
|
||||
FROM
|
||||
|
@ -321,6 +356,50 @@ limit 10;
|
|||
15 | Tue Jan 21 02:25:36.136461 2014
|
||||
(10 rows)
|
||||
|
||||
--
|
||||
-- A similar query with topmost select is dropped
|
||||
-- and replaced by aggregation. Notice the heavy use of limit
|
||||
--
|
||||
SELECT "some_users_data".user_id, MAX(lastseen), count(*)
|
||||
FROM
|
||||
(SELECT filter_users_1.user_id,
|
||||
TIME AS lastseen
|
||||
FROM
|
||||
(SELECT user_where_1_1.user_id
|
||||
FROM
|
||||
(SELECT "users"."user_id"
|
||||
FROM users_table as "users"
|
||||
WHERE user_id > 12 and user_id < 16 and value_1 > 20) user_where_1_1
|
||||
INNER JOIN
|
||||
(SELECT "users"."user_id"
|
||||
FROM users_table as "users"
|
||||
WHERE user_id > 12 and user_id < 16 and value_2 > 60) user_where_1_join_1
|
||||
ON ("user_where_1_1".user_id = "user_where_1_join_1".user_id)) filter_users_1
|
||||
JOIN LATERAL
|
||||
(SELECT user_id,
|
||||
TIME
|
||||
FROM events_table as "events"
|
||||
WHERE user_id > 12 and user_id < 16 and user_id = filter_users_1.user_id
|
||||
ORDER BY TIME DESC
|
||||
LIMIT 1) "last_events_1" ON TRUE
|
||||
ORDER BY TIME DESC
|
||||
LIMIT 10) "some_recent_users"
|
||||
JOIN LATERAL
|
||||
(SELECT "users".user_id
|
||||
FROM users_table as "users"
|
||||
WHERE "users"."user_id" = "some_recent_users"."user_id"
|
||||
AND "users"."value_2" > 70
|
||||
LIMIT 1) "some_users_data" ON TRUE
|
||||
GROUP BY 1
|
||||
ORDER BY 2, 1 DESC
|
||||
limit 10;
|
||||
user_id | max | count
|
||||
---------+---------------------------------+-------
|
||||
15 | Tue Jan 21 02:25:36.136461 2014 | 10
|
||||
13 | Tue Jan 21 05:06:48.989766 2014 | 10
|
||||
14 | Tue Jan 21 05:46:51.286381 2014 | 10
|
||||
(3 rows)
|
||||
|
||||
-- not supported since the inner JOIN is not equi join
|
||||
SELECT user_id, lastseen
|
||||
FROM
|
||||
|
@ -586,23 +665,29 @@ DETAIL: Each relation should be joined with at least one another relation using
|
|||
SELECT
|
||||
"value_3", count(*) AS cnt
|
||||
FROM
|
||||
(SELECT "value_3", "user_id", random()
|
||||
FROM
|
||||
(SELECT users_in_segment_1.user_id, value_3
|
||||
FROM
|
||||
(SELECT user_id, value_3 * 2 as value_3
|
||||
FROM
|
||||
(SELECT user_id, value_3
|
||||
FROM
|
||||
(SELECT "users"."user_id", value_3
|
||||
FROM users_table as "users"
|
||||
WHERE user_id > 10 and user_id < 40 and value_2 > 30) simple_user_where_1) all_buckets_1) users_in_segment_1
|
||||
JOIN
|
||||
(SELECT "users"."user_id"
|
||||
FROM users_table as "users"
|
||||
WHERE user_id > 10 and user_id < 40 and value_2 > 60) some_users_data
|
||||
ON ("users_in_segment_1".user_id = "some_users_data".user_id)) segmentalias_1) "tempQuery"
|
||||
GROUP BY "value_3" ORDER BY cnt, value_3 DESC LIMIT 10;
|
||||
(SELECT "value_3", "user_id", random()
|
||||
FROM
|
||||
(SELECT users_in_segment_1.user_id, value_3
|
||||
FROM
|
||||
(SELECT user_id, value_3 * 2 as value_3
|
||||
FROM
|
||||
(SELECT user_id, value_3
|
||||
FROM
|
||||
(SELECT "users"."user_id", value_3
|
||||
FROM users_table as "users"
|
||||
WHERE user_id > 10 and user_id < 40 and value_2 > 30
|
||||
) simple_user_where_1
|
||||
) all_buckets_1
|
||||
) users_in_segment_1
|
||||
JOIN
|
||||
(SELECT "users"."user_id"
|
||||
FROM users_table as "users"
|
||||
WHERE user_id > 10 and user_id < 40 and value_2 > 60
|
||||
) some_users_data
|
||||
ON ("users_in_segment_1".user_id = "some_users_data".user_id)
|
||||
) segmentalias_1) "tempQuery"
|
||||
GROUP BY "value_3"
|
||||
ORDER BY cnt, value_3 DESC LIMIT 10;
|
||||
value_3 | cnt
|
||||
---------+-----
|
||||
556 | 75
|
||||
|
@ -683,6 +768,45 @@ FROM
|
|||
21 | 985
|
||||
(6 rows)
|
||||
|
||||
-- nested lateral join at top most level
|
||||
SELECT "some_users_data".user_id, "some_recent_users".value_3
|
||||
FROM
|
||||
(SELECT filter_users_1.user_id, value_3
|
||||
FROM
|
||||
(SELECT "users"."user_id"
|
||||
FROM users_table as "users"
|
||||
WHERE user_id > 20 and user_id < 70 and users.value_2 = 200
|
||||
) filter_users_1
|
||||
JOIN LATERAL
|
||||
(SELECT user_id, value_3
|
||||
FROM events_table as "events"
|
||||
WHERE user_id > 20 and user_id < 70
|
||||
AND ("events".user_id = "filter_users_1".user_id)
|
||||
ORDER BY value_3 DESC
|
||||
LIMIT 1
|
||||
) "last_events_1" ON TRUE
|
||||
ORDER BY value_3 DESC
|
||||
LIMIT 10
|
||||
) "some_recent_users"
|
||||
JOIN LATERAL
|
||||
(SELECT "users".user_id
|
||||
FROM users_table as "users"
|
||||
WHERE "users"."user_id" = "some_recent_users"."user_id"
|
||||
AND users.value_2 > 200
|
||||
LIMIT 1
|
||||
) "some_users_data" ON TRUE
|
||||
ORDER BY value_3 DESC, user_id ASC
|
||||
LIMIT 10;
|
||||
user_id | value_3
|
||||
---------+---------
|
||||
44 | 998
|
||||
65 | 996
|
||||
66 | 996
|
||||
37 | 995
|
||||
57 | 989
|
||||
21 | 985
|
||||
(6 rows)
|
||||
|
||||
-- longer nested lateral joins
|
||||
SELECT *
|
||||
FROM
|
||||
|
@ -726,6 +850,45 @@ FROM
|
|||
21 | 985
|
||||
(6 rows)
|
||||
|
||||
-- longer nested lateral join wth top level join
|
||||
SELECT "some_users_data".user_id, "some_recent_users".value_3
|
||||
FROM
|
||||
(SELECT filter_users_1.user_id, value_3
|
||||
FROM
|
||||
(SELECT "users"."user_id"
|
||||
FROM users_table as "users"
|
||||
WHERE user_id > 20 and user_id < 70 and users.value_2 = 200
|
||||
) filter_users_1
|
||||
JOIN LATERAL
|
||||
(SELECT user_id, value_3
|
||||
FROM events_table as "events"
|
||||
WHERE user_id > 20 and user_id < 70
|
||||
AND ("events".user_id = "filter_users_1".user_id)
|
||||
ORDER BY value_3 DESC
|
||||
LIMIT 1
|
||||
) "last_events_1" ON TRUE
|
||||
ORDER BY value_3 DESC
|
||||
LIMIT 10
|
||||
) "some_recent_users"
|
||||
JOIN LATERAL
|
||||
(SELECT "users".user_id
|
||||
FROM users_table as "users"
|
||||
WHERE "users"."user_id" = "some_recent_users"."user_id"
|
||||
AND users.value_2 > 200
|
||||
LIMIT 1
|
||||
) "some_users_data" ON TRUE
|
||||
ORDER BY value_3 DESC
|
||||
LIMIT 10;
|
||||
user_id | value_3
|
||||
---------+---------
|
||||
44 | 998
|
||||
65 | 996
|
||||
66 | 996
|
||||
37 | 995
|
||||
57 | 989
|
||||
21 | 985
|
||||
(6 rows)
|
||||
|
||||
-- LEFT JOINs used with INNER JOINs
|
||||
SELECT
|
||||
count(*) AS cnt, "generated_group_field"
|
||||
|
|
|
@ -197,9 +197,7 @@ SELECT count(*) FROM
|
|||
-- (SELECT l_orderkey FROM lineitem_subquery) UNION
|
||||
-- (SELECT l_orderkey FROM lineitem_subquery)
|
||||
--) b;
|
||||
|
||||
-- Check that we error out if the outermost query has subquery join.
|
||||
|
||||
-- Check that we error out if inner query has limit but outer quers has not.
|
||||
SELECT
|
||||
avg(o_totalprice/l_quantity)
|
||||
FROM
|
||||
|
|
|
@ -40,7 +40,7 @@ test: multi_insert_select
|
|||
test: multi_deparse_shard_query
|
||||
test: multi_basic_queries multi_complex_expressions multi_verify_no_subquery
|
||||
test: multi_explain
|
||||
test: multi_subquery multi_subquery_complex_queries
|
||||
test: multi_subquery multi_subquery_complex_queries multi_subquery_behavioral_analytics
|
||||
test: multi_reference_table
|
||||
test: multi_outer_join_reference
|
||||
test: multi_single_relation_subquery
|
||||
|
|
|
@ -194,7 +194,7 @@ DETAIL: Union All clauses are currently unsupported
|
|||
-- (SELECT l_orderkey FROM lineitem_subquery) UNION
|
||||
-- (SELECT l_orderkey FROM lineitem_subquery)
|
||||
--) b;
|
||||
-- Check that we error out if the outermost query has subquery join.
|
||||
-- Check that we error out if inner query has limit but outer quers has not.
|
||||
SELECT
|
||||
avg(o_totalprice/l_quantity)
|
||||
FROM
|
||||
|
@ -214,8 +214,8 @@ FROM
|
|||
orders_subquery
|
||||
WHERE
|
||||
lineitem_quantities.l_orderkey = o_orderkey) orders_price ON true;
|
||||
ERROR: cannot perform distributed planning on this query
|
||||
DETAIL: Join in subqueries is not supported yet
|
||||
ERROR: cannot push down this subquery
|
||||
DETAIL: Limit in subquery without limit in the outer query is unsupported
|
||||
-- Check that we error out if the outermost query is a distinct clause.
|
||||
SELECT
|
||||
count(DISTINCT a)
|
||||
|
|
|
@ -194,7 +194,7 @@ DETAIL: Union All clauses are currently unsupported
|
|||
-- (SELECT l_orderkey FROM lineitem_subquery) UNION
|
||||
-- (SELECT l_orderkey FROM lineitem_subquery)
|
||||
--) b;
|
||||
-- Check that we error out if the outermost query has subquery join.
|
||||
-- Check that we error out if inner query has limit but outer quers has not.
|
||||
SELECT
|
||||
avg(o_totalprice/l_quantity)
|
||||
FROM
|
||||
|
@ -214,8 +214,8 @@ FROM
|
|||
orders_subquery
|
||||
WHERE
|
||||
lineitem_quantities.l_orderkey = o_orderkey) orders_price ON true;
|
||||
ERROR: cannot perform distributed planning on this query
|
||||
DETAIL: Join in subqueries is not supported yet
|
||||
ERROR: cannot push down this subquery
|
||||
DETAIL: Limit in subquery without limit in the outer query is unsupported
|
||||
-- Check that we error out if the outermost query is a distinct clause.
|
||||
SELECT
|
||||
count(DISTINCT a)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -206,6 +206,32 @@ FROM
|
|||
order BY user_id
|
||||
limit 50;
|
||||
|
||||
-- same query with subuqery joins in topmost select
|
||||
SELECT "some_users_data".user_id, lastseen
|
||||
FROM
|
||||
(SELECT user_id,
|
||||
Max(TIME) AS lastseen
|
||||
FROM
|
||||
(SELECT user_id,
|
||||
TIME
|
||||
FROM
|
||||
(SELECT user_id,
|
||||
TIME
|
||||
FROM events_table as "events"
|
||||
WHERE user_id > 10 and user_id < 40) "events_1"
|
||||
ORDER BY TIME DESC
|
||||
LIMIT 1000) "recent_events_1"
|
||||
GROUP BY user_id
|
||||
ORDER BY max(TIME) DESC) "some_recent_users"
|
||||
JOIN LATERAL
|
||||
(SELECT "users".user_id
|
||||
FROM users_table as "users"
|
||||
WHERE "users"."user_id" = "some_recent_users"."user_id"
|
||||
AND users.value_2 > 50 and users.value_2 < 55
|
||||
LIMIT 1) "some_users_data" ON TRUE
|
||||
ORDER BY user_id
|
||||
limit 50;
|
||||
|
||||
-- not supported since JOIN is not on the partition key
|
||||
SELECT *
|
||||
FROM
|
||||
|
@ -304,6 +330,44 @@ FROM
|
|||
order BY user_id DESC
|
||||
limit 10;
|
||||
|
||||
--
|
||||
-- A similar query with topmost select is dropped
|
||||
-- and replaced by aggregation. Notice the heavy use of limit
|
||||
--
|
||||
SELECT "some_users_data".user_id, MAX(lastseen), count(*)
|
||||
FROM
|
||||
(SELECT filter_users_1.user_id,
|
||||
TIME AS lastseen
|
||||
FROM
|
||||
(SELECT user_where_1_1.user_id
|
||||
FROM
|
||||
(SELECT "users"."user_id"
|
||||
FROM users_table as "users"
|
||||
WHERE user_id > 12 and user_id < 16 and value_1 > 20) user_where_1_1
|
||||
INNER JOIN
|
||||
(SELECT "users"."user_id"
|
||||
FROM users_table as "users"
|
||||
WHERE user_id > 12 and user_id < 16 and value_2 > 60) user_where_1_join_1
|
||||
ON ("user_where_1_1".user_id = "user_where_1_join_1".user_id)) filter_users_1
|
||||
JOIN LATERAL
|
||||
(SELECT user_id,
|
||||
TIME
|
||||
FROM events_table as "events"
|
||||
WHERE user_id > 12 and user_id < 16 and user_id = filter_users_1.user_id
|
||||
ORDER BY TIME DESC
|
||||
LIMIT 1) "last_events_1" ON TRUE
|
||||
ORDER BY TIME DESC
|
||||
LIMIT 10) "some_recent_users"
|
||||
JOIN LATERAL
|
||||
(SELECT "users".user_id
|
||||
FROM users_table as "users"
|
||||
WHERE "users"."user_id" = "some_recent_users"."user_id"
|
||||
AND "users"."value_2" > 70
|
||||
LIMIT 1) "some_users_data" ON TRUE
|
||||
GROUP BY 1
|
||||
ORDER BY 2, 1 DESC
|
||||
limit 10;
|
||||
|
||||
-- not supported since the inner JOIN is not equi join
|
||||
SELECT user_id, lastseen
|
||||
FROM
|
||||
|
@ -545,24 +609,29 @@ SELECT
|
|||
SELECT
|
||||
"value_3", count(*) AS cnt
|
||||
FROM
|
||||
(SELECT "value_3", "user_id", random()
|
||||
FROM
|
||||
(SELECT users_in_segment_1.user_id, value_3
|
||||
FROM
|
||||
(SELECT user_id, value_3 * 2 as value_3
|
||||
FROM
|
||||
(SELECT user_id, value_3
|
||||
FROM
|
||||
(SELECT "users"."user_id", value_3
|
||||
FROM users_table as "users"
|
||||
WHERE user_id > 10 and user_id < 40 and value_2 > 30) simple_user_where_1) all_buckets_1) users_in_segment_1
|
||||
JOIN
|
||||
(SELECT "users"."user_id"
|
||||
FROM users_table as "users"
|
||||
WHERE user_id > 10 and user_id < 40 and value_2 > 60) some_users_data
|
||||
|
||||
ON ("users_in_segment_1".user_id = "some_users_data".user_id)) segmentalias_1) "tempQuery"
|
||||
GROUP BY "value_3" ORDER BY cnt, value_3 DESC LIMIT 10;
|
||||
(SELECT "value_3", "user_id", random()
|
||||
FROM
|
||||
(SELECT users_in_segment_1.user_id, value_3
|
||||
FROM
|
||||
(SELECT user_id, value_3 * 2 as value_3
|
||||
FROM
|
||||
(SELECT user_id, value_3
|
||||
FROM
|
||||
(SELECT "users"."user_id", value_3
|
||||
FROM users_table as "users"
|
||||
WHERE user_id > 10 and user_id < 40 and value_2 > 30
|
||||
) simple_user_where_1
|
||||
) all_buckets_1
|
||||
) users_in_segment_1
|
||||
JOIN
|
||||
(SELECT "users"."user_id"
|
||||
FROM users_table as "users"
|
||||
WHERE user_id > 10 and user_id < 40 and value_2 > 60
|
||||
) some_users_data
|
||||
ON ("users_in_segment_1".user_id = "some_users_data".user_id)
|
||||
) segmentalias_1) "tempQuery"
|
||||
GROUP BY "value_3"
|
||||
ORDER BY cnt, value_3 DESC LIMIT 10;
|
||||
|
||||
|
||||
-- not supported since there is no partition column equality at all
|
||||
|
@ -622,6 +691,36 @@ FROM
|
|||
value_3 DESC
|
||||
limit 10;
|
||||
|
||||
-- nested lateral join at top most level
|
||||
SELECT "some_users_data".user_id, "some_recent_users".value_3
|
||||
FROM
|
||||
(SELECT filter_users_1.user_id, value_3
|
||||
FROM
|
||||
(SELECT "users"."user_id"
|
||||
FROM users_table as "users"
|
||||
WHERE user_id > 20 and user_id < 70 and users.value_2 = 200
|
||||
) filter_users_1
|
||||
JOIN LATERAL
|
||||
(SELECT user_id, value_3
|
||||
FROM events_table as "events"
|
||||
WHERE user_id > 20 and user_id < 70
|
||||
AND ("events".user_id = "filter_users_1".user_id)
|
||||
ORDER BY value_3 DESC
|
||||
LIMIT 1
|
||||
) "last_events_1" ON TRUE
|
||||
ORDER BY value_3 DESC
|
||||
LIMIT 10
|
||||
) "some_recent_users"
|
||||
JOIN LATERAL
|
||||
(SELECT "users".user_id
|
||||
FROM users_table as "users"
|
||||
WHERE "users"."user_id" = "some_recent_users"."user_id"
|
||||
AND users.value_2 > 200
|
||||
LIMIT 1
|
||||
) "some_users_data" ON TRUE
|
||||
ORDER BY value_3 DESC, user_id ASC
|
||||
LIMIT 10;
|
||||
|
||||
-- longer nested lateral joins
|
||||
SELECT *
|
||||
FROM
|
||||
|
@ -656,7 +755,35 @@ FROM
|
|||
value_3 DESC
|
||||
limit 10;
|
||||
|
||||
|
||||
-- longer nested lateral join wth top level join
|
||||
SELECT "some_users_data".user_id, "some_recent_users".value_3
|
||||
FROM
|
||||
(SELECT filter_users_1.user_id, value_3
|
||||
FROM
|
||||
(SELECT "users"."user_id"
|
||||
FROM users_table as "users"
|
||||
WHERE user_id > 20 and user_id < 70 and users.value_2 = 200
|
||||
) filter_users_1
|
||||
JOIN LATERAL
|
||||
(SELECT user_id, value_3
|
||||
FROM events_table as "events"
|
||||
WHERE user_id > 20 and user_id < 70
|
||||
AND ("events".user_id = "filter_users_1".user_id)
|
||||
ORDER BY value_3 DESC
|
||||
LIMIT 1
|
||||
) "last_events_1" ON TRUE
|
||||
ORDER BY value_3 DESC
|
||||
LIMIT 10
|
||||
) "some_recent_users"
|
||||
JOIN LATERAL
|
||||
(SELECT "users".user_id
|
||||
FROM users_table as "users"
|
||||
WHERE "users"."user_id" = "some_recent_users"."user_id"
|
||||
AND users.value_2 > 200
|
||||
LIMIT 1
|
||||
) "some_users_data" ON TRUE
|
||||
ORDER BY value_3 DESC
|
||||
LIMIT 10;
|
||||
|
||||
-- LEFT JOINs used with INNER JOINs
|
||||
SELECT
|
||||
|
|
Loading…
Reference in New Issue