Enable top level subquery join queries

This work enables
- Top level subquery joins
- Joins between subqueries and relations
- Joins involving more than 2 range table entries

A new regression test file is added to reflect enabled test cases
pull/1347/head
Murat Tuncer 2017-03-29 10:49:07 +03:00 committed by Onder Kalaci
parent a3cadbb1e1
commit 60fa537e3f
12 changed files with 3404 additions and 118 deletions

View File

@ -3051,8 +3051,7 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit)
* ErrorIfUnsupportedTableCombination checks if the given query tree contains any
* unsupported range table combinations. For this, the function walks over all
* range tables in the join tree, and checks if they correspond to simple relations
* or subqueries. It also checks if there is a join between a regular table and
* a subquery and if join is on more than two range table entries.
* or subqueries.
*/
static void
ErrorIfUnsupportedTableCombination(Query *queryTree)
@ -3062,8 +3061,6 @@ ErrorIfUnsupportedTableCombination(Query *queryTree)
ListCell *joinTreeTableIndexCell = NULL;
bool unsupporteTableCombination = false;
char *errorDetail = NULL;
uint32 relationRangeTableCount = 0;
uint32 subqueryRangeTableCount = 0;
/*
* Extract all range table indexes from the join tree. Note that sub-queries
@ -3086,15 +3083,8 @@ ErrorIfUnsupportedTableCombination(Query *queryTree)
* Check if the range table in the join tree is a simple relation or a
* subquery.
*/
if (rangeTableEntry->rtekind == RTE_RELATION)
{
relationRangeTableCount++;
}
else if (rangeTableEntry->rtekind == RTE_SUBQUERY)
{
subqueryRangeTableCount++;
}
else
if (rangeTableEntry->rtekind != RTE_RELATION &&
rangeTableEntry->rtekind != RTE_SUBQUERY)
{
unsupporteTableCombination = true;
errorDetail = "Table expressions other than simple relations and "
@ -3103,19 +3093,6 @@ ErrorIfUnsupportedTableCombination(Query *queryTree)
}
}
if ((subqueryRangeTableCount > 0) && (relationRangeTableCount > 0))
{
unsupporteTableCombination = true;
errorDetail = "Joins between regular tables and subqueries are unsupported";
}
if ((relationRangeTableCount > 2) || (subqueryRangeTableCount > 2))
{
unsupporteTableCombination = true;
errorDetail = "Joins between more than two relations and subqueries are "
"unsupported";
}
/* finally check and error out if not satisfied */
if (unsupporteTableCombination)
{

View File

@ -105,8 +105,12 @@ static MultiNode * ApplyCartesianProduct(MultiNode *leftNode, MultiNode *rightNo
* functions will be removed with upcoming subqery changes.
*/
static MultiNode * SubqueryPushdownMultiPlanTree(Query *queryTree);
static void ErrorIfSubqueryJoin(Query *queryTree);
static MultiTable * MultiSubqueryPushdownTable(RangeTblEntry *subqueryRangeTableEntry);
static List * CreateSubqueryTargetEntryList(List *columnList);
static void UpdateVarMappingsForExtendedOpNode(List *columnList,
List *subqueryTargetEntryList);
static MultiTable * MultiSubqueryPushdownTable(Query *subquery);
/*
@ -1987,67 +1991,109 @@ ApplyCartesianProduct(MultiNode *leftNode, MultiNode *rightNode,
* SubqueryPushdownMultiTree creates logical plan for subquery pushdown logic.
* Note that this logic will be changed in next iterations, so we decoupled it
* from other parts of code although it causes some code duplication.
*
* Current subquery pushdown support in MultiTree logic requires a single range
* table entry in the top most from clause. Therefore we inject an synthetic
* query derived from the top level query and make it the only range table
* entry for the top level query. This way we can push down any subquery joins
* down to workers without invoking join order planner.
*/
static MultiNode *
SubqueryPushdownMultiPlanTree(Query *queryTree)
{
List *targetEntryList = queryTree->targetList;
List *qualifierList = NIL;
List *qualifierColumnList = NIL;
List *targetListColumnList = NIL;
List *columnList = NIL;
ListCell *columnCell = NULL;
List *targetColumnList = NIL;
MultiCollect *subqueryCollectNode = CitusMakeNode(MultiCollect);
MultiTable *subqueryNode = NULL;
MultiSelect *selectNode = NULL;
MultiProject *projectNode = NULL;
MultiExtendedOp *extendedOpNode = NULL;
MultiNode *currentTopNode = NULL;
RangeTblEntry *subqueryRangeTableEntry = NULL;
List *subqueryEntryList = SubqueryEntryList(queryTree);
Query *pushedDownQuery = NULL;
List *subqueryTargetEntryList = NIL;
List *havingClauseColumnList = NIL;
/* verify we can perform distributed planning on this query */
ErrorIfQueryNotSupported(queryTree);
ErrorIfSubqueryJoin(queryTree);
/* extract qualifiers and verify we can plan for them */
qualifierList = QualifierList(queryTree->jointree);
ValidateClauseList(qualifierList);
/*
* We disregard pulled subqueries. This changes order of range table list.
* We do not allow subquery joins, so we will have only one range table
* entry in range table list after dropping pulled subquery. For this reason,
* here we are updating columns in the most outer query for where clause
* list and target list accordingly.
* We would be creating a new Query and pushing down top level query's
* contents down to it. Join and filter clauses in higher level query would
* be transferred to lower query. Therefore after this function we would
* only have a single range table entry in the top level query. We need to
* create a target list entry in lower query for each column reference in
* upper level query's target list and having clauses. Any column reference
* in the upper query will be updated to have varno=1, and varattno=<resno>
* of matching target entry in pushed down query.
* Consider query
* SELECT s1.a, sum(s2.c)
* FROM (some subquery) s1, (some subquery) s2
* WHERE s1.a = s2.a
* GROUP BY s1.a
* HAVING avg(s2.b);
*
* We want to prepare a multi tree to avoid subquery joins at top level,
* therefore above query is converted to an equivalent
* SELECT worker_column_0, sum(worker_column_1)
* FROM (
* SELECT
* s1.a AS worker_column_0,
* s2.c AS worker_column_1,
* s2.b AS as worker_column_2
* FROM (some subquery) s1, (some subquery) s2
* WHERE s1.a = s2.a) worker_subquery
* GROUP BY worker_column_0
* HAVING avg(worker_column_2);
* After this conversion MultiTree is created as follows
*
* MultiExtendedOpNode(
* targetList : worker_column_0, sum(worker_column_1)
* groupBy : worker_column_0
* having : avg(worker_column_2))
* --->MultiProject (worker_column_0, worker_column_1, worker_column_2)
* --->---> MultiTable (subquery : worker_subquery)
*
* Master and worker queries will be created out of this MultiTree at later stages.
*/
Assert(list_length(subqueryEntryList) == 1);
qualifierColumnList = pull_var_clause_default((Node *) qualifierList);
targetListColumnList = pull_var_clause_default((Node *) targetEntryList);
/*
* uniqueColumnList contains all columns returned by subquery. Subquery target
* entry list, subquery range table entry's column name list are derived from
* uniqueColumnList. Columns mentioned in multiProject node and multiExtendedOp
* node are indexed with their respective position in uniqueColumnList.
*/
targetColumnList = pull_var_clause_default((Node *) targetEntryList);
havingClauseColumnList = pull_var_clause_default(queryTree->havingQual);
columnList = list_concat(targetColumnList, havingClauseColumnList);
columnList = list_concat(qualifierColumnList, targetListColumnList);
foreach(columnCell, columnList)
{
Var *column = (Var *) lfirst(columnCell);
column->varno = 1;
}
/* create a target entry for each unique column */
subqueryTargetEntryList = CreateSubqueryTargetEntryList(columnList);
/* create multi node for the subquery */
subqueryRangeTableEntry = (RangeTblEntry *) linitial(subqueryEntryList);
subqueryNode = MultiSubqueryPushdownTable(subqueryRangeTableEntry);
/*
* Update varno/varattno fields of columns in columnList to
* point to corresponding target entry in subquery target entry list.
*/
UpdateVarMappingsForExtendedOpNode(columnList, subqueryTargetEntryList);
/* new query only has target entries, join tree, and rtable*/
pushedDownQuery = makeNode(Query);
pushedDownQuery->commandType = queryTree->commandType;
pushedDownQuery->targetList = subqueryTargetEntryList;
pushedDownQuery->jointree = copyObject(queryTree->jointree);
pushedDownQuery->rtable = copyObject(queryTree->rtable);
pushedDownQuery->setOperations = copyObject(queryTree->setOperations);
pushedDownQuery->querySource = queryTree->querySource;
subqueryNode = MultiSubqueryPushdownTable(pushedDownQuery);
SetChild((MultiUnaryNode *) subqueryCollectNode, (MultiNode *) subqueryNode);
currentTopNode = (MultiNode *) subqueryCollectNode;
/* build select node if the query has selection criteria */
selectNode = MultiSelectNode(qualifierList);
if (selectNode != NULL)
{
SetChild((MultiUnaryNode *) selectNode, currentTopNode);
currentTopNode = (MultiNode *) selectNode;
}
/* build project node for the columns to project */
projectNode = MultiProjectNode(targetEntryList);
SetChild((MultiUnaryNode *) projectNode, currentTopNode);
@ -2060,6 +2106,20 @@ SubqueryPushdownMultiPlanTree(Query *queryTree)
* in the logical optimizer.
*/
extendedOpNode = MultiExtendedOpNode(queryTree);
/*
* Postgres standard planner converts having qual node to a list of and
* clauses and expects havingQual to be of type List when executing the
* query later. This function is called on an original query, therefore
* havingQual has not been converted yet. Perform conversion here.
*/
if (extendedOpNode->havingQual != NULL &&
!IsA(extendedOpNode->havingQual, List))
{
extendedOpNode->havingQual =
(Node *) make_ands_implicit((Expr *) extendedOpNode->havingQual);
}
SetChild((MultiUnaryNode *) extendedOpNode, currentTopNode);
currentTopNode = (MultiNode *) extendedOpNode;
@ -2094,22 +2154,105 @@ ErrorIfSubqueryJoin(Query *queryTree)
/*
* MultiSubqueryPushdownTable creates a MultiTable from the given subquery range
* table entry and returns it. Note that this sets subquery field of MultiTable
* to subquery of the given range table entry.
* CreateSubqueryTargetEntryList creates a target entry for each unique column
* in the column list and returns the target entry list.
*/
static List *
CreateSubqueryTargetEntryList(List *columnList)
{
AttrNumber resNo = 1;
ListCell *columnCell = NULL;
List *uniqueColumnList = NIL;
List *subqueryTargetEntryList = NIL;
foreach(columnCell, columnList)
{
Var *column = (Var *) lfirst(columnCell);
uniqueColumnList = list_append_unique(uniqueColumnList, copyObject(column));
}
foreach(columnCell, uniqueColumnList)
{
Var *column = (Var *) lfirst(columnCell);
TargetEntry *newTargetEntry = makeNode(TargetEntry);
StringInfo columnNameString = makeStringInfo();
newTargetEntry->expr = (Expr *) copyObject(column);
appendStringInfo(columnNameString, WORKER_COLUMN_FORMAT, resNo);
newTargetEntry->resname = columnNameString->data;
newTargetEntry->resjunk = false;
newTargetEntry->resno = resNo;
subqueryTargetEntryList = lappend(subqueryTargetEntryList, newTargetEntry);
resNo++;
}
return subqueryTargetEntryList;
}
/*
* UpdateVarMappingsForExtendedOpNode updates varno/varattno fields of columns
* in columnList to point to corresponding target in subquery target entry
* list.
*/
static void
UpdateVarMappingsForExtendedOpNode(List *columnList, List *subqueryTargetEntryList)
{
ListCell *columnCell = NULL;
foreach(columnCell, columnList)
{
Var *columnOnTheExtendedNode = (Var *) lfirst(columnCell);
ListCell *targetEntryCell = NULL;
foreach(targetEntryCell, subqueryTargetEntryList)
{
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
Var *targetColumn = NULL;
Assert(IsA(targetEntry->expr, Var));
targetColumn = (Var *) targetEntry->expr;
if (columnOnTheExtendedNode->varno == targetColumn->varno &&
columnOnTheExtendedNode->varattno == targetColumn->varattno)
{
columnOnTheExtendedNode->varno = 1;
columnOnTheExtendedNode->varattno = targetEntry->resno;
break;
}
}
}
}
/*
* MultiSubqueryPushdownTable creates a MultiTable from the given subquery,
* populates column list and returns the multitable.
*/
static MultiTable *
MultiSubqueryPushdownTable(RangeTblEntry *subqueryRangeTableEntry)
MultiSubqueryPushdownTable(Query *subquery)
{
Query *subquery = subqueryRangeTableEntry->subquery;
MultiTable *subqueryTableNode = NULL;
StringInfo rteName = makeStringInfo();
List *columnNamesList = NIL;
ListCell *targetEntryCell = NULL;
MultiTable *subqueryTableNode = CitusMakeNode(MultiTable);
appendStringInfo(rteName, "worker_subquery");
foreach(targetEntryCell, subquery->targetList)
{
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
columnNamesList = lappend(columnNamesList, makeString(targetEntry->resname));
}
subqueryTableNode = CitusMakeNode(MultiTable);
subqueryTableNode->subquery = subquery;
subqueryTableNode->relationId = HEAP_ANALYTICS_SUBQUERY_RELATION_ID;
subqueryTableNode->relationId = SUBQUERY_PUSHDOWN_RELATION_ID;
subqueryTableNode->rangeTableId = SUBQUERY_RANGE_TABLE_ID;
subqueryTableNode->partitionColumn = NULL;
subqueryTableNode->alias = subqueryRangeTableEntry->alias;
subqueryTableNode->referenceNames = subqueryRangeTableEntry->eref;
subqueryTableNode->alias = makeNode(Alias);
subqueryTableNode->alias->aliasname = rteName->data;
subqueryTableNode->referenceNames = makeNode(Alias);
subqueryTableNode->referenceNames->aliasname = rteName->data;
subqueryTableNode->referenceNames->colnames = columnNamesList;
return subqueryTableNode;
}

View File

@ -817,7 +817,7 @@ BaseRangeTableList(MultiNode *multiNode)
*/
MultiTable *multiTable = (MultiTable *) multiNode;
if (multiTable->relationId != SUBQUERY_RELATION_ID &&
multiTable->relationId != HEAP_ANALYTICS_SUBQUERY_RELATION_ID)
multiTable->relationId != SUBQUERY_PUSHDOWN_RELATION_ID)
{
RangeTblEntry *rangeTableEntry = makeNode(RangeTblEntry);
rangeTableEntry->inFromCl = true;
@ -1397,6 +1397,7 @@ BuildSubqueryJobQuery(MultiNode *multiNode)
List *sortClauseList = NIL;
List *groupClauseList = NIL;
List *whereClauseList = NIL;
Node *havingQual = NULL;
Node *limitCount = NULL;
Node *limitOffset = NULL;
FromExpr *joinTree = NULL;
@ -1436,7 +1437,7 @@ BuildSubqueryJobQuery(MultiNode *multiNode)
targetList = QueryTargetList(multiNode);
}
/* extract limit count/offset and sort clauses */
/* extract limit count/offset, sort and having clauses */
if (extendedOpNodeList != NIL)
{
MultiExtendedOp *extendedOp = (MultiExtendedOp *) linitial(extendedOpNodeList);
@ -1444,6 +1445,7 @@ BuildSubqueryJobQuery(MultiNode *multiNode)
limitCount = extendedOp->limitCount;
limitOffset = extendedOp->limitOffset;
sortClauseList = extendedOp->sortClauseList;
havingQual = extendedOp->havingQual;
}
/* build group clauses */
@ -1473,7 +1475,9 @@ BuildSubqueryJobQuery(MultiNode *multiNode)
jobQuery->groupClause = groupClauseList;
jobQuery->limitOffset = limitOffset;
jobQuery->limitCount = limitCount;
jobQuery->hasAggs = contain_agg_clause((Node *) targetList);
jobQuery->havingQual = havingQual;
jobQuery->hasAggs = contain_agg_clause((Node *) targetList) ||
contain_agg_clause((Node *) havingQual);
return jobQuery;
}

View File

@ -24,7 +24,7 @@
#define SUBQUERY_RANGE_TABLE_ID -1
#define SUBQUERY_RELATION_ID 10000
#define HEAP_ANALYTICS_SUBQUERY_RELATION_ID 10001
#define SUBQUERY_PUSHDOWN_RELATION_ID 10001
/*

File diff suppressed because it is too large Load Diff

View File

@ -208,6 +208,41 @@ FROM
34 | Tue Jan 21 04:15:03.874341 2014
(6 rows)
-- same query with subuqery joins in topmost select
SELECT "some_users_data".user_id, lastseen
FROM
(SELECT user_id,
Max(TIME) AS lastseen
FROM
(SELECT user_id,
TIME
FROM
(SELECT user_id,
TIME
FROM events_table as "events"
WHERE user_id > 10 and user_id < 40) "events_1"
ORDER BY TIME DESC
LIMIT 1000) "recent_events_1"
GROUP BY user_id
ORDER BY max(TIME) DESC) "some_recent_users"
JOIN LATERAL
(SELECT "users".user_id
FROM users_table as "users"
WHERE "users"."user_id" = "some_recent_users"."user_id"
AND users.value_2 > 50 and users.value_2 < 55
LIMIT 1) "some_users_data" ON TRUE
ORDER BY user_id
limit 50;
user_id | lastseen
---------+---------------------------------
19 | Tue Jan 21 05:23:09.26298 2014
22 | Tue Jan 21 05:22:28.223506 2014
25 | Tue Jan 21 01:10:29.315788 2014
31 | Tue Jan 21 02:43:24.591489 2014
33 | Tue Jan 21 04:23:35.623056 2014
34 | Tue Jan 21 04:15:03.874341 2014
(6 rows)
-- not supported since JOIN is not on the partition key
SELECT *
FROM
@ -321,6 +356,50 @@ limit 10;
15 | Tue Jan 21 02:25:36.136461 2014
(10 rows)
--
-- A similar query with topmost select is dropped
-- and replaced by aggregation. Notice the heavy use of limit
--
SELECT "some_users_data".user_id, MAX(lastseen), count(*)
FROM
(SELECT filter_users_1.user_id,
TIME AS lastseen
FROM
(SELECT user_where_1_1.user_id
FROM
(SELECT "users"."user_id"
FROM users_table as "users"
WHERE user_id > 12 and user_id < 16 and value_1 > 20) user_where_1_1
INNER JOIN
(SELECT "users"."user_id"
FROM users_table as "users"
WHERE user_id > 12 and user_id < 16 and value_2 > 60) user_where_1_join_1
ON ("user_where_1_1".user_id = "user_where_1_join_1".user_id)) filter_users_1
JOIN LATERAL
(SELECT user_id,
TIME
FROM events_table as "events"
WHERE user_id > 12 and user_id < 16 and user_id = filter_users_1.user_id
ORDER BY TIME DESC
LIMIT 1) "last_events_1" ON TRUE
ORDER BY TIME DESC
LIMIT 10) "some_recent_users"
JOIN LATERAL
(SELECT "users".user_id
FROM users_table as "users"
WHERE "users"."user_id" = "some_recent_users"."user_id"
AND "users"."value_2" > 70
LIMIT 1) "some_users_data" ON TRUE
GROUP BY 1
ORDER BY 2, 1 DESC
limit 10;
user_id | max | count
---------+---------------------------------+-------
15 | Tue Jan 21 02:25:36.136461 2014 | 10
13 | Tue Jan 21 05:06:48.989766 2014 | 10
14 | Tue Jan 21 05:46:51.286381 2014 | 10
(3 rows)
-- not supported since the inner JOIN is not equi join
SELECT user_id, lastseen
FROM
@ -586,23 +665,29 @@ DETAIL: Each relation should be joined with at least one another relation using
SELECT
"value_3", count(*) AS cnt
FROM
(SELECT "value_3", "user_id", random()
FROM
(SELECT users_in_segment_1.user_id, value_3
FROM
(SELECT user_id, value_3 * 2 as value_3
FROM
(SELECT user_id, value_3
FROM
(SELECT "users"."user_id", value_3
FROM users_table as "users"
WHERE user_id > 10 and user_id < 40 and value_2 > 30) simple_user_where_1) all_buckets_1) users_in_segment_1
JOIN
(SELECT "users"."user_id"
FROM users_table as "users"
WHERE user_id > 10 and user_id < 40 and value_2 > 60) some_users_data
ON ("users_in_segment_1".user_id = "some_users_data".user_id)) segmentalias_1) "tempQuery"
GROUP BY "value_3" ORDER BY cnt, value_3 DESC LIMIT 10;
(SELECT "value_3", "user_id", random()
FROM
(SELECT users_in_segment_1.user_id, value_3
FROM
(SELECT user_id, value_3 * 2 as value_3
FROM
(SELECT user_id, value_3
FROM
(SELECT "users"."user_id", value_3
FROM users_table as "users"
WHERE user_id > 10 and user_id < 40 and value_2 > 30
) simple_user_where_1
) all_buckets_1
) users_in_segment_1
JOIN
(SELECT "users"."user_id"
FROM users_table as "users"
WHERE user_id > 10 and user_id < 40 and value_2 > 60
) some_users_data
ON ("users_in_segment_1".user_id = "some_users_data".user_id)
) segmentalias_1) "tempQuery"
GROUP BY "value_3"
ORDER BY cnt, value_3 DESC LIMIT 10;
value_3 | cnt
---------+-----
556 | 75
@ -683,6 +768,45 @@ FROM
21 | 985
(6 rows)
-- nested lateral join at top most level
SELECT "some_users_data".user_id, "some_recent_users".value_3
FROM
(SELECT filter_users_1.user_id, value_3
FROM
(SELECT "users"."user_id"
FROM users_table as "users"
WHERE user_id > 20 and user_id < 70 and users.value_2 = 200
) filter_users_1
JOIN LATERAL
(SELECT user_id, value_3
FROM events_table as "events"
WHERE user_id > 20 and user_id < 70
AND ("events".user_id = "filter_users_1".user_id)
ORDER BY value_3 DESC
LIMIT 1
) "last_events_1" ON TRUE
ORDER BY value_3 DESC
LIMIT 10
) "some_recent_users"
JOIN LATERAL
(SELECT "users".user_id
FROM users_table as "users"
WHERE "users"."user_id" = "some_recent_users"."user_id"
AND users.value_2 > 200
LIMIT 1
) "some_users_data" ON TRUE
ORDER BY value_3 DESC, user_id ASC
LIMIT 10;
user_id | value_3
---------+---------
44 | 998
65 | 996
66 | 996
37 | 995
57 | 989
21 | 985
(6 rows)
-- longer nested lateral joins
SELECT *
FROM
@ -726,6 +850,45 @@ FROM
21 | 985
(6 rows)
-- longer nested lateral join wth top level join
SELECT "some_users_data".user_id, "some_recent_users".value_3
FROM
(SELECT filter_users_1.user_id, value_3
FROM
(SELECT "users"."user_id"
FROM users_table as "users"
WHERE user_id > 20 and user_id < 70 and users.value_2 = 200
) filter_users_1
JOIN LATERAL
(SELECT user_id, value_3
FROM events_table as "events"
WHERE user_id > 20 and user_id < 70
AND ("events".user_id = "filter_users_1".user_id)
ORDER BY value_3 DESC
LIMIT 1
) "last_events_1" ON TRUE
ORDER BY value_3 DESC
LIMIT 10
) "some_recent_users"
JOIN LATERAL
(SELECT "users".user_id
FROM users_table as "users"
WHERE "users"."user_id" = "some_recent_users"."user_id"
AND users.value_2 > 200
LIMIT 1
) "some_users_data" ON TRUE
ORDER BY value_3 DESC
LIMIT 10;
user_id | value_3
---------+---------
44 | 998
65 | 996
66 | 996
37 | 995
57 | 989
21 | 985
(6 rows)
-- LEFT JOINs used with INNER JOINs
SELECT
count(*) AS cnt, "generated_group_field"

View File

@ -197,9 +197,7 @@ SELECT count(*) FROM
-- (SELECT l_orderkey FROM lineitem_subquery) UNION
-- (SELECT l_orderkey FROM lineitem_subquery)
--) b;
-- Check that we error out if the outermost query has subquery join.
-- Check that we error out if inner query has limit but outer quers has not.
SELECT
avg(o_totalprice/l_quantity)
FROM

View File

@ -40,7 +40,7 @@ test: multi_insert_select
test: multi_deparse_shard_query
test: multi_basic_queries multi_complex_expressions multi_verify_no_subquery
test: multi_explain
test: multi_subquery multi_subquery_complex_queries
test: multi_subquery multi_subquery_complex_queries multi_subquery_behavioral_analytics
test: multi_reference_table
test: multi_outer_join_reference
test: multi_single_relation_subquery

View File

@ -194,7 +194,7 @@ DETAIL: Union All clauses are currently unsupported
-- (SELECT l_orderkey FROM lineitem_subquery) UNION
-- (SELECT l_orderkey FROM lineitem_subquery)
--) b;
-- Check that we error out if the outermost query has subquery join.
-- Check that we error out if inner query has limit but outer quers has not.
SELECT
avg(o_totalprice/l_quantity)
FROM
@ -214,8 +214,8 @@ FROM
orders_subquery
WHERE
lineitem_quantities.l_orderkey = o_orderkey) orders_price ON true;
ERROR: cannot perform distributed planning on this query
DETAIL: Join in subqueries is not supported yet
ERROR: cannot push down this subquery
DETAIL: Limit in subquery without limit in the outer query is unsupported
-- Check that we error out if the outermost query is a distinct clause.
SELECT
count(DISTINCT a)

View File

@ -194,7 +194,7 @@ DETAIL: Union All clauses are currently unsupported
-- (SELECT l_orderkey FROM lineitem_subquery) UNION
-- (SELECT l_orderkey FROM lineitem_subquery)
--) b;
-- Check that we error out if the outermost query has subquery join.
-- Check that we error out if inner query has limit but outer quers has not.
SELECT
avg(o_totalprice/l_quantity)
FROM
@ -214,8 +214,8 @@ FROM
orders_subquery
WHERE
lineitem_quantities.l_orderkey = o_orderkey) orders_price ON true;
ERROR: cannot perform distributed planning on this query
DETAIL: Join in subqueries is not supported yet
ERROR: cannot push down this subquery
DETAIL: Limit in subquery without limit in the outer query is unsupported
-- Check that we error out if the outermost query is a distinct clause.
SELECT
count(DISTINCT a)

File diff suppressed because it is too large Load Diff

View File

@ -206,6 +206,32 @@ FROM
order BY user_id
limit 50;
-- same query with subuqery joins in topmost select
SELECT "some_users_data".user_id, lastseen
FROM
(SELECT user_id,
Max(TIME) AS lastseen
FROM
(SELECT user_id,
TIME
FROM
(SELECT user_id,
TIME
FROM events_table as "events"
WHERE user_id > 10 and user_id < 40) "events_1"
ORDER BY TIME DESC
LIMIT 1000) "recent_events_1"
GROUP BY user_id
ORDER BY max(TIME) DESC) "some_recent_users"
JOIN LATERAL
(SELECT "users".user_id
FROM users_table as "users"
WHERE "users"."user_id" = "some_recent_users"."user_id"
AND users.value_2 > 50 and users.value_2 < 55
LIMIT 1) "some_users_data" ON TRUE
ORDER BY user_id
limit 50;
-- not supported since JOIN is not on the partition key
SELECT *
FROM
@ -304,6 +330,44 @@ FROM
order BY user_id DESC
limit 10;
--
-- A similar query with topmost select is dropped
-- and replaced by aggregation. Notice the heavy use of limit
--
SELECT "some_users_data".user_id, MAX(lastseen), count(*)
FROM
(SELECT filter_users_1.user_id,
TIME AS lastseen
FROM
(SELECT user_where_1_1.user_id
FROM
(SELECT "users"."user_id"
FROM users_table as "users"
WHERE user_id > 12 and user_id < 16 and value_1 > 20) user_where_1_1
INNER JOIN
(SELECT "users"."user_id"
FROM users_table as "users"
WHERE user_id > 12 and user_id < 16 and value_2 > 60) user_where_1_join_1
ON ("user_where_1_1".user_id = "user_where_1_join_1".user_id)) filter_users_1
JOIN LATERAL
(SELECT user_id,
TIME
FROM events_table as "events"
WHERE user_id > 12 and user_id < 16 and user_id = filter_users_1.user_id
ORDER BY TIME DESC
LIMIT 1) "last_events_1" ON TRUE
ORDER BY TIME DESC
LIMIT 10) "some_recent_users"
JOIN LATERAL
(SELECT "users".user_id
FROM users_table as "users"
WHERE "users"."user_id" = "some_recent_users"."user_id"
AND "users"."value_2" > 70
LIMIT 1) "some_users_data" ON TRUE
GROUP BY 1
ORDER BY 2, 1 DESC
limit 10;
-- not supported since the inner JOIN is not equi join
SELECT user_id, lastseen
FROM
@ -545,24 +609,29 @@ SELECT
SELECT
"value_3", count(*) AS cnt
FROM
(SELECT "value_3", "user_id", random()
FROM
(SELECT users_in_segment_1.user_id, value_3
FROM
(SELECT user_id, value_3 * 2 as value_3
FROM
(SELECT user_id, value_3
FROM
(SELECT "users"."user_id", value_3
FROM users_table as "users"
WHERE user_id > 10 and user_id < 40 and value_2 > 30) simple_user_where_1) all_buckets_1) users_in_segment_1
JOIN
(SELECT "users"."user_id"
FROM users_table as "users"
WHERE user_id > 10 and user_id < 40 and value_2 > 60) some_users_data
ON ("users_in_segment_1".user_id = "some_users_data".user_id)) segmentalias_1) "tempQuery"
GROUP BY "value_3" ORDER BY cnt, value_3 DESC LIMIT 10;
(SELECT "value_3", "user_id", random()
FROM
(SELECT users_in_segment_1.user_id, value_3
FROM
(SELECT user_id, value_3 * 2 as value_3
FROM
(SELECT user_id, value_3
FROM
(SELECT "users"."user_id", value_3
FROM users_table as "users"
WHERE user_id > 10 and user_id < 40 and value_2 > 30
) simple_user_where_1
) all_buckets_1
) users_in_segment_1
JOIN
(SELECT "users"."user_id"
FROM users_table as "users"
WHERE user_id > 10 and user_id < 40 and value_2 > 60
) some_users_data
ON ("users_in_segment_1".user_id = "some_users_data".user_id)
) segmentalias_1) "tempQuery"
GROUP BY "value_3"
ORDER BY cnt, value_3 DESC LIMIT 10;
-- not supported since there is no partition column equality at all
@ -622,6 +691,36 @@ FROM
value_3 DESC
limit 10;
-- nested lateral join at top most level
SELECT "some_users_data".user_id, "some_recent_users".value_3
FROM
(SELECT filter_users_1.user_id, value_3
FROM
(SELECT "users"."user_id"
FROM users_table as "users"
WHERE user_id > 20 and user_id < 70 and users.value_2 = 200
) filter_users_1
JOIN LATERAL
(SELECT user_id, value_3
FROM events_table as "events"
WHERE user_id > 20 and user_id < 70
AND ("events".user_id = "filter_users_1".user_id)
ORDER BY value_3 DESC
LIMIT 1
) "last_events_1" ON TRUE
ORDER BY value_3 DESC
LIMIT 10
) "some_recent_users"
JOIN LATERAL
(SELECT "users".user_id
FROM users_table as "users"
WHERE "users"."user_id" = "some_recent_users"."user_id"
AND users.value_2 > 200
LIMIT 1
) "some_users_data" ON TRUE
ORDER BY value_3 DESC, user_id ASC
LIMIT 10;
-- longer nested lateral joins
SELECT *
FROM
@ -656,7 +755,35 @@ FROM
value_3 DESC
limit 10;
-- longer nested lateral join wth top level join
SELECT "some_users_data".user_id, "some_recent_users".value_3
FROM
(SELECT filter_users_1.user_id, value_3
FROM
(SELECT "users"."user_id"
FROM users_table as "users"
WHERE user_id > 20 and user_id < 70 and users.value_2 = 200
) filter_users_1
JOIN LATERAL
(SELECT user_id, value_3
FROM events_table as "events"
WHERE user_id > 20 and user_id < 70
AND ("events".user_id = "filter_users_1".user_id)
ORDER BY value_3 DESC
LIMIT 1
) "last_events_1" ON TRUE
ORDER BY value_3 DESC
LIMIT 10
) "some_recent_users"
JOIN LATERAL
(SELECT "users".user_id
FROM users_table as "users"
WHERE "users"."user_id" = "some_recent_users"."user_id"
AND users.value_2 > 200
LIMIT 1
) "some_users_data" ON TRUE
ORDER BY value_3 DESC
LIMIT 10;
-- LEFT JOINs used with INNER JOINs
SELECT