Add union support within the JOINS

This commit adds support for UNION/UNION ALL subqueries that are
in the following form:

     .... (Q1 UNION Q2 UNION ...) as union_query JOIN (QN) ...

In other words, we currently do NOT support the queries that are
in the following form where union query is not JOINed with
other relations/subqueries :

     .... (Q1 UNION Q2 UNION ...) as union_query ....
pull/1333/head
Onder Kalaci 2017-03-22 16:44:47 +02:00
parent 6f3262546f
commit 1a0678bc7f
11 changed files with 2715 additions and 546 deletions

View File

@ -42,6 +42,7 @@
#include "parser/parse_agg.h" #include "parser/parse_agg.h"
#include "parser/parse_coerce.h" #include "parser/parse_coerce.h"
#include "parser/parse_oper.h" #include "parser/parse_oper.h"
#include "parser/parsetree.h"
#include "utils/builtins.h" #include "utils/builtins.h"
#include "utils/fmgroids.h" #include "utils/fmgroids.h"
#include "utils/lsyscache.h" #include "utils/lsyscache.h"
@ -151,8 +152,9 @@ static void ErrorIfContainsUnsupportedSubquery(MultiNode *logicalPlanNode,
PlannerRestrictionContext * PlannerRestrictionContext *
plannerRestrictionContext); plannerRestrictionContext);
static void ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit); static void ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit);
static void ErrorIfUnsupportedSetOperation(Query *subqueryTree, bool outerQueryHasLimit);
static bool ExtractSetOperationStatmentWalker(Node *node, List **setOperationList);
static void ErrorIfUnsupportedTableCombination(Query *queryTree); static void ErrorIfUnsupportedTableCombination(Query *queryTree);
static void ErrorIfUnsupportedUnionQuery(Query *unionQuery);
static bool TargetListOnPartitionColumn(Query *query, List *targetEntryList); static bool TargetListOnPartitionColumn(Query *query, List *targetEntryList);
static FieldSelect * CompositeFieldRecursive(Expr *expression, Query *query); static FieldSelect * CompositeFieldRecursive(Expr *expression, Query *query);
static bool FullCompositeFieldList(List *compositeFieldList); static bool FullCompositeFieldList(List *compositeFieldList);
@ -2945,18 +2947,7 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit)
if (subqueryTree->setOperations) if (subqueryTree->setOperations)
{ {
SetOperationStmt *setOperationStatement = ErrorIfUnsupportedSetOperation(subqueryTree, outerQueryHasLimit);
(SetOperationStmt *) subqueryTree->setOperations;
if (setOperationStatement->op == SETOP_UNION)
{
ErrorIfUnsupportedUnionQuery(subqueryTree);
}
else
{
preconditionsSatisfied = false;
errorDetail = "Intersect and Except are currently unsupported";
}
} }
if (subqueryTree->hasRecursive) if (subqueryTree->hasRecursive)
@ -3047,6 +3038,77 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit)
} }
/*
* ErrorIfUnsupportedSetOperation is a helper function for ErrorIfCannotPushdownSubquery().
* It basically iterates over the subqueries that reside under the given set operations.
*
* The function also errors out for set operations INTERSECT and EXCEPT.
*/
static void
ErrorIfUnsupportedSetOperation(Query *subqueryTree, bool outerQueryHasLimit)
{
List *rangeTableList = subqueryTree->rtable;
List *rangeTableIndexList = NIL;
ListCell *rangeTableIndexCell = NULL;
List *setOperationStatementList = NIL;
ListCell *setOperationStatmentCell = NULL;
ExtractSetOperationStatmentWalker((Node *) subqueryTree->setOperations,
&setOperationStatementList);
foreach(setOperationStatmentCell, setOperationStatementList)
{
SetOperationStmt *setOperation =
(SetOperationStmt *) lfirst(setOperationStatmentCell);
if (setOperation->op != SETOP_UNION)
{
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot push down this subquery"),
errdetail("Intersect and Except are currently unsupported")));
}
}
ExtractRangeTableIndexWalker((Node *) subqueryTree->setOperations,
&rangeTableIndexList);
foreach(rangeTableIndexCell, rangeTableIndexList)
{
int rangeTableIndex = lfirst_int(rangeTableIndexCell);
RangeTblEntry *rangeTableEntry = rt_fetch(rangeTableIndex, rangeTableList);
Assert(rangeTableEntry->rtekind == RTE_SUBQUERY);
ErrorIfCannotPushdownSubquery(rangeTableEntry->subquery, outerQueryHasLimit);
}
}
/*
* ExtractSetOperationStatementWalker walks over a set operations statment,
* and finds all set operations in the tree.
*/
static bool
ExtractSetOperationStatmentWalker(Node *node, List **setOperationList)
{
bool walkerResult = false;
if (node == NULL)
{
return false;
}
if (IsA(node, SetOperationStmt))
{
SetOperationStmt *setOperation = (SetOperationStmt *) node;
(*setOperationList) = lappend(*setOperationList, setOperation);
}
walkerResult = expression_tree_walker(node, ExtractSetOperationStatmentWalker,
setOperationList);
return walkerResult;
}
/* /*
* ErrorIfUnsupportedTableCombination checks if the given query tree contains any * ErrorIfUnsupportedTableCombination checks if the given query tree contains any
* unsupported range table combinations. For this, the function walks over all * unsupported range table combinations. For this, the function walks over all
@ -3103,103 +3165,6 @@ ErrorIfUnsupportedTableCombination(Query *queryTree)
} }
/*
* ErrorIfUnsupportedUnionQuery checks if the given union query is a supported
* one., otherwise it errors out. For these purpose it checks tree conditions;
* a. Are count of partition column filters same for union subqueries.
* b. Are target lists of union subquries include partition column.
* c. Is it a union clause without All option.
*
* Note that we check equality of filters in ErrorIfUnsupportedFilters(). We
* allow leaf queries not having a filter clause on the partition column. We
* check if a leaf query has a filter on the partition column, it must be same
* with other queries or if leaf query must not have any filter on the partition
* column, both are ok. Because joins and nested queries are transitive, it is
* enough one leaf query to have a filter on the partition column. But unions
* are not transitive, so here we check if they have same count of filters on
* the partition column. If count is more than 0, we already checked that they
* are same, of if count is 0 then both don't have any filter on the partition
* column.
*/
static void
ErrorIfUnsupportedUnionQuery(Query *unionQuery)
{
bool supportedUnionQuery = true;
bool leftQueryOnPartitionColumn = false;
bool rightQueryOnPartitionColumn = false;
List *rangeTableList = unionQuery->rtable;
SetOperationStmt *unionStatement = (SetOperationStmt *) unionQuery->setOperations;
Query *leftQuery = NULL;
Query *rightQuery = NULL;
List *leftOpExpressionList = NIL;
List *rightOpExpressionList = NIL;
uint32 leftOpExpressionCount = 0;
uint32 rightOpExpressionCount = 0;
char *errorDetail = NULL;
RangeTblRef *leftRangeTableReference = (RangeTblRef *) unionStatement->larg;
RangeTblRef *rightRangeTableReference = (RangeTblRef *) unionStatement->rarg;
int leftTableIndex = leftRangeTableReference->rtindex - 1;
int rightTableIndex = rightRangeTableReference->rtindex - 1;
RangeTblEntry *leftRangeTableEntry = (RangeTblEntry *) list_nth(rangeTableList,
leftTableIndex);
RangeTblEntry *rightRangeTableEntry = (RangeTblEntry *) list_nth(rangeTableList,
rightTableIndex);
Assert(leftRangeTableEntry->rtekind == RTE_SUBQUERY);
Assert(rightRangeTableEntry->rtekind == RTE_SUBQUERY);
leftQuery = leftRangeTableEntry->subquery;
rightQuery = rightRangeTableEntry->subquery;
/*
* Check if subqueries of union have same count of filters on partition
* column.
*/
leftOpExpressionList = PartitionColumnOpExpressionList(leftQuery);
rightOpExpressionList = PartitionColumnOpExpressionList(rightQuery);
leftOpExpressionCount = list_length(leftOpExpressionList);
rightOpExpressionCount = list_length(rightOpExpressionList);
if (leftOpExpressionCount != rightOpExpressionCount)
{
supportedUnionQuery = false;
errorDetail = "Union clauses need to have same count of filters on "
"partition column";
}
/* check if union subqueries have partition column in their target lists */
leftQueryOnPartitionColumn = TargetListOnPartitionColumn(leftQuery,
leftQuery->targetList);
rightQueryOnPartitionColumn = TargetListOnPartitionColumn(rightQuery,
rightQuery->targetList);
if (!(leftQueryOnPartitionColumn && rightQueryOnPartitionColumn))
{
supportedUnionQuery = false;
errorDetail = "Union clauses need to select partition columns";
}
/* check if it is a union all operation */
if (unionStatement->all)
{
supportedUnionQuery = false;
errorDetail = "Union All clauses are currently unsupported";
}
/* finally check and error out if not satisfied */
if (!supportedUnionQuery)
{
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot push down this subquery"),
errdetail("%s", errorDetail)));
}
}
/* /*
* GroupTargetEntryList walks over group clauses in the given list, finds * GroupTargetEntryList walks over group clauses in the given list, finds
* matching target entries and return them in a new list. * matching target entries and return them in a new list.

View File

@ -69,6 +69,29 @@ static AttributeEquivalenceClass * AttributeEquivalenceClassForEquivalenceClass(
static void AddToAttributeEquivalenceClass(AttributeEquivalenceClass ** static void AddToAttributeEquivalenceClass(AttributeEquivalenceClass **
attributeEquivalanceClass, attributeEquivalanceClass,
PlannerInfo *root, Var *varToBeAdded); PlannerInfo *root, Var *varToBeAdded);
static void AddRteSubqueryToAttributeEquivalenceClass(AttributeEquivalenceClass *
*attributeEquivalanceClass,
RangeTblEntry *
rangeTableEntry,
PlannerInfo *root,
Var *varToBeAdded);
static Query * GetTargetSubquery(PlannerInfo *root, RangeTblEntry *rangeTableEntry,
Var *varToBeAdded);
static void AddUnionAllSetOperationsToAttributeEquivalenceClass(
AttributeEquivalenceClass **
attributeEquivalanceClass,
PlannerInfo *root,
Var *varToBeAdded);
static void AddUnionSetOperationsToAttributeEquivalenceClass(AttributeEquivalenceClass **
attributeEquivalenceClass,
PlannerInfo *root,
SetOperationStmt *
setOperation,
Var *varToBeAdded);
static void AddRteRelationToAttributeEquivalenceClass(AttributeEquivalenceClass **
attrEquivalenceClass,
RangeTblEntry *rangeTableEntry,
Var *varToBeAdded);
static Var * GetVarFromAssignedParam(List *parentPlannerParamList, static Var * GetVarFromAssignedParam(List *parentPlannerParamList,
Param *plannerParam); Param *plannerParam);
static List * GenerateAttributeEquivalencesForJoinRestrictions(JoinRestrictionContext static List * GenerateAttributeEquivalencesForJoinRestrictions(JoinRestrictionContext
@ -632,106 +655,271 @@ GenerateAttributeEquivalencesForJoinRestrictions(JoinRestrictionContext *
* - Generate an AttributeEquivalenceMember and add to the input * - Generate an AttributeEquivalenceMember and add to the input
* AttributeEquivalenceClass * AttributeEquivalenceClass
* - If the RTE that corresponds to a subquery * - If the RTE that corresponds to a subquery
* - Find the corresponding target entry via varno * - If the RTE that corresponds to a UNION ALL subquery
* - if subquery entry is a set operation (i.e., only UNION/UNION ALL allowed) * - Iterate on each of the appendRels (i.e., each of the UNION ALL query)
* - recursively add both left and right sides of the set operation's * - Recursively add all children of the set operation's
* corresponding target entries
* - If the corresponding subquery entry is a UNION set operation
* - Recursively add all children of the set operation's
* corresponding target entries * corresponding target entries
* - if subquery is not a set operation * - If the corresponding subquery is a regular subquery (i.e., No set operations)
* - recursively try to add the corresponding target entry to the * - Recursively try to add the corresponding target entry to the
* equivalence class * equivalence class
*
* Note that this function only adds partition keys to the attributeEquivalanceClass.
* This implies that there wouldn't be any columns for reference tables.
*/ */
static void static void
AddToAttributeEquivalenceClass(AttributeEquivalenceClass **attributeEquivalanceClass, AddToAttributeEquivalenceClass(AttributeEquivalenceClass **attributeEquivalanceClass,
PlannerInfo *root, Var *varToBeAdded) PlannerInfo *root, Var *varToBeAdded)
{ {
RangeTblEntry *rangeTableEntry = root->simple_rte_array[varToBeAdded->varno]; RangeTblEntry *rangeTableEntry = NULL;
/* punt if it's a whole-row var rather than a plain column reference */
if (varToBeAdded->varattno == InvalidAttrNumber)
{
return;
}
/* we also don't want to process ctid, tableoid etc */
if (varToBeAdded->varattno < InvalidAttrNumber)
{
return;
}
rangeTableEntry = root->simple_rte_array[varToBeAdded->varno];
if (rangeTableEntry->rtekind == RTE_RELATION) if (rangeTableEntry->rtekind == RTE_RELATION)
{ {
AttributeEquivalenceClassMember *attributeEqMember = NULL; AddRteRelationToAttributeEquivalenceClass(attributeEquivalanceClass,
Oid relationId = rangeTableEntry->relid; rangeTableEntry,
Var *relationPartitionKey = NULL; varToBeAdded);
if (PartitionMethod(relationId) == DISTRIBUTE_BY_NONE)
{
return;
}
relationPartitionKey = PartitionKey(relationId);
if (relationPartitionKey->varattno != varToBeAdded->varattno)
{
return;
}
attributeEqMember = palloc0(sizeof(AttributeEquivalenceClassMember));
attributeEqMember->varattno = varToBeAdded->varattno;
attributeEqMember->varno = varToBeAdded->varno;
attributeEqMember->rteIdentity = GetRTEIdentity(rangeTableEntry);
attributeEqMember->relationId = rangeTableEntry->relid;
(*attributeEquivalanceClass)->equivalentAttributes =
lappend((*attributeEquivalanceClass)->equivalentAttributes,
attributeEqMember);
} }
else if (rangeTableEntry->rtekind == RTE_SUBQUERY && !rangeTableEntry->inh) else if (rangeTableEntry->rtekind == RTE_SUBQUERY)
{ {
Query *subquery = rangeTableEntry->subquery; AddRteSubqueryToAttributeEquivalenceClass(attributeEquivalanceClass,
RelOptInfo *baseRelOptInfo = NULL; rangeTableEntry, root,
TargetEntry *subqueryTargetEntry = NULL; varToBeAdded);
}
}
/* punt if it's a whole-row var rather than a plain column reference */
if (varToBeAdded->varattno == InvalidAttrNumber)
{
return;
}
/* we also don't want to process ctid, tableoid etc */ /*
if (varToBeAdded->varattno < InvalidAttrNumber) * AddRteSubqueryToAttributeEquivalenceClass adds the given var to the given
{ * attribute equivalence class.
return; *
} * The main algorithm is outlined in AddToAttributeEquivalenceClass().
*/
static void
AddRteSubqueryToAttributeEquivalenceClass(AttributeEquivalenceClass
**attributeEquivalanceClass,
RangeTblEntry *rangeTableEntry,
PlannerInfo *root, Var *varToBeAdded)
{
RelOptInfo *baseRelOptInfo = find_base_rel(root, varToBeAdded->varno);
TargetEntry *subqueryTargetEntry = NULL;
Query *targetSubquery = GetTargetSubquery(root, rangeTableEntry, varToBeAdded);
baseRelOptInfo = find_base_rel(root, varToBeAdded->varno); subqueryTargetEntry = get_tle_by_resno(targetSubquery->targetList,
varToBeAdded->varattno);
/* If the subquery hasn't been planned yet, we have to punt */ /* if we fail to find corresponding target entry, do not proceed */
if (subqueryTargetEntry == NULL || subqueryTargetEntry->resjunk)
{
return;
}
/* we're only interested in Vars */
if (!IsA(subqueryTargetEntry->expr, Var))
{
return;
}
varToBeAdded = (Var *) subqueryTargetEntry->expr;
/*
* "inh" flag is set either when inheritance or "UNION ALL" exists in the
* subquery. Here we're only interested in the "UNION ALL" case.
*
* Else, we check one more thing: Does the subquery contain a "UNION" query.
* If so, we recursively traverse all "UNION" tree and add the corresponding
* target list elements to the attribute equivalence.
*
* Finally, if it is a regular subquery (i.e., does not contain UNION or UNION ALL),
* we simply recurse to find the corresponding RTE_RELATION to add to the
* equivalence class.
*
* Note that we're treating "UNION" and "UNION ALL" clauses differently given
* that postgres planner process/plans them separately.
*/
if (rangeTableEntry->inh)
{
AddUnionAllSetOperationsToAttributeEquivalenceClass(attributeEquivalanceClass,
root, varToBeAdded);
}
else if (targetSubquery->setOperations)
{
AddUnionSetOperationsToAttributeEquivalenceClass(attributeEquivalanceClass,
baseRelOptInfo->subroot,
(SetOperationStmt *)
targetSubquery->setOperations,
varToBeAdded);
}
else if (varToBeAdded && IsA(varToBeAdded, Var) && varToBeAdded->varlevelsup == 0)
{
AddToAttributeEquivalenceClass(attributeEquivalanceClass,
baseRelOptInfo->subroot, varToBeAdded);
}
}
/*
* GetTargetSubquery returns the corresponding subquery for the given planner root,
* range table entry and the var.
*
* The aim of this function is to simplify extracting the subquery in case of "UNION ALL"
* queries.
*/
static Query *
GetTargetSubquery(PlannerInfo *root, RangeTblEntry *rangeTableEntry, Var *varToBeAdded)
{
Query *targetSubquery = NULL;
/*
* For subqueries other than "UNION ALL", find the corresponding targetSubquery. See
* the details of how we process subqueries in the below comments.
*/
if (!rangeTableEntry->inh)
{
RelOptInfo *baseRelOptInfo = find_base_rel(root, varToBeAdded->varno);
/* If the targetSubquery hasn't been planned yet, we have to punt */
if (baseRelOptInfo->subroot == NULL) if (baseRelOptInfo->subroot == NULL)
{ {
return; return NULL;
} }
Assert(IsA(baseRelOptInfo->subroot, PlannerInfo)); Assert(IsA(baseRelOptInfo->subroot, PlannerInfo));
subquery = baseRelOptInfo->subroot->parse; targetSubquery = baseRelOptInfo->subroot->parse;
Assert(IsA(subquery, Query)); Assert(IsA(targetSubquery, Query));
/* Get the subquery output expression referenced by the upper Var */
subqueryTargetEntry = get_tle_by_resno(subquery->targetList,
varToBeAdded->varattno);
if (subqueryTargetEntry == NULL || subqueryTargetEntry->resjunk)
{
ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("subquery %s does not have attribute %d",
rangeTableEntry->eref->aliasname,
varToBeAdded->varattno)));
}
if (!IsA(subqueryTargetEntry->expr, Var))
{
return;
}
varToBeAdded = (Var *) subqueryTargetEntry->expr;
if (varToBeAdded && IsA(varToBeAdded, Var) && varToBeAdded->varlevelsup == 0)
{
AddToAttributeEquivalenceClass(attributeEquivalanceClass,
baseRelOptInfo->subroot, varToBeAdded);
}
} }
else
{
targetSubquery = rangeTableEntry->subquery;
}
return targetSubquery;
}
/*
* AddUnionAllSetOperationsToAttributeEquivalenceClass recursively iterates on all the
* append rels, sets the varno's accordingly and adds the
* var the given equivalence class.
*/
static void
AddUnionAllSetOperationsToAttributeEquivalenceClass(AttributeEquivalenceClass **
attributeEquivalanceClass,
PlannerInfo *root,
Var *varToBeAdded)
{
List *appendRelList = root->append_rel_list;
ListCell *appendRelCell = NULL;
/* iterate on the queries that are part of UNION ALL subselects */
foreach(appendRelCell, appendRelList)
{
AppendRelInfo *appendRelInfo = (AppendRelInfo *) lfirst(appendRelCell);
/*
* We're only interested in UNION ALL clauses and parent_reloid is invalid
* only for UNION ALL (i.e., equals to a legitimate Oid for inheritance)
*/
if (appendRelInfo->parent_reloid != InvalidOid)
{
continue;
}
/* set the varno accordingly for this specific child */
varToBeAdded->varno = appendRelInfo->child_relid;
AddToAttributeEquivalenceClass(attributeEquivalanceClass, root,
varToBeAdded);
}
}
/*
* AddUnionSetOperationsToAttributeEquivalenceClass recursively iterates on all the
* setOperations and adds each corresponding target entry to the given equivalence
* class.
*
* Although the function silently accepts INTERSECT and EXPECT set operations, they are
* rejected later in the planning. We prefer this behavior to provide better error
* messages.
*/
static void
AddUnionSetOperationsToAttributeEquivalenceClass(AttributeEquivalenceClass **
attributeEquivalenceClass,
PlannerInfo *root,
SetOperationStmt *setOperation,
Var *varToBeAdded)
{
List *rangeTableIndexList = NIL;
ListCell *rangeTableIndexCell = NULL;
ExtractRangeTableIndexWalker((Node *) setOperation, &rangeTableIndexList);
foreach(rangeTableIndexCell, rangeTableIndexList)
{
int rangeTableIndex = lfirst_int(rangeTableIndexCell);
varToBeAdded->varno = rangeTableIndex;
AddToAttributeEquivalenceClass(attributeEquivalenceClass, root, varToBeAdded);
}
}
/*
* AddRteRelationToAttributeEquivalenceClass adds the given var to the given equivalence
* class using the rteIdentity provided by the rangeTableEntry. Note that
* rteIdentities are only assigned to RTE_RELATIONs and this function asserts
* the input rte to be an RTE_RELATION.
*
* Note that this function only adds partition keys to the attributeEquivalanceClass.
* This implies that there wouldn't be any columns for reference tables.
*/
static void
AddRteRelationToAttributeEquivalenceClass(AttributeEquivalenceClass **
attrEquivalenceClass,
RangeTblEntry *rangeTableEntry,
Var *varToBeAdded)
{
AttributeEquivalenceClassMember *attributeEqMember = NULL;
Oid relationId = InvalidOid;
Var *relationPartitionKey = NULL;
Assert(rangeTableEntry->rtekind == RTE_RELATION);
relationId = rangeTableEntry->relid;
if (PartitionMethod(relationId) == DISTRIBUTE_BY_NONE)
{
return;
}
relationPartitionKey = PartitionKey(relationId);
if (relationPartitionKey->varattno != varToBeAdded->varattno)
{
return;
}
attributeEqMember = palloc0(sizeof(AttributeEquivalenceClassMember));
attributeEqMember->varattno = varToBeAdded->varattno;
attributeEqMember->varno = varToBeAdded->varno;
attributeEqMember->rteIdentity = GetRTEIdentity(rangeTableEntry);
attributeEqMember->relationId = rangeTableEntry->relid;
(*attrEquivalenceClass)->equivalentAttributes =
lappend((*attrEquivalenceClass)->equivalentAttributes,
attributeEqMember);
} }
@ -821,8 +1009,10 @@ static bool
AttributeEquivalancesAreEqual(AttributeEquivalenceClass *firstAttributeEquivalance, AttributeEquivalancesAreEqual(AttributeEquivalenceClass *firstAttributeEquivalance,
AttributeEquivalenceClass *secondAttributeEquivalance) AttributeEquivalenceClass *secondAttributeEquivalance)
{ {
List *firstEquivalenceMemberList = firstAttributeEquivalance->equivalentAttributes; List *firstEquivalenceMemberList =
List *secondEquivalenceMemberList = secondAttributeEquivalance->equivalentAttributes; firstAttributeEquivalance->equivalentAttributes;
List *secondEquivalenceMemberList =
secondAttributeEquivalance->equivalentAttributes;
ListCell *firstAttributeEquivalanceCell = NULL; ListCell *firstAttributeEquivalanceCell = NULL;
ListCell *secondAttributeEquivalanceCell = NULL; ListCell *secondAttributeEquivalanceCell = NULL;

View File

@ -17,3 +17,4 @@
/worker_copy.out /worker_copy.out
/multi_complex_count_distinct.out /multi_complex_count_distinct.out
/multi_mx_copy_data.out /multi_mx_copy_data.out
/multi_insert_select_behavioral_analytics_create_table.out

View File

@ -38,7 +38,6 @@ ORDER BY 2 DESC, 1;
------------------------------------ ------------------------------------
-- Funnel grouped by whether or not a user has done an event -- Funnel grouped by whether or not a user has done an event
-- This has multiple subqueries joinin at the top level -- This has multiple subqueries joinin at the top level
-- Query will be supported when we enable unions
------------------------------------ ------------------------------------
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
FROM ( FROM (
@ -77,8 +76,18 @@ FROM (
GROUP BY t1.user_id, hasdone_event GROUP BY t1.user_id, hasdone_event
) t GROUP BY user_id, hasdone_event ) t GROUP BY user_id, hasdone_event
ORDER BY user_id; ORDER BY user_id;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys user_id | sum | length | hasdone_event
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. ---------+-----+--------+--------------------
10 | 1 | 18 | Has not done event
12 | 1 | 14 | Has done event
13 | 2 | 18 | Has not done event
15 | 1 | 18 | Has not done event
17 | 1 | 18 | Has not done event
19 | 1 | 14 | Has done event
20 | 2 | 18 | Has not done event
23 | 1 | 18 | Has not done event
(8 rows)
-- same query but multiple joins are one level below, returns count of row instead of actual rows -- same query but multiple joins are one level below, returns count of row instead of actual rows
SELECT count(*) SELECT count(*)
FROM ( FROM (
@ -119,8 +128,11 @@ FROM (
GROUP BY t1.user_id, hasdone_event GROUP BY t1.user_id, hasdone_event
) t GROUP BY user_id, hasdone_event ) t GROUP BY user_id, hasdone_event
ORDER BY user_id) u; ORDER BY user_id) u;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys count
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. -------
8
(1 row)
-- Same queries written without unions -- Same queries written without unions
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
FROM ( FROM (
@ -202,7 +214,6 @@ FROM (
------------------------------------ ------------------------------------
-- Funnel, grouped by the number of times a user has done an event -- Funnel, grouped by the number of times a user has done an event
-- These will be supported when we add unions
------------------------------------ ------------------------------------
SELECT SELECT
user_id, user_id,
@ -268,8 +279,18 @@ GROUP BY
count_pay, user_id count_pay, user_id
ORDER BY ORDER BY
event_average DESC, count_pay DESC, user_id DESC; event_average DESC, count_pay DESC, user_id DESC;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys user_id | event_average | count_pay
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. ---------+------------------------+-----------
69 | 1.00000000000000000000 | 0
65 | 1.00000000000000000000 | 0
58 | 1.00000000000000000000 | 0
49 | 1.00000000000000000000 | 0
40 | 1.00000000000000000000 | 0
32 | 1.00000000000000000000 | 0
29 | 1.00000000000000000000 | 0
18 | 1.00000000000000000000 | 0
(8 rows)
SELECT SELECT
user_id, user_id,
avg(array_length(events_table, 1)) AS event_average, avg(array_length(events_table, 1)) AS event_average,
@ -336,8 +357,18 @@ HAVING
avg(array_length(events_table, 1)) > 0 avg(array_length(events_table, 1)) > 0
ORDER BY ORDER BY
event_average DESC, count_pay DESC, user_id DESC; event_average DESC, count_pay DESC, user_id DESC;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys user_id | event_average | count_pay
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. ---------+------------------------+-----------
69 | 1.00000000000000000000 | 0
65 | 1.00000000000000000000 | 0
58 | 1.00000000000000000000 | 0
49 | 1.00000000000000000000 | 0
40 | 1.00000000000000000000 | 0
32 | 1.00000000000000000000 | 0
29 | 1.00000000000000000000 | 0
18 | 1.00000000000000000000 | 0
(8 rows)
-- Same queries rewritten without using unions -- Same queries rewritten without using unions
SELECT SELECT
user_id, user_id,

File diff suppressed because it is too large Load Diff

View File

@ -172,12 +172,12 @@ FROM
l_orderkey) AS unit_prices; l_orderkey) AS unit_prices;
-- Check that we error out if there is union all. -- Check that we error out if there is union all.
-- TEMPORARLY DISABLE UNION ALL
SELECT count(*) FROM -- SELECT count(*) FROM
( -- (
(SELECT l_orderkey FROM lineitem_subquery) UNION ALL -- (SELECT l_orderkey FROM lineitem_subquery) UNION ALL
(SELECT 1::bigint) -- (SELECT 1::bigint)
) b; --) b;
--- ---
-- TEMPORARLY DISABLE UNIONS WITHOUT JOINS -- TEMPORARLY DISABLE UNIONS WITHOUT JOINS

View File

@ -172,13 +172,12 @@ FROM
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
-- Check that we error out if there is union all. -- Check that we error out if there is union all.
SELECT count(*) FROM -- TEMPORARLY DISABLE UNION ALL
( -- SELECT count(*) FROM
(SELECT l_orderkey FROM lineitem_subquery) UNION ALL -- (
(SELECT 1::bigint) -- (SELECT l_orderkey FROM lineitem_subquery) UNION ALL
) b; -- (SELECT 1::bigint)
ERROR: cannot push down this subquery --) b;
DETAIL: Union All clauses are currently unsupported
--- ---
-- TEMPORARLY DISABLE UNIONS WITHOUT JOINS -- TEMPORARLY DISABLE UNIONS WITHOUT JOINS
--- ---
@ -984,8 +983,12 @@ FROM
hasdone) AS subquery_top hasdone) AS subquery_top
GROUP BY GROUP BY
hasdone; hasdone;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys event_average | hasdone
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. --------------------+---------------------
4.0000000000000000 | Has not done paying
2.5000000000000000 | Has done paying
(2 rows)
-- Union, left join and having subquery pushdown -- Union, left join and having subquery pushdown
SELECT SELECT
avg(array_length(events, 1)) AS event_average, avg(array_length(events, 1)) AS event_average,
@ -1056,8 +1059,11 @@ GROUP BY
count_pay count_pay
ORDER BY ORDER BY
count_pay; count_pay;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys event_average | count_pay
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. --------------------+-----------
3.0000000000000000 | 0
(1 row)
-- Lateral join subquery pushdown -- Lateral join subquery pushdown
SELECT SELECT
@ -1226,8 +1232,51 @@ FROM
hasdone) AS subquery_top hasdone) AS subquery_top
GROUP BY GROUP BY
hasdone; hasdone;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys QUERY PLAN
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
HashAggregate (cost=0.00..0.00 rows=0 width=0)
Group Key: remote_scan.hasdone
-> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> GroupAggregate (cost=100.80..100.85 rows=2 width=48)
Group Key: subquery_top.hasdone
-> Sort (cost=100.80..100.80 rows=2 width=64)
Sort Key: subquery_top.hasdone
-> Subquery Scan on subquery_top (cost=100.72..100.79 rows=2 width=64)
-> GroupAggregate (cost=100.72..100.77 rows=2 width=112)
Group Key: subquery_1.tenant_id, subquery_1.user_id, ('Has done paying'::text)
-> Sort (cost=100.72..100.72 rows=2 width=88)
Sort Key: subquery_1.tenant_id, subquery_1.user_id, ('Has done paying'::text)
-> Hash Right Join (cost=100.55..100.71 rows=2 width=88)
Hash Cond: (events.composite_id = subquery_1.composite_id)
-> Unique (cost=76.64..76.64 rows=1 width=80)
-> Sort (cost=76.64..76.64 rows=1 width=80)
Sort Key: ((events.composite_id).tenant_id), ((events.composite_id).user_id)
-> Seq Scan on events_270011 events (cost=0.00..76.62 rows=1 width=80)
Filter: (((event_type)::text = 'pay'::text) AND (composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type))
-> Hash (cost=23.89..23.89 rows=2 width=88)
-> Subquery Scan on subquery_1 (cost=23.84..23.89 rows=2 width=88)
-> Unique (cost=23.84..23.87 rows=2 width=88)
-> Sort (cost=23.84..23.84 rows=2 width=88)
Sort Key: (((NULL::user_composite_type)).tenant_id), (((NULL::user_composite_type)).user_id), (NULL::user_composite_type), ('action=>1'::text), events_1.event_time
-> Append (cost=0.00..23.83 rows=2 width=88)
-> Nested Loop (cost=0.00..11.90 rows=1 width=88)
Join Filter: ((NULL::user_composite_type) = events_1.composite_id)
-> Result (cost=0.00..0.01 rows=1 width=40)
One-Time Filter: false
-> Seq Scan on events_270011 events_1 (cost=0.00..11.62 rows=1 width=40)
Filter: ((event_type)::text = 'click'::text)
-> Nested Loop (cost=0.00..11.90 rows=1 width=88)
Join Filter: ((NULL::user_composite_type) = events_2.composite_id)
-> Result (cost=0.00..0.01 rows=1 width=40)
One-Time Filter: false
-> Seq Scan on events_270011 events_2 (cost=0.00..11.62 rows=1 width=40)
Filter: ((event_type)::text = 'submit'::text)
(41 rows)
-- Union, left join and having subquery pushdown -- Union, left join and having subquery pushdown
EXPLAIN SELECT EXPLAIN SELECT
avg(array_length(events, 1)) AS event_average, avg(array_length(events, 1)) AS event_average,
@ -1298,8 +1347,7 @@ GROUP BY
count_pay count_pay
ORDER BY ORDER BY
count_pay; count_pay;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys ERROR: bogus varattno for OUTER_VAR var: 3
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
-- Lateral join subquery pushdown -- Lateral join subquery pushdown
EXPLAIN SELECT EXPLAIN SELECT
tenant_id, tenant_id,

View File

@ -172,13 +172,12 @@ FROM
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
-- Check that we error out if there is union all. -- Check that we error out if there is union all.
SELECT count(*) FROM -- TEMPORARLY DISABLE UNION ALL
( -- SELECT count(*) FROM
(SELECT l_orderkey FROM lineitem_subquery) UNION ALL -- (
(SELECT 1::bigint) -- (SELECT l_orderkey FROM lineitem_subquery) UNION ALL
) b; -- (SELECT 1::bigint)
ERROR: cannot push down this subquery --) b;
DETAIL: Union All clauses are currently unsupported
--- ---
-- TEMPORARLY DISABLE UNIONS WITHOUT JOINS -- TEMPORARLY DISABLE UNIONS WITHOUT JOINS
--- ---
@ -984,8 +983,12 @@ FROM
hasdone) AS subquery_top hasdone) AS subquery_top
GROUP BY GROUP BY
hasdone; hasdone;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys event_average | hasdone
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. --------------------+---------------------
4.0000000000000000 | Has not done paying
2.5000000000000000 | Has done paying
(2 rows)
-- Union, left join and having subquery pushdown -- Union, left join and having subquery pushdown
SELECT SELECT
avg(array_length(events, 1)) AS event_average, avg(array_length(events, 1)) AS event_average,
@ -1056,8 +1059,11 @@ GROUP BY
count_pay count_pay
ORDER BY ORDER BY
count_pay; count_pay;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys event_average | count_pay
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. --------------------+-----------
3.0000000000000000 | 0
(1 row)
-- Lateral join subquery pushdown -- Lateral join subquery pushdown
SELECT SELECT
@ -1221,8 +1227,40 @@ FROM
hasdone) AS subquery_top hasdone) AS subquery_top
GROUP BY GROUP BY
hasdone; hasdone;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys QUERY PLAN
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
HashAggregate (cost=0.00..0.00 rows=0 width=0)
Group Key: remote_scan.hasdone
-> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate (cost=77.02..77.04 rows=2 width=64)
Group Key: COALESCE(('Has done paying'::text), 'Has not done paying'::text)
-> GroupAggregate (cost=76.93..76.98 rows=2 width=88)
Group Key: subquery_1.tenant_id, subquery_1.user_id, ('Has done paying'::text)
-> Sort (cost=76.93..76.93 rows=2 width=88)
Sort Key: subquery_1.tenant_id, subquery_1.user_id, ('Has done paying'::text)
-> Hash Right Join (cost=76.76..76.92 rows=2 width=88)
Hash Cond: (events.composite_id = subquery_1.composite_id)
-> Unique (cost=76.64..76.64 rows=1 width=32)
-> Sort (cost=76.64..76.64 rows=1 width=32)
Sort Key: ((events.composite_id).tenant_id), ((events.composite_id).user_id)
-> Seq Scan on events_270011 events (cost=0.00..76.62 rows=1 width=32)
Filter: (((event_type)::text = 'pay'::text) AND (composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type))
-> Hash (cost=0.10..0.10 rows=2 width=88)
-> Subquery Scan on subquery_1 (cost=0.05..0.10 rows=2 width=88)
-> Unique (cost=0.05..0.08 rows=2 width=0)
-> Sort (cost=0.05..0.06 rows=2 width=0)
Sort Key: ((composite_id).tenant_id), ((composite_id).user_id), composite_id, ('action=>1'::text), event_time
-> Append (cost=0.00..0.04 rows=2 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
One-Time Filter: false
-> Result (cost=0.00..0.01 rows=1 width=0)
One-Time Filter: false
(30 rows)
-- Union, left join and having subquery pushdown -- Union, left join and having subquery pushdown
EXPLAIN SELECT EXPLAIN SELECT
avg(array_length(events, 1)) AS event_average, avg(array_length(events, 1)) AS event_average,
@ -1293,8 +1331,7 @@ GROUP BY
count_pay count_pay
ORDER BY ORDER BY
count_pay; count_pay;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys ERROR: bogus varattno for OUTER_VAR var: 3
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
-- Lateral join subquery pushdown -- Lateral join subquery pushdown
EXPLAIN SELECT EXPLAIN SELECT
tenant_id, tenant_id,

View File

@ -16,3 +16,4 @@
/worker_copy.sql /worker_copy.sql
/multi_complex_count_distinct.sql /multi_complex_count_distinct.sql
/multi_mx_copy_data.sql /multi_mx_copy_data.sql
/multi_insert_select_behavioral_analytics_create_table.sql

View File

@ -33,7 +33,6 @@ ORDER BY 2 DESC, 1;
------------------------------------ ------------------------------------
-- Funnel grouped by whether or not a user has done an event -- Funnel grouped by whether or not a user has done an event
-- This has multiple subqueries joinin at the top level -- This has multiple subqueries joinin at the top level
-- Query will be supported when we enable unions
------------------------------------ ------------------------------------
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
FROM ( FROM (
@ -180,7 +179,6 @@ FROM (
------------------------------------ ------------------------------------
-- Funnel, grouped by the number of times a user has done an event -- Funnel, grouped by the number of times a user has done an event
-- These will be supported when we add unions
------------------------------------ ------------------------------------
SELECT SELECT
user_id, user_id,

File diff suppressed because it is too large Load Diff