Merge pull request #2631 from citusdata/fix_column_alias

Fix column references to aliased joins
pull/2644/head
Murat Tuncer 2019-03-26 13:17:56 +03:00 committed by GitHub
commit e803eb8a02
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 279 additions and 42 deletions

View File

@ -80,6 +80,7 @@ static bool IsRecurringRangeTable(List *rangeTable, RecurringTuplesType *recurTy
static bool HasRecurringTuples(Node *node, RecurringTuplesType *recurType);
static MultiNode * SubqueryPushdownMultiNodeTree(Query *queryTree);
static List * FlattenJoinVars(List *columnList, Query *queryTree);
static Node * FlattenJoinVarsMutator(Node *node, Query *queryTree);
static void UpdateVarMappingsForExtendedOpNode(List *columnList,
List *flattenedColumnList,
List *subqueryTargetEntryList);
@ -1582,55 +1583,74 @@ SubqueryPushdownMultiNodeTree(Query *queryTree)
static List *
FlattenJoinVars(List *columnList, Query *queryTree)
{
ListCell *columnCell = NULL;
List *rteList = queryTree->rtable;
List *flattenedExprList = NIL;
ListCell *columnCell = NULL;
foreach(columnCell, columnList)
{
Var *column = (Var *) lfirst(columnCell);
RangeTblEntry *columnRte = NULL;
PlannerInfo *root = NULL;
Assert(IsA(column, Var));
/*
* if join does not have an alias, it is copied over join rte.
* There is no need to find the JoinExpr to check whether it has
* an alias defined.
*
* We use the planner's flatten_join_alias_vars routine to do
* the flattening; it wants a PlannerInfo root node, which
* fortunately can be mostly dummy.
*/
columnRte = rt_fetch(column->varno, rteList);
if (columnRte->rtekind == RTE_JOIN && columnRte->alias == NULL)
{
Node *normalizedNode = NULL;
if (root == NULL)
{
root = makeNode(PlannerInfo);
root->parse = (queryTree);
root->planner_cxt = CurrentMemoryContext;
root->hasJoinRTEs = true;
}
normalizedNode = strip_implicit_coercions(flatten_join_alias_vars(root,
(Node *)
column));
flattenedExprList = lappend(flattenedExprList, copyObject(normalizedNode));
}
else
{
flattenedExprList = lappend(flattenedExprList, copyObject(column));
}
Node *column = strip_implicit_coercions(
FlattenJoinVarsMutator((Node *) lfirst(columnCell), queryTree));
flattenedExprList = lappend(flattenedExprList, copyObject(column));
}
return flattenedExprList;
}
/*
* FlattenJoinVarsMutator flattens a single column var as outlined in the caller
* function (FlattenJoinVars). It iterates the join tree to find the
* lowest Var it can go. This is usually the relation range table var. However
* if a join operation is given an alias, iteration stops at that level since the
* query can not reference the inner RTE by name if the join is given an alias.
*/
static Node *
FlattenJoinVarsMutator(Node *node, Query *queryTree)
{
if (node == NULL)
{
return NULL;
}
if (IsA(node, Var))
{
Var *column = (Var *) node;
RangeTblEntry *rte = rt_fetch(column->varno, queryTree->rtable);
if (rte->rtekind == RTE_JOIN)
{
Node *newColumn = NULL;
/*
* if join has an alias, it is copied over join RTE. We should
* reference this RTE.
*/
if (rte->alias != NULL)
{
return (Node *) column;
}
/* join RTE does not have and alias defined at this level, deeper look is needed */
Assert(column->varattno > 0);
newColumn = (Node *) list_nth(rte->joinaliasvars, column->varattno - 1);
Assert(newColumn != NULL);
/*
* Ideally we should use expression_tree_mutator here. But it does not call
* mutate function for Vars, thus we make a recursive call to make sure
* not to miss Vars in nested joins.
*/
return FlattenJoinVarsMutator(newColumn, queryTree);
}
else
{
return node;
}
}
return expression_tree_mutator(node, FlattenJoinVarsMutator, (void *) queryTree);
}
/*
* CreateSubqueryTargetEntryList creates a target entry for each unique column
* in the column list and returns the target entry list.
@ -1733,13 +1753,12 @@ UpdateColumnToMatchingTargetEntry(Var *column, Node *flattenedExpr, List *target
else if (IsA(targetEntry->expr, CoalesceExpr))
{
/*
* flatten_join_alias_vars() flattens full oter joins' columns that is
* FlattenJoinVars() flattens full oter joins' columns that is
* in the USING part into COALESCE(left_col, right_col)
*/
CoalesceExpr *targetCoalesceExpr = (CoalesceExpr *) targetEntry->expr;
if (IsA(flattenedExpr, CoalesceExpr) && equal(flattenedExpr,
targetCoalesceExpr))
targetEntry->expr))
{
Oid expressionType = exprType(flattenedExpr);
int32 expressionTypmod = exprTypmod(flattenedExpr);

View File

@ -2749,3 +2749,151 @@ LIMIT 5;
1 | 1 | 2
(5 rows)
SELECT c_custkey
FROM (users_table LEFT OUTER JOIN events_table ON (users_table.user_id = events_table.user_id)) AS test(c_custkey, c_nationkey)
INNER JOIN users_table as u2 ON (test.c_custkey = u2.user_id)
ORDER BY 1 DESC
LIMIT 10;
c_custkey
-----------
6
6
6
6
6
6
6
6
6
6
(10 rows)
SELECT c_custkey, date_trunc('minute', max(c_nationkey))
FROM (users_table LEFT OUTER JOIN events_table ON (users_table.user_id = events_table.user_id)) AS test(c_custkey, c_nationkey)
INNER JOIN users_table as u2 ON (test.c_custkey = u2.user_id)
GROUP BY 1
ORDER BY 2, 1
LIMIT 10;
c_custkey | date_trunc
-----------+--------------------------
2 | Thu Nov 23 13:52:00 2017
6 | Thu Nov 23 14:43:00 2017
4 | Thu Nov 23 15:32:00 2017
5 | Thu Nov 23 16:48:00 2017
3 | Thu Nov 23 17:18:00 2017
1 | Thu Nov 23 17:30:00 2017
(6 rows)
SELECT c_custkey, date_trunc('minute', max(c_nationkey))
FROM (users_table LEFT OUTER JOIN events_table ON (users_table.user_id = events_table.user_id)) AS test(c_custkey, c_nationkey)
INNER JOIN users_table as u2 ON (test.c_custkey = u2.user_id)
GROUP BY 1
HAVING extract(minute from max(c_nationkey)) >= 45
ORDER BY 2, 1
LIMIT 10;
c_custkey | date_trunc
-----------+--------------------------
2 | Thu Nov 23 13:52:00 2017
5 | Thu Nov 23 16:48:00 2017
(2 rows)
SELECT user_id
FROM (users_table JOIN events_table USING (user_id)) AS test(user_id, c_nationkey)
FULL JOIN users_table AS u2 USING (user_id)
ORDER BY 1 DESC
LIMIT 10;
user_id
---------
6
6
6
6
6
6
6
6
6
6
(10 rows)
-- nested joins
SELECT bar, value_3_table.value_3
FROM ((users_table
JOIN (events_table INNER JOIN users_reference_table foo ON (events_table.user_id = foo.value_2)) AS deeper_join(user_id_deep)
ON (users_table.user_id = deeper_join.user_id_deep)) AS test(c_custkey, c_nationkey)
LEFT JOIN users_table AS u2 ON (test.c_custkey = u2.user_id)) outer_test(bar,foo)
JOIN LATERAL (SELECT value_3 FROM events_table WHERE user_id = bar) as value_3_table ON true
GROUP BY 1,2
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
bar | value_3
-----+---------
3 | 5
2 | 5
1 | 5
5 | 4
4 | 4
3 | 4
2 | 4
1 | 4
5 | 3
4 | 3
(10 rows)
-- lateral joins
SELECT bar,
value_3_table.value_3
FROM ((users_table
JOIN (events_table
INNER JOIN users_reference_table foo ON (events_table.user_id = foo.value_2)) AS deeper_join(user_id_deep) ON (users_table.user_id = deeper_join.user_id_deep)) AS test(c_custkey, c_nationkey)
LEFT JOIN users_table AS u2 ON (test.c_custkey = u2.user_id)) outer_test(bar, foo)
JOIN LATERAL
(SELECT value_3
FROM events_table
WHERE user_id = bar) AS value_3_table ON TRUE
GROUP BY 1, 2
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
bar | value_3
-----+---------
3 | 5
2 | 5
1 | 5
5 | 4
4 | 4
3 | 4
2 | 4
1 | 4
5 | 3
4 | 3
(10 rows)
--Joins inside subqueries are the sources of the values in the target list:
SELECT bar, foo.value_3, c_custkey, test_2.time_2 FROM
(
SELECT bar, value_3_table.value_3, random()
FROM ((users_table
JOIN (events_table INNER JOIN users_reference_table foo ON (events_table.user_id = foo.value_2)) AS deeper_join(user_id_deep)
ON (users_table.user_id = deeper_join.user_id_deep)) AS test(c_custkey, c_nationkey)
LEFT JOIN users_table AS u2 ON (test.c_custkey = u2.user_id)) outer_test(bar,foo)
JOIN LATERAL (SELECT value_3 FROM events_table WHERE user_id = bar) as value_3_table ON true
GROUP BY 1,2
) as foo, (users_table
JOIN (events_table INNER JOIN users_reference_table foo ON (events_table.user_id = foo.value_2)) AS deeper_join_2(user_id_deep)
ON (users_table.user_id = deeper_join_2.user_id_deep)) AS test_2(c_custkey, time_2) WHERE foo.bar = test_2.c_custkey
ORDER BY 2 DESC, 1 DESC, 3 DESC, 4 DESC
LIMIT 10;
bar | value_3 | c_custkey | time_2
-----+---------+-----------+---------------------------------
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
(10 rows)

View File

@ -2428,3 +2428,73 @@ FROM (users_table u FULL JOIN events_table e USING (user_id)) k(x1, x2, x3, x4,
ORDER BY 1, 2, 3
LIMIT 5;
SELECT c_custkey
FROM (users_table LEFT OUTER JOIN events_table ON (users_table.user_id = events_table.user_id)) AS test(c_custkey, c_nationkey)
INNER JOIN users_table as u2 ON (test.c_custkey = u2.user_id)
ORDER BY 1 DESC
LIMIT 10;
SELECT c_custkey, date_trunc('minute', max(c_nationkey))
FROM (users_table LEFT OUTER JOIN events_table ON (users_table.user_id = events_table.user_id)) AS test(c_custkey, c_nationkey)
INNER JOIN users_table as u2 ON (test.c_custkey = u2.user_id)
GROUP BY 1
ORDER BY 2, 1
LIMIT 10;
SELECT c_custkey, date_trunc('minute', max(c_nationkey))
FROM (users_table LEFT OUTER JOIN events_table ON (users_table.user_id = events_table.user_id)) AS test(c_custkey, c_nationkey)
INNER JOIN users_table as u2 ON (test.c_custkey = u2.user_id)
GROUP BY 1
HAVING extract(minute from max(c_nationkey)) >= 45
ORDER BY 2, 1
LIMIT 10;
SELECT user_id
FROM (users_table JOIN events_table USING (user_id)) AS test(user_id, c_nationkey)
FULL JOIN users_table AS u2 USING (user_id)
ORDER BY 1 DESC
LIMIT 10;
-- nested joins
SELECT bar, value_3_table.value_3
FROM ((users_table
JOIN (events_table INNER JOIN users_reference_table foo ON (events_table.user_id = foo.value_2)) AS deeper_join(user_id_deep)
ON (users_table.user_id = deeper_join.user_id_deep)) AS test(c_custkey, c_nationkey)
LEFT JOIN users_table AS u2 ON (test.c_custkey = u2.user_id)) outer_test(bar,foo)
JOIN LATERAL (SELECT value_3 FROM events_table WHERE user_id = bar) as value_3_table ON true
GROUP BY 1,2
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
-- lateral joins
SELECT bar,
value_3_table.value_3
FROM ((users_table
JOIN (events_table
INNER JOIN users_reference_table foo ON (events_table.user_id = foo.value_2)) AS deeper_join(user_id_deep) ON (users_table.user_id = deeper_join.user_id_deep)) AS test(c_custkey, c_nationkey)
LEFT JOIN users_table AS u2 ON (test.c_custkey = u2.user_id)) outer_test(bar, foo)
JOIN LATERAL
(SELECT value_3
FROM events_table
WHERE user_id = bar) AS value_3_table ON TRUE
GROUP BY 1, 2
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
--Joins inside subqueries are the sources of the values in the target list:
SELECT bar, foo.value_3, c_custkey, test_2.time_2 FROM
(
SELECT bar, value_3_table.value_3, random()
FROM ((users_table
JOIN (events_table INNER JOIN users_reference_table foo ON (events_table.user_id = foo.value_2)) AS deeper_join(user_id_deep)
ON (users_table.user_id = deeper_join.user_id_deep)) AS test(c_custkey, c_nationkey)
LEFT JOIN users_table AS u2 ON (test.c_custkey = u2.user_id)) outer_test(bar,foo)
JOIN LATERAL (SELECT value_3 FROM events_table WHERE user_id = bar) as value_3_table ON true
GROUP BY 1,2
) as foo, (users_table
JOIN (events_table INNER JOIN users_reference_table foo ON (events_table.user_id = foo.value_2)) AS deeper_join_2(user_id_deep)
ON (users_table.user_id = deeper_join_2.user_id_deep)) AS test_2(c_custkey, time_2) WHERE foo.bar = test_2.c_custkey
ORDER BY 2 DESC, 1 DESC, 3 DESC, 4 DESC
LIMIT 10;