Support columns referencing an aliased joins

We used to rely on PG function flatten_join_alias_vars
to resolve actual columns referenced in target entry list.

The function goes deep and finds the actual relation. This logic
usually works fine. However, when joins are given an alias, inner
relation names are not visible to target entry entry. Thus relation
resolving should stop when we the target entry column refers an
rte of an aliased join.

We stopped using PG function and provided our own flatten function.
pull/2631/head
Murat Tuncer 2019-03-14 18:29:59 +03:00
parent f218549572
commit 1424f75ec9
3 changed files with 279 additions and 42 deletions

View File

@ -80,6 +80,7 @@ static bool IsRecurringRangeTable(List *rangeTable, RecurringTuplesType *recurTy
static bool HasRecurringTuples(Node *node, RecurringTuplesType *recurType);
static MultiNode * SubqueryPushdownMultiNodeTree(Query *queryTree);
static List * FlattenJoinVars(List *columnList, Query *queryTree);
static Node * FlattenJoinVarsMutator(Node *node, Query *queryTree);
static void UpdateVarMappingsForExtendedOpNode(List *columnList,
List *flattenedColumnList,
List *subqueryTargetEntryList);
@ -1582,55 +1583,74 @@ SubqueryPushdownMultiNodeTree(Query *queryTree)
static List *
FlattenJoinVars(List *columnList, Query *queryTree)
{
ListCell *columnCell = NULL;
List *rteList = queryTree->rtable;
List *flattenedExprList = NIL;
ListCell *columnCell = NULL;
foreach(columnCell, columnList)
{
Var *column = (Var *) lfirst(columnCell);
RangeTblEntry *columnRte = NULL;
PlannerInfo *root = NULL;
Assert(IsA(column, Var));
/*
* if join does not have an alias, it is copied over join rte.
* There is no need to find the JoinExpr to check whether it has
* an alias defined.
*
* We use the planner's flatten_join_alias_vars routine to do
* the flattening; it wants a PlannerInfo root node, which
* fortunately can be mostly dummy.
*/
columnRte = rt_fetch(column->varno, rteList);
if (columnRte->rtekind == RTE_JOIN && columnRte->alias == NULL)
{
Node *normalizedNode = NULL;
if (root == NULL)
{
root = makeNode(PlannerInfo);
root->parse = (queryTree);
root->planner_cxt = CurrentMemoryContext;
root->hasJoinRTEs = true;
}
normalizedNode = strip_implicit_coercions(flatten_join_alias_vars(root,
(Node *)
column));
flattenedExprList = lappend(flattenedExprList, copyObject(normalizedNode));
}
else
{
flattenedExprList = lappend(flattenedExprList, copyObject(column));
}
Node *column = strip_implicit_coercions(
FlattenJoinVarsMutator((Node *) lfirst(columnCell), queryTree));
flattenedExprList = lappend(flattenedExprList, copyObject(column));
}
return flattenedExprList;
}
/*
* FlattenJoinVarsMutator flattens a single column var as outlined in the caller
* function (FlattenJoinVars). It iterates the join tree to find the
* lowest Var it can go. This is usually the relation range table var. However
* if a join operation is given an alias, iteration stops at that level since the
* query can not reference the inner RTE by name if the join is given an alias.
*/
static Node *
FlattenJoinVarsMutator(Node *node, Query *queryTree)
{
if (node == NULL)
{
return NULL;
}
if (IsA(node, Var))
{
Var *column = (Var *) node;
RangeTblEntry *rte = rt_fetch(column->varno, queryTree->rtable);
if (rte->rtekind == RTE_JOIN)
{
Node *newColumn = NULL;
/*
* if join has an alias, it is copied over join RTE. We should
* reference this RTE.
*/
if (rte->alias != NULL)
{
return (Node *) column;
}
/* join RTE does not have and alias defined at this level, deeper look is needed */
Assert(column->varattno > 0);
newColumn = (Node *) list_nth(rte->joinaliasvars, column->varattno - 1);
Assert(newColumn != NULL);
/*
* Ideally we should use expression_tree_mutator here. But it does not call
* mutate function for Vars, thus we make a recursive call to make sure
* not to miss Vars in nested joins.
*/
return FlattenJoinVarsMutator(newColumn, queryTree);
}
else
{
return node;
}
}
return expression_tree_mutator(node, FlattenJoinVarsMutator, (void *) queryTree);
}
/*
* CreateSubqueryTargetEntryList creates a target entry for each unique column
* in the column list and returns the target entry list.
@ -1733,13 +1753,12 @@ UpdateColumnToMatchingTargetEntry(Var *column, Node *flattenedExpr, List *target
else if (IsA(targetEntry->expr, CoalesceExpr))
{
/*
* flatten_join_alias_vars() flattens full oter joins' columns that is
* FlattenJoinVars() flattens full oter joins' columns that is
* in the USING part into COALESCE(left_col, right_col)
*/
CoalesceExpr *targetCoalesceExpr = (CoalesceExpr *) targetEntry->expr;
if (IsA(flattenedExpr, CoalesceExpr) && equal(flattenedExpr,
targetCoalesceExpr))
targetEntry->expr))
{
Oid expressionType = exprType(flattenedExpr);
int32 expressionTypmod = exprTypmod(flattenedExpr);

View File

@ -2749,3 +2749,151 @@ LIMIT 5;
1 | 1 | 2
(5 rows)
SELECT c_custkey
FROM (users_table LEFT OUTER JOIN events_table ON (users_table.user_id = events_table.user_id)) AS test(c_custkey, c_nationkey)
INNER JOIN users_table as u2 ON (test.c_custkey = u2.user_id)
ORDER BY 1 DESC
LIMIT 10;
c_custkey
-----------
6
6
6
6
6
6
6
6
6
6
(10 rows)
SELECT c_custkey, date_trunc('minute', max(c_nationkey))
FROM (users_table LEFT OUTER JOIN events_table ON (users_table.user_id = events_table.user_id)) AS test(c_custkey, c_nationkey)
INNER JOIN users_table as u2 ON (test.c_custkey = u2.user_id)
GROUP BY 1
ORDER BY 2, 1
LIMIT 10;
c_custkey | date_trunc
-----------+--------------------------
2 | Thu Nov 23 13:52:00 2017
6 | Thu Nov 23 14:43:00 2017
4 | Thu Nov 23 15:32:00 2017
5 | Thu Nov 23 16:48:00 2017
3 | Thu Nov 23 17:18:00 2017
1 | Thu Nov 23 17:30:00 2017
(6 rows)
SELECT c_custkey, date_trunc('minute', max(c_nationkey))
FROM (users_table LEFT OUTER JOIN events_table ON (users_table.user_id = events_table.user_id)) AS test(c_custkey, c_nationkey)
INNER JOIN users_table as u2 ON (test.c_custkey = u2.user_id)
GROUP BY 1
HAVING extract(minute from max(c_nationkey)) >= 45
ORDER BY 2, 1
LIMIT 10;
c_custkey | date_trunc
-----------+--------------------------
2 | Thu Nov 23 13:52:00 2017
5 | Thu Nov 23 16:48:00 2017
(2 rows)
SELECT user_id
FROM (users_table JOIN events_table USING (user_id)) AS test(user_id, c_nationkey)
FULL JOIN users_table AS u2 USING (user_id)
ORDER BY 1 DESC
LIMIT 10;
user_id
---------
6
6
6
6
6
6
6
6
6
6
(10 rows)
-- nested joins
SELECT bar, value_3_table.value_3
FROM ((users_table
JOIN (events_table INNER JOIN users_reference_table foo ON (events_table.user_id = foo.value_2)) AS deeper_join(user_id_deep)
ON (users_table.user_id = deeper_join.user_id_deep)) AS test(c_custkey, c_nationkey)
LEFT JOIN users_table AS u2 ON (test.c_custkey = u2.user_id)) outer_test(bar,foo)
JOIN LATERAL (SELECT value_3 FROM events_table WHERE user_id = bar) as value_3_table ON true
GROUP BY 1,2
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
bar | value_3
-----+---------
3 | 5
2 | 5
1 | 5
5 | 4
4 | 4
3 | 4
2 | 4
1 | 4
5 | 3
4 | 3
(10 rows)
-- lateral joins
SELECT bar,
value_3_table.value_3
FROM ((users_table
JOIN (events_table
INNER JOIN users_reference_table foo ON (events_table.user_id = foo.value_2)) AS deeper_join(user_id_deep) ON (users_table.user_id = deeper_join.user_id_deep)) AS test(c_custkey, c_nationkey)
LEFT JOIN users_table AS u2 ON (test.c_custkey = u2.user_id)) outer_test(bar, foo)
JOIN LATERAL
(SELECT value_3
FROM events_table
WHERE user_id = bar) AS value_3_table ON TRUE
GROUP BY 1, 2
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
bar | value_3
-----+---------
3 | 5
2 | 5
1 | 5
5 | 4
4 | 4
3 | 4
2 | 4
1 | 4
5 | 3
4 | 3
(10 rows)
--Joins inside subqueries are the sources of the values in the target list:
SELECT bar, foo.value_3, c_custkey, test_2.time_2 FROM
(
SELECT bar, value_3_table.value_3, random()
FROM ((users_table
JOIN (events_table INNER JOIN users_reference_table foo ON (events_table.user_id = foo.value_2)) AS deeper_join(user_id_deep)
ON (users_table.user_id = deeper_join.user_id_deep)) AS test(c_custkey, c_nationkey)
LEFT JOIN users_table AS u2 ON (test.c_custkey = u2.user_id)) outer_test(bar,foo)
JOIN LATERAL (SELECT value_3 FROM events_table WHERE user_id = bar) as value_3_table ON true
GROUP BY 1,2
) as foo, (users_table
JOIN (events_table INNER JOIN users_reference_table foo ON (events_table.user_id = foo.value_2)) AS deeper_join_2(user_id_deep)
ON (users_table.user_id = deeper_join_2.user_id_deep)) AS test_2(c_custkey, time_2) WHERE foo.bar = test_2.c_custkey
ORDER BY 2 DESC, 1 DESC, 3 DESC, 4 DESC
LIMIT 10;
bar | value_3 | c_custkey | time_2
-----+---------+-----------+---------------------------------
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
(10 rows)

View File

@ -2428,3 +2428,73 @@ FROM (users_table u FULL JOIN events_table e USING (user_id)) k(x1, x2, x3, x4,
ORDER BY 1, 2, 3
LIMIT 5;
SELECT c_custkey
FROM (users_table LEFT OUTER JOIN events_table ON (users_table.user_id = events_table.user_id)) AS test(c_custkey, c_nationkey)
INNER JOIN users_table as u2 ON (test.c_custkey = u2.user_id)
ORDER BY 1 DESC
LIMIT 10;
SELECT c_custkey, date_trunc('minute', max(c_nationkey))
FROM (users_table LEFT OUTER JOIN events_table ON (users_table.user_id = events_table.user_id)) AS test(c_custkey, c_nationkey)
INNER JOIN users_table as u2 ON (test.c_custkey = u2.user_id)
GROUP BY 1
ORDER BY 2, 1
LIMIT 10;
SELECT c_custkey, date_trunc('minute', max(c_nationkey))
FROM (users_table LEFT OUTER JOIN events_table ON (users_table.user_id = events_table.user_id)) AS test(c_custkey, c_nationkey)
INNER JOIN users_table as u2 ON (test.c_custkey = u2.user_id)
GROUP BY 1
HAVING extract(minute from max(c_nationkey)) >= 45
ORDER BY 2, 1
LIMIT 10;
SELECT user_id
FROM (users_table JOIN events_table USING (user_id)) AS test(user_id, c_nationkey)
FULL JOIN users_table AS u2 USING (user_id)
ORDER BY 1 DESC
LIMIT 10;
-- nested joins
SELECT bar, value_3_table.value_3
FROM ((users_table
JOIN (events_table INNER JOIN users_reference_table foo ON (events_table.user_id = foo.value_2)) AS deeper_join(user_id_deep)
ON (users_table.user_id = deeper_join.user_id_deep)) AS test(c_custkey, c_nationkey)
LEFT JOIN users_table AS u2 ON (test.c_custkey = u2.user_id)) outer_test(bar,foo)
JOIN LATERAL (SELECT value_3 FROM events_table WHERE user_id = bar) as value_3_table ON true
GROUP BY 1,2
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
-- lateral joins
SELECT bar,
value_3_table.value_3
FROM ((users_table
JOIN (events_table
INNER JOIN users_reference_table foo ON (events_table.user_id = foo.value_2)) AS deeper_join(user_id_deep) ON (users_table.user_id = deeper_join.user_id_deep)) AS test(c_custkey, c_nationkey)
LEFT JOIN users_table AS u2 ON (test.c_custkey = u2.user_id)) outer_test(bar, foo)
JOIN LATERAL
(SELECT value_3
FROM events_table
WHERE user_id = bar) AS value_3_table ON TRUE
GROUP BY 1, 2
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
--Joins inside subqueries are the sources of the values in the target list:
SELECT bar, foo.value_3, c_custkey, test_2.time_2 FROM
(
SELECT bar, value_3_table.value_3, random()
FROM ((users_table
JOIN (events_table INNER JOIN users_reference_table foo ON (events_table.user_id = foo.value_2)) AS deeper_join(user_id_deep)
ON (users_table.user_id = deeper_join.user_id_deep)) AS test(c_custkey, c_nationkey)
LEFT JOIN users_table AS u2 ON (test.c_custkey = u2.user_id)) outer_test(bar,foo)
JOIN LATERAL (SELECT value_3 FROM events_table WHERE user_id = bar) as value_3_table ON true
GROUP BY 1,2
) as foo, (users_table
JOIN (events_table INNER JOIN users_reference_table foo ON (events_table.user_id = foo.value_2)) AS deeper_join_2(user_id_deep)
ON (users_table.user_id = deeper_join_2.user_id_deep)) AS test_2(c_custkey, time_2) WHERE foo.bar = test_2.c_custkey
ORDER BY 2 DESC, 1 DESC, 3 DESC, 4 DESC
LIMIT 10;