Correct planner and add more tests

pull/1532/head
velioglu 2017-08-09 17:57:57 +03:00
parent 7550b8ad52
commit b0efffae1c
16 changed files with 649 additions and 161 deletions

View File

@ -546,7 +546,8 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"cannot pushdown the subquery",
"There exist a reference table in the outer part of the outer join",
"There exist a reference table in the outer part of the "
"outer join",
NULL);
}
@ -1426,18 +1427,19 @@ MultiPlanTree(Query *queryTree)
/*
* HasUnsupportedReferenceTableJoin returns true if there exists a outer join
* exist between reference table and distributed tables which does not obey the
* rules :
* - Reference tables can not be located in the outer part of the semi join (or
* the inner part of the anti join). Otherwise, we may have duplicate results.
* Although getting duplicate results is not possible by checking the equality
* on the column of the reference table and partition column of distributed table,
* we still keep these checks. Because, using the reference table in the outer
* part of the semi join is not very common.
* - Reference tables can not be located in the outer part of the left join and
* inner part of the right join. Otherwise we will definitely have duplicate rows.
* Beside, reference tables can not be used with full outer joins because of the
* same reason.
* between reference table and distributed tables which does not follow
* the rules :
* - Reference tables can not be located in the outer part of the semi join or the
* anti join. Otherwise, we may have duplicate results. Although getting duplicate
* results is not possible by checking the equality on the column of the reference
* table and partition column of distributed table, we still keep these checks.
* Because, using the reference table in the outer part of the semi join or anti
* join is not very common.
* - Reference tables can not be located in the outer part of the left join
* (Note that PostgreSQL converts right joins to left joins. While converting
* join types, innerrel and outerrel are also switched.) Otherwise we will
* definitely have duplicate rows. Beside, reference tables can not be used
* with full outer joins because of the same reason.
*/
static bool
HasUnsupportedReferenceTableJoin(PlannerRestrictionContext *plannerRestrictionContext)
@ -1455,58 +1457,20 @@ HasUnsupportedReferenceTableJoin(PlannerRestrictionContext *plannerRestrictionCo
RelOptInfo *innerrel = joinRestriction->innerrel;
RelOptInfo *outerrel = joinRestriction->outerrel;
switch (joinType)
if (joinType == JOIN_SEMI || joinType == JOIN_ANTI || joinType == JOIN_LEFT)
{
case JOIN_SEMI:
if (RelationInfoHasReferenceTable(plannerInfo, outerrel))
{
if (RelationInfoHasReferenceTable(plannerInfo, outerrel))
{
return true;
}
return true;
}
break;
case JOIN_ANTI:
}
else if (joinType == JOIN_FULL)
{
if (RelationInfoHasReferenceTable(plannerInfo, innerrel) ||
RelationInfoHasReferenceTable(plannerInfo, outerrel))
{
if (RelationInfoHasReferenceTable(plannerInfo, innerrel))
{
return true;
}
return true;
}
break;
case JOIN_LEFT:
{
if (RelationInfoHasReferenceTable(plannerInfo, outerrel))
{
return true;
}
}
break;
case JOIN_RIGHT:
{
if (RelationInfoHasReferenceTable(plannerInfo, innerrel))
{
return true;
}
}
break;
case JOIN_FULL:
{
if (RelationInfoHasReferenceTable(plannerInfo, innerrel) ||
RelationInfoHasReferenceTable(
plannerInfo, outerrel))
{
return true;
}
}
break;
default:
{ }
break;
}
}
@ -1515,7 +1479,7 @@ HasUnsupportedReferenceTableJoin(PlannerRestrictionContext *plannerRestrictionCo
/*
* ReferenceTableExist check whether the relationInfo has reference table.
* RelationInfoHasReferenceTable check whether the relationInfo has reference table.
* Since relation ids of relationInfo indexes to the range table entry list of
* planner info, planner info is also passed.
*/
@ -1549,7 +1513,7 @@ HasReferenceTable(Node *node)
{
List *relationList = NIL;
ListCell *relationCell = NULL;
ExtractRangeTableRelationWalkerInRTE(node, &relationList);
ExtractRangeTableRelationWalkerWithRTEExpand(node, &relationList);
foreach(relationCell, relationList)
{
@ -2933,13 +2897,13 @@ ExtractRangeTableRelationWalker(Node *node, List **rangeTableRelationList)
/*
* ExtractRangeTableRelationWalkerInRTE obtains the list of relations from the
* given node. Note that the difference between this function and
* ExtractRangeTableRelationWalkerWithRTEExpand obtains the list of relations
* from the given node. Note that the difference between this function and
* ExtractRangeTableRelationWalker is that this one recursively
* walk into range table entries if it can.
*/
bool
ExtractRangeTableRelationWalkerInRTE(Node *node, List **rangeTableRelationList)
ExtractRangeTableRelationWalkerWithRTEExpand(Node *node, List **rangeTableRelationList)
{
bool walkIsComplete = false;
@ -2950,8 +2914,7 @@ ExtractRangeTableRelationWalkerInRTE(Node *node, List **rangeTableRelationList)
else if (IsA(node, RangeTblEntry))
{
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) node;
List *rangeTableList = NIL;
rangeTableList = lappend(rangeTableList, rangeTableEntry);
List *rangeTableList = list_make1(rangeTableEntry);
if (rangeTableEntry->rtekind == RTE_RELATION)
{
@ -2960,7 +2923,7 @@ ExtractRangeTableRelationWalkerInRTE(Node *node, List **rangeTableRelationList)
else
{
walkIsComplete = range_table_walker(rangeTableList,
ExtractRangeTableRelationWalkerInRTE,
ExtractRangeTableRelationWalkerWithRTEExpand,
rangeTableRelationList, 0);
}
}

View File

@ -2111,7 +2111,7 @@ ErrorIfUnsupportedShardDistribution(Query *query)
{
Oid firstTableRelationId = InvalidOid;
List *relationIdList = RelationIdList(query);
List *distributedRelationIdList = NIL;
List *nonReferenceRelations = NIL;
ListCell *relationIdCell = NULL;
uint32 relationIndex = 0;
uint32 rangeDistributedRelationCount = 0;
@ -2121,18 +2121,17 @@ ErrorIfUnsupportedShardDistribution(Query *query)
{
Oid relationId = lfirst_oid(relationIdCell);
char partitionMethod = PartitionMethod(relationId);
if (partitionMethod == DISTRIBUTE_BY_RANGE)
{
rangeDistributedRelationCount++;
distributedRelationIdList = lappend_oid(distributedRelationIdList,
relationId);
nonReferenceRelations = lappend_oid(nonReferenceRelations,
relationId);
}
else if (partitionMethod == DISTRIBUTE_BY_HASH)
{
hashDistributedRelationCount++;
distributedRelationIdList = lappend_oid(distributedRelationIdList,
relationId);
nonReferenceRelations = lappend_oid(nonReferenceRelations,
relationId);
}
else if (partitionMethod == DISTRIBUTE_BY_NONE)
{
@ -2142,7 +2141,7 @@ ErrorIfUnsupportedShardDistribution(Query *query)
else
{
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot pushdown this subquery"),
errmsg("cannot push down this subquery"),
errdetail("Currently append partitioned relations "
"are not supported")));
}
@ -2156,7 +2155,7 @@ ErrorIfUnsupportedShardDistribution(Query *query)
"partitioned relations are unsupported")));
}
foreach(relationIdCell, distributedRelationIdList)
foreach(relationIdCell, nonReferenceRelations)
{
Oid relationId = lfirst_oid(relationIdCell);
bool coPartitionedTables = false;

View File

@ -124,7 +124,8 @@ static Index RelationRestrictionPartitionKeyIndex(RelationRestriction *
/*
* SafeToPushdownUnionSubquery returns true if all the relations are returns
* partition keys in the same ordinal position.
* partition keys in the same ordinal position and there is no reference table
* exists.
*
* Note that the function expects (and asserts) the input query to be a top
* level union query defined by TopLevelUnionQuery().

View File

@ -206,7 +206,8 @@ extern List * TableEntryList(List *rangeTableList);
extern List * UsedTableEntryList(Query *query);
extern bool ExtractRangeTableRelationWalker(Node *node, List **rangeTableList);
extern bool ExtractRangeTableEntryWalker(Node *node, List **rangeTableList);
extern bool ExtractRangeTableRelationWalkerInRTE(Node *node, List **rangeTableList);
extern bool ExtractRangeTableRelationWalkerWithRTEExpand(Node *node,
List **rangeTableList);
extern List * pull_var_clause_default(Node *node);
extern bool OperatorImplementsEquality(Oid opno);

View File

@ -446,15 +446,16 @@ Aggregate
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> GroupAggregate
Group Key: (((NULL::user_composite_type)).tenant_id), (((NULL::user_composite_type)).user_id)
Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id)
-> Sort
Sort Key: (((NULL::user_composite_type)).tenant_id), (((NULL::user_composite_type)).user_id)
-> Nested Loop
Join Filter: ((NULL::user_composite_type) = events.composite_id)
-> Result
One-Time Filter: false
-> Seq Scan on events_1400027 events
Filter: ((event_type)::text = ANY ('{click,submit,pay}'::text[]))
Sort Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id)
-> Hash Join
Hash Cond: (users.composite_id = events.composite_id)
-> Seq Scan on users_1400029 users
Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type))
-> Hash
-> Seq Scan on events_1400025 events
Filter: ((event_type)::text = ANY ('{click,submit,pay}'::text[]))
-- Union and left join subquery pushdown
EXPLAIN (COSTS OFF)
SELECT
@ -531,33 +532,34 @@ HashAggregate
Sort Key: subquery_top.hasdone
-> Subquery Scan on subquery_top
-> GroupAggregate
Group Key: (((NULL::user_composite_type)).tenant_id), (((NULL::user_composite_type)).user_id), subquery_2.hasdone
Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), subquery_2.hasdone
-> Sort
Sort Key: (((NULL::user_composite_type)).tenant_id), (((NULL::user_composite_type)).user_id), subquery_2.hasdone
Sort Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), subquery_2.hasdone
-> Hash Left Join
Hash Cond: ((NULL::user_composite_type) = subquery_2.composite_id)
-> Unique
-> Sort
Sort Key: (((NULL::user_composite_type)).tenant_id), (((NULL::user_composite_type)).user_id), (NULL::user_composite_type), ('action=>1'::text), events.event_time
-> Append
-> Nested Loop
Join Filter: ((NULL::user_composite_type) = events.composite_id)
-> Result
One-Time Filter: false
-> Seq Scan on events_1400027 events
Hash Cond: (users.composite_id = subquery_2.composite_id)
-> HashAggregate
Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), users.composite_id, ('action=>1'::text), events.event_time
-> Append
-> Hash Join
Hash Cond: (users.composite_id = events.composite_id)
-> Seq Scan on users_1400029 users
Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type))
-> Hash
-> Seq Scan on events_1400025 events
Filter: ((event_type)::text = 'click'::text)
-> Nested Loop
Join Filter: ((NULL::user_composite_type) = events_1.composite_id)
-> Result
One-Time Filter: false
-> Seq Scan on events_1400027 events_1
-> Hash Join
Hash Cond: (users_1.composite_id = events_1.composite_id)
-> Seq Scan on users_1400029 users_1
Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type))
-> Hash
-> Seq Scan on events_1400025 events_1
Filter: ((event_type)::text = 'submit'::text)
-> Hash
-> Subquery Scan on subquery_2
-> Unique
-> Sort
Sort Key: ((events_2.composite_id).tenant_id), ((events_2.composite_id).user_id)
-> Seq Scan on events_1400027 events_2
-> Seq Scan on events_1400025 events_2
Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type) AND ((event_type)::text = 'pay'::text))
-- Union, left join and having subquery pushdown
EXPLAIN (COSTS OFF)
@ -703,13 +705,12 @@ Limit
-> Limit
-> Sort
Sort Key: users.lastseen DESC
-> Subquery Scan on users
-> Result
One-Time Filter: false
-> Seq Scan on users_1400029 users
Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type))
-> Limit
-> Sort
Sort Key: events.event_time DESC
-> Seq Scan on events_1400027 events
-> Seq Scan on events_1400025 events
Filter: (composite_id = users.composite_id)
-- Test all tasks output
SET citus.explain_all_tasks TO on;

View File

@ -446,15 +446,16 @@ Aggregate
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> GroupAggregate
Group Key: (((NULL::user_composite_type)).tenant_id), (((NULL::user_composite_type)).user_id)
Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id)
-> Sort
Sort Key: (((NULL::user_composite_type)).tenant_id), (((NULL::user_composite_type)).user_id)
-> Nested Loop
Join Filter: ((NULL::user_composite_type) = events.composite_id)
-> Result
One-Time Filter: false
-> Seq Scan on events_1400027 events
Filter: ((event_type)::text = ANY ('{click,submit,pay}'::text[]))
Sort Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id)
-> Hash Join
Hash Cond: (users.composite_id = events.composite_id)
-> Seq Scan on users_1400029 users
Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type))
-> Hash
-> Seq Scan on events_1400025 events
Filter: ((event_type)::text = ANY ('{click,submit,pay}'::text[]))
-- Union and left join subquery pushdown
EXPLAIN (COSTS OFF)
SELECT
@ -531,33 +532,34 @@ HashAggregate
Sort Key: subquery_top.hasdone
-> Subquery Scan on subquery_top
-> GroupAggregate
Group Key: (((NULL::user_composite_type)).tenant_id), (((NULL::user_composite_type)).user_id), subquery_2.hasdone
Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), subquery_2.hasdone
-> Sort
Sort Key: (((NULL::user_composite_type)).tenant_id), (((NULL::user_composite_type)).user_id), subquery_2.hasdone
Sort Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), subquery_2.hasdone
-> Hash Left Join
Hash Cond: ((NULL::user_composite_type) = subquery_2.composite_id)
-> Unique
-> Sort
Sort Key: (((NULL::user_composite_type)).tenant_id), (((NULL::user_composite_type)).user_id), (NULL::user_composite_type), ('action=>1'::text), events.event_time
-> Append
-> Nested Loop
Join Filter: ((NULL::user_composite_type) = events.composite_id)
-> Result
One-Time Filter: false
-> Seq Scan on events_1400027 events
Hash Cond: (users.composite_id = subquery_2.composite_id)
-> HashAggregate
Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), users.composite_id, ('action=>1'::text), events.event_time
-> Append
-> Hash Join
Hash Cond: (users.composite_id = events.composite_id)
-> Seq Scan on users_1400029 users
Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type))
-> Hash
-> Seq Scan on events_1400025 events
Filter: ((event_type)::text = 'click'::text)
-> Nested Loop
Join Filter: ((NULL::user_composite_type) = events_1.composite_id)
-> Result
One-Time Filter: false
-> Seq Scan on events_1400027 events_1
-> Hash Join
Hash Cond: (users_1.composite_id = events_1.composite_id)
-> Seq Scan on users_1400029 users_1
Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type))
-> Hash
-> Seq Scan on events_1400025 events_1
Filter: ((event_type)::text = 'submit'::text)
-> Hash
-> Subquery Scan on subquery_2
-> Unique
-> Sort
Sort Key: ((events_2.composite_id).tenant_id), ((events_2.composite_id).user_id)
-> Seq Scan on events_1400027 events_2
-> Seq Scan on events_1400025 events_2
Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type) AND ((event_type)::text = 'pay'::text))
-- Union, left join and having subquery pushdown
EXPLAIN (COSTS OFF)
@ -703,13 +705,12 @@ Limit
-> Limit
-> Sort
Sort Key: users.lastseen DESC
-> Subquery Scan on users
-> Result
One-Time Filter: false
-> Seq Scan on users_1400029 users
Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type))
-> Limit
-> Sort
Sort Key: events.event_time DESC
-> Seq Scan on events_1400027 events
-> Seq Scan on events_1400025 events
Filter: (composite_id = users.composite_id)
-- Test all tasks output
SET citus.explain_all_tasks TO on;

View File

@ -1122,8 +1122,7 @@ WHERE
colocated_table_test.value_1 = reference_table_test.value_1;
DEBUG: only reference tables may be queried when targeting a reference table with distributed INSERT ... SELECT
DEBUG: Collecting INSERT ... SELECT results on coordinator
-- now, insert into the hash partitioned table and use reference
-- tables in the SELECT queries
-- not pushable due to lack of equality between partition column and column of reference table
INSERT INTO
colocated_table_test (value_1, value_2)
SELECT

View File

@ -8,22 +8,112 @@
ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000;
SET citus.enable_router_execution TO FALSE;
CREATE TABLE events_reference_table as SELECT * FROM events_table;
CREATE TABLE users_reference_table as SELECT * FROM users_table;
SELECT create_reference_table('events_reference_table');
NOTICE: Copying data from local table...
CREATE TABLE user_buy_test_table(user_id int, item_id int, buy_count int);
SELECT create_distributed_table('user_buy_test_table', 'user_id');
create_distributed_table
--------------------------
(1 row)
INSERT INTO user_buy_test_table VALUES(1,2,1);
INSERT INTO user_buy_test_table VALUES(2,3,4);
INSERT INTO user_buy_test_table VALUES(3,4,2);
INSERT INTO user_buy_test_table VALUES(7,5,2);
CREATE TABLE users_return_test_table(user_id int, item_id int, buy_count int);
SELECT create_distributed_table('users_return_test_table', 'user_id');
create_distributed_table
--------------------------
(1 row)
INSERT INTO users_return_test_table VALUES(4,1,1);
INSERT INTO users_return_test_table VALUES(1,3,1);
INSERT INTO users_return_test_table VALUES(3,2,2);
CREATE TABLE users_ref_test_table(id int, it_name varchar(25), k_no int);
SELECT create_reference_table('users_ref_test_table');
create_reference_table
------------------------
(1 row)
SELECT create_reference_table('users_reference_table');
NOTICE: Copying data from local table...
create_reference_table
------------------------
INSERT INTO users_ref_test_table VALUES(1,'User_1',45);
INSERT INTO users_ref_test_table VALUES(2,'User_2',46);
INSERT INTO users_ref_test_table VALUES(3,'User_3',47);
INSERT INTO users_ref_test_table VALUES(4,'User_4',48);
INSERT INTO users_ref_test_table VALUES(5,'User_5',49);
INSERT INTO users_ref_test_table VALUES(6,'User_6',50);
-- Simple Join test with reference table
SELECT count(*) FROM
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1;
count
-------
3
(1 row)
-- Should work, reference table at the inner side is allowed
SELECT count(*) FROM
(SELECT random(), k_no FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1 WHERE k_no = 47;
count
-------
1
(1 row)
-- Should not work, no equality between partition column and reference table
SELECT * FROM
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
-- Should not work, no equality between partition column and reference table
SELECT * FROM
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_1;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
-- Shouldn't work, reference table at the outer side is not allowed
SELECT * FROM
(SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table
ON users_ref_test_table.id = user_buy_test_table.user_id) subquery_1;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- Should work, reference table at the inner side is allowed
SELECT count(*) FROM
(SELECT random() FROM users_ref_test_table RIGHT JOIN user_buy_test_table
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1;
count
-------
4
(1 row)
-- Shouldn't work, reference table at the outer side is not allowed
SELECT * FROM
(SELECT random() FROM user_buy_test_table RIGHT JOIN users_ref_test_table
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- Should pass since reference table locates in the inner part of each left join
SELECT count(*) FROM
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
ON tt1.user_id = tt2.user_id) subquery_1
LEFT JOIN
(SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 LEFT JOIN users_ref_test_table as ref
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
count
-------
2
(1 row)
-- Should not pass since reference table locates in the outer part of right join
SELECT * FROM
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
ON tt1.user_id = tt2.user_id) subquery_1
RIGHT JOIN
(SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- LATERAL JOINs used with INNER JOINs with reference tables
SET citus.subquery_pushdown to ON;
SELECT user_id, lastseen
@ -570,3 +660,6 @@ GROUP BY types
ORDER BY types;
ERROR: cannot push down this subquery
DETAIL: Reference tables are not supported with union operator
DROP TABLE user_buy_test_table;
DROP TABLE users_ref_test_table;
DROP TABLE users_return_test_table;

View File

@ -28,6 +28,89 @@ LIMIT 5;
63
(4 rows)
-- subqueries in WHERE with IN operator
SELECT
user_id
FROM
users_table
WHERE
value_2 IN
(SELECT
value_2
FROM
events_reference_table
WHERE
users_table.user_id = events_reference_table.user_id
)
GROUP BY user_id
ORDER BY user_id
LIMIT 3;
user_id
---------
0
1
2
(3 rows)
-- subqueries in WHERE with NOT EXISTS operator, should work since
-- reference table in the inner part of the join
SELECT
user_id
FROM
users_table
WHERE
NOT EXISTS
(SELECT
value_2
FROM
events_reference_table
WHERE
users_table.user_id = events_reference_table.user_id
)
GROUP BY user_id
ORDER BY user_id
LIMIT 3;
user_id
---------
(0 rows)
-- subqueries in WHERE with NOT EXISTS operator, should not work
-- there is a reference table in the outer part of the join
SELECT
user_id
FROM
users_reference_table
WHERE
NOT EXISTS
(SELECT
value_2
FROM
events_table
WHERE
users_reference_table.user_id = events_table.user_id
)
LIMIT 3;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- subqueries in WHERE with IN operator without equality
SELECT
user_id
FROM
users_table
WHERE
value_2 IN
(SELECT
value_2
FROM
events_reference_table
WHERE
users_table.user_id > events_reference_table.user_id
)
GROUP BY user_id
ORDER BY user_id
LIMIT 3;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
-- have reference table without any equality, should error out
SELECT
user_id
@ -141,5 +224,3 @@ SELECT user_id, value_2 FROM users_table WHERE
ORDER BY 1, 2;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
DROP TABLE events_reference_table;
DROP TABLE users_reference_table;

View File

@ -23,6 +23,17 @@ LIMIT 5;
20 | 9
(5 rows)
-- a very simple union query with reference table
SELECT user_id, counter
FROM (
SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2, 3, 4, 5)
UNION
SELECT user_id, value_2 % 10 AS counter FROM events_reference_table WHERE event_type IN (5, 6, 7, 8, 9, 10)
) user_id
ORDER BY 2 DESC,1
LIMIT 5;
ERROR: cannot pushdown this query
DETAIL: Reference tables are not allowed with set operations
-- the same query with union all
SELECT user_id, counter
FROM (
@ -41,6 +52,17 @@ LIMIT 5;
15 | 9
(5 rows)
-- the same query with union all and reference table
SELECT user_id, counter
FROM (
SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2, 3, 4, 5)
UNION ALL
SELECT user_id, value_2 % 10 AS counter FROM events_reference_table WHERE event_type IN (5, 6, 7, 8, 9, 10)
) user_id
ORDER BY 2 DESC,1
LIMIT 5;
ERROR: cannot pushdown this query
DETAIL: Reference tables are not allowed with set operations
-- the same query with group by
SELECT user_id, sum(counter)
FROM (
@ -162,6 +184,22 @@ GROUP BY user_id ORDER BY 1 DESC LIMIT 5;
23508
(5 rows)
-- similar query this time more subqueries with reference table and target list contains a resjunk entry
SELECT sum(counter)
FROM (
SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 20 GROUP BY user_id HAVING sum(value_2) > 500
UNION
SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 40 and value_1 < 60 GROUP BY user_id HAVING sum(value_2) > 500
UNION
SELECT user_id, sum(value_2) AS counter FROM users_reference_table where value_1 < 60 and value_1 < 80 GROUP BY user_id HAVING sum(value_2) > 500
UNION
SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 80 and value_1 < 100 GROUP BY user_id HAVING sum(value_2) > 500
UNION
SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 100 and value_1 < 120 GROUP BY user_id HAVING sum(value_2) > 500
) user_id
GROUP BY user_id ORDER BY 1 DESC LIMIT 5;
ERROR: cannot pushdown this query
DETAIL: Reference tables are not allowed with set operations
-- similar query as above, with UNION ALL
SELECT sum(counter)
FROM (
@ -236,6 +274,50 @@ LIMIT 5;
90 | 115843
(5 rows)
-- unions within unions with reference table
SELECT *
FROM (
( SELECT user_id,
sum(counter)
FROM
(SELECT
user_id, sum(value_2) AS counter
FROM
users_table
GROUP BY
user_id
UNION
SELECT
user_id, sum(value_2) AS counter
FROM
events_reference_table
GROUP BY
user_id) user_id_1
GROUP BY
user_id)
UNION
(SELECT
user_id, sum(counter)
FROM
(SELECT
user_id, sum(value_2) AS counter
FROM
users_table
GROUP BY
user_id
UNION
SELECT
user_id, sum(value_2) AS counter
FROM
events_table
GROUP BY
user_id) user_id_2
GROUP BY
user_id)) AS ftop
ORDER BY 2 DESC, 1 DESC
LIMIT 5;
ERROR: cannot pushdown this query
DETAIL: Reference tables are not allowed with set operations
-- top level unions are wrapped into top level aggregations
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
FROM
@ -493,6 +575,17 @@ FROM
20002
(1 row)
-- some UNION ALL queries that are going to be pulled up with reference table
SELECT
count(*)
FROM
(
(SELECT user_id FROM users_table)
UNION ALL
(SELECT user_id FROM events_reference_table)
) b;
ERROR: cannot pushdown this query
DETAIL: Reference tables are not allowed with set operations
-- similar query without top level agg
SELECT
user_id
@ -899,3 +992,5 @@ ORDER BY types;
ERROR: cannot push down this subquery
DETAIL: Subqueries without relations are unsupported
SET citus.enable_router_execution TO true;
DROP TABLE events_reference_table;
DROP TABLE users_reference_table;

View File

@ -353,3 +353,11 @@ SET citus.shard_max_size TO "1MB";
\copy orders_subquery FROM '@abs_srcdir@/data/orders.1.data' with delimiter '|'
\copy orders_subquery FROM '@abs_srcdir@/data/orders.2.data' with delimiter '|'
CREATE TABLE events_reference_table (like events_table including all);
SELECT create_reference_table('events_reference_table');
INSERT INTO events_reference_table SELECT * FROM events_table;
CREATE TABLE users_reference_table (like users_table including all);
SELECT create_reference_table('users_reference_table');
INSERT INTO users_reference_table SELECT * FROM users_table;

View File

@ -421,3 +421,19 @@ SET citus.shard_max_size TO "1MB";
\copy lineitem_subquery FROM '@abs_srcdir@/data/lineitem.2.data' with delimiter '|'
\copy orders_subquery FROM '@abs_srcdir@/data/orders.1.data' with delimiter '|'
\copy orders_subquery FROM '@abs_srcdir@/data/orders.2.data' with delimiter '|'
CREATE TABLE events_reference_table (like events_table including all);
SELECT create_reference_table('events_reference_table');
create_reference_table
------------------------
(1 row)
INSERT INTO events_reference_table SELECT * FROM events_table;
CREATE TABLE users_reference_table (like users_table including all);
SELECT create_reference_table('users_reference_table');
create_reference_table
------------------------
(1 row)
INSERT INTO users_reference_table SELECT * FROM users_table;

View File

@ -707,8 +707,7 @@ FROM
WHERE
colocated_table_test.value_1 = reference_table_test.value_1;
-- now, insert into the hash partitioned table and use reference
-- tables in the SELECT queries
-- not pushable due to lack of equality between partition column and column of reference table
INSERT INTO
colocated_table_test (value_1, value_2)
SELECT

View File

@ -10,11 +10,78 @@ ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000;
SET citus.enable_router_execution TO FALSE;
CREATE TABLE events_reference_table as SELECT * FROM events_table;
CREATE TABLE users_reference_table as SELECT * FROM users_table;
CREATE TABLE user_buy_test_table(user_id int, item_id int, buy_count int);
SELECT create_distributed_table('user_buy_test_table', 'user_id');
INSERT INTO user_buy_test_table VALUES(1,2,1);
INSERT INTO user_buy_test_table VALUES(2,3,4);
INSERT INTO user_buy_test_table VALUES(3,4,2);
INSERT INTO user_buy_test_table VALUES(7,5,2);
SELECT create_reference_table('events_reference_table');
SELECT create_reference_table('users_reference_table');
CREATE TABLE users_return_test_table(user_id int, item_id int, buy_count int);
SELECT create_distributed_table('users_return_test_table', 'user_id');
INSERT INTO users_return_test_table VALUES(4,1,1);
INSERT INTO users_return_test_table VALUES(1,3,1);
INSERT INTO users_return_test_table VALUES(3,2,2);
CREATE TABLE users_ref_test_table(id int, it_name varchar(25), k_no int);
SELECT create_reference_table('users_ref_test_table');
INSERT INTO users_ref_test_table VALUES(1,'User_1',45);
INSERT INTO users_ref_test_table VALUES(2,'User_2',46);
INSERT INTO users_ref_test_table VALUES(3,'User_3',47);
INSERT INTO users_ref_test_table VALUES(4,'User_4',48);
INSERT INTO users_ref_test_table VALUES(5,'User_5',49);
INSERT INTO users_ref_test_table VALUES(6,'User_6',50);
-- Simple Join test with reference table
SELECT count(*) FROM
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1;
-- Should work, reference table at the inner side is allowed
SELECT count(*) FROM
(SELECT random(), k_no FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1 WHERE k_no = 47;
-- Should not work, no equality between partition column and reference table
SELECT * FROM
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1;
-- Should not work, no equality between partition column and reference table
SELECT * FROM
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_1;
-- Shouldn't work, reference table at the outer side is not allowed
SELECT * FROM
(SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table
ON users_ref_test_table.id = user_buy_test_table.user_id) subquery_1;
-- Should work, reference table at the inner side is allowed
SELECT count(*) FROM
(SELECT random() FROM users_ref_test_table RIGHT JOIN user_buy_test_table
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1;
-- Shouldn't work, reference table at the outer side is not allowed
SELECT * FROM
(SELECT random() FROM user_buy_test_table RIGHT JOIN users_ref_test_table
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1;
-- Should pass since reference table locates in the inner part of each left join
SELECT count(*) FROM
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
ON tt1.user_id = tt2.user_id) subquery_1
LEFT JOIN
(SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 LEFT JOIN users_ref_test_table as ref
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
-- Should not pass since reference table locates in the outer part of right join
SELECT * FROM
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
ON tt1.user_id = tt2.user_id) subquery_1
RIGHT JOIN
(SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
-- LATERAL JOINs used with INNER JOINs with reference tables
SET citus.subquery_pushdown to ON;
@ -500,4 +567,8 @@ INNER JOIN
FROM users_table as "users"
WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query
GROUP BY types
ORDER BY types;
ORDER BY types;
DROP TABLE user_buy_test_table;
DROP TABLE users_ref_test_table;
DROP TABLE users_return_test_table;

View File

@ -22,6 +22,78 @@ HAVING count(*) > 66
ORDER BY user_id
LIMIT 5;
-- subqueries in WHERE with IN operator
SELECT
user_id
FROM
users_table
WHERE
value_2 IN
(SELECT
value_2
FROM
events_reference_table
WHERE
users_table.user_id = events_reference_table.user_id
)
GROUP BY user_id
ORDER BY user_id
LIMIT 3;
-- subqueries in WHERE with NOT EXISTS operator, should work since
-- reference table in the inner part of the join
SELECT
user_id
FROM
users_table
WHERE
NOT EXISTS
(SELECT
value_2
FROM
events_reference_table
WHERE
users_table.user_id = events_reference_table.user_id
)
GROUP BY user_id
ORDER BY user_id
LIMIT 3;
-- subqueries in WHERE with NOT EXISTS operator, should not work
-- there is a reference table in the outer part of the join
SELECT
user_id
FROM
users_reference_table
WHERE
NOT EXISTS
(SELECT
value_2
FROM
events_table
WHERE
users_reference_table.user_id = events_table.user_id
)
LIMIT 3;
-- subqueries in WHERE with IN operator without equality
SELECT
user_id
FROM
users_table
WHERE
value_2 IN
(SELECT
value_2
FROM
events_reference_table
WHERE
users_table.user_id > events_reference_table.user_id
)
GROUP BY user_id
ORDER BY user_id
LIMIT 3;
-- have reference table without any equality, should error out
SELECT
user_id
@ -125,6 +197,3 @@ SELECT user_id, value_2 FROM users_table WHERE
HAVING sum(submit_card_info) > 0
)
ORDER BY 1, 2;
DROP TABLE events_reference_table;
DROP TABLE users_reference_table;

View File

@ -17,6 +17,16 @@ FROM (
ORDER BY 2 DESC,1
LIMIT 5;
-- a very simple union query with reference table
SELECT user_id, counter
FROM (
SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2, 3, 4, 5)
UNION
SELECT user_id, value_2 % 10 AS counter FROM events_reference_table WHERE event_type IN (5, 6, 7, 8, 9, 10)
) user_id
ORDER BY 2 DESC,1
LIMIT 5;
-- the same query with union all
SELECT user_id, counter
FROM (
@ -27,6 +37,16 @@ FROM (
ORDER BY 2 DESC,1
LIMIT 5;
-- the same query with union all and reference table
SELECT user_id, counter
FROM (
SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2, 3, 4, 5)
UNION ALL
SELECT user_id, value_2 % 10 AS counter FROM events_reference_table WHERE event_type IN (5, 6, 7, 8, 9, 10)
) user_id
ORDER BY 2 DESC,1
LIMIT 5;
-- the same query with group by
SELECT user_id, sum(counter)
FROM (
@ -102,6 +122,21 @@ FROM (
) user_id
GROUP BY user_id ORDER BY 1 DESC LIMIT 5;
-- similar query this time more subqueries with reference table and target list contains a resjunk entry
SELECT sum(counter)
FROM (
SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 20 GROUP BY user_id HAVING sum(value_2) > 500
UNION
SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 40 and value_1 < 60 GROUP BY user_id HAVING sum(value_2) > 500
UNION
SELECT user_id, sum(value_2) AS counter FROM users_reference_table where value_1 < 60 and value_1 < 80 GROUP BY user_id HAVING sum(value_2) > 500
UNION
SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 80 and value_1 < 100 GROUP BY user_id HAVING sum(value_2) > 500
UNION
SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 100 and value_1 < 120 GROUP BY user_id HAVING sum(value_2) > 500
) user_id
GROUP BY user_id ORDER BY 1 DESC LIMIT 5;
-- similar query as above, with UNION ALL
SELECT sum(counter)
FROM (
@ -160,6 +195,49 @@ FROM (
ORDER BY 2 DESC, 1 DESC
LIMIT 5;
-- unions within unions with reference table
SELECT *
FROM (
( SELECT user_id,
sum(counter)
FROM
(SELECT
user_id, sum(value_2) AS counter
FROM
users_table
GROUP BY
user_id
UNION
SELECT
user_id, sum(value_2) AS counter
FROM
events_reference_table
GROUP BY
user_id) user_id_1
GROUP BY
user_id)
UNION
(SELECT
user_id, sum(counter)
FROM
(SELECT
user_id, sum(value_2) AS counter
FROM
users_table
GROUP BY
user_id
UNION
SELECT
user_id, sum(value_2) AS counter
FROM
events_table
GROUP BY
user_id) user_id_2
GROUP BY
user_id)) AS ftop
ORDER BY 2 DESC, 1 DESC
LIMIT 5;
-- top level unions are wrapped into top level aggregations
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
FROM
@ -377,6 +455,16 @@ FROM
(SELECT user_id FROM events_table)
) b;
-- some UNION ALL queries that are going to be pulled up with reference table
SELECT
count(*)
FROM
(
(SELECT user_id FROM users_table)
UNION ALL
(SELECT user_id FROM events_reference_table)
) b;
-- similar query without top level agg
SELECT
user_id
@ -724,3 +812,6 @@ GROUP BY types
ORDER BY types;
SET citus.enable_router_execution TO true;
DROP TABLE events_reference_table;
DROP TABLE users_reference_table;