Merge pull request #426 from citusdata/bugfix/#375-prune-with-subquery-pushdown-varchar

Fix Shard Pruning Problem With Subqueries on VARCHAR Partition Columns

cr: @jasonmp85
pull/459/head
Jason Petersen 2016-04-19 22:06:30 -06:00
commit 0ac9bf2bcb
6 changed files with 165 additions and 24 deletions

View File

@ -2907,14 +2907,15 @@ IsPartitionColumnRecursive(Expr *columnExpression, Query *query)
List *rangetableList = query->rtable;
Index rangeTableEntryIndex = 0;
RangeTblEntry *rangeTableEntry = NULL;
Expr *strippedColumnExpression = strip_implicit_coercions(columnExpression);
if (IsA(columnExpression, Var))
if (IsA(strippedColumnExpression, Var))
{
candidateColumn = (Var *) columnExpression;
candidateColumn = (Var *) strippedColumnExpression;
}
else if (IsA(columnExpression, FieldSelect))
else if (IsA(strippedColumnExpression, FieldSelect))
{
FieldSelect *compositeField = (FieldSelect *) columnExpression;
FieldSelect *compositeField = (FieldSelect *) strippedColumnExpression;
Expr *fieldExpression = compositeField->arg;
if (IsA(fieldExpression, Var))
@ -3710,6 +3711,8 @@ PartitionColumnOpExpressionList(Query *query)
Node *whereNode = (Node *) lfirst(whereClauseCell);
Node *leftArgument = NULL;
Node *rightArgument = NULL;
Node *strippedLeftArgument = NULL;
Node *strippedRightArgument = NULL;
OpExpr *whereClause = NULL;
List *argumentList = NIL;
List *rangetableList = NIL;
@ -3744,14 +3747,16 @@ PartitionColumnOpExpressionList(Query *query)
leftArgument = (Node *) linitial(argumentList);
rightArgument = (Node *) lsecond(argumentList);
strippedLeftArgument = strip_implicit_coercions(leftArgument);
strippedRightArgument = strip_implicit_coercions(rightArgument);
if (IsA(leftArgument, Var) && IsA(rightArgument, Const))
if (IsA(strippedLeftArgument, Var) && IsA(strippedRightArgument, Const))
{
candidatePartitionColumn = (Var *) leftArgument;
candidatePartitionColumn = (Var *) strippedLeftArgument;
}
else if (IsA(leftArgument, Const) && IsA(leftArgument, Var))
else if (IsA(strippedLeftArgument, Const) && IsA(strippedRightArgument, Var))
{
candidatePartitionColumn = (Var *) rightArgument;
candidatePartitionColumn = (Var *) strippedRightArgument;
}
else
{
@ -3798,14 +3803,16 @@ ReplaceColumnsInOpExpressionList(List *opExpressionList, Var *newColumn)
Node *leftArgument = (Node *) linitial(argumentList);
Node *rightArgument = (Node *) lsecond(argumentList);
Node *strippedLeftArgument = strip_implicit_coercions(leftArgument);
Node *strippedRightArgument = strip_implicit_coercions(rightArgument);
if (IsA(leftArgument, Var))
if (IsA(strippedLeftArgument, Var))
{
newArgumentList = list_make2(newColumn, rightArgument);
newArgumentList = list_make2(newColumn, strippedRightArgument);
}
else if (IsA(leftArgument, Var))
else if (IsA(strippedRightArgument, Var))
{
newArgumentList = list_make2(leftArgument, newColumn);
newArgumentList = list_make2(strippedLeftArgument, newColumn);
}
copyOpExpression->args = newArgumentList;

View File

@ -3500,9 +3500,11 @@ CheckJoinBetweenColumns(OpExpr *joinClause)
List *argumentList = joinClause->args;
Node *leftArgument = (Node *) linitial(argumentList);
Node *rightArgument = (Node *) lsecond(argumentList);
Node *strippedLeftArgument = strip_implicit_coercions(leftArgument);
Node *strippedRightArgument = strip_implicit_coercions(rightArgument);
NodeTag leftArgumentType = nodeTag(leftArgument);
NodeTag rightArgumentType = nodeTag(rightArgument);
NodeTag leftArgumentType = nodeTag(strippedLeftArgument);
NodeTag rightArgumentType = nodeTag(strippedRightArgument);
if (leftArgumentType != T_Var || rightArgumentType != T_Var)
{

View File

@ -89,13 +89,15 @@ DEBUG: join prunable for intervals [(a,3,b),(b,4,c)] and [(c,5,d),(d,6,e)]
explain statements for distributed queries are currently unsupported
(1 row)
-- Large table joins between varchar columns do not work because of a bug we
-- have. Currently, we require joins to be only on columns. Postgres adds a
-- relabel to typecast varchars to text due to which our check fails and we
-- error out.
-- Test that large table joins on partition varchar columns work
EXPLAIN SELECT count(*)
FROM varchar_partitioned_table table1, varchar_partitioned_table table2
WHERE table1.varchar_column = table2.varchar_column;
ERROR: cannot perform local joins that involve expressions
DETAIL: local joins can be performed between columns only
DEBUG: join prunable for intervals [BA1000U2AMO4ZGX,BZZXSP27F21T6] and [AA1000U2AMO4ZGX,AZZXSP27F21T6]
DEBUG: join prunable for intervals [AA1000U2AMO4ZGX,AZZXSP27F21T6] and [BA1000U2AMO4ZGX,BZZXSP27F21T6]
QUERY PLAN
----------------------------------------------------------------------
explain statements for distributed queries are currently unsupported
(1 row)
SET client_min_messages TO NOTICE;

View File

@ -294,3 +294,61 @@ FROM
l_orderkey = o_orderkey
GROUP BY
l_orderkey) AS unit_prices;
-- Check that we can prune shards in subqueries with VARCHAR partition columns
CREATE TABLE subquery_pruning_varchar_test_table
(
a varchar,
b int
);
SELECT master_create_distributed_table('subquery_pruning_varchar_test_table', 'a', 'hash');
SELECT master_create_worker_shards('subquery_pruning_varchar_test_table', 4, 1);
SET citus.subquery_pushdown TO TRUE;
SET client_min_messages TO DEBUG2;
SELECT * FROM
(SELECT count(*) FROM subquery_pruning_varchar_test_table WHERE a = 'onder' GROUP BY a)
AS foo;
SELECT * FROM
(SELECT count(*) FROM subquery_pruning_varchar_test_table WHERE 'eren' = a GROUP BY a)
AS foo;
SET client_min_messages TO NOTICE;
-- test subquery join on VARCHAR partition column
SELECT * FROM
(SELECT
a_inner AS a
FROM
(SELECT
subquery_pruning_varchar_test_table.a AS a_inner
FROM
subquery_pruning_varchar_test_table
GROUP BY
subquery_pruning_varchar_test_table.a
HAVING
count(subquery_pruning_varchar_test_table.a) < 3)
AS f1,
(SELECT
subquery_pruning_varchar_test_table.a
FROM
subquery_pruning_varchar_test_table
GROUP BY
subquery_pruning_varchar_test_table.a
HAVING
sum(coalesce(subquery_pruning_varchar_test_table.b,0)) > 20.0)
AS f2
WHERE
f1.a_inner = f2.a
GROUP BY
a_inner)
AS foo;
DROP TABLE subquery_pruning_varchar_test_table;
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 102026;

View File

@ -326,3 +326,78 @@ FROM
l_orderkey) AS unit_prices;
ERROR: cannot push down this subquery
DETAIL: Shards of relations in subquery need to have 1-to-1 shard partitioning
-- Check that we can prune shards in subqueries with VARCHAR partition columns
CREATE TABLE subquery_pruning_varchar_test_table
(
a varchar,
b int
);
SELECT master_create_distributed_table('subquery_pruning_varchar_test_table', 'a', 'hash');
master_create_distributed_table
---------------------------------
(1 row)
SELECT master_create_worker_shards('subquery_pruning_varchar_test_table', 4, 1);
master_create_worker_shards
-----------------------------
(1 row)
SET citus.subquery_pushdown TO TRUE;
SET client_min_messages TO DEBUG2;
SELECT * FROM
(SELECT count(*) FROM subquery_pruning_varchar_test_table WHERE a = 'onder' GROUP BY a)
AS foo;
DEBUG: predicate pruning for shardId 102029
DEBUG: predicate pruning for shardId 102027
DEBUG: predicate pruning for shardId 102026
count
-------
(0 rows)
SELECT * FROM
(SELECT count(*) FROM subquery_pruning_varchar_test_table WHERE 'eren' = a GROUP BY a)
AS foo;
DEBUG: predicate pruning for shardId 102029
DEBUG: predicate pruning for shardId 102028
DEBUG: predicate pruning for shardId 102026
count
-------
(0 rows)
SET client_min_messages TO NOTICE;
-- test subquery join on VARCHAR partition column
SELECT * FROM
(SELECT
a_inner AS a
FROM
(SELECT
subquery_pruning_varchar_test_table.a AS a_inner
FROM
subquery_pruning_varchar_test_table
GROUP BY
subquery_pruning_varchar_test_table.a
HAVING
count(subquery_pruning_varchar_test_table.a) < 3)
AS f1,
(SELECT
subquery_pruning_varchar_test_table.a
FROM
subquery_pruning_varchar_test_table
GROUP BY
subquery_pruning_varchar_test_table.a
HAVING
sum(coalesce(subquery_pruning_varchar_test_table.b,0)) > 20.0)
AS f2
WHERE
f1.a_inner = f2.a
GROUP BY
a_inner)
AS foo;
a
---
(0 rows)
DROP TABLE subquery_pruning_varchar_test_table;
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 102026;

View File

@ -44,10 +44,7 @@ EXPLAIN SELECT count(*)
FROM composite_partitioned_table table1, composite_partitioned_table table2
WHERE table1.composite_column = table2.composite_column;
-- Large table joins between varchar columns do not work because of a bug we
-- have. Currently, we require joins to be only on columns. Postgres adds a
-- relabel to typecast varchars to text due to which our check fails and we
-- error out.
-- Test that large table joins on partition varchar columns work
EXPLAIN SELECT count(*)
FROM varchar_partitioned_table table1, varchar_partitioned_table table2