From f53057c7dd5a244ec2e802ddd6f72a48fdb7894d Mon Sep 17 00:00:00 2001 From: eren Date: Fri, 1 Apr 2016 15:42:26 +0300 Subject: [PATCH 1/3] Fix Shard Pruning Problem With Subqueries on VARCHAR Partition Columns Fixes #375 Prior to this change, shard pruning couldn't be done if: - Table is hash-distributed - Partition column of is VARCHAR - Query to be pruned is a subquery There were two problems: - A bug in left-side/right-side checks for the partition column - We were not considering relabeled types (VARCHAR was relabeled as TEXT) --- .../planner/multi_logical_optimizer.c | 22 ++++++---- src/test/regress/input/multi_subquery.source | 28 ++++++++++++ src/test/regress/output/multi_subquery.source | 43 +++++++++++++++++++ 3 files changed, 85 insertions(+), 8 deletions(-) diff --git a/src/backend/distributed/planner/multi_logical_optimizer.c b/src/backend/distributed/planner/multi_logical_optimizer.c index 9db945cba..59c5c4ee0 100644 --- a/src/backend/distributed/planner/multi_logical_optimizer.c +++ b/src/backend/distributed/planner/multi_logical_optimizer.c @@ -3710,6 +3710,8 @@ PartitionColumnOpExpressionList(Query *query) Node *whereNode = (Node *) lfirst(whereClauseCell); Node *leftArgument = NULL; Node *rightArgument = NULL; + Node *strippedLeftArgument = NULL; + Node *strippedRightArgument = NULL; OpExpr *whereClause = NULL; List *argumentList = NIL; List *rangetableList = NIL; @@ -3744,14 +3746,16 @@ PartitionColumnOpExpressionList(Query *query) leftArgument = (Node *) linitial(argumentList); rightArgument = (Node *) lsecond(argumentList); + strippedLeftArgument = strip_implicit_coercions(leftArgument); + strippedRightArgument = strip_implicit_coercions(rightArgument); - if (IsA(leftArgument, Var) && IsA(rightArgument, Const)) + if (IsA(strippedLeftArgument, Var) && IsA(strippedRightArgument, Const)) { - candidatePartitionColumn = (Var *) leftArgument; + candidatePartitionColumn = (Var *) strippedLeftArgument; } - else if (IsA(leftArgument, Const) && IsA(leftArgument, Var)) + else if (IsA(strippedLeftArgument, Const) && IsA(strippedRightArgument, Var)) { - candidatePartitionColumn = (Var *) rightArgument; + candidatePartitionColumn = (Var *) strippedRightArgument; } else { @@ -3798,14 +3802,16 @@ ReplaceColumnsInOpExpressionList(List *opExpressionList, Var *newColumn) Node *leftArgument = (Node *) linitial(argumentList); Node *rightArgument = (Node *) lsecond(argumentList); + Node *strippedLeftArgument = strip_implicit_coercions(leftArgument); + Node *strippedRightArgument = strip_implicit_coercions(rightArgument); - if (IsA(leftArgument, Var)) + if (IsA(strippedLeftArgument, Var)) { - newArgumentList = list_make2(newColumn, rightArgument); + newArgumentList = list_make2(newColumn, strippedRightArgument); } - else if (IsA(leftArgument, Var)) + else if (IsA(strippedRightArgument, Var)) { - newArgumentList = list_make2(leftArgument, newColumn); + newArgumentList = list_make2(strippedLeftArgument, newColumn); } copyOpExpression->args = newArgumentList; diff --git a/src/test/regress/input/multi_subquery.source b/src/test/regress/input/multi_subquery.source index 0accf9135..cee05ac3d 100644 --- a/src/test/regress/input/multi_subquery.source +++ b/src/test/regress/input/multi_subquery.source @@ -294,3 +294,31 @@ FROM l_orderkey = o_orderkey GROUP BY l_orderkey) AS unit_prices; + +-- Check that we can prune shards in subqueries with VARCHAR partition columns + +CREATE TABLE subquery_pruning_varchar_test_table +( + a varchar, + b int +); + +SELECT master_create_distributed_table('subquery_pruning_varchar_test_table', 'a', 'hash'); +SELECT master_create_worker_shards('subquery_pruning_varchar_test_table', 4, 1); + +SET citus.subquery_pushdown TO TRUE; +SET client_min_messages TO DEBUG2; + +SELECT * FROM + (SELECT count(*) FROM subquery_pruning_varchar_test_table WHERE a = 'onder' GROUP BY a) +AS foo; + +SELECT * FROM + (SELECT count(*) FROM subquery_pruning_varchar_test_table WHERE 'eren' = a GROUP BY a) +AS foo; + +SET client_min_messages TO NOTICE; + +DROP TABLE subquery_pruning_varchar_test_table; + +ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 102026; diff --git a/src/test/regress/output/multi_subquery.source b/src/test/regress/output/multi_subquery.source index f0e6f0250..8f5f30c22 100644 --- a/src/test/regress/output/multi_subquery.source +++ b/src/test/regress/output/multi_subquery.source @@ -326,3 +326,46 @@ FROM l_orderkey) AS unit_prices; ERROR: cannot push down this subquery DETAIL: Shards of relations in subquery need to have 1-to-1 shard partitioning +-- Check that we can prune shards in subqueries with VARCHAR partition columns +CREATE TABLE subquery_pruning_varchar_test_table +( + a varchar, + b int +); +SELECT master_create_distributed_table('subquery_pruning_varchar_test_table', 'a', 'hash'); + master_create_distributed_table +--------------------------------- + +(1 row) + +SELECT master_create_worker_shards('subquery_pruning_varchar_test_table', 4, 1); + master_create_worker_shards +----------------------------- + +(1 row) + +SET citus.subquery_pushdown TO TRUE; +SET client_min_messages TO DEBUG2; +SELECT * FROM + (SELECT count(*) FROM subquery_pruning_varchar_test_table WHERE a = 'onder' GROUP BY a) +AS foo; +DEBUG: predicate pruning for shardId 102029 +DEBUG: predicate pruning for shardId 102027 +DEBUG: predicate pruning for shardId 102026 + count +------- +(0 rows) + +SELECT * FROM + (SELECT count(*) FROM subquery_pruning_varchar_test_table WHERE 'eren' = a GROUP BY a) +AS foo; +DEBUG: predicate pruning for shardId 102029 +DEBUG: predicate pruning for shardId 102028 +DEBUG: predicate pruning for shardId 102026 + count +------- +(0 rows) + +SET client_min_messages TO NOTICE; +DROP TABLE subquery_pruning_varchar_test_table; +ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 102026; From e786cbed0f5d834e6c6a9527b453a189b01fed1a Mon Sep 17 00:00:00 2001 From: eren Date: Thu, 7 Apr 2016 15:16:04 +0300 Subject: [PATCH 2/3] Fix Join Problem With VARCHAR Partition Columns This change fixes the problem with joins with VARCHAR columns. Prior to this change, when we tried to do large table joins on varchar columns, we got an error of the form: ERROR: cannot perform local joins that involve expressions DETAIL: local joins can be performed between columns only. This is because we have a check in CheckJoinBetweenColumns() which requires the join clause to have only 'Var' nodes (i.e. columns). Postgres adds a relabel t ype cast to cast the varchar to text; hence the type of the node is not T_Var and the join fails. The fix involves calling strip_implicit_coercions() to the left and right arguments so that RELABELTYPE is stripped to VAR. Fixes #76. --- .../distributed/planner/multi_physical_planner.c | 6 ++++-- src/test/regress/expected/multi_join_pruning.out | 14 ++++++++------ src/test/regress/sql/multi_join_pruning.sql | 5 +---- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/backend/distributed/planner/multi_physical_planner.c b/src/backend/distributed/planner/multi_physical_planner.c index 1488d1e94..9323a163a 100644 --- a/src/backend/distributed/planner/multi_physical_planner.c +++ b/src/backend/distributed/planner/multi_physical_planner.c @@ -3500,9 +3500,11 @@ CheckJoinBetweenColumns(OpExpr *joinClause) List *argumentList = joinClause->args; Node *leftArgument = (Node *) linitial(argumentList); Node *rightArgument = (Node *) lsecond(argumentList); + Node *strippedLeftArgument = strip_implicit_coercions(leftArgument); + Node *strippedRightArgument = strip_implicit_coercions(rightArgument); - NodeTag leftArgumentType = nodeTag(leftArgument); - NodeTag rightArgumentType = nodeTag(rightArgument); + NodeTag leftArgumentType = nodeTag(strippedLeftArgument); + NodeTag rightArgumentType = nodeTag(strippedRightArgument); if (leftArgumentType != T_Var || rightArgumentType != T_Var) { diff --git a/src/test/regress/expected/multi_join_pruning.out b/src/test/regress/expected/multi_join_pruning.out index 578964ac5..9627df180 100644 --- a/src/test/regress/expected/multi_join_pruning.out +++ b/src/test/regress/expected/multi_join_pruning.out @@ -89,13 +89,15 @@ DEBUG: join prunable for intervals [(a,3,b),(b,4,c)] and [(c,5,d),(d,6,e)] explain statements for distributed queries are currently unsupported (1 row) --- Large table joins between varchar columns do not work because of a bug we --- have. Currently, we require joins to be only on columns. Postgres adds a --- relabel to typecast varchars to text due to which our check fails and we --- error out. +-- Test that large table joins on partition varchar columns work EXPLAIN SELECT count(*) FROM varchar_partitioned_table table1, varchar_partitioned_table table2 WHERE table1.varchar_column = table2.varchar_column; -ERROR: cannot perform local joins that involve expressions -DETAIL: local joins can be performed between columns only +DEBUG: join prunable for intervals [BA1000U2AMO4ZGX,BZZXSP27F21T6] and [AA1000U2AMO4ZGX,AZZXSP27F21T6] +DEBUG: join prunable for intervals [AA1000U2AMO4ZGX,AZZXSP27F21T6] and [BA1000U2AMO4ZGX,BZZXSP27F21T6] + QUERY PLAN +---------------------------------------------------------------------- + explain statements for distributed queries are currently unsupported +(1 row) + SET client_min_messages TO NOTICE; diff --git a/src/test/regress/sql/multi_join_pruning.sql b/src/test/regress/sql/multi_join_pruning.sql index 293b05f4c..f2794d8d9 100644 --- a/src/test/regress/sql/multi_join_pruning.sql +++ b/src/test/regress/sql/multi_join_pruning.sql @@ -44,10 +44,7 @@ EXPLAIN SELECT count(*) FROM composite_partitioned_table table1, composite_partitioned_table table2 WHERE table1.composite_column = table2.composite_column; --- Large table joins between varchar columns do not work because of a bug we --- have. Currently, we require joins to be only on columns. Postgres adds a --- relabel to typecast varchars to text due to which our check fails and we --- error out. +-- Test that large table joins on partition varchar columns work EXPLAIN SELECT count(*) FROM varchar_partitioned_table table1, varchar_partitioned_table table2 From f77cff3fb6ed9b222f9f8c61bbe9b5d0dcac0cfd Mon Sep 17 00:00:00 2001 From: eren Date: Fri, 8 Apr 2016 11:51:37 +0300 Subject: [PATCH 3/3] Fix JOINs on varchar columns with subquery pushdown Fixes #379 Varchar VAR struct is wrapped in RELABELTYPE struct inside PostgreSQL code and IsPartitionColumnRecursive function considers only VAR types so returning false for varchar. This change adds strip_implicit_coercions() call to the columnExpression in IsPartitionColumnRecursive function so that we get rid of implicit coercions like RELABELTYPE are stripped to VAR. --- .../planner/multi_logical_optimizer.c | 9 +++--- src/test/regress/input/multi_subquery.source | 30 +++++++++++++++++ src/test/regress/output/multi_subquery.source | 32 +++++++++++++++++++ 3 files changed, 67 insertions(+), 4 deletions(-) diff --git a/src/backend/distributed/planner/multi_logical_optimizer.c b/src/backend/distributed/planner/multi_logical_optimizer.c index 59c5c4ee0..75a869b60 100644 --- a/src/backend/distributed/planner/multi_logical_optimizer.c +++ b/src/backend/distributed/planner/multi_logical_optimizer.c @@ -2907,14 +2907,15 @@ IsPartitionColumnRecursive(Expr *columnExpression, Query *query) List *rangetableList = query->rtable; Index rangeTableEntryIndex = 0; RangeTblEntry *rangeTableEntry = NULL; + Expr *strippedColumnExpression = strip_implicit_coercions(columnExpression); - if (IsA(columnExpression, Var)) + if (IsA(strippedColumnExpression, Var)) { - candidateColumn = (Var *) columnExpression; + candidateColumn = (Var *) strippedColumnExpression; } - else if (IsA(columnExpression, FieldSelect)) + else if (IsA(strippedColumnExpression, FieldSelect)) { - FieldSelect *compositeField = (FieldSelect *) columnExpression; + FieldSelect *compositeField = (FieldSelect *) strippedColumnExpression; Expr *fieldExpression = compositeField->arg; if (IsA(fieldExpression, Var)) diff --git a/src/test/regress/input/multi_subquery.source b/src/test/regress/input/multi_subquery.source index cee05ac3d..032b044cf 100644 --- a/src/test/regress/input/multi_subquery.source +++ b/src/test/regress/input/multi_subquery.source @@ -319,6 +319,36 @@ AS foo; SET client_min_messages TO NOTICE; +-- test subquery join on VARCHAR partition column +SELECT * FROM + (SELECT + a_inner AS a + FROM + (SELECT + subquery_pruning_varchar_test_table.a AS a_inner + FROM + subquery_pruning_varchar_test_table + GROUP BY + subquery_pruning_varchar_test_table.a + HAVING + count(subquery_pruning_varchar_test_table.a) < 3) + AS f1, + + (SELECT + subquery_pruning_varchar_test_table.a + FROM + subquery_pruning_varchar_test_table + GROUP BY + subquery_pruning_varchar_test_table.a + HAVING + sum(coalesce(subquery_pruning_varchar_test_table.b,0)) > 20.0) + AS f2 + WHERE + f1.a_inner = f2.a + GROUP BY + a_inner) +AS foo; + DROP TABLE subquery_pruning_varchar_test_table; ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 102026; diff --git a/src/test/regress/output/multi_subquery.source b/src/test/regress/output/multi_subquery.source index 8f5f30c22..0d8492e9b 100644 --- a/src/test/regress/output/multi_subquery.source +++ b/src/test/regress/output/multi_subquery.source @@ -367,5 +367,37 @@ DEBUG: predicate pruning for shardId 102026 (0 rows) SET client_min_messages TO NOTICE; +-- test subquery join on VARCHAR partition column +SELECT * FROM + (SELECT + a_inner AS a + FROM + (SELECT + subquery_pruning_varchar_test_table.a AS a_inner + FROM + subquery_pruning_varchar_test_table + GROUP BY + subquery_pruning_varchar_test_table.a + HAVING + count(subquery_pruning_varchar_test_table.a) < 3) + AS f1, + (SELECT + subquery_pruning_varchar_test_table.a + FROM + subquery_pruning_varchar_test_table + GROUP BY + subquery_pruning_varchar_test_table.a + HAVING + sum(coalesce(subquery_pruning_varchar_test_table.b,0)) > 20.0) + AS f2 + WHERE + f1.a_inner = f2.a + GROUP BY + a_inner) +AS foo; + a +--- +(0 rows) + DROP TABLE subquery_pruning_varchar_test_table; ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 102026;