From 41ed433b0e0cc4c1c58e13c6d668bb6ae68714ea Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Wed, 6 Jul 2016 11:41:37 -0600 Subject: [PATCH] Remove hash-pruning logic for NULL values It turns out some tests exercised this behavior, but removing it should have no ill effects. Besides, both copy and INSERT disallow NULLs in a table's partition column. Fixes a bug where anti-joins on hash-partitioned distributed tables would incorrectly prune shards early, result in incorrect results (test included). --- .../planner/multi_physical_planner.c | 36 ---------------- .../regress/expected/multi_hash_pruning.out | 12 ++---- .../expected/multi_prune_shard_list.out | 8 ++-- .../regress/input/multi_outer_join.source | 33 ++++++++++++++ .../regress/output/multi_outer_join.source | 43 +++++++++++++++++++ .../regress/sql/multi_prune_shard_list.sql | 2 +- 6 files changed, 84 insertions(+), 50 deletions(-) diff --git a/src/backend/distributed/planner/multi_physical_planner.c b/src/backend/distributed/planner/multi_physical_planner.c index 832e95d54..bb63a0e1c 100644 --- a/src/backend/distributed/planner/multi_physical_planner.c +++ b/src/backend/distributed/planner/multi_physical_planner.c @@ -134,7 +134,6 @@ static Node * HashableClauseMutator(Node *originalNode, Var *partitionColumn); static Var * MakeInt4Column(void); static Const * MakeInt4Constant(Datum constantValue); static OpExpr * MakeHashedOperatorExpression(OpExpr *operatorExpression); -static OpExpr * MakeOpExpressionWithZeroConst(void); static List * BuildRestrictInfoList(List *qualList); static List * FragmentCombinationList(List *rangeTableFragmentsList, Query *jobQuery, List *dependedJobList); @@ -2838,24 +2837,6 @@ HashableClauseMutator(Node *originalNode, Var *partitionColumn) newNode = (Node *) hashedOperatorExpression; } } - else if (IsA(originalNode, NullTest)) - { - NullTest *nullTest = (NullTest *) originalNode; - Var *column = NULL; - - Expr *nullTestOperand = nullTest->arg; - if (IsA(nullTestOperand, Var)) - { - column = (Var *) nullTestOperand; - } - - if ((column != NULL) && equal(column, partitionColumn) && - (nullTest->nulltesttype == IS_NULL)) - { - OpExpr *opExpressionWithZeroConst = MakeOpExpressionWithZeroConst(); - newNode = (Node *) opExpressionWithZeroConst; - } - } else if (IsA(originalNode, ScalarArrayOpExpr)) { ScalarArrayOpExpr *arrayOperatorExpression = (ScalarArrayOpExpr *) originalNode; @@ -3034,23 +3015,6 @@ MakeInt4Constant(Datum constantValue) } -/* - * MakeOpExpressionWithZeroConst creates a new operator expression with equality - * check to zero and returns it. - */ -static OpExpr * -MakeOpExpressionWithZeroConst() -{ - Var *int4Column = MakeInt4Column(); - OpExpr *operatorExpression = MakeOpExpression(int4Column, BTEqualStrategyNumber); - Const *constant = (Const *) get_rightop((Expr *) operatorExpression); - constant->constvalue = Int32GetDatum(0); - constant->constisnull = false; - - return operatorExpression; -} - - /* * BuildRestrictInfoList builds restrict info list using the selection criteria, * and then return this list. Note that this function assumes there is only one diff --git a/src/test/regress/expected/multi_hash_pruning.out b/src/test/regress/expected/multi_hash_pruning.out index c7dc9e30b..0eb4da5ae 100644 --- a/src/test/regress/expected/multi_hash_pruning.out +++ b/src/test/regress/expected/multi_hash_pruning.out @@ -178,9 +178,6 @@ DEBUG: predicate pruning for shardId 630003 SET citus.task_executor_type TO :actual_task_executor; SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey is NULL; -DEBUG: predicate pruning for shardId 630000 -DEBUG: predicate pruning for shardId 630001 -DEBUG: predicate pruning for shardId 630003 count ------- 0 @@ -225,8 +222,6 @@ DEBUG: predicate pruning for shardId 630003 SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = 1 OR o_orderkey is NULL; -DEBUG: predicate pruning for shardId 630001 -DEBUG: predicate pruning for shardId 630003 count ------- 0 @@ -324,12 +319,11 @@ SELECT count(*) DEBUG: predicate pruning for shardId 630001 DEBUG: predicate pruning for shardId 630002 DEBUG: predicate pruning for shardId 630003 -DEBUG: predicate pruning for shardId 630000 -DEBUG: predicate pruning for shardId 630001 -DEBUG: predicate pruning for shardId 630003 +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823] +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647] count ------- - + 0 (1 row) diff --git a/src/test/regress/expected/multi_prune_shard_list.out b/src/test/regress/expected/multi_prune_shard_list.out index ea8594c24..a440478e5 100644 --- a/src/test/regress/expected/multi_prune_shard_list.out +++ b/src/test/regress/expected/multi_prune_shard_list.out @@ -59,11 +59,11 @@ SELECT prune_using_single_value('pruning', 'tomato'); {800002} (1 row) --- the above is true even if that value is null +-- null values should result in no pruning SELECT prune_using_single_value('pruning', NULL); - prune_using_single_value --------------------------- - {800002} + prune_using_single_value +------------------------------- + {800000,800001,800002,800003} (1 row) -- build an OR clause and expect more than one sahrd diff --git a/src/test/regress/input/multi_outer_join.source b/src/test/regress/input/multi_outer_join.source index 6aeca757f..bf3ee98b0 100644 --- a/src/test/regress/input/multi_outer_join.source +++ b/src/test/regress/input/multi_outer_join.source @@ -415,3 +415,36 @@ FROM LEFT OUTER JOIN multi_outer_join_left l1 ON (l1.l_custkey = r1.r_custkey)) AS test(c_custkey, c_nationkey) INNER JOIN multi_outer_join_third t1 ON (test.c_custkey = t1.t_custkey); + +-- simple test to ensure anti-joins work with hash-partitioned tables +CREATE TABLE left_values(val int); + +SELECT master_create_distributed_table('left_values', 'val', 'hash'); +SELECT master_create_worker_shards('left_values', 16, 1); + +\copy left_values from stdin +1 +2 +3 +4 +5 +\. + +CREATE TABLE right_values(val int); + +SELECT master_create_distributed_table('right_values', 'val', 'hash'); +SELECT master_create_worker_shards('right_values', 16, 1); + +\copy right_values from stdin +2 +3 +4 +\. + +SELECT + * +FROM + left_values AS l + LEFT JOIN right_values AS r ON l.val = r.val +WHERE + r.val IS NULL; diff --git a/src/test/regress/output/multi_outer_join.source b/src/test/regress/output/multi_outer_join.source index fedd7a657..2f48305a2 100644 --- a/src/test/regress/output/multi_outer_join.source +++ b/src/test/regress/output/multi_outer_join.source @@ -762,3 +762,46 @@ FROM INNER JOIN multi_outer_join_third t1 ON (test.c_custkey = t1.t_custkey); ERROR: cannot perform distributed planning on this query DETAIL: Shards of relations in outer join queries must have 1-to-1 shard partitioning +-- simple test to ensure anti-joins work with hash-partitioned tables +CREATE TABLE left_values(val int); +SELECT master_create_distributed_table('left_values', 'val', 'hash'); + master_create_distributed_table +--------------------------------- + +(1 row) + +SELECT master_create_worker_shards('left_values', 16, 1); + master_create_worker_shards +----------------------------- + +(1 row) + +\copy left_values from stdin +CREATE TABLE right_values(val int); +SELECT master_create_distributed_table('right_values', 'val', 'hash'); + master_create_distributed_table +--------------------------------- + +(1 row) + +SELECT master_create_worker_shards('right_values', 16, 1); + master_create_worker_shards +----------------------------- + +(1 row) + +\copy right_values from stdin +SELECT + * +FROM + left_values AS l + LEFT JOIN right_values AS r ON l.val = r.val +WHERE + r.val IS NULL; +LOG: join order: [ "left_values" ][ local partition join "right_values" ] + val | val +-----+----- + 1 | + 5 | +(2 rows) + diff --git a/src/test/regress/sql/multi_prune_shard_list.sql b/src/test/regress/sql/multi_prune_shard_list.sql index 9dc60213e..62e0432c6 100644 --- a/src/test/regress/sql/multi_prune_shard_list.sql +++ b/src/test/regress/sql/multi_prune_shard_list.sql @@ -54,7 +54,7 @@ SELECT prune_using_no_values('pruning'); -- with a single value, expect a single shard SELECT prune_using_single_value('pruning', 'tomato'); --- the above is true even if that value is null +-- null values should result in no pruning SELECT prune_using_single_value('pruning', NULL); -- build an OR clause and expect more than one sahrd