Remove hash-pruning logic for NULL values

It turns out some tests exercised this behavior, but removing it should
have no ill effects. Besides, both copy and INSERT disallow NULLs in a
table's partition column.

Fixes a bug where anti-joins on hash-partitioned distributed tables
would incorrectly prune shards early, result in incorrect results (test
included).
pull/639/head
Jason Petersen 2016-07-06 11:41:37 -06:00
parent 18236b18b5
commit 41ed433b0e
No known key found for this signature in database
GPG Key ID: 9F1D3510D110ABA9
6 changed files with 84 additions and 50 deletions

View File

@ -134,7 +134,6 @@ static Node * HashableClauseMutator(Node *originalNode, Var *partitionColumn);
static Var * MakeInt4Column(void);
static Const * MakeInt4Constant(Datum constantValue);
static OpExpr * MakeHashedOperatorExpression(OpExpr *operatorExpression);
static OpExpr * MakeOpExpressionWithZeroConst(void);
static List * BuildRestrictInfoList(List *qualList);
static List * FragmentCombinationList(List *rangeTableFragmentsList, Query *jobQuery,
List *dependedJobList);
@ -2838,24 +2837,6 @@ HashableClauseMutator(Node *originalNode, Var *partitionColumn)
newNode = (Node *) hashedOperatorExpression;
}
}
else if (IsA(originalNode, NullTest))
{
NullTest *nullTest = (NullTest *) originalNode;
Var *column = NULL;
Expr *nullTestOperand = nullTest->arg;
if (IsA(nullTestOperand, Var))
{
column = (Var *) nullTestOperand;
}
if ((column != NULL) && equal(column, partitionColumn) &&
(nullTest->nulltesttype == IS_NULL))
{
OpExpr *opExpressionWithZeroConst = MakeOpExpressionWithZeroConst();
newNode = (Node *) opExpressionWithZeroConst;
}
}
else if (IsA(originalNode, ScalarArrayOpExpr))
{
ScalarArrayOpExpr *arrayOperatorExpression = (ScalarArrayOpExpr *) originalNode;
@ -3034,23 +3015,6 @@ MakeInt4Constant(Datum constantValue)
}
/*
* MakeOpExpressionWithZeroConst creates a new operator expression with equality
* check to zero and returns it.
*/
static OpExpr *
MakeOpExpressionWithZeroConst()
{
Var *int4Column = MakeInt4Column();
OpExpr *operatorExpression = MakeOpExpression(int4Column, BTEqualStrategyNumber);
Const *constant = (Const *) get_rightop((Expr *) operatorExpression);
constant->constvalue = Int32GetDatum(0);
constant->constisnull = false;
return operatorExpression;
}
/*
* BuildRestrictInfoList builds restrict info list using the selection criteria,
* and then return this list. Note that this function assumes there is only one

View File

@ -178,9 +178,6 @@ DEBUG: predicate pruning for shardId 630003
SET citus.task_executor_type TO :actual_task_executor;
SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey is NULL;
DEBUG: predicate pruning for shardId 630000
DEBUG: predicate pruning for shardId 630001
DEBUG: predicate pruning for shardId 630003
count
-------
0
@ -225,8 +222,6 @@ DEBUG: predicate pruning for shardId 630003
SELECT count(*) FROM orders_hash_partitioned
WHERE o_orderkey = 1 OR o_orderkey is NULL;
DEBUG: predicate pruning for shardId 630001
DEBUG: predicate pruning for shardId 630003
count
-------
0
@ -324,12 +319,11 @@ SELECT count(*)
DEBUG: predicate pruning for shardId 630001
DEBUG: predicate pruning for shardId 630002
DEBUG: predicate pruning for shardId 630003
DEBUG: predicate pruning for shardId 630000
DEBUG: predicate pruning for shardId 630001
DEBUG: predicate pruning for shardId 630003
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1]
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823]
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647]
count
-------
0
(1 row)

View File

@ -59,11 +59,11 @@ SELECT prune_using_single_value('pruning', 'tomato');
{800002}
(1 row)
-- the above is true even if that value is null
-- null values should result in no pruning
SELECT prune_using_single_value('pruning', NULL);
prune_using_single_value
--------------------------
{800002}
prune_using_single_value
-------------------------------
{800000,800001,800002,800003}
(1 row)
-- build an OR clause and expect more than one sahrd

View File

@ -415,3 +415,36 @@ FROM
LEFT OUTER JOIN multi_outer_join_left l1 ON (l1.l_custkey = r1.r_custkey)) AS
test(c_custkey, c_nationkey)
INNER JOIN multi_outer_join_third t1 ON (test.c_custkey = t1.t_custkey);
-- simple test to ensure anti-joins work with hash-partitioned tables
CREATE TABLE left_values(val int);
SELECT master_create_distributed_table('left_values', 'val', 'hash');
SELECT master_create_worker_shards('left_values', 16, 1);
\copy left_values from stdin
1
2
3
4
5
\.
CREATE TABLE right_values(val int);
SELECT master_create_distributed_table('right_values', 'val', 'hash');
SELECT master_create_worker_shards('right_values', 16, 1);
\copy right_values from stdin
2
3
4
\.
SELECT
*
FROM
left_values AS l
LEFT JOIN right_values AS r ON l.val = r.val
WHERE
r.val IS NULL;

View File

@ -762,3 +762,46 @@ FROM
INNER JOIN multi_outer_join_third t1 ON (test.c_custkey = t1.t_custkey);
ERROR: cannot perform distributed planning on this query
DETAIL: Shards of relations in outer join queries must have 1-to-1 shard partitioning
-- simple test to ensure anti-joins work with hash-partitioned tables
CREATE TABLE left_values(val int);
SELECT master_create_distributed_table('left_values', 'val', 'hash');
master_create_distributed_table
---------------------------------
(1 row)
SELECT master_create_worker_shards('left_values', 16, 1);
master_create_worker_shards
-----------------------------
(1 row)
\copy left_values from stdin
CREATE TABLE right_values(val int);
SELECT master_create_distributed_table('right_values', 'val', 'hash');
master_create_distributed_table
---------------------------------
(1 row)
SELECT master_create_worker_shards('right_values', 16, 1);
master_create_worker_shards
-----------------------------
(1 row)
\copy right_values from stdin
SELECT
*
FROM
left_values AS l
LEFT JOIN right_values AS r ON l.val = r.val
WHERE
r.val IS NULL;
LOG: join order: [ "left_values" ][ local partition join "right_values" ]
val | val
-----+-----
1 |
5 |
(2 rows)

View File

@ -54,7 +54,7 @@ SELECT prune_using_no_values('pruning');
-- with a single value, expect a single shard
SELECT prune_using_single_value('pruning', 'tomato');
-- the above is true even if that value is null
-- null values should result in no pruning
SELECT prune_using_single_value('pruning', NULL);
-- build an OR clause and expect more than one sahrd