citus/src/test/regress/expected/multi_null_minmax_value_pru...

307 lines
13 KiB
Plaintext

--
-- MULTI_NULL_MINMAX_VALUE_PRUNING
--
-- This test checks that we can handle null min/max values in shard statistics
-- and that we don't partition or join prune shards that have null values.
SET citus.next_shard_id TO 760000;
-- print whether we're using version > 9 to make version-specific tests clear
SHOW server_version \gset
SELECT substring(:'server_version', '\d+')::int > 9 AS version_above_nine;
version_above_nine
--------------------
t
(1 row)
SET client_min_messages TO DEBUG2;
SET citus.explain_all_tasks TO on;
-- to avoid differing explain output - executor doesn't matter,
-- because were testing pruning here.
SET citus.task_executor_type TO 'real-time';
-- Change configuration to treat lineitem and orders tables as large
SET citus.large_table_shard_count TO 2;
SET citus.log_multi_join_order to true;
SET citus.enable_repartition_joins to ON;
SELECT shardminvalue, shardmaxvalue from pg_dist_shard WHERE shardid = 290000;
shardminvalue | shardmaxvalue
---------------+---------------
1 | 5986
(1 row)
SELECT shardminvalue, shardmaxvalue from pg_dist_shard WHERE shardid = 290001;
shardminvalue | shardmaxvalue
---------------+---------------
8997 | 14947
(1 row)
-- Check that partition and join pruning works when min/max values exist
-- Adding l_orderkey = 1 to make the query not router executable
EXPLAIN (COSTS FALSE)
SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030 or l_orderkey = 1;
LOG: join order: [ "lineitem" ]
QUERY PLAN
-----------------------------------------------------------------------
Custom Scan (Citus Real-Time)
Task Count: 2
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> Bitmap Heap Scan on lineitem_290001 lineitem
Recheck Cond: ((l_orderkey = 9030) OR (l_orderkey = 1))
-> BitmapOr
-> Bitmap Index Scan on lineitem_pkey_290001
Index Cond: (l_orderkey = 9030)
-> Bitmap Index Scan on lineitem_pkey_290001
Index Cond: (l_orderkey = 1)
-> Task
Node: host=localhost port=57638 dbname=regression
-> Bitmap Heap Scan on lineitem_290000 lineitem
Recheck Cond: ((l_orderkey = 9030) OR (l_orderkey = 1))
-> BitmapOr
-> Bitmap Index Scan on lineitem_pkey_290000
Index Cond: (l_orderkey = 9030)
-> Bitmap Index Scan on lineitem_pkey_290000
Index Cond: (l_orderkey = 1)
(21 rows)
EXPLAIN (COSTS FALSE)
SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders
WHERE l_orderkey = o_orderkey;
LOG: join order: [ "lineitem" ][ local partition join "orders" ]
DEBUG: join prunable for intervals [1,5986] and [8997,14947]
DEBUG: join prunable for intervals [8997,14947] and [1,5986]
QUERY PLAN
--------------------------------------------------------------------------------
Aggregate
-> Custom Scan (Citus Real-Time)
Task Count: 2
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Hash Join
Hash Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Seq Scan on lineitem_290001 lineitem
-> Hash
-> Seq Scan on orders_290003 orders
-> Task
Node: host=localhost port=57638 dbname=regression
-> Aggregate
-> Hash Join
Hash Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Seq Scan on lineitem_290000 lineitem
-> Hash
-> Seq Scan on orders_290002 orders
(20 rows)
-- Now set the minimum value for a shard to null. Then check that we don't apply
-- partition or join pruning for the shard with null min value. Since it is not
-- supported with single-repartition join, dual-repartition has been used.
UPDATE pg_dist_shard SET shardminvalue = NULL WHERE shardid = 290000;
EXPLAIN (COSTS FALSE)
SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030;
LOG: join order: [ "lineitem" ]
QUERY PLAN
-------------------------------------------------------------------------------
Custom Scan (Citus Real-Time)
Task Count: 2
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> Index Scan using lineitem_pkey_290001 on lineitem_290001 lineitem
Index Cond: (l_orderkey = 9030)
-> Task
Node: host=localhost port=57638 dbname=regression
-> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem
Index Cond: (l_orderkey = 9030)
(11 rows)
EXPLAIN (COSTS FALSE)
SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders
WHERE l_partkey = o_custkey;
LOG: join order: [ "lineitem" ][ dual partition join "orders" ]
DEBUG: join prunable for task partitionId 0 and 1
DEBUG: join prunable for task partitionId 0 and 2
DEBUG: join prunable for task partitionId 0 and 3
DEBUG: join prunable for task partitionId 1 and 0
DEBUG: join prunable for task partitionId 1 and 2
DEBUG: join prunable for task partitionId 1 and 3
DEBUG: join prunable for task partitionId 2 and 0
DEBUG: join prunable for task partitionId 2 and 1
DEBUG: join prunable for task partitionId 2 and 3
DEBUG: join prunable for task partitionId 3 and 0
DEBUG: join prunable for task partitionId 3 and 1
DEBUG: join prunable for task partitionId 3 and 2
DEBUG: pruning merge fetch taskId 1
DETAIL: Creating dependency on merge taskId 3
DEBUG: pruning merge fetch taskId 2
DETAIL: Creating dependency on merge taskId 3
DEBUG: pruning merge fetch taskId 4
DETAIL: Creating dependency on merge taskId 6
DEBUG: pruning merge fetch taskId 5
DETAIL: Creating dependency on merge taskId 6
DEBUG: pruning merge fetch taskId 7
DETAIL: Creating dependency on merge taskId 9
DEBUG: pruning merge fetch taskId 8
DETAIL: Creating dependency on merge taskId 9
DEBUG: pruning merge fetch taskId 10
DETAIL: Creating dependency on merge taskId 12
DEBUG: pruning merge fetch taskId 11
DETAIL: Creating dependency on merge taskId 12
DEBUG: cannot use real time executor with repartition jobs
HINT: Since you enabled citus.enable_repartition_joins Citus chose to use task-tracker.
QUERY PLAN
-------------------------------------------------------------------
Aggregate
-> Custom Scan (Citus Task-Tracker)
Task Count: 4
Tasks Shown: None, not supported for re-partition queries
-> MapMergeJob
Map Task Count: 2
Merge Task Count: 4
-> MapMergeJob
Map Task Count: 2
Merge Task Count: 4
(10 rows)
-- Next, set the maximum value for another shard to null. Then check that we
-- don't apply partition or join pruning for this other shard either. Since it
-- is not supported with single-repartition join, dual-repartition has been used.
UPDATE pg_dist_shard SET shardmaxvalue = NULL WHERE shardid = 290001;
EXPLAIN (COSTS FALSE)
SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030;
LOG: join order: [ "lineitem" ]
QUERY PLAN
-------------------------------------------------------------------------------
Custom Scan (Citus Real-Time)
Task Count: 2
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> Index Scan using lineitem_pkey_290001 on lineitem_290001 lineitem
Index Cond: (l_orderkey = 9030)
-> Task
Node: host=localhost port=57638 dbname=regression
-> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem
Index Cond: (l_orderkey = 9030)
(11 rows)
EXPLAIN (COSTS FALSE)
SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders
WHERE l_partkey = o_custkey;
LOG: join order: [ "lineitem" ][ dual partition join "orders" ]
DEBUG: join prunable for task partitionId 0 and 1
DEBUG: join prunable for task partitionId 0 and 2
DEBUG: join prunable for task partitionId 0 and 3
DEBUG: join prunable for task partitionId 1 and 0
DEBUG: join prunable for task partitionId 1 and 2
DEBUG: join prunable for task partitionId 1 and 3
DEBUG: join prunable for task partitionId 2 and 0
DEBUG: join prunable for task partitionId 2 and 1
DEBUG: join prunable for task partitionId 2 and 3
DEBUG: join prunable for task partitionId 3 and 0
DEBUG: join prunable for task partitionId 3 and 1
DEBUG: join prunable for task partitionId 3 and 2
DEBUG: pruning merge fetch taskId 1
DETAIL: Creating dependency on merge taskId 3
DEBUG: pruning merge fetch taskId 2
DETAIL: Creating dependency on merge taskId 3
DEBUG: pruning merge fetch taskId 4
DETAIL: Creating dependency on merge taskId 6
DEBUG: pruning merge fetch taskId 5
DETAIL: Creating dependency on merge taskId 6
DEBUG: pruning merge fetch taskId 7
DETAIL: Creating dependency on merge taskId 9
DEBUG: pruning merge fetch taskId 8
DETAIL: Creating dependency on merge taskId 9
DEBUG: pruning merge fetch taskId 10
DETAIL: Creating dependency on merge taskId 12
DEBUG: pruning merge fetch taskId 11
DETAIL: Creating dependency on merge taskId 12
DEBUG: cannot use real time executor with repartition jobs
HINT: Since you enabled citus.enable_repartition_joins Citus chose to use task-tracker.
QUERY PLAN
-------------------------------------------------------------------
Aggregate
-> Custom Scan (Citus Task-Tracker)
Task Count: 4
Tasks Shown: None, not supported for re-partition queries
-> MapMergeJob
Map Task Count: 2
Merge Task Count: 4
-> MapMergeJob
Map Task Count: 2
Merge Task Count: 4
(10 rows)
-- Last, set the minimum value to 0 and check that we don't treat it as null. We
-- should apply partition and join pruning for this shard now. Since it is not
-- supported with single-repartition join, dual-repartition has been used.
UPDATE pg_dist_shard SET shardminvalue = '0' WHERE shardid = 290000;
EXPLAIN (COSTS FALSE)
SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030;
LOG: join order: [ "lineitem" ]
DEBUG: Plan is router executable
QUERY PLAN
-------------------------------------------------------------------------------
Custom Scan (Citus Router)
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> Index Scan using lineitem_pkey_290001 on lineitem_290001 lineitem
Index Cond: (l_orderkey = 9030)
(7 rows)
EXPLAIN (COSTS FALSE)
SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders
WHERE l_partkey = o_custkey;
LOG: join order: [ "lineitem" ][ dual partition join "orders" ]
DEBUG: join prunable for task partitionId 0 and 1
DEBUG: join prunable for task partitionId 0 and 2
DEBUG: join prunable for task partitionId 0 and 3
DEBUG: join prunable for task partitionId 1 and 0
DEBUG: join prunable for task partitionId 1 and 2
DEBUG: join prunable for task partitionId 1 and 3
DEBUG: join prunable for task partitionId 2 and 0
DEBUG: join prunable for task partitionId 2 and 1
DEBUG: join prunable for task partitionId 2 and 3
DEBUG: join prunable for task partitionId 3 and 0
DEBUG: join prunable for task partitionId 3 and 1
DEBUG: join prunable for task partitionId 3 and 2
DEBUG: pruning merge fetch taskId 1
DETAIL: Creating dependency on merge taskId 3
DEBUG: pruning merge fetch taskId 2
DETAIL: Creating dependency on merge taskId 3
DEBUG: pruning merge fetch taskId 4
DETAIL: Creating dependency on merge taskId 6
DEBUG: pruning merge fetch taskId 5
DETAIL: Creating dependency on merge taskId 6
DEBUG: pruning merge fetch taskId 7
DETAIL: Creating dependency on merge taskId 9
DEBUG: pruning merge fetch taskId 8
DETAIL: Creating dependency on merge taskId 9
DEBUG: pruning merge fetch taskId 10
DETAIL: Creating dependency on merge taskId 12
DEBUG: pruning merge fetch taskId 11
DETAIL: Creating dependency on merge taskId 12
DEBUG: cannot use real time executor with repartition jobs
HINT: Since you enabled citus.enable_repartition_joins Citus chose to use task-tracker.
QUERY PLAN
-------------------------------------------------------------------
Aggregate
-> Custom Scan (Citus Task-Tracker)
Task Count: 4
Tasks Shown: None, not supported for re-partition queries
-> MapMergeJob
Map Task Count: 2
Merge Task Count: 4
-> MapMergeJob
Map Task Count: 2
Merge Task Count: 4
(10 rows)
-- Set minimum and maximum values for two shards back to their original values
UPDATE pg_dist_shard SET shardminvalue = '1' WHERE shardid = 290000;
UPDATE pg_dist_shard SET shardmaxvalue = '14947' WHERE shardid = 290001;
SET client_min_messages TO NOTICE;