citus/src/test/regress/expected/multi_null_minmax_value_pru...

455 lines
24 KiB
Plaintext

--
-- MULTI_NULL_MINMAX_VALUE_PRUNING
--
-- This test checks that we can handle null min/max values in shard statistics
-- and that we don't partition or join prune shards that have null values.
SET citus.next_shard_id TO 760000;
-- print whether we're using version > 9 to make version-specific tests clear
SHOW server_version \gset
SELECT substring(:'server_version', '\d+')::int > 9 AS version_above_nine;
version_above_nine
--------------------
t
(1 row)
SET client_min_messages TO DEBUG2;
SET citus.explain_all_tasks TO on;
-- to avoid differing explain output - executor doesn't matter,
-- because were testing pruning here.
SET citus.task_executor_type TO 'real-time';
-- Change configuration to treat lineitem and orders tables as large
SET citus.large_table_shard_count TO 2;
SELECT shardminvalue, shardmaxvalue from pg_dist_shard WHERE shardid = 290000;
shardminvalue | shardmaxvalue
---------------+---------------
1 | 1509
(1 row)
SELECT shardminvalue, shardmaxvalue from pg_dist_shard WHERE shardid = 290001;
shardminvalue | shardmaxvalue
---------------+---------------
1509 | 2951
(1 row)
-- Check that partition and join pruning works when min/max values exist
-- Adding l_orderkey = 1 to make the query not router executable
EXPLAIN (COSTS FALSE)
SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030 or l_orderkey = 1;
QUERY PLAN
-----------------------------------------------------------------------
Custom Scan (Citus Real-Time)
Task Count: 2
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> Bitmap Heap Scan on lineitem_290000 lineitem
Recheck Cond: ((l_orderkey = 9030) OR (l_orderkey = 1))
-> BitmapOr
-> Bitmap Index Scan on lineitem_pkey_290000
Index Cond: (l_orderkey = 9030)
-> Bitmap Index Scan on lineitem_pkey_290000
Index Cond: (l_orderkey = 1)
-> Task
Node: host=localhost port=57638 dbname=regression
-> Bitmap Heap Scan on lineitem_290004 lineitem
Recheck Cond: ((l_orderkey = 9030) OR (l_orderkey = 1))
-> BitmapOr
-> Bitmap Index Scan on lineitem_pkey_290004
Index Cond: (l_orderkey = 9030)
-> Bitmap Index Scan on lineitem_pkey_290004
Index Cond: (l_orderkey = 1)
(21 rows)
EXPLAIN (COSTS FALSE)
SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders
WHERE l_orderkey = o_orderkey;
DEBUG: join prunable for intervals [1,1509] and [8997,14946]
DEBUG: join prunable for intervals [1509,2951] and [8997,14946]
DEBUG: join prunable for intervals [2951,4455] and [8997,14946]
DEBUG: join prunable for intervals [4480,5986] and [8997,14946]
DEBUG: join prunable for intervals [8997,10560] and [1,5986]
DEBUG: join prunable for intervals [10560,12036] and [1,5986]
DEBUG: join prunable for intervals [12036,13473] and [1,5986]
DEBUG: join prunable for intervals [13473,14947] and [1,5986]
QUERY PLAN
------------------------------------------------------------------------------------------------------
Aggregate
-> Custom Scan (Citus Real-Time)
Task Count: 8
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290001 on lineitem_290001 lineitem
-> Index Only Scan using orders_pkey_290008 on orders_290008 orders
-> Task
Node: host=localhost port=57638 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290000 on lineitem_290000 lineitem
-> Index Only Scan using orders_pkey_290008 on orders_290008 orders
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290003 on lineitem_290003 lineitem
-> Index Only Scan using orders_pkey_290008 on orders_290008 orders
-> Task
Node: host=localhost port=57638 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290002 on lineitem_290002 lineitem
-> Index Only Scan using orders_pkey_290008 on orders_290008 orders
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290005 on lineitem_290005 lineitem
-> Index Only Scan using orders_pkey_290009 on orders_290009 orders
-> Task
Node: host=localhost port=57638 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290004 on lineitem_290004 lineitem
-> Index Only Scan using orders_pkey_290009 on orders_290009 orders
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290007 on lineitem_290007 lineitem
-> Index Only Scan using orders_pkey_290009 on orders_290009 orders
-> Task
Node: host=localhost port=57638 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290006 on lineitem_290006 lineitem
-> Index Only Scan using orders_pkey_290009 on orders_290009 orders
(60 rows)
-- Now set the minimum value for a shard to null. Then check that we don't apply
-- partition or join pruning for the shard with null min value.
UPDATE pg_dist_shard SET shardminvalue = NULL WHERE shardid = 290000;
EXPLAIN (COSTS FALSE)
SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030;
QUERY PLAN
-------------------------------------------------------------------------------
Custom Scan (Citus Real-Time)
Task Count: 2
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem
Index Cond: (l_orderkey = 9030)
-> Task
Node: host=localhost port=57638 dbname=regression
-> Index Scan using lineitem_pkey_290004 on lineitem_290004 lineitem
Index Cond: (l_orderkey = 9030)
(11 rows)
EXPLAIN (COSTS FALSE)
SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders
WHERE l_orderkey = o_orderkey;
DEBUG: join prunable for intervals [1509,2951] and [8997,14946]
DEBUG: join prunable for intervals [2951,4455] and [8997,14946]
DEBUG: join prunable for intervals [4480,5986] and [8997,14946]
DEBUG: join prunable for intervals [8997,10560] and [1,5986]
DEBUG: join prunable for intervals [10560,12036] and [1,5986]
DEBUG: join prunable for intervals [12036,13473] and [1,5986]
DEBUG: join prunable for intervals [13473,14947] and [1,5986]
QUERY PLAN
------------------------------------------------------------------------------------------------------
Aggregate
-> Custom Scan (Citus Real-Time)
Task Count: 9
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290001 on lineitem_290001 lineitem
-> Index Only Scan using orders_pkey_290008 on orders_290008 orders
-> Task
Node: host=localhost port=57638 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290000 on lineitem_290000 lineitem
-> Index Only Scan using orders_pkey_290008 on orders_290008 orders
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290003 on lineitem_290003 lineitem
-> Index Only Scan using orders_pkey_290008 on orders_290008 orders
-> Task
Node: host=localhost port=57638 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290000 on lineitem_290000 lineitem
-> Index Only Scan using orders_pkey_290009 on orders_290009 orders
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290005 on lineitem_290005 lineitem
-> Index Only Scan using orders_pkey_290009 on orders_290009 orders
-> Task
Node: host=localhost port=57638 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290002 on lineitem_290002 lineitem
-> Index Only Scan using orders_pkey_290008 on orders_290008 orders
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290007 on lineitem_290007 lineitem
-> Index Only Scan using orders_pkey_290009 on orders_290009 orders
-> Task
Node: host=localhost port=57638 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290004 on lineitem_290004 lineitem
-> Index Only Scan using orders_pkey_290009 on orders_290009 orders
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290006 on lineitem_290006 lineitem
-> Index Only Scan using orders_pkey_290009 on orders_290009 orders
(67 rows)
-- Next, set the maximum value for another shard to null. Then check that we
-- don't apply partition or join pruning for this other shard either.
UPDATE pg_dist_shard SET shardmaxvalue = NULL WHERE shardid = 290001;
EXPLAIN (COSTS FALSE)
SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030;
QUERY PLAN
-------------------------------------------------------------------------------
Custom Scan (Citus Real-Time)
Task Count: 3
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> Index Scan using lineitem_pkey_290001 on lineitem_290001 lineitem
Index Cond: (l_orderkey = 9030)
-> Task
Node: host=localhost port=57638 dbname=regression
-> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem
Index Cond: (l_orderkey = 9030)
-> Task
Node: host=localhost port=57637 dbname=regression
-> Index Scan using lineitem_pkey_290004 on lineitem_290004 lineitem
Index Cond: (l_orderkey = 9030)
(15 rows)
EXPLAIN (COSTS FALSE)
SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders
WHERE l_orderkey = o_orderkey;
DEBUG: join prunable for intervals [2951,4455] and [8997,14946]
DEBUG: join prunable for intervals [4480,5986] and [8997,14946]
DEBUG: join prunable for intervals [8997,10560] and [1,5986]
DEBUG: join prunable for intervals [10560,12036] and [1,5986]
DEBUG: join prunable for intervals [12036,13473] and [1,5986]
DEBUG: join prunable for intervals [13473,14947] and [1,5986]
QUERY PLAN
------------------------------------------------------------------------------------------------------
Aggregate
-> Custom Scan (Citus Real-Time)
Task Count: 10
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290001 on lineitem_290001 lineitem
-> Index Only Scan using orders_pkey_290009 on orders_290009 orders
-> Task
Node: host=localhost port=57638 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290000 on lineitem_290000 lineitem
-> Index Only Scan using orders_pkey_290008 on orders_290008 orders
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290001 on lineitem_290001 lineitem
-> Index Only Scan using orders_pkey_290008 on orders_290008 orders
-> Task
Node: host=localhost port=57638 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290000 on lineitem_290000 lineitem
-> Index Only Scan using orders_pkey_290009 on orders_290009 orders
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290003 on lineitem_290003 lineitem
-> Index Only Scan using orders_pkey_290008 on orders_290008 orders
-> Task
Node: host=localhost port=57638 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290002 on lineitem_290002 lineitem
-> Index Only Scan using orders_pkey_290008 on orders_290008 orders
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290005 on lineitem_290005 lineitem
-> Index Only Scan using orders_pkey_290009 on orders_290009 orders
-> Task
Node: host=localhost port=57638 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290004 on lineitem_290004 lineitem
-> Index Only Scan using orders_pkey_290009 on orders_290009 orders
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290007 on lineitem_290007 lineitem
-> Index Only Scan using orders_pkey_290009 on orders_290009 orders
-> Task
Node: host=localhost port=57638 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290006 on lineitem_290006 lineitem
-> Index Only Scan using orders_pkey_290009 on orders_290009 orders
(74 rows)
-- Last, set the minimum value to 0 and check that we don't treat it as null. We
-- should apply partition and join pruning for this shard now.
UPDATE pg_dist_shard SET shardminvalue = '0' WHERE shardid = 290000;
EXPLAIN (COSTS FALSE)
SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030;
QUERY PLAN
-------------------------------------------------------------------------------
Custom Scan (Citus Real-Time)
Task Count: 2
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> Index Scan using lineitem_pkey_290001 on lineitem_290001 lineitem
Index Cond: (l_orderkey = 9030)
-> Task
Node: host=localhost port=57638 dbname=regression
-> Index Scan using lineitem_pkey_290004 on lineitem_290004 lineitem
Index Cond: (l_orderkey = 9030)
(11 rows)
EXPLAIN (COSTS FALSE)
SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders
WHERE l_orderkey = o_orderkey;
DEBUG: join prunable for intervals [0,1509] and [8997,14946]
DEBUG: join prunable for intervals [2951,4455] and [8997,14946]
DEBUG: join prunable for intervals [4480,5986] and [8997,14946]
DEBUG: join prunable for intervals [8997,10560] and [1,5986]
DEBUG: join prunable for intervals [10560,12036] and [1,5986]
DEBUG: join prunable for intervals [12036,13473] and [1,5986]
DEBUG: join prunable for intervals [13473,14947] and [1,5986]
QUERY PLAN
------------------------------------------------------------------------------------------------------
Aggregate
-> Custom Scan (Citus Real-Time)
Task Count: 9
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290001 on lineitem_290001 lineitem
-> Index Only Scan using orders_pkey_290009 on orders_290009 orders
-> Task
Node: host=localhost port=57638 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290000 on lineitem_290000 lineitem
-> Index Only Scan using orders_pkey_290008 on orders_290008 orders
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290001 on lineitem_290001 lineitem
-> Index Only Scan using orders_pkey_290008 on orders_290008 orders
-> Task
Node: host=localhost port=57638 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290002 on lineitem_290002 lineitem
-> Index Only Scan using orders_pkey_290008 on orders_290008 orders
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290003 on lineitem_290003 lineitem
-> Index Only Scan using orders_pkey_290008 on orders_290008 orders
-> Task
Node: host=localhost port=57638 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290004 on lineitem_290004 lineitem
-> Index Only Scan using orders_pkey_290009 on orders_290009 orders
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290005 on lineitem_290005 lineitem
-> Index Only Scan using orders_pkey_290009 on orders_290009 orders
-> Task
Node: host=localhost port=57638 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290006 on lineitem_290006 lineitem
-> Index Only Scan using orders_pkey_290009 on orders_290009 orders
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Merge Join
Merge Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Index Only Scan using lineitem_pkey_290007 on lineitem_290007 lineitem
-> Index Only Scan using orders_pkey_290009 on orders_290009 orders
(67 rows)
-- Set minimum and maximum values for two shards back to their original values
UPDATE pg_dist_shard SET shardminvalue = '1' WHERE shardid = 290000;
UPDATE pg_dist_shard SET shardmaxvalue = '4964' WHERE shardid = 290001;
SET client_min_messages TO NOTICE;