diff --git a/src/test/regress/expected/multi_large_table_pruning.out b/src/test/regress/expected/multi_large_table_pruning.out index 67a986bdf..6bd3149e4 100644 --- a/src/test/regress/expected/multi_large_table_pruning.out +++ b/src/test/regress/expected/multi_large_table_pruning.out @@ -9,6 +9,36 @@ SET citus.large_table_shard_count TO 2; SET client_min_messages TO DEBUG2; SET citus.task_executor_type TO 'task-tracker'; -- Single range-repartition join to test join-pruning behaviour. +EXPLAIN (COSTS OFF) +SELECT + count(*) +FROM + orders, customer +WHERE + o_custkey = c_custkey; +DEBUG: join prunable for intervals [1,1000] and [1001,2000] +DEBUG: join prunable for intervals [1,1000] and [6001,7000] +DEBUG: join prunable for intervals [1001,2000] and [1,1000] +DEBUG: join prunable for intervals [1001,2000] and [6001,7000] +DEBUG: join prunable for intervals [6001,7000] and [1,1000] +DEBUG: join prunable for intervals [6001,7000] and [1001,2000] +DEBUG: pruning merge fetch taskId 1 +DETAIL: Creating dependency on merge taskId 5 +DEBUG: pruning merge fetch taskId 4 +DETAIL: Creating dependency on merge taskId 8 +DEBUG: pruning merge fetch taskId 7 +DETAIL: Creating dependency on merge taskId 11 + QUERY PLAN +------------------------------------------------------------------- + Aggregate + -> Custom Scan (Citus Task-Tracker) + Task Count: 3 + Tasks Shown: None, not supported for re-partition queries + -> MapMergeJob + Map Task Count: 2 + Merge Task Count: 3 +(7 rows) + SELECT count(*) FROM @@ -34,6 +64,25 @@ DETAIL: Creating dependency on merge taskId 11 -- Single range-repartition join with a selection clause on the partitioned -- table to test the case when all map tasks are pruned away. +EXPLAIN (COSTS OFF) +SELECT + count(*) +FROM + orders, customer +WHERE + o_custkey = c_custkey AND + o_orderkey < 0; + QUERY PLAN +------------------------------------------------------------------- + Aggregate + -> Custom Scan (Citus Task-Tracker) + Task Count: 0 + Tasks Shown: None, not supported for re-partition queries + -> MapMergeJob + Map Task Count: 0 + Merge Task Count: 0 +(7 rows) + SELECT count(*) FROM @@ -48,6 +97,25 @@ WHERE -- Single range-repartition join with a selection clause on the base table to -- test the case when all sql tasks are pruned away. +EXPLAIN (COSTS OFF) +SELECT + count(*) +FROM + orders, customer +WHERE + o_custkey = c_custkey AND + c_custkey < 0; + QUERY PLAN +------------------------------------------------------------------- + Aggregate + -> Custom Scan (Citus Task-Tracker) + Task Count: 0 + Tasks Shown: None, not supported for re-partition queries + -> MapMergeJob + Map Task Count: 2 + Merge Task Count: 3 +(7 rows) + SELECT count(*) FROM @@ -63,6 +131,55 @@ WHERE -- Dual hash-repartition join test case. Note that this query doesn't produce -- meaningful results and is only to test hash-partitioning of two large tables -- on non-partition columns. +EXPLAIN (COSTS OFF) +SELECT + count(*) +FROM + lineitem, customer +WHERE + l_partkey = c_nationkey; +DEBUG: join prunable for task partitionId 0 and 1 +DEBUG: join prunable for task partitionId 0 and 2 +DEBUG: join prunable for task partitionId 0 and 3 +DEBUG: join prunable for task partitionId 1 and 0 +DEBUG: join prunable for task partitionId 1 and 2 +DEBUG: join prunable for task partitionId 1 and 3 +DEBUG: join prunable for task partitionId 2 and 0 +DEBUG: join prunable for task partitionId 2 and 1 +DEBUG: join prunable for task partitionId 2 and 3 +DEBUG: join prunable for task partitionId 3 and 0 +DEBUG: join prunable for task partitionId 3 and 1 +DEBUG: join prunable for task partitionId 3 and 2 +DEBUG: pruning merge fetch taskId 1 +DETAIL: Creating dependency on merge taskId 17 +DEBUG: pruning merge fetch taskId 2 +DETAIL: Creating dependency on merge taskId 7 +DEBUG: pruning merge fetch taskId 4 +DETAIL: Creating dependency on merge taskId 26 +DEBUG: pruning merge fetch taskId 5 +DETAIL: Creating dependency on merge taskId 11 +DEBUG: pruning merge fetch taskId 7 +DETAIL: Creating dependency on merge taskId 35 +DEBUG: pruning merge fetch taskId 8 +DETAIL: Creating dependency on merge taskId 15 +DEBUG: pruning merge fetch taskId 10 +DETAIL: Creating dependency on merge taskId 44 +DEBUG: pruning merge fetch taskId 11 +DETAIL: Creating dependency on merge taskId 19 + QUERY PLAN +------------------------------------------------------------------- + Aggregate + -> Custom Scan (Citus Task-Tracker) + Task Count: 4 + Tasks Shown: None, not supported for re-partition queries + -> MapMergeJob + Map Task Count: 8 + Merge Task Count: 4 + -> MapMergeJob + Map Task Count: 3 + Merge Task Count: 4 +(10 rows) + SELECT count(*) FROM @@ -104,6 +221,28 @@ DETAIL: Creating dependency on merge taskId 19 -- Dual hash-repartition join with a selection clause on one of the tables to -- test the case when all map tasks are pruned away. +EXPLAIN (COSTS OFF) +SELECT + count(*) +FROM + lineitem, customer +WHERE + l_partkey = c_nationkey AND + l_orderkey < 0; + QUERY PLAN +------------------------------------------------------------------- + Aggregate + -> Custom Scan (Citus Task-Tracker) + Task Count: 0 + Tasks Shown: None, not supported for re-partition queries + -> MapMergeJob + Map Task Count: 0 + Merge Task Count: 0 + -> MapMergeJob + Map Task Count: 3 + Merge Task Count: 4 +(10 rows) + SELECT count(*) FROM @@ -117,6 +256,24 @@ WHERE (1 row) -- Test cases with false in the WHERE clause +EXPLAIN (COSTS OFF) +SELECT + o_orderkey +FROM + orders INNER JOIN customer ON (o_custkey = c_custkey) +WHERE + false; + QUERY PLAN +------------------------------------------------------------- + Custom Scan (Citus Task-Tracker) + Task Count: 0 + Tasks Shown: None, not supported for re-partition queries + -> MapMergeJob + Map Task Count: 0 + Merge Task Count: 0 +(6 rows) + +-- execute once, to verify that's handled SELECT o_orderkey FROM @@ -127,31 +284,46 @@ WHERE ------------ (0 rows) +EXPLAIN (COSTS OFF) SELECT o_orderkey FROM orders INNER JOIN customer ON (o_custkey = c_custkey) WHERE 1=0 AND c_custkey < 0; - o_orderkey ------------- -(0 rows) + QUERY PLAN +------------------------------------------------------------- + Custom Scan (Citus Task-Tracker) + Task Count: 0 + Tasks Shown: None, not supported for re-partition queries + -> MapMergeJob + Map Task Count: 0 + Merge Task Count: 0 +(6 rows) +EXPLAIN (COSTS OFF) SELECT o_orderkey FROM orders INNER JOIN customer ON (o_custkey = c_custkey AND false); - o_orderkey ------------- -(0 rows) + QUERY PLAN +---------------------------------- + Custom Scan (Citus Task-Tracker) + Task Count: 0 + Tasks Shown: All +(3 rows) +EXPLAIN (COSTS OFF) SELECT o_orderkey FROM orders, customer WHERE o_custkey = c_custkey AND false; - o_orderkey ------------- -(0 rows) + QUERY PLAN +---------------------------------- + Custom Scan (Citus Task-Tracker) + Task Count: 0 + Tasks Shown: All +(3 rows) diff --git a/src/test/regress/expected/multi_null_minmax_value_pruning.out b/src/test/regress/expected/multi_null_minmax_value_pruning.out index c3130e086..fd5491287 100644 --- a/src/test/regress/expected/multi_null_minmax_value_pruning.out +++ b/src/test/regress/expected/multi_null_minmax_value_pruning.out @@ -5,6 +5,10 @@ -- and that we don't partition or join prune shards that have null values. ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 760000; SET client_min_messages TO DEBUG2; +SET citus.explain_all_tasks TO on; +-- to avoid differing explain output - executor doesn't matter, +-- because were testing pruning here. +SET citus.task_executor_type TO 'real-time'; -- Change configuration to treat lineitem and orders tables as large SET citus.large_table_shard_count TO 2; SELECT shardminvalue, shardmaxvalue from pg_dist_shard WHERE shardid = 290000; @@ -21,23 +25,34 @@ SELECT shardminvalue, shardmaxvalue from pg_dist_shard WHERE shardid = 290001; -- Check that partition and join pruning works when min/max values exist -- Adding l_orderkey = 1 to make the query not router executable +EXPLAIN (COSTS FALSE) SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030 or l_orderkey = 1; - l_orderkey | l_linenumber | l_shipdate -------------+--------------+------------ - 1 | 1 | 03-13-1996 - 1 | 2 | 04-12-1996 - 1 | 3 | 01-29-1996 - 1 | 4 | 04-21-1996 - 1 | 5 | 03-30-1996 - 1 | 6 | 01-30-1996 - 9030 | 1 | 09-02-1998 - 9030 | 2 | 08-19-1998 - 9030 | 3 | 08-27-1998 - 9030 | 4 | 07-20-1998 - 9030 | 5 | 09-29-1998 - 9030 | 6 | 09-03-1998 -(12 rows) + QUERY PLAN +----------------------------------------------------------------------- + Custom Scan (Citus Real-Time) + Task Count: 2 + Tasks Shown: All + -> Task + Node: host=localhost port=57637 dbname=regression + -> Bitmap Heap Scan on lineitem_290000 lineitem + Recheck Cond: ((l_orderkey = 9030) OR (l_orderkey = 1)) + -> BitmapOr + -> Bitmap Index Scan on lineitem_pkey_290000 + Index Cond: (l_orderkey = 9030) + -> Bitmap Index Scan on lineitem_pkey_290000 + Index Cond: (l_orderkey = 1) + -> Task + Node: host=localhost port=57638 dbname=regression + -> Bitmap Heap Scan on lineitem_290004 lineitem + Recheck Cond: ((l_orderkey = 9030) OR (l_orderkey = 1)) + -> BitmapOr + -> Bitmap Index Scan on lineitem_pkey_290004 + Index Cond: (l_orderkey = 9030) + -> Bitmap Index Scan on lineitem_pkey_290004 + Index Cond: (l_orderkey = 1) +(21 rows) +EXPLAIN (COSTS FALSE) SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders WHERE l_orderkey = o_orderkey; DEBUG: join prunable for intervals [1,1509] and [8997,14946] @@ -48,25 +63,91 @@ DEBUG: join prunable for intervals [8997,10560] and [1,5986] DEBUG: join prunable for intervals [10560,12036] and [1,5986] DEBUG: join prunable for intervals [12036,13473] and [1,5986] DEBUG: join prunable for intervals [13473,14947] and [1,5986] - sum | avg --------+-------------------- - 36086 | 3.0076679446574429 -(1 row) + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Aggregate + -> Custom Scan (Citus Real-Time) + Task Count: 8 + Tasks Shown: All + -> Task + Node: host=localhost port=57637 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290008 on orders_290008 orders + -> Index Only Scan using lineitem_pkey_290001 on lineitem_290001 lineitem + -> Task + Node: host=localhost port=57638 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (lineitem.l_orderkey = orders.o_orderkey) + -> Index Only Scan using lineitem_pkey_290000 on lineitem_290000 lineitem + -> Index Only Scan using orders_pkey_290008 on orders_290008 orders + -> Task + Node: host=localhost port=57637 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290008 on orders_290008 orders + -> Index Only Scan using lineitem_pkey_290003 on lineitem_290003 lineitem + -> Task + Node: host=localhost port=57638 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290008 on orders_290008 orders + -> Index Only Scan using lineitem_pkey_290002 on lineitem_290002 lineitem + -> Task + Node: host=localhost port=57637 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290009 on orders_290009 orders + -> Index Only Scan using lineitem_pkey_290005 on lineitem_290005 lineitem + -> Task + Node: host=localhost port=57638 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290009 on orders_290009 orders + -> Index Only Scan using lineitem_pkey_290004 on lineitem_290004 lineitem + -> Task + Node: host=localhost port=57637 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290009 on orders_290009 orders + -> Index Only Scan using lineitem_pkey_290007 on lineitem_290007 lineitem + -> Task + Node: host=localhost port=57638 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290009 on orders_290009 orders + -> Index Only Scan using lineitem_pkey_290006 on lineitem_290006 lineitem +(60 rows) -- Now set the minimum value for a shard to null. Then check that we don't apply -- partition or join pruning for the shard with null min value. UPDATE pg_dist_shard SET shardminvalue = NULL WHERE shardid = 290000; +EXPLAIN (COSTS FALSE) SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030; - l_orderkey | l_linenumber | l_shipdate -------------+--------------+------------ - 9030 | 1 | 09-02-1998 - 9030 | 2 | 08-19-1998 - 9030 | 3 | 08-27-1998 - 9030 | 4 | 07-20-1998 - 9030 | 5 | 09-29-1998 - 9030 | 6 | 09-03-1998 -(6 rows) + QUERY PLAN +------------------------------------------------------------------------------- + Custom Scan (Citus Real-Time) + Task Count: 2 + Tasks Shown: All + -> Task + Node: host=localhost port=57637 dbname=regression + -> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem + Index Cond: (l_orderkey = 9030) + -> Task + Node: host=localhost port=57638 dbname=regression + -> Index Scan using lineitem_pkey_290004 on lineitem_290004 lineitem + Index Cond: (l_orderkey = 9030) +(11 rows) +EXPLAIN (COSTS FALSE) SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders WHERE l_orderkey = o_orderkey; DEBUG: join prunable for intervals [1509,2951] and [8997,14946] @@ -76,25 +157,102 @@ DEBUG: join prunable for intervals [8997,10560] and [1,5986] DEBUG: join prunable for intervals [10560,12036] and [1,5986] DEBUG: join prunable for intervals [12036,13473] and [1,5986] DEBUG: join prunable for intervals [13473,14947] and [1,5986] - sum | avg --------+-------------------- - 36086 | 3.0076679446574429 -(1 row) + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Aggregate + -> Custom Scan (Citus Real-Time) + Task Count: 9 + Tasks Shown: All + -> Task + Node: host=localhost port=57637 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290008 on orders_290008 orders + -> Index Only Scan using lineitem_pkey_290001 on lineitem_290001 lineitem + -> Task + Node: host=localhost port=57638 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (lineitem.l_orderkey = orders.o_orderkey) + -> Index Only Scan using lineitem_pkey_290000 on lineitem_290000 lineitem + -> Index Only Scan using orders_pkey_290008 on orders_290008 orders + -> Task + Node: host=localhost port=57637 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290008 on orders_290008 orders + -> Index Only Scan using lineitem_pkey_290003 on lineitem_290003 lineitem + -> Task + Node: host=localhost port=57638 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290009 on orders_290009 orders + -> Index Only Scan using lineitem_pkey_290000 on lineitem_290000 lineitem + -> Task + Node: host=localhost port=57637 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290009 on orders_290009 orders + -> Index Only Scan using lineitem_pkey_290005 on lineitem_290005 lineitem + -> Task + Node: host=localhost port=57638 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290008 on orders_290008 orders + -> Index Only Scan using lineitem_pkey_290002 on lineitem_290002 lineitem + -> Task + Node: host=localhost port=57637 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290009 on orders_290009 orders + -> Index Only Scan using lineitem_pkey_290007 on lineitem_290007 lineitem + -> Task + Node: host=localhost port=57638 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290009 on orders_290009 orders + -> Index Only Scan using lineitem_pkey_290004 on lineitem_290004 lineitem + -> Task + Node: host=localhost port=57637 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290009 on orders_290009 orders + -> Index Only Scan using lineitem_pkey_290006 on lineitem_290006 lineitem +(67 rows) -- Next, set the maximum value for another shard to null. Then check that we -- don't apply partition or join pruning for this other shard either. UPDATE pg_dist_shard SET shardmaxvalue = NULL WHERE shardid = 290001; +EXPLAIN (COSTS FALSE) SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030; - l_orderkey | l_linenumber | l_shipdate -------------+--------------+------------ - 9030 | 1 | 09-02-1998 - 9030 | 2 | 08-19-1998 - 9030 | 3 | 08-27-1998 - 9030 | 4 | 07-20-1998 - 9030 | 5 | 09-29-1998 - 9030 | 6 | 09-03-1998 -(6 rows) + QUERY PLAN +------------------------------------------------------------------------------- + Custom Scan (Citus Real-Time) + Task Count: 3 + Tasks Shown: All + -> Task + Node: host=localhost port=57637 dbname=regression + -> Index Scan using lineitem_pkey_290001 on lineitem_290001 lineitem + Index Cond: (l_orderkey = 9030) + -> Task + Node: host=localhost port=57638 dbname=regression + -> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem + Index Cond: (l_orderkey = 9030) + -> Task + Node: host=localhost port=57637 dbname=regression + -> Index Scan using lineitem_pkey_290004 on lineitem_290004 lineitem + Index Cond: (l_orderkey = 9030) +(15 rows) +EXPLAIN (COSTS FALSE) SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders WHERE l_orderkey = o_orderkey; DEBUG: join prunable for intervals [2951,4455] and [8997,14946] @@ -103,25 +261,105 @@ DEBUG: join prunable for intervals [8997,10560] and [1,5986] DEBUG: join prunable for intervals [10560,12036] and [1,5986] DEBUG: join prunable for intervals [12036,13473] and [1,5986] DEBUG: join prunable for intervals [13473,14947] and [1,5986] - sum | avg --------+-------------------- - 36086 | 3.0076679446574429 -(1 row) + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Aggregate + -> Custom Scan (Citus Real-Time) + Task Count: 10 + Tasks Shown: All + -> Task + Node: host=localhost port=57637 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290009 on orders_290009 orders + -> Index Only Scan using lineitem_pkey_290001 on lineitem_290001 lineitem + -> Task + Node: host=localhost port=57638 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (lineitem.l_orderkey = orders.o_orderkey) + -> Index Only Scan using lineitem_pkey_290000 on lineitem_290000 lineitem + -> Index Only Scan using orders_pkey_290008 on orders_290008 orders + -> Task + Node: host=localhost port=57637 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290008 on orders_290008 orders + -> Index Only Scan using lineitem_pkey_290001 on lineitem_290001 lineitem + -> Task + Node: host=localhost port=57638 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290009 on orders_290009 orders + -> Index Only Scan using lineitem_pkey_290000 on lineitem_290000 lineitem + -> Task + Node: host=localhost port=57637 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290008 on orders_290008 orders + -> Index Only Scan using lineitem_pkey_290003 on lineitem_290003 lineitem + -> Task + Node: host=localhost port=57638 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290008 on orders_290008 orders + -> Index Only Scan using lineitem_pkey_290002 on lineitem_290002 lineitem + -> Task + Node: host=localhost port=57637 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290009 on orders_290009 orders + -> Index Only Scan using lineitem_pkey_290005 on lineitem_290005 lineitem + -> Task + Node: host=localhost port=57638 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290009 on orders_290009 orders + -> Index Only Scan using lineitem_pkey_290004 on lineitem_290004 lineitem + -> Task + Node: host=localhost port=57637 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290009 on orders_290009 orders + -> Index Only Scan using lineitem_pkey_290007 on lineitem_290007 lineitem + -> Task + Node: host=localhost port=57638 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290009 on orders_290009 orders + -> Index Only Scan using lineitem_pkey_290006 on lineitem_290006 lineitem +(74 rows) -- Last, set the minimum value to 0 and check that we don't treat it as null. We -- should apply partition and join pruning for this shard now. UPDATE pg_dist_shard SET shardminvalue = '0' WHERE shardid = 290000; +EXPLAIN (COSTS FALSE) SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030; - l_orderkey | l_linenumber | l_shipdate -------------+--------------+------------ - 9030 | 1 | 09-02-1998 - 9030 | 2 | 08-19-1998 - 9030 | 3 | 08-27-1998 - 9030 | 4 | 07-20-1998 - 9030 | 5 | 09-29-1998 - 9030 | 6 | 09-03-1998 -(6 rows) + QUERY PLAN +------------------------------------------------------------------------------- + Custom Scan (Citus Real-Time) + Task Count: 2 + Tasks Shown: All + -> Task + Node: host=localhost port=57637 dbname=regression + -> Index Scan using lineitem_pkey_290001 on lineitem_290001 lineitem + Index Cond: (l_orderkey = 9030) + -> Task + Node: host=localhost port=57638 dbname=regression + -> Index Scan using lineitem_pkey_290004 on lineitem_290004 lineitem + Index Cond: (l_orderkey = 9030) +(11 rows) +EXPLAIN (COSTS FALSE) SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders WHERE l_orderkey = o_orderkey; DEBUG: join prunable for intervals [0,1509] and [8997,14946] @@ -131,10 +369,76 @@ DEBUG: join prunable for intervals [8997,10560] and [1,5986] DEBUG: join prunable for intervals [10560,12036] and [1,5986] DEBUG: join prunable for intervals [12036,13473] and [1,5986] DEBUG: join prunable for intervals [13473,14947] and [1,5986] - sum | avg --------+-------------------- - 36086 | 3.0076679446574429 -(1 row) + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Aggregate + -> Custom Scan (Citus Real-Time) + Task Count: 9 + Tasks Shown: All + -> Task + Node: host=localhost port=57637 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290009 on orders_290009 orders + -> Index Only Scan using lineitem_pkey_290001 on lineitem_290001 lineitem + -> Task + Node: host=localhost port=57638 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (lineitem.l_orderkey = orders.o_orderkey) + -> Index Only Scan using lineitem_pkey_290000 on lineitem_290000 lineitem + -> Index Only Scan using orders_pkey_290008 on orders_290008 orders + -> Task + Node: host=localhost port=57637 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290008 on orders_290008 orders + -> Index Only Scan using lineitem_pkey_290001 on lineitem_290001 lineitem + -> Task + Node: host=localhost port=57638 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290008 on orders_290008 orders + -> Index Only Scan using lineitem_pkey_290002 on lineitem_290002 lineitem + -> Task + Node: host=localhost port=57637 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290008 on orders_290008 orders + -> Index Only Scan using lineitem_pkey_290003 on lineitem_290003 lineitem + -> Task + Node: host=localhost port=57638 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290009 on orders_290009 orders + -> Index Only Scan using lineitem_pkey_290004 on lineitem_290004 lineitem + -> Task + Node: host=localhost port=57637 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290009 on orders_290009 orders + -> Index Only Scan using lineitem_pkey_290005 on lineitem_290005 lineitem + -> Task + Node: host=localhost port=57638 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290009 on orders_290009 orders + -> Index Only Scan using lineitem_pkey_290006 on lineitem_290006 lineitem + -> Task + Node: host=localhost port=57637 dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) + -> Index Only Scan using orders_pkey_290009 on orders_290009 orders + -> Index Only Scan using lineitem_pkey_290007 on lineitem_290007 lineitem +(67 rows) -- Set minimum and maximum values for two shards back to their original values UPDATE pg_dist_shard SET shardminvalue = '1' WHERE shardid = 290000; diff --git a/src/test/regress/expected/multi_partition_pruning.out b/src/test/regress/expected/multi_partition_pruning.out index 58152336f..5f6c0471a 100644 --- a/src/test/regress/expected/multi_partition_pruning.out +++ b/src/test/regress/expected/multi_partition_pruning.out @@ -4,8 +4,6 @@ -- Tests to verify that we correctly prune unreferenced shards. For this, we -- need to increase the logging verbosity of messages displayed on the client. ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 770000; -SET citus.explain_distributed_queries TO off; -SET client_min_messages TO DEBUG2; -- Adding additional l_orderkey = 1 to make this query not router executable SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030 or l_orderkey = 1; l_orderkey | l_linenumber | l_shipdate @@ -146,32 +144,46 @@ INSERT INTO pg_dist_shard_placement (shardid, shardstate, shardlength, nodename, GROUP BY nodename, nodeport ORDER BY nodename, nodeport ASC LIMIT 1; --- Verify that shard pruning works. Note that these queries should all prune --- one shard. -EXPLAIN SELECT count(*) FROM varchar_partitioned_table WHERE varchar_column = 'BA2'; - QUERY PLAN ------------------------------------------------------------------------ - Aggregate (cost=0.00..0.00 rows=0 width=0) - -> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0) - explain statements for distributed queries are not enabled -(3 rows) +-- Verify that shard pruning works. Note that these queries should all +-- prune one shard (see task count). As these tables don't exist +-- remotely, temporarily disable WARNING messages. +SET client_min_messages TO ERROR; +EXPLAIN (COSTS OFF) +SELECT count(*) FROM varchar_partitioned_table WHERE varchar_column = 'BA2'; + QUERY PLAN +------------------------------------------------- + Aggregate + -> Custom Scan (Citus Real-Time) + Task Count: 1 + Tasks Shown: All + -> Task + Error: Could not get remote plan. +(6 rows) -EXPLAIN SELECT count(*) FROM array_partitioned_table +EXPLAIN (COSTS OFF) +SELECT count(*) FROM array_partitioned_table WHERE array_column > '{BA1000U2AMO4ZGX, BZZXSP27F21T6}'; - QUERY PLAN ------------------------------------------------------------------------ - Aggregate (cost=0.00..0.00 rows=0 width=0) - -> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0) - explain statements for distributed queries are not enabled -(3 rows) + QUERY PLAN +------------------------------------------------- + Aggregate + -> Custom Scan (Citus Real-Time) + Task Count: 1 + Tasks Shown: All + -> Task + Error: Could not get remote plan. +(6 rows) -EXPLAIN SELECT count(*) FROM composite_partitioned_table +EXPLAIN (COSTS OFF) +SELECT count(*) FROM composite_partitioned_table WHERE composite_column < '(b,5,c)'::composite_type; - QUERY PLAN ------------------------------------------------------------------------ - Aggregate (cost=0.00..0.00 rows=0 width=0) - -> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0) - explain statements for distributed queries are not enabled -(3 rows) + QUERY PLAN +------------------------------------------------- + Aggregate + -> Custom Scan (Citus Real-Time) + Task Count: 1 + Tasks Shown: All + -> Task + Error: Could not get remote plan. +(6 rows) SET client_min_messages TO NOTICE; diff --git a/src/test/regress/multi_task_tracker_extra_schedule b/src/test/regress/multi_task_tracker_extra_schedule index 99f53d6d1..8051e63d1 100644 --- a/src/test/regress/multi_task_tracker_extra_schedule +++ b/src/test/regress/multi_task_tracker_extra_schedule @@ -34,7 +34,6 @@ test: multi_average_expression multi_working_columns test: multi_array_agg test: multi_agg_type_conversion multi_count_type_conversion test: multi_hash_pruning -test: multi_null_minmax_value_pruning test: multi_query_directory_cleanup test: multi_utility_statements test: multi_dropped_column_aliases @@ -50,7 +49,6 @@ test: multi_tpch_query7 multi_tpch_query7_nested # Parallel tests to check our join order planning logic. Note that we load data # below; and therefore these tests should come after the execution tests. # ---------- -test: multi_join_order_additional test: multi_load_more_data test: multi_join_order_tpch_large diff --git a/src/test/regress/sql/multi_large_table_pruning.sql b/src/test/regress/sql/multi_large_table_pruning.sql index 133cbc3b6..5e70ed3ab 100644 --- a/src/test/regress/sql/multi_large_table_pruning.sql +++ b/src/test/regress/sql/multi_large_table_pruning.sql @@ -14,7 +14,13 @@ SET client_min_messages TO DEBUG2; SET citus.task_executor_type TO 'task-tracker'; -- Single range-repartition join to test join-pruning behaviour. - +EXPLAIN (COSTS OFF) +SELECT + count(*) +FROM + orders, customer +WHERE + o_custkey = c_custkey; SELECT count(*) FROM @@ -24,7 +30,14 @@ WHERE -- Single range-repartition join with a selection clause on the partitioned -- table to test the case when all map tasks are pruned away. - +EXPLAIN (COSTS OFF) +SELECT + count(*) +FROM + orders, customer +WHERE + o_custkey = c_custkey AND + o_orderkey < 0; SELECT count(*) FROM @@ -35,7 +48,14 @@ WHERE -- Single range-repartition join with a selection clause on the base table to -- test the case when all sql tasks are pruned away. - +EXPLAIN (COSTS OFF) +SELECT + count(*) +FROM + orders, customer +WHERE + o_custkey = c_custkey AND + c_custkey < 0; SELECT count(*) FROM @@ -47,7 +67,13 @@ WHERE -- Dual hash-repartition join test case. Note that this query doesn't produce -- meaningful results and is only to test hash-partitioning of two large tables -- on non-partition columns. - +EXPLAIN (COSTS OFF) +SELECT + count(*) +FROM + lineitem, customer +WHERE + l_partkey = c_nationkey; SELECT count(*) FROM @@ -57,7 +83,14 @@ WHERE -- Dual hash-repartition join with a selection clause on one of the tables to -- test the case when all map tasks are pruned away. - +EXPLAIN (COSTS OFF) +SELECT + count(*) +FROM + lineitem, customer +WHERE + l_partkey = c_nationkey AND + l_orderkey < 0; SELECT count(*) FROM @@ -67,6 +100,14 @@ WHERE l_orderkey < 0; -- Test cases with false in the WHERE clause +EXPLAIN (COSTS OFF) +SELECT + o_orderkey +FROM + orders INNER JOIN customer ON (o_custkey = c_custkey) +WHERE + false; +-- execute once, to verify that's handled SELECT o_orderkey FROM @@ -74,6 +115,7 @@ FROM WHERE false; +EXPLAIN (COSTS OFF) SELECT o_orderkey FROM @@ -81,11 +123,13 @@ FROM WHERE 1=0 AND c_custkey < 0; +EXPLAIN (COSTS OFF) SELECT o_orderkey FROM orders INNER JOIN customer ON (o_custkey = c_custkey AND false); +EXPLAIN (COSTS OFF) SELECT o_orderkey FROM diff --git a/src/test/regress/sql/multi_null_minmax_value_pruning.sql b/src/test/regress/sql/multi_null_minmax_value_pruning.sql index ceeaaec8d..60ebd51cf 100644 --- a/src/test/regress/sql/multi_null_minmax_value_pruning.sql +++ b/src/test/regress/sql/multi_null_minmax_value_pruning.sql @@ -8,8 +8,11 @@ ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 760000; - SET client_min_messages TO DEBUG2; +SET citus.explain_all_tasks TO on; +-- to avoid differing explain output - executor doesn't matter, +-- because were testing pruning here. +SET citus.task_executor_type TO 'real-time'; -- Change configuration to treat lineitem and orders tables as large @@ -20,8 +23,10 @@ SELECT shardminvalue, shardmaxvalue from pg_dist_shard WHERE shardid = 290001; -- Check that partition and join pruning works when min/max values exist -- Adding l_orderkey = 1 to make the query not router executable +EXPLAIN (COSTS FALSE) SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030 or l_orderkey = 1; +EXPLAIN (COSTS FALSE) SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders WHERE l_orderkey = o_orderkey; @@ -30,8 +35,10 @@ SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders UPDATE pg_dist_shard SET shardminvalue = NULL WHERE shardid = 290000; +EXPLAIN (COSTS FALSE) SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030; +EXPLAIN (COSTS FALSE) SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders WHERE l_orderkey = o_orderkey; @@ -40,8 +47,10 @@ SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders UPDATE pg_dist_shard SET shardmaxvalue = NULL WHERE shardid = 290001; +EXPLAIN (COSTS FALSE) SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030; +EXPLAIN (COSTS FALSE) SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders WHERE l_orderkey = o_orderkey; @@ -50,8 +59,10 @@ SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders UPDATE pg_dist_shard SET shardminvalue = '0' WHERE shardid = 290000; +EXPLAIN (COSTS FALSE) SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030; +EXPLAIN (COSTS FALSE) SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders WHERE l_orderkey = o_orderkey; diff --git a/src/test/regress/sql/multi_partition_pruning.sql b/src/test/regress/sql/multi_partition_pruning.sql index fd2a4594a..1c4d0c78e 100644 --- a/src/test/regress/sql/multi_partition_pruning.sql +++ b/src/test/regress/sql/multi_partition_pruning.sql @@ -8,10 +8,6 @@ ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 770000; - -SET citus.explain_distributed_queries TO off; -SET client_min_messages TO DEBUG2; - -- Adding additional l_orderkey = 1 to make this query not router executable SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030 or l_orderkey = 1; @@ -131,15 +127,21 @@ INSERT INTO pg_dist_shard_placement (shardid, shardstate, shardlength, nodename, ORDER BY nodename, nodeport ASC LIMIT 1; --- Verify that shard pruning works. Note that these queries should all prune --- one shard. +-- Verify that shard pruning works. Note that these queries should all +-- prune one shard (see task count). As these tables don't exist +-- remotely, temporarily disable WARNING messages. +SET client_min_messages TO ERROR; -EXPLAIN SELECT count(*) FROM varchar_partitioned_table WHERE varchar_column = 'BA2'; -EXPLAIN SELECT count(*) FROM array_partitioned_table +EXPLAIN (COSTS OFF) +SELECT count(*) FROM varchar_partitioned_table WHERE varchar_column = 'BA2'; + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM array_partitioned_table WHERE array_column > '{BA1000U2AMO4ZGX, BZZXSP27F21T6}'; -EXPLAIN SELECT count(*) FROM composite_partitioned_table +EXPLAIN (COSTS OFF) +SELECT count(*) FROM composite_partitioned_table WHERE composite_column < '(b,5,c)'::composite_type; SET client_min_messages TO NOTICE;