-- -- MULTI_NULL_MINMAX_VALUE_PRUNING -- -- This test checks that we can handle null min/max values in shard statistics -- and that we don't partition or join prune shards that have null values. SET citus.next_shard_id TO 760000; -- print whether we're using version > 9 to make version-specific tests clear SHOW server_version \gset SELECT substring(:'server_version', '\d+')::int > 9 AS version_above_nine; version_above_nine -------------------- t (1 row) SET client_min_messages TO DEBUG2; SET citus.explain_all_tasks TO on; -- to avoid differing explain output - executor doesn't matter, -- because were testing pruning here. SET citus.task_executor_type TO 'real-time'; -- Change configuration to treat lineitem and orders tables as large SET citus.large_table_shard_count TO 2; SET citus.log_multi_join_order to true; SET citus.enable_repartition_joins to ON; SELECT shardminvalue, shardmaxvalue from pg_dist_shard WHERE shardid = 290000; shardminvalue | shardmaxvalue ---------------+--------------- 1 | 5986 (1 row) SELECT shardminvalue, shardmaxvalue from pg_dist_shard WHERE shardid = 290001; shardminvalue | shardmaxvalue ---------------+--------------- 8997 | 14947 (1 row) -- Check that partition and join pruning works when min/max values exist -- Adding l_orderkey = 1 to make the query not router executable EXPLAIN (COSTS FALSE) SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030 or l_orderkey = 1; LOG: join order: [ "lineitem" ] QUERY PLAN ----------------------------------------------------------------------- Custom Scan (Citus Real-Time) Task Count: 2 Tasks Shown: All -> Task Node: host=localhost port=57637 dbname=regression -> Bitmap Heap Scan on lineitem_290001 lineitem Recheck Cond: ((l_orderkey = 9030) OR (l_orderkey = 1)) -> BitmapOr -> Bitmap Index Scan on lineitem_pkey_290001 Index Cond: (l_orderkey = 9030) -> Bitmap Index Scan on lineitem_pkey_290001 Index Cond: (l_orderkey = 1) -> Task Node: host=localhost port=57638 dbname=regression -> Bitmap Heap Scan on lineitem_290000 lineitem Recheck Cond: ((l_orderkey = 9030) OR (l_orderkey = 1)) -> BitmapOr -> Bitmap Index Scan on lineitem_pkey_290000 Index Cond: (l_orderkey = 9030) -> Bitmap Index Scan on lineitem_pkey_290000 Index Cond: (l_orderkey = 1) (21 rows) EXPLAIN (COSTS FALSE) SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders WHERE l_orderkey = o_orderkey; LOG: join order: [ "lineitem" ][ local partition join "orders" ] DEBUG: join prunable for intervals [1,5986] and [8997,14947] DEBUG: join prunable for intervals [8997,14947] and [1,5986] QUERY PLAN -------------------------------------------------------------------------------- Aggregate -> Custom Scan (Citus Real-Time) Task Count: 2 Tasks Shown: All -> Task Node: host=localhost port=57637 dbname=regression -> Aggregate -> Hash Join Hash Cond: (lineitem.l_orderkey = orders.o_orderkey) -> Seq Scan on lineitem_290001 lineitem -> Hash -> Seq Scan on orders_290003 orders -> Task Node: host=localhost port=57638 dbname=regression -> Aggregate -> Hash Join Hash Cond: (lineitem.l_orderkey = orders.o_orderkey) -> Seq Scan on lineitem_290000 lineitem -> Hash -> Seq Scan on orders_290002 orders (20 rows) -- Now set the minimum value for a shard to null. Then check that we don't apply -- partition or join pruning for the shard with null min value. Since it is not -- supported with single-repartition join, dual-repartition has been used. UPDATE pg_dist_shard SET shardminvalue = NULL WHERE shardid = 290000; EXPLAIN (COSTS FALSE) SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030; LOG: join order: [ "lineitem" ] QUERY PLAN ------------------------------------------------------------------------------- Custom Scan (Citus Real-Time) Task Count: 2 Tasks Shown: All -> Task Node: host=localhost port=57637 dbname=regression -> Index Scan using lineitem_pkey_290001 on lineitem_290001 lineitem Index Cond: (l_orderkey = 9030) -> Task Node: host=localhost port=57638 dbname=regression -> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem Index Cond: (l_orderkey = 9030) (11 rows) EXPLAIN (COSTS FALSE) SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders WHERE l_partkey = o_custkey; LOG: join order: [ "lineitem" ][ dual partition join "orders" ] DEBUG: join prunable for task partitionId 0 and 1 DEBUG: join prunable for task partitionId 0 and 2 DEBUG: join prunable for task partitionId 0 and 3 DEBUG: join prunable for task partitionId 1 and 0 DEBUG: join prunable for task partitionId 1 and 2 DEBUG: join prunable for task partitionId 1 and 3 DEBUG: join prunable for task partitionId 2 and 0 DEBUG: join prunable for task partitionId 2 and 1 DEBUG: join prunable for task partitionId 2 and 3 DEBUG: join prunable for task partitionId 3 and 0 DEBUG: join prunable for task partitionId 3 and 1 DEBUG: join prunable for task partitionId 3 and 2 DEBUG: pruning merge fetch taskId 1 DETAIL: Creating dependency on merge taskId 3 DEBUG: pruning merge fetch taskId 2 DETAIL: Creating dependency on merge taskId 3 DEBUG: pruning merge fetch taskId 4 DETAIL: Creating dependency on merge taskId 6 DEBUG: pruning merge fetch taskId 5 DETAIL: Creating dependency on merge taskId 6 DEBUG: pruning merge fetch taskId 7 DETAIL: Creating dependency on merge taskId 9 DEBUG: pruning merge fetch taskId 8 DETAIL: Creating dependency on merge taskId 9 DEBUG: pruning merge fetch taskId 10 DETAIL: Creating dependency on merge taskId 12 DEBUG: pruning merge fetch taskId 11 DETAIL: Creating dependency on merge taskId 12 DEBUG: cannot use real time executor with repartition jobs HINT: Since you enabled citus.enable_repartition_joins Citus chose to use task-tracker. QUERY PLAN ------------------------------------------------------------------- Aggregate -> Custom Scan (Citus Task-Tracker) Task Count: 4 Tasks Shown: None, not supported for re-partition queries -> MapMergeJob Map Task Count: 2 Merge Task Count: 4 -> MapMergeJob Map Task Count: 2 Merge Task Count: 4 (10 rows) -- Next, set the maximum value for another shard to null. Then check that we -- don't apply partition or join pruning for this other shard either. Since it -- is not supported with single-repartition join, dual-repartition has been used. UPDATE pg_dist_shard SET shardmaxvalue = NULL WHERE shardid = 290001; EXPLAIN (COSTS FALSE) SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030; LOG: join order: [ "lineitem" ] QUERY PLAN ------------------------------------------------------------------------------- Custom Scan (Citus Real-Time) Task Count: 2 Tasks Shown: All -> Task Node: host=localhost port=57637 dbname=regression -> Index Scan using lineitem_pkey_290001 on lineitem_290001 lineitem Index Cond: (l_orderkey = 9030) -> Task Node: host=localhost port=57638 dbname=regression -> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem Index Cond: (l_orderkey = 9030) (11 rows) EXPLAIN (COSTS FALSE) SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders WHERE l_partkey = o_custkey; LOG: join order: [ "lineitem" ][ dual partition join "orders" ] DEBUG: join prunable for task partitionId 0 and 1 DEBUG: join prunable for task partitionId 0 and 2 DEBUG: join prunable for task partitionId 0 and 3 DEBUG: join prunable for task partitionId 1 and 0 DEBUG: join prunable for task partitionId 1 and 2 DEBUG: join prunable for task partitionId 1 and 3 DEBUG: join prunable for task partitionId 2 and 0 DEBUG: join prunable for task partitionId 2 and 1 DEBUG: join prunable for task partitionId 2 and 3 DEBUG: join prunable for task partitionId 3 and 0 DEBUG: join prunable for task partitionId 3 and 1 DEBUG: join prunable for task partitionId 3 and 2 DEBUG: pruning merge fetch taskId 1 DETAIL: Creating dependency on merge taskId 3 DEBUG: pruning merge fetch taskId 2 DETAIL: Creating dependency on merge taskId 3 DEBUG: pruning merge fetch taskId 4 DETAIL: Creating dependency on merge taskId 6 DEBUG: pruning merge fetch taskId 5 DETAIL: Creating dependency on merge taskId 6 DEBUG: pruning merge fetch taskId 7 DETAIL: Creating dependency on merge taskId 9 DEBUG: pruning merge fetch taskId 8 DETAIL: Creating dependency on merge taskId 9 DEBUG: pruning merge fetch taskId 10 DETAIL: Creating dependency on merge taskId 12 DEBUG: pruning merge fetch taskId 11 DETAIL: Creating dependency on merge taskId 12 DEBUG: cannot use real time executor with repartition jobs HINT: Since you enabled citus.enable_repartition_joins Citus chose to use task-tracker. QUERY PLAN ------------------------------------------------------------------- Aggregate -> Custom Scan (Citus Task-Tracker) Task Count: 4 Tasks Shown: None, not supported for re-partition queries -> MapMergeJob Map Task Count: 2 Merge Task Count: 4 -> MapMergeJob Map Task Count: 2 Merge Task Count: 4 (10 rows) -- Last, set the minimum value to 0 and check that we don't treat it as null. We -- should apply partition and join pruning for this shard now. Since it is not -- supported with single-repartition join, dual-repartition has been used. UPDATE pg_dist_shard SET shardminvalue = '0' WHERE shardid = 290000; EXPLAIN (COSTS FALSE) SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030; LOG: join order: [ "lineitem" ] DEBUG: Plan is router executable QUERY PLAN ------------------------------------------------------------------------------- Custom Scan (Citus Router) Task Count: 1 Tasks Shown: All -> Task Node: host=localhost port=57637 dbname=regression -> Index Scan using lineitem_pkey_290001 on lineitem_290001 lineitem Index Cond: (l_orderkey = 9030) (7 rows) EXPLAIN (COSTS FALSE) SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders WHERE l_partkey = o_custkey; LOG: join order: [ "lineitem" ][ dual partition join "orders" ] DEBUG: join prunable for task partitionId 0 and 1 DEBUG: join prunable for task partitionId 0 and 2 DEBUG: join prunable for task partitionId 0 and 3 DEBUG: join prunable for task partitionId 1 and 0 DEBUG: join prunable for task partitionId 1 and 2 DEBUG: join prunable for task partitionId 1 and 3 DEBUG: join prunable for task partitionId 2 and 0 DEBUG: join prunable for task partitionId 2 and 1 DEBUG: join prunable for task partitionId 2 and 3 DEBUG: join prunable for task partitionId 3 and 0 DEBUG: join prunable for task partitionId 3 and 1 DEBUG: join prunable for task partitionId 3 and 2 DEBUG: pruning merge fetch taskId 1 DETAIL: Creating dependency on merge taskId 3 DEBUG: pruning merge fetch taskId 2 DETAIL: Creating dependency on merge taskId 3 DEBUG: pruning merge fetch taskId 4 DETAIL: Creating dependency on merge taskId 6 DEBUG: pruning merge fetch taskId 5 DETAIL: Creating dependency on merge taskId 6 DEBUG: pruning merge fetch taskId 7 DETAIL: Creating dependency on merge taskId 9 DEBUG: pruning merge fetch taskId 8 DETAIL: Creating dependency on merge taskId 9 DEBUG: pruning merge fetch taskId 10 DETAIL: Creating dependency on merge taskId 12 DEBUG: pruning merge fetch taskId 11 DETAIL: Creating dependency on merge taskId 12 DEBUG: cannot use real time executor with repartition jobs HINT: Since you enabled citus.enable_repartition_joins Citus chose to use task-tracker. QUERY PLAN ------------------------------------------------------------------- Aggregate -> Custom Scan (Citus Task-Tracker) Task Count: 4 Tasks Shown: None, not supported for re-partition queries -> MapMergeJob Map Task Count: 2 Merge Task Count: 4 -> MapMergeJob Map Task Count: 2 Merge Task Count: 4 (10 rows) -- Set minimum and maximum values for two shards back to their original values UPDATE pg_dist_shard SET shardminvalue = '1' WHERE shardid = 290000; UPDATE pg_dist_shard SET shardmaxvalue = '14947' WHERE shardid = 290001; SET client_min_messages TO NOTICE;