From 386d2567d4fe763a6cc93d65a97b3a24ffc641e1 Mon Sep 17 00:00:00 2001 From: Marco Slot Date: Fri, 8 Oct 2021 10:32:30 +0200 Subject: [PATCH] Reduce reliance on append tables in regression tests --- src/test/regress/expected/.gitignore | 1 - src/test/regress/expected/having_subquery.out | 4 +- .../multi_agg_approximate_distinct.out | 26 +-- .../multi_agg_approximate_distinct_0.out | 4 +- src/test/regress/expected/multi_array_agg.out | 22 ++- .../regress/expected/multi_create_table.out | 16 +- src/test/regress/expected/multi_explain.out | 148 ++++++++++-------- .../regress/expected/multi_hash_pruning.out | 46 ++++-- .../expected/multi_having_pushdown.out | 29 ++-- .../expected/multi_index_statements.out | 6 +- .../expected/multi_join_order_additional.out | 8 +- .../expected/multi_join_order_tpch_small.out | 12 +- .../regress/expected/multi_join_pruning.out | 61 ++++---- src/test/regress/expected/multi_json_agg.out | 41 ++--- .../expected/multi_json_object_agg.out | 11 +- src/test/regress/expected/multi_jsonb_agg.out | 36 +++-- .../expected/multi_jsonb_object_agg.out | 45 ++++-- .../regress/expected/multi_large_shardid.out | 63 ++++++++ .../regress/expected/multi_limit_clause.out | 1 + .../multi_limit_clause_approximate.out | 6 +- .../multi_null_minmax_value_pruning.out | 52 +++--- .../multi_repartition_join_planning.out | 12 +- .../multi_repartition_join_pruning.out | 98 ++++++++++-- ...multi_repartition_join_task_assignment.out | 24 +-- .../regress/output/multi_large_shardid.source | 55 ------- src/test/regress/pg_regress_multi.pl | 9 +- src/test/regress/sql/.gitignore | 1 - .../sql/multi_agg_approximate_distinct.sql | 4 +- src/test/regress/sql/multi_array_agg.sql | 9 +- src/test/regress/sql/multi_create_table.sql | 10 +- src/test/regress/sql/multi_hash_pruning.sql | 4 +- .../regress/sql/multi_index_statements.sql | 1 - src/test/regress/sql/multi_join_pruning.sql | 6 - src/test/regress/sql/multi_json_agg.sql | 12 +- .../regress/sql/multi_json_object_agg.sql | 4 +- src/test/regress/sql/multi_jsonb_agg.sql | 13 +- .../regress/sql/multi_jsonb_object_agg.sql | 26 ++- .../multi_large_shardid.sql} | 11 +- .../sql/multi_repartition_join_pruning.sql | 6 +- 39 files changed, 561 insertions(+), 382 deletions(-) create mode 100644 src/test/regress/expected/multi_large_shardid.out delete mode 100644 src/test/regress/output/multi_large_shardid.source rename src/test/regress/{input/multi_large_shardid.source => sql/multi_large_shardid.sql} (83%) diff --git a/src/test/regress/expected/.gitignore b/src/test/regress/expected/.gitignore index 0197c3b7e..594cdd475 100644 --- a/src/test/regress/expected/.gitignore +++ b/src/test/regress/expected/.gitignore @@ -11,7 +11,6 @@ /multi_complex_count_distinct.out /multi_copy.out /multi_create_schema.out -/multi_large_shardid.out /multi_load_data.out /multi_load_data_superuser.out /multi_load_large_records.out diff --git a/src/test/regress/expected/having_subquery.out b/src/test/regress/expected/having_subquery.out index 5b660e12a..a67d441b3 100644 --- a/src/test/regress/expected/having_subquery.out +++ b/src/test/regress/expected/having_subquery.out @@ -48,13 +48,13 @@ HAVING ( -> Task Node: host=localhost port=xxxxx dbname=regression -> Aggregate - -> Seq Scan on customer_360001 customer + -> Seq Scan on customer_360005 customer Task Count: 2 Tasks Shown: One of 2 -> Task Node: host=localhost port=xxxxx dbname=regression -> HashAggregate Group Key: orders.o_orderstatus - -> Seq Scan on orders_290002 orders + -> Seq Scan on orders_360002 orders (23 rows) diff --git a/src/test/regress/expected/multi_agg_approximate_distinct.out b/src/test/regress/expected/multi_agg_approximate_distinct.out index 945a5da38..fecbd12d0 100644 --- a/src/test/regress/expected/multi_agg_approximate_distinct.out +++ b/src/test/regress/expected/multi_agg_approximate_distinct.out @@ -185,26 +185,26 @@ SELECT l_returnflag, count(DISTINCT l_shipdate) as count_distinct, count(*) as t (3 rows) SELECT - l_orderkey, + l_partkey, count(l_partkey) FILTER (WHERE l_shipmode = 'AIR'), count(DISTINCT l_partkey) FILTER (WHERE l_shipmode = 'AIR'), count(DISTINCT CASE WHEN l_shipmode = 'AIR' THEN l_partkey ELSE NULL END) FROM lineitem - GROUP BY l_orderkey + GROUP BY l_partkey ORDER BY 2 DESC, 1 DESC LIMIT 10; - l_orderkey | count | count | count + l_partkey | count | count | count --------------------------------------------------------------------- - 12005 | 4 | 4 | 4 - 5409 | 4 | 4 | 4 - 4964 | 4 | 4 | 4 - 14848 | 3 | 3 | 3 - 14496 | 3 | 3 | 3 - 13473 | 3 | 3 | 3 - 13122 | 3 | 3 | 3 - 12929 | 3 | 3 | 3 - 12645 | 3 | 3 | 3 - 12417 | 3 | 3 | 3 + 147722 | 2 | 1 | 1 + 87191 | 2 | 1 | 1 + 78600 | 2 | 1 | 1 + 1927 | 2 | 1 | 1 + 199943 | 1 | 1 | 1 + 199929 | 1 | 1 | 1 + 199810 | 1 | 1 | 1 + 199792 | 1 | 1 | 1 + 199716 | 1 | 1 | 1 + 199699 | 1 | 1 | 1 (10 rows) -- Check that we can revert config and disable count(distinct) approximations diff --git a/src/test/regress/expected/multi_agg_approximate_distinct_0.out b/src/test/regress/expected/multi_agg_approximate_distinct_0.out index 4b771461d..0ab24b540 100644 --- a/src/test/regress/expected/multi_agg_approximate_distinct_0.out +++ b/src/test/regress/expected/multi_agg_approximate_distinct_0.out @@ -143,12 +143,12 @@ SELECT l_returnflag, count(DISTINCT l_shipdate) as count_distinct, count(*) as t ERROR: cannot compute count (distinct) approximation HINT: You need to have the hll extension loaded. SELECT - l_orderkey, + l_partkey, count(l_partkey) FILTER (WHERE l_shipmode = 'AIR'), count(DISTINCT l_partkey) FILTER (WHERE l_shipmode = 'AIR'), count(DISTINCT CASE WHEN l_shipmode = 'AIR' THEN l_partkey ELSE NULL END) FROM lineitem - GROUP BY l_orderkey + GROUP BY l_partkey ORDER BY 2 DESC, 1 DESC LIMIT 10; ERROR: cannot compute count (distinct) approximation diff --git a/src/test/regress/expected/multi_array_agg.out b/src/test/regress/expected/multi_array_agg.out index d073ce6fe..35a445bd9 100644 --- a/src/test/regress/expected/multi_array_agg.out +++ b/src/test/regress/expected/multi_array_agg.out @@ -3,11 +3,18 @@ -- SET citus.next_shard_id TO 520000; SET citus.coordinator_aggregation_strategy TO 'disabled'; +SELECT run_command_on_master_and_workers($r$ CREATE OR REPLACE FUNCTION array_sort (ANYARRAY) RETURNS ANYARRAY LANGUAGE SQL AS $$ SELECT ARRAY(SELECT unnest($1) ORDER BY 1) $$; +$r$); + run_command_on_master_and_workers +--------------------------------------------------------------------- + +(1 row) + -- Check multi_cat_agg() aggregate which is used to implement array_agg() SELECT array_cat_agg(i) FROM (VALUES (ARRAY[1,2]), (NULL), (ARRAY[3,4])) AS t(i); array_cat_agg @@ -132,17 +139,18 @@ SELECT l_quantity, array_sort(array_agg(l_orderkey * 2 + 1)) FROM lineitem WHERE (4 rows) -- Check that we can execute array_agg() with an expression containing NULL values -SELECT array_agg(case when l_quantity > 20 then l_quantity else NULL end) - FROM lineitem WHERE l_orderkey < 10; - array_agg +SELECT array_sort(array_agg(case when l_quantity > 20 then l_quantity else NULL end)) + FROM lineitem WHERE l_orderkey < 10; + array_sort --------------------------------------------------------------------- - {NULL,36.00,NULL,28.00,24.00,32.00,38.00,45.00,49.00,27.00,NULL,28.00,26.00,30.00,NULL,26.00,50.00,37.00,NULL,NULL,46.00,28.00,38.00,35.00,NULL} + {24.00,26.00,26.00,27.00,28.00,28.00,28.00,30.00,32.00,35.00,36.00,37.00,38.00,38.00,45.00,46.00,49.00,50.00,NULL,NULL,NULL,NULL,NULL,NULL,NULL} (1 row) -- Check that we return NULL in case there are no input rows to array_agg() -SELECT array_agg(l_orderkey) FROM lineitem WHERE l_quantity < 0; - array_agg +SELECT array_sort(array_agg(l_orderkey)) + FROM lineitem WHERE l_orderkey < 0; + array_sort --------------------------------------------------------------------- - + {} (1 row) diff --git a/src/test/regress/expected/multi_create_table.out b/src/test/regress/expected/multi_create_table.out index 2f28b3fda..9d3f55ac9 100644 --- a/src/test/regress/expected/multi_create_table.out +++ b/src/test/regress/expected/multi_create_table.out @@ -4,7 +4,7 @@ -- Create new table definitions for use in testing in distributed planning and -- execution functionality. Also create indexes to boost performance. Since we -- need to cover both reference join and partitioned join, we have created --- reference and append distributed version of orders, customer and part tables. +-- reference and hash-distributed version of orders, customer and part tables. SET citus.next_shard_id TO 360000; -- this function is dropped in Citus10, added here for tests CREATE OR REPLACE FUNCTION pg_catalog.master_create_distributed_table(table_name regclass, @@ -41,10 +41,7 @@ CREATE TABLE lineitem ( l_shipmode char(10) not null, l_comment varchar(44) not null, PRIMARY KEY(l_orderkey, l_linenumber) ); -SELECT create_distributed_table('lineitem', 'l_orderkey', 'append'); -WARNING: table "lineitem" has a UNIQUE or EXCLUDE constraint -DETAIL: UNIQUE constraints, EXCLUDE constraints, and PRIMARY KEYs on append-partitioned tables cannot be enforced. -HINT: Consider using hash partitioning. +SELECT create_distributed_table('lineitem', 'l_orderkey', 'hash', shard_count := 2); create_distributed_table --------------------------------------------------------------------- @@ -62,10 +59,7 @@ CREATE TABLE orders ( o_shippriority integer not null, o_comment varchar(79) not null, PRIMARY KEY(o_orderkey) ); -SELECT create_distributed_table('orders', 'o_orderkey', 'append'); -WARNING: table "orders" has a UNIQUE or EXCLUDE constraint -DETAIL: UNIQUE constraints, EXCLUDE constraints, and PRIMARY KEYs on append-partitioned tables cannot be enforced. -HINT: Consider using hash partitioning. +SELECT create_distributed_table('orders', 'o_orderkey', 'hash', colocate_with := 'lineitem'); create_distributed_table --------------------------------------------------------------------- @@ -189,14 +183,14 @@ CREATE TABLE supplier_single_shard s_acctbal decimal(15,2) not null, s_comment varchar(101) not null ); -SELECT create_distributed_table('supplier_single_shard', 's_suppkey', 'append'); +SELECT create_distributed_table('supplier_single_shard', 's_suppkey', 'hash', shard_count := 1); create_distributed_table --------------------------------------------------------------------- (1 row) CREATE TABLE mx_table_test (col1 int, col2 text); -SET citus.next_shard_id TO 360009; +SET citus.next_shard_id TO 360013; -- Test initial data loading CREATE TABLE data_load_test (col1 int, col2 text, col3 serial); INSERT INTO data_load_test VALUES (132, 'hello'); diff --git a/src/test/regress/expected/multi_explain.out b/src/test/regress/expected/multi_explain.out index 140ce1445..832b64a31 100644 --- a/src/test/regress/expected/multi_explain.out +++ b/src/test/regress/expected/multi_explain.out @@ -47,7 +47,7 @@ Sort Node: host=localhost port=xxxxx dbname=regression -> HashAggregate Group Key: l_quantity - -> Seq Scan on lineitem_290000 lineitem + -> Seq Scan on lineitem_360000 lineitem -- Test disable hash aggregate SET enable_hashagg TO off; EXPLAIN (COSTS FALSE, FORMAT TEXT) @@ -66,7 +66,7 @@ Sort Node: host=localhost port=xxxxx dbname=regression -> HashAggregate Group Key: l_quantity - -> Seq Scan on lineitem_290000 lineitem + -> Seq Scan on lineitem_360000 lineitem SET enable_hashagg TO on; -- Test JSON format EXPLAIN (COSTS FALSE, FORMAT JSON) @@ -110,7 +110,7 @@ EXPLAIN (COSTS FALSE, FORMAT JSON) { "Node Type": "Seq Scan", "Parallel Aware": false, - "Relation Name": "lineitem_290000", + "Relation Name": "lineitem_360000", "Alias": "lineitem" } ] @@ -184,7 +184,7 @@ EXPLAIN (COSTS FALSE, FORMAT XML) Seq Scan false - lineitem_290000 + lineitem_360000 lineitem @@ -246,7 +246,7 @@ EXPLAIN (COSTS FALSE, FORMAT YAML) Plans: - Node Type: "Seq Scan" Parallel Aware: false - Relation Name: "lineitem_290000" + Relation Name: "lineitem_360000" Alias: "lineitem" -- Test Text format @@ -264,7 +264,7 @@ Sort Node: host=localhost port=xxxxx dbname=regression -> HashAggregate Group Key: l_quantity - -> Seq Scan on lineitem_290000 lineitem + -> Seq Scan on lineitem_360000 lineitem -- Test analyze (with TIMING FALSE and SUMMARY FALSE for consistent output) EXPLAIN (COSTS FALSE, ANALYZE TRUE, TIMING FALSE, SUMMARY FALSE) SELECT l_quantity, count(*) count_quantity FROM lineitem @@ -283,7 +283,7 @@ Sort (actual rows=50 loops=1) Node: host=localhost port=xxxxx dbname=regression -> HashAggregate (actual rows=50 loops=1) Group Key: l_quantity - -> Seq Scan on lineitem_290000 lineitem (actual rows=6000 loops=1) + -> Seq Scan on lineitem_360000 lineitem (actual rows=5894 loops=1) -- EXPLAIN ANALYZE doesn't show worker tasks for repartition joins yet SET citus.shard_count TO 3; CREATE TABLE t1(a int, b int); @@ -350,13 +350,13 @@ Sort (actual rows=50 loops=1) Tuple data received from nodes: 1800 bytes Tasks Shown: One of 2 -> Task - Query: SELECT l_quantity, count(*) AS count_quantity FROM public.lineitem_290000 lineitem WHERE true GROUP BY l_quantity + Query: SELECT l_quantity, count(*) AS count_quantity FROM public.lineitem_360000 lineitem WHERE true GROUP BY l_quantity Tuple data received from node: 900 bytes Node: host=localhost port=xxxxx dbname=regression -> HashAggregate (actual rows=50 loops=1) Output: l_quantity, count(*) Group Key: lineitem.l_quantity - -> Seq Scan on public.lineitem_290000 lineitem (actual rows=6000 loops=1) + -> Seq Scan on public.lineitem_360000 lineitem (actual rows=5894 loops=1) Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment -- Test query text output, with ANALYZE OFF EXPLAIN (COSTS FALSE, ANALYZE FALSE, TIMING FALSE, SUMMARY FALSE, VERBOSE TRUE) @@ -373,12 +373,12 @@ Sort Task Count: 2 Tasks Shown: One of 2 -> Task - Query: SELECT l_quantity, count(*) AS count_quantity FROM public.lineitem_290000 lineitem WHERE true GROUP BY l_quantity + Query: SELECT l_quantity, count(*) AS count_quantity FROM public.lineitem_360000 lineitem WHERE true GROUP BY l_quantity Node: host=localhost port=xxxxx dbname=regression -> HashAggregate Output: l_quantity, count(*) Group Key: lineitem.l_quantity - -> Seq Scan on public.lineitem_290000 lineitem + -> Seq Scan on public.lineitem_360000 lineitem Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment -- Test verbose EXPLAIN (COSTS FALSE, VERBOSE TRUE) @@ -390,11 +390,11 @@ Aggregate Task Count: 2 Tasks Shown: One of 2 -> Task - Query: SELECT sum(l_quantity), sum(l_quantity), count(l_quantity) FROM public.lineitem_290000 lineitem WHERE true + Query: SELECT sum(l_quantity), sum(l_quantity), count(l_quantity) FROM public.lineitem_360000 lineitem WHERE true Node: host=localhost port=xxxxx dbname=regression -> Aggregate Output: sum(l_quantity), sum(l_quantity), count(l_quantity) - -> Seq Scan on public.lineitem_290000 lineitem + -> Seq Scan on public.lineitem_360000 lineitem Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment -- Test join EXPLAIN (COSTS FALSE) @@ -414,19 +414,19 @@ Limit Sort Key: lineitem.l_quantity -> Hash Join Hash Cond: (lineitem.l_orderkey = orders.o_orderkey) - -> Seq Scan on lineitem_290000 lineitem + -> Seq Scan on lineitem_360000 lineitem Filter: (l_quantity < 5.0) -> Hash - -> Seq Scan on orders_290002 orders + -> Seq Scan on orders_360002 orders -- Test insert EXPLAIN (COSTS FALSE) INSERT INTO lineitem VALUES (1,0), (2, 0), (3, 0), (4, 0); Custom Scan (Citus Adaptive) - Task Count: 1 - Tasks Shown: All + Task Count: 2 + Tasks Shown: One of 2 -> Task Node: host=localhost port=xxxxx dbname=regression - -> Insert on lineitem_290000 citus_table_alias + -> Insert on lineitem_360000 citus_table_alias -> Values Scan on "*VALUES*" -- Test update EXPLAIN (COSTS FALSE) @@ -438,8 +438,8 @@ Custom Scan (Citus Adaptive) Tasks Shown: All -> Task Node: host=localhost port=xxxxx dbname=regression - -> Update on lineitem_290000 lineitem - -> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem + -> Update on lineitem_360000 lineitem + -> Index Scan using lineitem_pkey_360000 on lineitem_360000 lineitem Index Cond: (l_orderkey = 1) Filter: (l_partkey = 0) -- Test analyze (with TIMING FALSE and SUMMARY FALSE for consistent output) @@ -453,8 +453,8 @@ Custom Scan (Citus Adaptive) (actual rows=0 loops=1) Tasks Shown: All -> Task Node: host=localhost port=xxxxx dbname=regression - -> Update on lineitem_290000 lineitem (actual rows=0 loops=1) - -> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem (actual rows=0 loops=1) + -> Update on lineitem_360000 lineitem (actual rows=0 loops=1) + -> Index Scan using lineitem_pkey_360000 on lineitem_360000 lineitem (actual rows=0 loops=1) Index Cond: (l_orderkey = 1) Filter: (l_partkey = 0) Rows Removed by Filter: 6 @@ -468,8 +468,8 @@ Custom Scan (Citus Adaptive) Tasks Shown: All -> Task Node: host=localhost port=xxxxx dbname=regression - -> Delete on lineitem_290000 lineitem - -> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem + -> Delete on lineitem_360000 lineitem + -> Index Scan using lineitem_pkey_360000 on lineitem_360000 lineitem Index Cond: (l_orderkey = 1) Filter: (l_partkey = 0) -- Test zero-shard update @@ -495,7 +495,7 @@ Custom Scan (Citus Adaptive) Tasks Shown: All -> Task Node: host=localhost port=xxxxx dbname=regression - -> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem + -> Index Scan using lineitem_pkey_360000 on lineitem_360000 lineitem Index Cond: (l_orderkey = 5) SELECT true AS valid FROM explain_xml($$ SELECT l_quantity FROM lineitem WHERE l_orderkey = 5$$); @@ -512,7 +512,7 @@ Custom Scan (Citus Adaptive) Tasks Shown: One of 2 -> Task Node: host=localhost port=xxxxx dbname=regression - -> Seq Scan on lineitem_290000 lineitem + -> Seq Scan on lineitem_360000 lineitem -- Test having EXPLAIN (COSTS FALSE, VERBOSE TRUE) SELECT sum(l_quantity) / avg(l_quantity) FROM lineitem @@ -525,11 +525,11 @@ Aggregate Task Count: 2 Tasks Shown: One of 2 -> Task - Query: SELECT sum(l_quantity), sum(l_quantity), count(l_quantity), sum(l_quantity) AS worker_column_4 FROM public.lineitem_290000 lineitem WHERE true + Query: SELECT sum(l_quantity), sum(l_quantity), count(l_quantity), sum(l_quantity) AS worker_column_4 FROM public.lineitem_360000 lineitem WHERE true Node: host=localhost port=xxxxx dbname=regression -> Aggregate Output: sum(l_quantity), sum(l_quantity), count(l_quantity), sum(l_quantity) - -> Seq Scan on public.lineitem_290000 lineitem + -> Seq Scan on public.lineitem_360000 lineitem Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment -- Test having without aggregate EXPLAIN (COSTS FALSE, VERBOSE TRUE) @@ -545,12 +545,12 @@ HashAggregate Task Count: 2 Tasks Shown: One of 2 -> Task - Query: SELECT l_quantity, l_quantity AS worker_column_2 FROM public.lineitem_290000 lineitem WHERE true GROUP BY l_quantity + Query: SELECT l_quantity, l_quantity AS worker_column_2 FROM public.lineitem_360000 lineitem WHERE true GROUP BY l_quantity Node: host=localhost port=xxxxx dbname=regression -> HashAggregate Output: l_quantity, l_quantity Group Key: lineitem.l_quantity - -> Seq Scan on public.lineitem_290000 lineitem + -> Seq Scan on public.lineitem_360000 lineitem Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment -- Subquery pushdown tests with explain EXPLAIN (COSTS OFF) @@ -907,13 +907,18 @@ EXPLAIN (COSTS FALSE) SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030; Aggregate -> Custom Scan (Citus Adaptive) - Task Count: 1 + Task Count: 2 Tasks Shown: All -> Task Node: host=localhost port=xxxxx dbname=regression -> Aggregate - -> Seq Scan on lineitem_290001 lineitem - Filter: (l_orderkey > 9030) + -> Index Only Scan using lineitem_pkey_360000 on lineitem_360000 lineitem + Index Cond: (l_orderkey > 9030) + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Aggregate + -> Index Only Scan using lineitem_pkey_360001 on lineitem_360001 lineitem + Index Cond: (l_orderkey > 9030) SELECT true AS valid FROM explain_xml($$ SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030$$); t @@ -1000,13 +1005,13 @@ Sort (actual rows=50 loops=1) Node: host=localhost port=xxxxx dbname=regression -> HashAggregate (actual rows=50 loops=1) Group Key: l_quantity - -> Seq Scan on lineitem_290000 lineitem (actual rows=6000 loops=1) + -> Seq Scan on lineitem_360000 lineitem (actual rows=5894 loops=1) -> Task Tuple data received from node: 900 bytes Node: host=localhost port=xxxxx dbname=regression -> HashAggregate (actual rows=50 loops=1) Group Key: l_quantity - -> Seq Scan on lineitem_290001 lineitem (actual rows=6000 loops=1) + -> Seq Scan on lineitem_360001 lineitem (actual rows=6106 loops=1) SET citus.explain_all_tasks TO off; -- Test update with subquery EXPLAIN (COSTS FALSE) @@ -1046,13 +1051,13 @@ EXPLAIN (COSTS FALSE) SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030; Aggregate -> Custom Scan (Citus Adaptive) - Task Count: 1 - Tasks Shown: All + Task Count: 2 + Tasks Shown: One of 2 -> Task Node: host=localhost port=xxxxx dbname=regression -> Aggregate - -> Seq Scan on lineitem_290001 lineitem - Filter: (l_orderkey > 9030) + -> Index Only Scan using lineitem_pkey_360000 on lineitem_360000 lineitem + Index Cond: (l_orderkey > 9030) -- Test re-partition join EXPLAIN (COSTS FALSE) SELECT count(*) @@ -1062,14 +1067,17 @@ EXPLAIN (COSTS FALSE) AND l_suppkey = s_suppkey; Aggregate -> Custom Scan (Citus Adaptive) - Task Count: 1 + Task Count: 4 Tasks Shown: None, not supported for re-partition queries -> MapMergeJob Map Task Count: 1 - Merge Task Count: 1 + Merge Task Count: 4 -> MapMergeJob Map Task Count: 2 Merge Task Count: 1 + -> MapMergeJob + Map Task Count: 1 + Merge Task Count: 4 EXPLAIN (COSTS FALSE, FORMAT JSON) SELECT count(*) FROM lineitem, orders, customer_append, supplier_single_shard @@ -1090,18 +1098,22 @@ EXPLAIN (COSTS FALSE, FORMAT JSON) "Parallel Aware": false, "Distributed Query": { "Job": { - "Task Count": 1, + "Task Count": 4, "Tasks Shown": "None, not supported for re-partition queries", "Dependent Jobs": [ { "Map Task Count": 1, - "Merge Task Count": 1, + "Merge Task Count": 4, "Dependent Jobs": [ { "Map Task Count": 2, "Merge Task Count": 1 } ] + }, + { + "Map Task Count": 1, + "Merge Task Count": 4 } ] } @@ -1138,12 +1150,12 @@ EXPLAIN (COSTS FALSE, FORMAT XML) false - 1 + 4 None, not supported for re-partition queries 1 - 1 + 4 2 @@ -1151,6 +1163,10 @@ EXPLAIN (COSTS FALSE, FORMAT XML) + + 1 + 4 + @@ -1196,11 +1212,13 @@ EXPLAIN (COSTS FALSE, FORMAT YAML) Parallel Aware: false Distributed Query: Job: - Task Count: 1 + Task Count: 4 Tasks Shown: "None, not supported for re-partition queries" Dependent Jobs: - Map Task Count: 2 - Merge Task Count: 1 + Merge Task Count: 4 + - Map Task Count: 1 + Merge Task Count: 4 -- ensure local plans display correctly CREATE TABLE lineitem_clone (LIKE lineitem); EXPLAIN (COSTS FALSE) SELECT avg(l_linenumber) FROM lineitem_clone; @@ -1215,51 +1233,51 @@ Aggregate -> Task Node: host=localhost port=xxxxx dbname=regression -> Aggregate - -> Seq Scan on lineitem_290000 lineitem + -> Seq Scan on lineitem_360000 lineitem -- ensure EXPLAIN EXECUTE doesn't crash PREPARE task_tracker_query AS SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030; EXPLAIN (COSTS FALSE) EXECUTE task_tracker_query; Aggregate -> Custom Scan (Citus Adaptive) - Task Count: 1 - Tasks Shown: All + Task Count: 2 + Tasks Shown: One of 2 -> Task Node: host=localhost port=xxxxx dbname=regression -> Aggregate - -> Seq Scan on lineitem_290001 lineitem - Filter: (l_orderkey > 9030) + -> Index Only Scan using lineitem_pkey_360000 on lineitem_360000 lineitem + Index Cond: (l_orderkey > 9030) PREPARE router_executor_query AS SELECT l_quantity FROM lineitem WHERE l_orderkey = 5; EXPLAIN EXECUTE router_executor_query; -Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=100000 width=18) +Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0) Task Count: 1 Tasks Shown: All -> Task Node: host=localhost port=xxxxx dbname=regression - -> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem (cost=0.28..13.60 rows=4 width=5) + -> Index Scan using lineitem_pkey_360000 on lineitem_360000 lineitem (cost=0.28..13.60 rows=4 width=5) Index Cond: (l_orderkey = 5) PREPARE real_time_executor_query AS SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030; EXPLAIN (COSTS FALSE) EXECUTE real_time_executor_query; Aggregate -> Custom Scan (Citus Adaptive) - Task Count: 1 - Tasks Shown: All + Task Count: 2 + Tasks Shown: One of 2 -> Task Node: host=localhost port=xxxxx dbname=regression -> Aggregate - -> Seq Scan on lineitem_290001 lineitem - Filter: (l_orderkey > 9030) + -> Index Only Scan using lineitem_pkey_360000 on lineitem_360000 lineitem + Index Cond: (l_orderkey > 9030) -- EXPLAIN EXECUTE of parametrized prepared statements is broken, but -- at least make sure to fail without crashing PREPARE router_executor_query_param(int) AS SELECT l_quantity FROM lineitem WHERE l_orderkey = $1; EXPLAIN EXECUTE router_executor_query_param(5); -Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=100000 width=18) +Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0) Task Count: 1 Tasks Shown: All -> Task Node: host=localhost port=xxxxx dbname=regression - -> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem (cost=0.28..13.60 rows=4 width=5) + -> Index Scan using lineitem_pkey_360000 on lineitem_360000 lineitem (cost=0.28..13.60 rows=4 width=5) Index Cond: (l_orderkey = 5) EXPLAIN (ANALYZE ON, COSTS OFF, TIMING OFF, SUMMARY OFF) EXECUTE router_executor_query_param(5); Custom Scan (Citus Adaptive) (actual rows=3 loops=1) @@ -1269,7 +1287,7 @@ Custom Scan (Citus Adaptive) (actual rows=3 loops=1) -> Task Tuple data received from node: 30 bytes Node: host=localhost port=xxxxx dbname=regression - -> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem (actual rows=3 loops=1) + -> Index Scan using lineitem_pkey_360000 on lineitem_360000 lineitem (actual rows=3 loops=1) Index Cond: (l_orderkey = 5) \set VERBOSITY TERSE PREPARE multi_shard_query_param(int) AS UPDATE lineitem SET l_quantity = $1; @@ -1280,8 +1298,8 @@ Custom Scan (Citus Adaptive) Tasks Shown: One of 2 -> Task Node: host=localhost port=xxxxx dbname=regression - -> Update on lineitem_290000 lineitem - -> Seq Scan on lineitem_290000 lineitem + -> Update on lineitem_360000 lineitem + -> Seq Scan on lineitem_360000 lineitem ROLLBACK; BEGIN; EXPLAIN (ANALYZE ON, COSTS OFF, TIMING OFF, SUMMARY OFF) EXECUTE multi_shard_query_param(5); @@ -1290,8 +1308,8 @@ Custom Scan (Citus Adaptive) (actual rows=0 loops=1) Tasks Shown: One of 2 -> Task Node: host=localhost port=xxxxx dbname=regression - -> Update on lineitem_290000 lineitem (actual rows=0 loops=1) - -> Seq Scan on lineitem_290000 lineitem (actual rows=6000 loops=1) + -> Update on lineitem_360000 lineitem (actual rows=0 loops=1) + -> Seq Scan on lineitem_360000 lineitem (actual rows=5894 loops=1) ROLLBACK; \set VERBOSITY DEFAULT -- test explain in a transaction with alter table to test we use right connections diff --git a/src/test/regress/expected/multi_hash_pruning.out b/src/test/regress/expected/multi_hash_pruning.out index ed7d935f8..6511f1d21 100644 --- a/src/test/regress/expected/multi_hash_pruning.out +++ b/src/test/regress/expected/multi_hash_pruning.out @@ -368,13 +368,15 @@ DEBUG: assigned task to node localhost:xxxxx 12000 (1 row) --- Check whether we support range queries with append distributed table +-- Check whether we support range queries SELECT count(*) FROM lineitem WHERE l_orderkey >= 1 AND l_orderkey <= 3; -DEBUG: Router planner does not support append-partitioned tables. -DEBUG: constraint (lteq) value: '3'::bigint -DEBUG: constraint (gteq) value: '1'::bigint -DEBUG: shard count after pruning for lineitem: 1 +DEBUG: no sharding pruning constraints on lineitem found +DEBUG: shard count after pruning for lineitem: 2 +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: no sharding pruning constraints on lineitem found +DEBUG: shard count after pruning for lineitem: 2 +DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx count --------------------------------------------------------------------- @@ -383,24 +385,31 @@ DEBUG: assigned task to node localhost:xxxxx SELECT count(*) FROM lineitem WHERE (l_orderkey >= 1 AND l_orderkey <= 3) AND (l_quantity > 11 AND l_quantity < 22); -DEBUG: Router planner does not support append-partitioned tables. -DEBUG: constraint (lteq) value: '3'::bigint -DEBUG: constraint (gteq) value: '1'::bigint -DEBUG: shard count after pruning for lineitem: 1 +DEBUG: no sharding pruning constraints on lineitem found +DEBUG: shard count after pruning for lineitem: 2 +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: no sharding pruning constraints on lineitem found +DEBUG: shard count after pruning for lineitem: 2 +DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx count --------------------------------------------------------------------- 1 (1 row) --- Check whether we support IN/ANY in subquery with append and range distributed table +-- Check whether we support IN/ANY in subquery SELECT count(*) FROM lineitem WHERE l_orderkey = ANY ('{1,2,3}'); -DEBUG: Router planner does not support append-partitioned tables. DEBUG: constraint value: '1'::bigint DEBUG: constraint value: '2'::bigint DEBUG: constraint value: '3'::bigint -DEBUG: shard count after pruning for lineitem: 1 +DEBUG: shard count after pruning for lineitem: 2 +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: constraint value: '1'::bigint +DEBUG: constraint value: '2'::bigint +DEBUG: constraint value: '3'::bigint +DEBUG: shard count after pruning for lineitem: 2 +DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx count --------------------------------------------------------------------- @@ -409,11 +418,16 @@ DEBUG: assigned task to node localhost:xxxxx SELECT count(*) FROM lineitem WHERE l_orderkey IN (1,2,3); -DEBUG: Router planner does not support append-partitioned tables. DEBUG: constraint value: '1'::bigint DEBUG: constraint value: '2'::bigint DEBUG: constraint value: '3'::bigint -DEBUG: shard count after pruning for lineitem: 1 +DEBUG: shard count after pruning for lineitem: 2 +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: constraint value: '1'::bigint +DEBUG: constraint value: '2'::bigint +DEBUG: constraint value: '3'::bigint +DEBUG: shard count after pruning for lineitem: 2 +DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx count --------------------------------------------------------------------- @@ -422,7 +436,9 @@ DEBUG: assigned task to node localhost:xxxxx SELECT count(*) FROM lineitem WHERE l_orderkey = ANY(NULL) OR TRUE; -DEBUG: Router planner does not support append-partitioned tables. +DEBUG: no sharding pruning constraints on lineitem found +DEBUG: shard count after pruning for lineitem: 2 +DEBUG: Router planner cannot handle multi-shard select queries DEBUG: no sharding pruning constraints on lineitem found DEBUG: shard count after pruning for lineitem: 2 DEBUG: assigned task to node localhost:xxxxx diff --git a/src/test/regress/expected/multi_having_pushdown.out b/src/test/regress/expected/multi_having_pushdown.out index 7c15c3976..d2051a55c 100644 --- a/src/test/regress/expected/multi_having_pushdown.out +++ b/src/test/regress/expected/multi_having_pushdown.out @@ -47,23 +47,24 @@ EXPLAIN (COSTS FALSE) FROM lineitem GROUP BY l_orderkey HAVING sum(l_quantity) > 24 ORDER BY 2 DESC, 1 ASC LIMIT 3; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- Limit -> Sort - Sort Key: (sum(remote_scan.revenue)) DESC, remote_scan.l_orderkey - -> HashAggregate - Group Key: remote_scan.l_orderkey - Filter: (sum(remote_scan.worker_column_3) > '24'::numeric) - -> Custom Scan (Citus Adaptive) - Task Count: 2 - Tasks Shown: One of 2 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_orderkey - -> Seq Scan on lineitem_290000 lineitem -(14 rows) + Sort Key: remote_scan.revenue DESC, remote_scan.l_orderkey + -> Custom Scan (Citus Adaptive) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Limit + -> Sort + Sort Key: (sum((l_extendedprice * l_discount))) DESC, l_orderkey + -> HashAggregate + Group Key: l_orderkey + Filter: (sum(l_quantity) > '24'::numeric) + -> Seq Scan on lineitem_360000 lineitem +(15 rows) -- and don't push down when not grouped by partition column EXPLAIN (COSTS FALSE) diff --git a/src/test/regress/expected/multi_index_statements.out b/src/test/regress/expected/multi_index_statements.out index f4ef97340..3ac1f063f 100644 --- a/src/test/regress/expected/multi_index_statements.out +++ b/src/test/regress/expected/multi_index_statements.out @@ -182,8 +182,6 @@ SELECT count(*) FROM pg_indexes WHERE tablename LIKE 'index_test_append%'; \c - - - :master_port SET search_path TO multi_index_statements, public; -- Verify that we error out on unsupported statement types -CREATE UNIQUE INDEX try_index ON public.lineitem (l_orderkey); -ERROR: creating unique indexes on append-partitioned tables is currently unsupported CREATE INDEX try_index ON lineitem (l_orderkey) TABLESPACE newtablespace; ERROR: specifying tablespaces with CREATE INDEX statements is currently unsupported CREATE UNIQUE INDEX try_unique_range_index ON index_test_range(b); @@ -306,8 +304,8 @@ SELECT * FROM pg_indexes WHERE tablename LIKE 'index_test_%' ORDER BY indexname; SELECT indrelid::regclass, indexrelid::regclass FROM pg_index WHERE indrelid = (SELECT relname FROM pg_class WHERE relname LIKE 'lineitem%' ORDER BY relname LIMIT 1)::regclass AND NOT indisprimary AND indexrelid::regclass::text NOT LIKE 'lineitem_time_index%' ORDER BY 1,2; indrelid | indexrelid --------------------------------------------------------------------- - lineitem_290000 | lineitem_l_orderkey_idx_290000 - lineitem_290000 | lineitem_l_shipdate_idx_290000 + lineitem_360000 | lineitem_l_orderkey_idx_360000 + lineitem_360000 | lineitem_l_shipdate_idx_360000 (2 rows) SELECT * FROM pg_indexes WHERE tablename LIKE 'index_test_%' ORDER BY indexname; diff --git a/src/test/regress/expected/multi_join_order_additional.out b/src/test/regress/expected/multi_join_order_additional.out index 3cb508514..d0eaa1a4d 100644 --- a/src/test/regress/expected/multi_join_order_additional.out +++ b/src/test/regress/expected/multi_join_order_additional.out @@ -73,10 +73,10 @@ SET client_min_messages TO DEBUG2; EXPLAIN (COSTS OFF) SELECT l1.l_quantity FROM lineitem l1, lineitem l2 WHERE l1.l_orderkey = l2.l_orderkey AND l1.l_quantity > 5; -DEBUG: Router planner does not support append-partitioned tables. +DEBUG: Router planner cannot handle multi-shard select queries LOG: join order: [ "lineitem" ][ local partition join "lineitem" ] -DEBUG: join prunable for intervals [1,5986] and [8997,14947] -DEBUG: join prunable for intervals [8997,14947] and [1,5986] +DEBUG: join prunable for intervals [-2147483648,-1] and [0,2147483647] +DEBUG: join prunable for intervals [0,2147483647] and [-2147483648,-1] QUERY PLAN --------------------------------------------------------------------- Custom Scan (Citus Adaptive) @@ -106,7 +106,7 @@ ERROR: complex joins are only supported when all distributed tables are joined EXPLAIN (COSTS OFF) SELECT count(*) FROM orders, lineitem_hash WHERE o_orderkey = l_orderkey; -LOG: join order: [ "orders" ][ single range partition join "lineitem_hash" ] +LOG: join order: [ "orders" ][ dual partition join "lineitem_hash" ] QUERY PLAN --------------------------------------------------------------------- Aggregate diff --git a/src/test/regress/expected/multi_join_order_tpch_small.out b/src/test/regress/expected/multi_join_order_tpch_small.out index c2e707867..b0b32bb1d 100644 --- a/src/test/regress/expected/multi_join_order_tpch_small.out +++ b/src/test/regress/expected/multi_join_order_tpch_small.out @@ -49,15 +49,13 @@ ORDER BY revenue DESC, o_orderdate; LOG: join order: [ "orders" ][ reference join "customer" ][ local partition join "lineitem" ] - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- Sort - Sort Key: (sum(remote_scan.revenue)) DESC, remote_scan.o_orderdate - -> HashAggregate - Group Key: remote_scan.l_orderkey, remote_scan.o_orderdate, remote_scan.o_shippriority - -> Custom Scan (Citus Adaptive) - explain statements for distributed queries are not enabled -(6 rows) + Sort Key: remote_scan.revenue DESC, remote_scan.o_orderdate + -> Custom Scan (Citus Adaptive) + explain statements for distributed queries are not enabled +(4 rows) -- Query #10 from the TPC-H decision support benchmark EXPLAIN (COSTS OFF) diff --git a/src/test/regress/expected/multi_join_pruning.out b/src/test/regress/expected/multi_join_pruning.out index 66aff4a3e..77131da45 100644 --- a/src/test/regress/expected/multi_join_pruning.out +++ b/src/test/regress/expected/multi_join_pruning.out @@ -8,9 +8,9 @@ SET citus.explain_distributed_queries TO off; SET client_min_messages TO DEBUG2; SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders WHERE l_orderkey = o_orderkey; -DEBUG: Router planner does not support append-partitioned tables. -DEBUG: join prunable for intervals [1,5986] and [8997,14947] -DEBUG: join prunable for intervals [8997,14947] and [1,5986] +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: join prunable for intervals [-2147483648,-1] and [0,2147483647] +DEBUG: join prunable for intervals [0,2147483647] and [-2147483648,-1] sum | avg --------------------------------------------------------------------- 36089 | 3.0074166666666667 @@ -18,8 +18,9 @@ DEBUG: join prunable for intervals [8997,14947] and [1,5986] SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders WHERE l_orderkey = o_orderkey AND l_orderkey > 9030; -DEBUG: Router planner does not support append-partitioned tables. -DEBUG: join prunable for intervals [8997,14947] and [1,5986] +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: join prunable for intervals [-2147483648,-1] and [0,2147483647] +DEBUG: join prunable for intervals [0,2147483647] and [-2147483648,-1] sum | avg --------------------------------------------------------------------- 17999 | 3.0189533713518953 @@ -29,7 +30,9 @@ DEBUG: join prunable for intervals [8997,14947] and [1,5986] -- works as expected in this case. SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders WHERE l_orderkey = o_orderkey AND l_orderkey > 20000; -DEBUG: Router planner does not support append-partitioned tables. +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: join prunable for intervals [-2147483648,-1] and [0,2147483647] +DEBUG: join prunable for intervals [0,2147483647] and [-2147483648,-1] sum | avg --------------------------------------------------------------------- | @@ -44,36 +47,28 @@ where logicalrelid='lineitem'::regclass or order by shardid; logicalrelid | shardid | shardstorage | shardminvalue | shardmaxvalue --------------------------------------------------------------------- - lineitem | 290000 | t | 1 | 5986 - lineitem | 290001 | t | 8997 | 14947 - orders | 290002 | t | 1 | 5986 - orders | 290003 | t | 8997 | 14947 + lineitem | 360000 | t | -2147483648 | -1 + lineitem | 360001 | t | 0 | 2147483647 + orders | 360002 | t | -2147483648 | -1 + orders | 360003 | t | 0 | 2147483647 (4 rows) -set citus.explain_distributed_queries to on; --- explain the query before actually executing it -EXPLAIN SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders - WHERE l_orderkey = o_orderkey AND l_orderkey > 6000 AND o_orderkey < 6000; -DEBUG: Router planner does not support append-partitioned tables. -DEBUG: join prunable for intervals [8997,14947] and [1,5986] - QUERY PLAN ---------------------------------------------------------------------- - Aggregate (cost=750.01..750.02 rows=1 width=40) - -> Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=100000 width=24) - Task Count: 0 - Tasks Shown: All -(4 rows) - -set citus.explain_distributed_queries to off; set client_min_messages to debug3; SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders WHERE l_orderkey = o_orderkey AND l_orderkey > 6000 AND o_orderkey < 6000; -DEBUG: Router planner does not support append-partitioned tables. -DEBUG: constraint (gt) value: '6000'::bigint -DEBUG: shard count after pruning for lineitem: 1 -DEBUG: constraint (lt) value: '6000'::bigint -DEBUG: shard count after pruning for orders: 1 -DEBUG: join prunable for intervals [8997,14947] and [1,5986] +DEBUG: no sharding pruning constraints on lineitem found +DEBUG: shard count after pruning for lineitem: 2 +DEBUG: no sharding pruning constraints on orders found +DEBUG: shard count after pruning for orders: 2 +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: no sharding pruning constraints on lineitem found +DEBUG: shard count after pruning for lineitem: 2 +DEBUG: no sharding pruning constraints on orders found +DEBUG: shard count after pruning for orders: 2 +DEBUG: join prunable for intervals [-2147483648,-1] and [0,2147483647] +DEBUG: join prunable for intervals [0,2147483647] and [-2147483648,-1] +DEBUG: assigned task to node localhost:xxxxx +DEBUG: assigned task to node localhost:xxxxx sum | avg --------------------------------------------------------------------- | @@ -83,7 +78,7 @@ set client_min_messages to debug2; -- Make sure that we can handle filters without a column SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders WHERE l_orderkey = o_orderkey AND false; -DEBUG: Router planner does not support append-partitioned tables. +DEBUG: Creating router plan sum | avg --------------------------------------------------------------------- | @@ -92,7 +87,7 @@ DEBUG: Router planner does not support append-partitioned tables. SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem INNER JOIN orders ON (l_orderkey = o_orderkey) WHERE false; -DEBUG: Router planner does not support append-partitioned tables. +DEBUG: Creating router plan sum | avg --------------------------------------------------------------------- | diff --git a/src/test/regress/expected/multi_json_agg.out b/src/test/regress/expected/multi_json_agg.out index c6d8ceb60..1d198c2f7 100644 --- a/src/test/regress/expected/multi_json_agg.out +++ b/src/test/regress/expected/multi_json_agg.out @@ -3,6 +3,7 @@ -- SET citus.next_shard_id TO 520000; SET citus.coordinator_aggregation_strategy TO 'disabled'; +SELECT run_command_on_master_and_workers($r$ CREATE OR REPLACE FUNCTION array_sort (json) RETURNS json LANGUAGE SQL AS $$ @@ -10,6 +11,12 @@ SELECT json_agg(value) FROM ( SELECT value FROM json_array_elements($1) ORDER BY value::jsonb ) t $$; +$r$); + run_command_on_master_and_workers +--------------------------------------------------------------------- + +(1 row) + -- Check multi_cat_agg() aggregate which is used to implement json_agg() SELECT json_cat_agg(i) FROM (VALUES ('[1,{"a":2}]'::json), ('[null]'::json), (NULL), ('["3",5,4]'::json)) AS t(i); @@ -135,46 +142,40 @@ SELECT l_quantity, array_sort(json_agg(l_orderkey * 2 + 1)) FROM lineitem WHERE (4 rows) -- Check that we can execute json_agg() with an expression containing NULL values -SELECT json_agg(case when l_quantity > 20 then l_quantity else NULL end) +SELECT array_sort(json_agg(case when l_quantity > 20 then l_quantity else NULL end)) FROM lineitem WHERE l_orderkey < 5; - json_agg + array_sort --------------------------------------------------------------------- - [null, 36.00, null, 28.00, 24.00, 32.00, 38.00, 45.00, 49.00, 27.00, null, 28.00, 26.00, 30.00] + [null, null, null, 24.00, 26.00, 27.00, 28.00, 28.00, 30.00, 32.00, 36.00, 38.00, 45.00, 49.00] (1 row) -- Check that we can execute json_agg() with an expression containing different types -SELECT json_agg(case when l_quantity > 20 then to_json(l_quantity) else '"f"'::json end) +SELECT array_sort(json_agg(case when l_quantity > 20 then to_json(l_quantity) else '"f"'::json end)) FROM lineitem WHERE l_orderkey < 5; - json_agg + array_sort --------------------------------------------------------------------- - ["f", 36.00, "f", 28.00, 24.00, 32.00, 38.00, 45.00, 49.00, 27.00, "f", 28.00, 26.00, 30.00] + ["f", "f", "f", 24.00, 26.00, 27.00, 28.00, 28.00, 30.00, 32.00, 36.00, 38.00, 45.00, 49.00] (1 row) -- Check that we can execute json_agg() with an expression containing json arrays -SELECT json_agg(json_build_array(l_quantity, l_shipdate)) +SELECT array_sort(json_agg(json_build_array(l_quantity, l_shipdate))) FROM lineitem WHERE l_orderkey < 3; - json_agg + array_sort --------------------------------------------------------------------- - [[17.00, "1996-03-13"], [36.00, "1996-04-12"], [8.00, "1996-01-29"], [28.00, "1996-04-21"], [24.00, "1996-03-30"], [32.00, "1996-01-30"], [38.00, "1997-01-28"]] + [[8.00, "1996-01-29"], [17.00, "1996-03-13"], [24.00, "1996-03-30"], [28.00, "1996-04-21"], [32.00, "1996-01-30"], [36.00, "1996-04-12"], [38.00, "1997-01-28"]] (1 row) -- Check that we can execute json_agg() with an expression containing arrays -SELECT json_agg(ARRAY[l_quantity, l_orderkey]) +SELECT array_sort(json_agg(ARRAY[l_quantity, l_orderkey])) FROM lineitem WHERE l_orderkey < 3; - json_agg + array_sort --------------------------------------------------------------------- - [[17.00,1], + - [36.00,1], + - [8.00,1], + - [28.00,1], + - [24.00,1], + - [32.00,1], + - [38.00,2]] + [[8.00,1], [17.00,1], [24.00,1], [28.00,1], [32.00,1], [36.00,1], [38.00,2]] (1 row) -- Check that we return NULL in case there are no input rows to json_agg() -SELECT json_agg(l_orderkey) FROM lineitem WHERE l_quantity < 0; - json_agg +SELECT array_sort(json_agg(l_orderkey)) FROM lineitem WHERE l_quantity < 0; + array_sort --------------------------------------------------------------------- (1 row) diff --git a/src/test/regress/expected/multi_json_object_agg.out b/src/test/regress/expected/multi_json_object_agg.out index ab0bcccc7..c3b80d921 100644 --- a/src/test/regress/expected/multi_json_object_agg.out +++ b/src/test/regress/expected/multi_json_object_agg.out @@ -8,6 +8,7 @@ RETURNS bigint LANGUAGE SQL AS $$ SELECT count(*) FROM (SELECT * FROM json_object_keys($1)) t $$; +SELECT run_command_on_master_and_workers($r$ CREATE OR REPLACE FUNCTION keys_sort (json) RETURNS json LANGUAGE SQL AS $$ @@ -15,6 +16,12 @@ SELECT json_object_agg(key, value) FROM ( SELECT * FROM json_each($1) ORDER BY key ) t $$; +$r$); + run_command_on_master_and_workers +--------------------------------------------------------------------- + +(1 row) + -- Check multi_cat_agg() aggregate which is used to implement json_object_agg() SELECT json_cat_agg(i) FROM (VALUES ('{"c":[], "b":2}'::json), (NULL), ('{"d":null, "a":{"b":3}, "b":2}'::json)) AS t(i); @@ -182,8 +189,8 @@ SELECT keys_sort(json_object_agg(l_orderkey::text || l_linenumber::text, ARRAY[l (1 row) -- Check that we return NULL in case there are no input rows to json_object_agg() -SELECT json_object_agg(l_shipdate, l_orderkey) FROM lineitem WHERE l_quantity < 0; - json_object_agg +SELECT keys_sort(json_object_agg(l_shipdate, l_orderkey)) FROM lineitem WHERE l_quantity < 0; + keys_sort --------------------------------------------------------------------- (1 row) diff --git a/src/test/regress/expected/multi_jsonb_agg.out b/src/test/regress/expected/multi_jsonb_agg.out index 48c17214e..33fe33318 100644 --- a/src/test/regress/expected/multi_jsonb_agg.out +++ b/src/test/regress/expected/multi_jsonb_agg.out @@ -3,6 +3,7 @@ -- SET citus.next_shard_id TO 520000; SET citus.coordinator_aggregation_strategy TO 'disabled'; +SELECT run_command_on_master_and_workers($r$ CREATE OR REPLACE FUNCTION array_sort (jsonb) RETURNS jsonb LANGUAGE SQL AS $$ @@ -10,6 +11,12 @@ SELECT jsonb_agg(value) FROM ( SELECT * FROM jsonb_array_elements($1) ORDER BY 1 ) t $$; +$r$); + run_command_on_master_and_workers +--------------------------------------------------------------------- + +(1 row) + -- Check multi_cat_agg() aggregate which is used to implement jsonb_agg() SELECT jsonb_cat_agg(i) FROM (VALUES ('[1,{"a":2}]'::jsonb), ('[null]'::jsonb), (NULL), ('["3",5,4]'::jsonb)) AS t(i); @@ -135,40 +142,41 @@ SELECT l_quantity, array_sort(jsonb_agg(l_orderkey * 2 + 1)) FROM lineitem WHERE (4 rows) -- Check that we can execute jsonb_agg() with an expression containing NULL values -SELECT jsonb_agg(case when l_quantity > 20 then l_quantity else NULL end) +SELECT array_sort(jsonb_agg(case when l_quantity > 20 then l_quantity else NULL end)) FROM lineitem WHERE l_orderkey < 5; - jsonb_agg + array_sort --------------------------------------------------------------------- - [null, 36.00, null, 28.00, 24.00, 32.00, 38.00, 45.00, 49.00, 27.00, null, 28.00, 26.00, 30.00] + [null, null, null, 24.00, 26.00, 27.00, 28.00, 28.00, 30.00, 32.00, 36.00, 38.00, 45.00, 49.00] (1 row) -- Check that we can execute jsonb_agg() with an expression containing different types -SELECT jsonb_agg(case when l_quantity > 20 then to_jsonb(l_quantity) else '"f"'::jsonb end) +SELECT array_sort(jsonb_agg(case when l_quantity > 20 then to_jsonb(l_quantity) else '"f"'::jsonb end)) FROM lineitem WHERE l_orderkey < 5; - jsonb_agg + array_sort --------------------------------------------------------------------- - ["f", 36.00, "f", 28.00, 24.00, 32.00, 38.00, 45.00, 49.00, 27.00, "f", 28.00, 26.00, 30.00] + ["f", "f", "f", 24.00, 26.00, 27.00, 28.00, 28.00, 30.00, 32.00, 36.00, 38.00, 45.00, 49.00] (1 row) -- Check that we can execute jsonb_agg() with an expression containing jsonb arrays -SELECT jsonb_agg(jsonb_build_array(l_quantity, l_shipdate)) +SELECT array_sort(jsonb_agg(jsonb_build_array(l_quantity, l_shipdate))) FROM lineitem WHERE l_orderkey < 3; - jsonb_agg + array_sort --------------------------------------------------------------------- - [[17.00, "1996-03-13"], [36.00, "1996-04-12"], [8.00, "1996-01-29"], [28.00, "1996-04-21"], [24.00, "1996-03-30"], [32.00, "1996-01-30"], [38.00, "1997-01-28"]] + [[8.00, "1996-01-29"], [17.00, "1996-03-13"], [24.00, "1996-03-30"], [28.00, "1996-04-21"], [32.00, "1996-01-30"], [36.00, "1996-04-12"], [38.00, "1997-01-28"]] (1 row) -- Check that we can execute jsonb_agg() with an expression containing arrays -SELECT jsonb_agg(ARRAY[l_quantity, l_orderkey]) +SELECT array_sort(jsonb_agg(ARRAY[l_quantity, l_orderkey])) FROM lineitem WHERE l_orderkey < 3; - jsonb_agg + array_sort --------------------------------------------------------------------- - [[17.00, 1], [36.00, 1], [8.00, 1], [28.00, 1], [24.00, 1], [32.00, 1], [38.00, 2]] + [[8.00, 1], [17.00, 1], [24.00, 1], [28.00, 1], [32.00, 1], [36.00, 1], [38.00, 2]] (1 row) -- Check that we return NULL in case there are no input rows to jsonb_agg() -SELECT jsonb_agg(l_orderkey) FROM lineitem WHERE l_quantity < 0; - jsonb_agg +SELECT array_sort(jsonb_agg(l_orderkey)) + FROM lineitem WHERE l_quantity < 0; + array_sort --------------------------------------------------------------------- (1 row) diff --git a/src/test/regress/expected/multi_jsonb_object_agg.out b/src/test/regress/expected/multi_jsonb_object_agg.out index b3e5526da..3db7f159d 100644 --- a/src/test/regress/expected/multi_jsonb_object_agg.out +++ b/src/test/regress/expected/multi_jsonb_object_agg.out @@ -3,11 +3,32 @@ -- SET citus.next_shard_id TO 520000; SET citus.coordinator_aggregation_strategy TO 'disabled'; +SELECT run_command_on_master_and_workers($r$ CREATE OR REPLACE FUNCTION count_keys (jsonb) RETURNS bigint LANGUAGE SQL AS $$ SELECT count(*) FROM (SELECT * FROM jsonb_object_keys($1)) t $$; +$r$); + run_command_on_master_and_workers +--------------------------------------------------------------------- + +(1 row) + +SELECT run_command_on_master_and_workers($r$ +CREATE OR REPLACE FUNCTION keys_sort (jsonb) +RETURNS jsonb LANGUAGE SQL +AS $$ +SELECT jsonb_object_agg(key, value) FROM ( + SELECT * FROM jsonb_each($1) ORDER BY key +) t +$$; +$r$); + run_command_on_master_and_workers +--------------------------------------------------------------------- + +(1 row) + -- Check multi_cat_agg() aggregate which is used to implement jsonb_object_agg() SELECT jsonb_cat_agg(i) FROM (VALUES ('{"c":[], "b":2}'::jsonb), (NULL), ('{"d":null, "a":{"b":3}, "b":2}'::jsonb)) AS t(i); @@ -141,42 +162,42 @@ SELECT l_quantity, jsonb_object_agg(l_orderkey::text || l_linenumber::text, l_or (4 rows) -- Check that we can execute jsonb_object_agg() with an expression containing NULL values -SELECT jsonb_object_agg(l_orderkey::text || l_linenumber::text, - case when l_quantity > 20 then l_quantity else NULL end) +SELECT keys_sort(jsonb_object_agg(l_orderkey::text || l_linenumber::text, + case when l_quantity > 20 then l_quantity else NULL end)) FROM lineitem WHERE l_orderkey < 5; - jsonb_object_agg + keys_sort --------------------------------------------------------------------- {"11": null, "12": 36.00, "13": null, "14": 28.00, "15": 24.00, "16": 32.00, "21": 38.00, "31": 45.00, "32": 49.00, "33": 27.00, "34": null, "35": 28.00, "36": 26.00, "41": 30.00} (1 row) -- Check that we can execute jsonb_object_agg() with an expression containing different types -SELECT jsonb_object_agg(l_orderkey::text || l_linenumber::text, - case when l_quantity > 20 then to_jsonb(l_quantity) else '"f"'::jsonb end) +SELECT keys_sort(jsonb_object_agg(l_orderkey::text || l_linenumber::text, + case when l_quantity > 20 then to_jsonb(l_quantity) else '"f"'::jsonb end)) FROM lineitem WHERE l_orderkey < 5; - jsonb_object_agg + keys_sort --------------------------------------------------------------------- {"11": "f", "12": 36.00, "13": "f", "14": 28.00, "15": 24.00, "16": 32.00, "21": 38.00, "31": 45.00, "32": 49.00, "33": 27.00, "34": "f", "35": 28.00, "36": 26.00, "41": 30.00} (1 row) -- Check that we can execute jsonb_object_agg() with an expression containing jsonb arrays -SELECT jsonb_object_agg(l_orderkey::text || l_linenumber::text, jsonb_build_array(l_quantity, l_shipdate)) +SELECT keys_sort(jsonb_object_agg(l_orderkey::text || l_linenumber::text, jsonb_build_array(l_quantity, l_shipdate))) FROM lineitem WHERE l_orderkey < 3; - jsonb_object_agg + keys_sort --------------------------------------------------------------------- {"11": [17.00, "1996-03-13"], "12": [36.00, "1996-04-12"], "13": [8.00, "1996-01-29"], "14": [28.00, "1996-04-21"], "15": [24.00, "1996-03-30"], "16": [32.00, "1996-01-30"], "21": [38.00, "1997-01-28"]} (1 row) -- Check that we can execute jsonb_object_agg() with an expression containing arrays -SELECT jsonb_object_agg(l_orderkey::text || l_linenumber::text, ARRAY[l_quantity, l_orderkey]) +SELECT keys_sort(jsonb_object_agg(l_orderkey::text || l_linenumber::text, ARRAY[l_quantity, l_orderkey])) FROM lineitem WHERE l_orderkey < 3; - jsonb_object_agg + keys_sort --------------------------------------------------------------------- {"11": [17.00, 1], "12": [36.00, 1], "13": [8.00, 1], "14": [28.00, 1], "15": [24.00, 1], "16": [32.00, 1], "21": [38.00, 2]} (1 row) -- Check that we return NULL in case there are no input rows to jsonb_object_agg() -SELECT jsonb_object_agg(l_shipdate, l_orderkey) FROM lineitem WHERE l_quantity < 0; - jsonb_object_agg +SELECT keys_sort(jsonb_object_agg(l_shipdate, l_orderkey)) FROM lineitem WHERE l_quantity < 0; + keys_sort --------------------------------------------------------------------- (1 row) diff --git a/src/test/regress/expected/multi_large_shardid.out b/src/test/regress/expected/multi_large_shardid.out new file mode 100644 index 000000000..218408eaa --- /dev/null +++ b/src/test/regress/expected/multi_large_shardid.out @@ -0,0 +1,63 @@ +-- +-- MULTI_LARGE_SHARDID +-- +-- Load data into distributed tables, and run TPC-H query #1 and #6. This test +-- differs from previous tests in that it modifies the *internal* shardId +-- generator, forcing the distributed database to use 64-bit shard identifiers. +ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 100200300400500; +CREATE TABLE lineitem_large_shard_id AS SELECT * FROM lineitem; +SELECT create_distributed_table('lineitem_large_shard_id', 'l_orderkey'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$public.lineitem_large_shard_id$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Query #1 from the TPC-H decision support benchmark. +SELECT + l_returnflag, + l_linestatus, + sum(l_quantity) as sum_qty, + sum(l_extendedprice) as sum_base_price, + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + avg(l_quantity) as avg_qty, + avg(l_extendedprice) as avg_price, + avg(l_discount) as avg_disc, + count(*) as count_order +FROM + lineitem_large_shard_id +WHERE + l_shipdate <= date '1998-12-01' - interval '90 days' +GROUP BY + l_returnflag, + l_linestatus +ORDER BY + l_returnflag, + l_linestatus; + l_returnflag | l_linestatus | sum_qty | sum_base_price | sum_disc_price | sum_charge | avg_qty | avg_price | avg_disc | count_order +--------------------------------------------------------------------- + A | F | 75465.00 | 113619873.63 | 107841287.0728 | 112171153.245923 | 25.6334918478260870 | 38593.707075407609 | 0.05055027173913043478 | 2944 + N | F | 2022.00 | 3102551.45 | 2952540.7118 | 3072642.770652 | 26.6052631578947368 | 40823.045394736842 | 0.05263157894736842105 | 76 + N | O | 149778.00 | 224706948.16 | 213634857.6854 | 222134071.929801 | 25.4594594594594595 | 38195.979629440762 | 0.04939486656467788543 | 5883 + R | F | 73156.00 | 108937979.73 | 103516623.6698 | 107743533.784328 | 25.2175112030334367 | 37551.871675284385 | 0.04983798690106859704 | 2901 +(4 rows) + +-- Query #6 from the TPC-H decision support benchmark. +SELECT + sum(l_extendedprice * l_discount) as revenue +FROM + lineitem_large_shard_id +WHERE + l_shipdate >= date '1994-01-01' + and l_shipdate < date '1994-01-01' + interval '1 year' + and l_discount between 0.06 - 0.01 and 0.06 + 0.01 + and l_quantity < 24; + revenue +--------------------------------------------------------------------- + 243277.7858 +(1 row) + diff --git a/src/test/regress/expected/multi_limit_clause.out b/src/test/regress/expected/multi_limit_clause.out index 21432fca1..65304b777 100644 --- a/src/test/regress/expected/multi_limit_clause.out +++ b/src/test/regress/expected/multi_limit_clause.out @@ -257,6 +257,7 @@ SELECT l_orderkey, max(l_shipdate) FROM lineitem GROUP BY l_orderkey ORDER BY 2 DESC, 1 LIMIT 5; +DEBUG: push down of limit count: 5 l_orderkey | max --------------------------------------------------------------------- 4678 | 11-27-1998 diff --git a/src/test/regress/expected/multi_limit_clause_approximate.out b/src/test/regress/expected/multi_limit_clause_approximate.out index 6928fd831..1d3ba642e 100644 --- a/src/test/regress/expected/multi_limit_clause_approximate.out +++ b/src/test/regress/expected/multi_limit_clause_approximate.out @@ -39,7 +39,7 @@ DEBUG: push down of limit count: 600 153937 | 2761321906 199283 | 2726988572 185925 | 2672114100 - 196629 | 2622637602 + 157064 | 2614644408 149926 | 2606013732 (10 rows) @@ -82,9 +82,9 @@ DEBUG: push down of limit count: 150 685 | Customer#000000685 | 37 472 | Customer#000000472 | 36 643 | Customer#000000643 | 34 - 226 | Customer#000000226 | 33 - 496 | Customer#000000496 | 32 304 | Customer#000000304 | 31 + 556 | Customer#000000556 | 31 + 613 | Customer#000000613 | 31 (10 rows) -- We now test scenarios where applying the limit optimization wouldn't produce diff --git a/src/test/regress/expected/multi_null_minmax_value_pruning.out b/src/test/regress/expected/multi_null_minmax_value_pruning.out index eb2617d28..cbb938ebe 100644 --- a/src/test/regress/expected/multi_null_minmax_value_pruning.out +++ b/src/test/regress/expected/multi_null_minmax_value_pruning.out @@ -13,13 +13,13 @@ SET citus.enable_repartition_joins to ON; SELECT shardminvalue, shardmaxvalue from pg_dist_shard WHERE shardid = 290000; shardminvalue | shardmaxvalue --------------------------------------------------------------------- - 1 | 5986 + 1 | 1000 (1 row) SELECT shardminvalue, shardmaxvalue from pg_dist_shard WHERE shardid = 290001; shardminvalue | shardmaxvalue --------------------------------------------------------------------- - 8997 | 14947 + 1 | 1000 (1 row) -- Check that partition and join pruning works when min/max values exist @@ -28,23 +28,21 @@ SELECT coordinator_plan($Q$ EXPLAIN (COSTS FALSE) SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030 or l_orderkey = 1; $Q$); -DEBUG: Router planner does not support append-partitioned tables. -CONTEXT: PL/pgSQL function coordinator_plan(text) line XX at FOR over EXECUTE statement -LOG: join order: [ "lineitem" ] +DEBUG: Creating router plan CONTEXT: PL/pgSQL function coordinator_plan(text) line XX at FOR over EXECUTE statement coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus Adaptive) - Task Count: 2 + Task Count: 1 (2 rows) EXPLAIN (COSTS FALSE) SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders WHERE l_orderkey = o_orderkey; -DEBUG: Router planner does not support append-partitioned tables. +DEBUG: Router planner cannot handle multi-shard select queries LOG: join order: [ "lineitem" ][ local partition join "orders" ] -DEBUG: join prunable for intervals [1,5986] and [8997,14947] -DEBUG: join prunable for intervals [8997,14947] and [1,5986] +DEBUG: join prunable for intervals [-2147483648,-1] and [0,2147483647] +DEBUG: join prunable for intervals [0,2147483647] and [-2147483648,-1] QUERY PLAN --------------------------------------------------------------------- Aggregate @@ -56,17 +54,17 @@ DEBUG: join prunable for intervals [8997,14947] and [1,5986] -> Aggregate -> Hash Join Hash Cond: (lineitem.l_orderkey = orders.o_orderkey) - -> Seq Scan on lineitem_290000 lineitem + -> Seq Scan on lineitem_360000 lineitem -> Hash - -> Seq Scan on orders_290002 orders + -> Seq Scan on orders_360002 orders -> Task Node: host=localhost port=xxxxx dbname=regression -> Aggregate -> Hash Join Hash Cond: (lineitem.l_orderkey = orders.o_orderkey) - -> Seq Scan on lineitem_290001 lineitem + -> Seq Scan on lineitem_360001 lineitem -> Hash - -> Seq Scan on orders_290003 orders + -> Seq Scan on orders_360003 orders (20 rows) -- Now set the minimum value for a shard to null. Then check that we don't apply @@ -77,20 +75,22 @@ SELECT coordinator_plan($Q$ EXPLAIN (COSTS FALSE) SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030; $Q$); -DEBUG: Router planner does not support append-partitioned tables. +DEBUG: Distributed planning for a fast-path router query CONTEXT: PL/pgSQL function coordinator_plan(text) line XX at FOR over EXECUTE statement -LOG: join order: [ "lineitem" ] +DEBUG: Creating router plan +CONTEXT: PL/pgSQL function coordinator_plan(text) line XX at FOR over EXECUTE statement +DEBUG: query has a single distribution column value: 9030 CONTEXT: PL/pgSQL function coordinator_plan(text) line XX at FOR over EXECUTE statement coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus Adaptive) - Task Count: 2 + Task Count: 1 (2 rows) EXPLAIN (COSTS FALSE) SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders WHERE l_partkey = o_custkey; -DEBUG: Router planner does not support append-partitioned tables. +DEBUG: Router planner cannot handle multi-shard select queries LOG: join order: [ "lineitem" ][ dual partition join "orders" ] DEBUG: join prunable for task partitionId 0 and 1 DEBUG: join prunable for task partitionId 0 and 2 @@ -142,20 +142,22 @@ SELECT coordinator_plan($Q$ EXPLAIN (COSTS FALSE) SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030; $Q$); -DEBUG: Router planner does not support append-partitioned tables. +DEBUG: Distributed planning for a fast-path router query CONTEXT: PL/pgSQL function coordinator_plan(text) line XX at FOR over EXECUTE statement -LOG: join order: [ "lineitem" ] +DEBUG: Creating router plan +CONTEXT: PL/pgSQL function coordinator_plan(text) line XX at FOR over EXECUTE statement +DEBUG: query has a single distribution column value: 9030 CONTEXT: PL/pgSQL function coordinator_plan(text) line XX at FOR over EXECUTE statement coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus Adaptive) - Task Count: 2 + Task Count: 1 (2 rows) EXPLAIN (COSTS FALSE) SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders WHERE l_partkey = o_custkey; -DEBUG: Router planner does not support append-partitioned tables. +DEBUG: Router planner cannot handle multi-shard select queries LOG: join order: [ "lineitem" ][ dual partition join "orders" ] DEBUG: join prunable for task partitionId 0 and 1 DEBUG: join prunable for task partitionId 0 and 2 @@ -207,9 +209,11 @@ SELECT coordinator_plan($Q$ EXPLAIN (COSTS FALSE) SELECT l_orderkey, l_linenumber, l_shipdate FROM lineitem WHERE l_orderkey = 9030; $Q$); -DEBUG: Router planner does not support append-partitioned tables. +DEBUG: Distributed planning for a fast-path router query CONTEXT: PL/pgSQL function coordinator_plan(text) line XX at FOR over EXECUTE statement -LOG: join order: [ "lineitem" ] +DEBUG: Creating router plan +CONTEXT: PL/pgSQL function coordinator_plan(text) line XX at FOR over EXECUTE statement +DEBUG: query has a single distribution column value: 9030 CONTEXT: PL/pgSQL function coordinator_plan(text) line XX at FOR over EXECUTE statement coordinator_plan --------------------------------------------------------------------- @@ -220,7 +224,7 @@ CONTEXT: PL/pgSQL function coordinator_plan(text) line XX at FOR over EXECUTE s EXPLAIN (COSTS FALSE) SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders WHERE l_partkey = o_custkey; -DEBUG: Router planner does not support append-partitioned tables. +DEBUG: Router planner cannot handle multi-shard select queries LOG: join order: [ "lineitem" ][ dual partition join "orders" ] DEBUG: join prunable for task partitionId 0 and 1 DEBUG: join prunable for task partitionId 0 and 2 diff --git a/src/test/regress/expected/multi_repartition_join_planning.out b/src/test/regress/expected/multi_repartition_join_planning.out index fdabff35c..e1bcda671 100644 --- a/src/test/regress/expected/multi_repartition_join_planning.out +++ b/src/test/regress/expected/multi_repartition_join_planning.out @@ -64,8 +64,8 @@ GROUP BY ORDER BY l_partkey, o_orderkey; DEBUG: Router planner does not support append-partitioned tables. -DEBUG: join prunable for intervals [1,5986] and [8997,14947] -DEBUG: join prunable for intervals [8997,14947] and [1,5986] +DEBUG: join prunable for intervals [-2147483648,-1] and [0,2147483647] +DEBUG: join prunable for intervals [0,2147483647] and [-2147483648,-1] DEBUG: join prunable for intervals [1,1000] and [6001,7000] DEBUG: join prunable for intervals [6001,7000] and [1,1000] DEBUG: pruning merge fetch taskId 1 @@ -128,7 +128,7 @@ GROUP BY l_partkey, o_orderkey ORDER BY l_partkey, o_orderkey; -DEBUG: Router planner does not support append-partitioned tables. +DEBUG: Router planner cannot handle multi-shard select queries DEBUG: join prunable for task partitionId 0 and 1 DEBUG: join prunable for task partitionId 0 and 2 DEBUG: join prunable for task partitionId 0 and 3 @@ -172,7 +172,7 @@ GROUP BY o_orderkey ORDER BY o_orderkey; -DEBUG: Router planner does not support append-partitioned tables. +DEBUG: Router planner cannot handle multi-shard select queries DEBUG: join prunable for task partitionId 0 and 1 DEBUG: join prunable for task partitionId 0 and 2 DEBUG: join prunable for task partitionId 0 and 3 @@ -218,7 +218,7 @@ GROUP BY o_orderkey, o_shippriority ORDER BY o_orderkey; -DEBUG: Router planner does not support append-partitioned tables. +DEBUG: Router planner cannot handle multi-shard select queries DEBUG: join prunable for task partitionId 0 and 1 DEBUG: join prunable for task partitionId 0 and 2 DEBUG: join prunable for task partitionId 0 and 3 @@ -262,7 +262,7 @@ GROUP BY o_orderkey, o_shippriority ORDER BY o_orderkey; -DEBUG: Router planner does not support append-partitioned tables. +DEBUG: Router planner cannot handle multi-shard select queries DEBUG: join prunable for task partitionId 0 and 1 DEBUG: join prunable for task partitionId 0 and 2 DEBUG: join prunable for task partitionId 0 and 3 diff --git a/src/test/regress/expected/multi_repartition_join_pruning.out b/src/test/regress/expected/multi_repartition_join_pruning.out index 5bbbf60bd..733a7205e 100644 --- a/src/test/regress/expected/multi_repartition_join_pruning.out +++ b/src/test/regress/expected/multi_repartition_join_pruning.out @@ -72,15 +72,27 @@ WHERE o_custkey = c_custkey AND o_orderkey < 0; DEBUG: Router planner does not support append-partitioned tables. +DEBUG: join prunable for intervals [1,1000] and [1001,2000] +DEBUG: join prunable for intervals [1,1000] and [6001,7000] +DEBUG: join prunable for intervals [1001,2000] and [1,1000] +DEBUG: join prunable for intervals [1001,2000] and [6001,7000] +DEBUG: join prunable for intervals [6001,7000] and [1,1000] +DEBUG: join prunable for intervals [6001,7000] and [1001,2000] +DEBUG: pruning merge fetch taskId 1 +DETAIL: Creating dependency on merge taskId 3 +DEBUG: pruning merge fetch taskId 3 +DETAIL: Creating dependency on merge taskId 6 +DEBUG: pruning merge fetch taskId 5 +DETAIL: Creating dependency on merge taskId 9 QUERY PLAN --------------------------------------------------------------------- Aggregate -> Custom Scan (Citus Adaptive) - Task Count: 0 + Task Count: 3 Tasks Shown: None, not supported for re-partition queries -> MapMergeJob - Map Task Count: 0 - Merge Task Count: 0 + Map Task Count: 2 + Merge Task Count: 3 (7 rows) SELECT @@ -89,8 +101,20 @@ FROM orders, customer_append WHERE o_custkey = c_custkey AND - o_orderkey < 0; + o_orderkey < 0 AND o_orderkey > 0; DEBUG: Router planner does not support append-partitioned tables. +DEBUG: join prunable for intervals [1,1000] and [1001,2000] +DEBUG: join prunable for intervals [1,1000] and [6001,7000] +DEBUG: join prunable for intervals [1001,2000] and [1,1000] +DEBUG: join prunable for intervals [1001,2000] and [6001,7000] +DEBUG: join prunable for intervals [6001,7000] and [1,1000] +DEBUG: join prunable for intervals [6001,7000] and [1001,2000] +DEBUG: pruning merge fetch taskId 1 +DETAIL: Creating dependency on merge taskId 3 +DEBUG: pruning merge fetch taskId 3 +DETAIL: Creating dependency on merge taskId 6 +DEBUG: pruning merge fetch taskId 5 +DETAIL: Creating dependency on merge taskId 9 count --------------------------------------------------------------------- 0 @@ -233,17 +257,45 @@ FROM lineitem, customer_append WHERE l_partkey = c_nationkey AND - l_orderkey < 0; + l_orderkey < 0 AND l_orderkey > 0; DEBUG: Router planner does not support append-partitioned tables. +DEBUG: join prunable for task partitionId 0 and 1 +DEBUG: join prunable for task partitionId 0 and 2 +DEBUG: join prunable for task partitionId 0 and 3 +DEBUG: join prunable for task partitionId 1 and 0 +DEBUG: join prunable for task partitionId 1 and 2 +DEBUG: join prunable for task partitionId 1 and 3 +DEBUG: join prunable for task partitionId 2 and 0 +DEBUG: join prunable for task partitionId 2 and 1 +DEBUG: join prunable for task partitionId 2 and 3 +DEBUG: join prunable for task partitionId 3 and 0 +DEBUG: join prunable for task partitionId 3 and 1 +DEBUG: join prunable for task partitionId 3 and 2 +DEBUG: pruning merge fetch taskId 1 +DETAIL: Creating dependency on merge taskId 3 +DEBUG: pruning merge fetch taskId 2 +DETAIL: Creating dependency on merge taskId 4 +DEBUG: pruning merge fetch taskId 4 +DETAIL: Creating dependency on merge taskId 6 +DEBUG: pruning merge fetch taskId 5 +DETAIL: Creating dependency on merge taskId 8 +DEBUG: pruning merge fetch taskId 7 +DETAIL: Creating dependency on merge taskId 9 +DEBUG: pruning merge fetch taskId 8 +DETAIL: Creating dependency on merge taskId 12 +DEBUG: pruning merge fetch taskId 10 +DETAIL: Creating dependency on merge taskId 12 +DEBUG: pruning merge fetch taskId 11 +DETAIL: Creating dependency on merge taskId 16 QUERY PLAN --------------------------------------------------------------------- Aggregate -> Custom Scan (Citus Adaptive) - Task Count: 0 + Task Count: 4 Tasks Shown: None, not supported for re-partition queries -> MapMergeJob - Map Task Count: 0 - Merge Task Count: 0 + Map Task Count: 2 + Merge Task Count: 4 -> MapMergeJob Map Task Count: 3 Merge Task Count: 4 @@ -255,8 +307,36 @@ FROM lineitem, customer_append WHERE l_partkey = c_nationkey AND - l_orderkey < 0; + l_orderkey < 0 AND l_orderkey > 0; DEBUG: Router planner does not support append-partitioned tables. +DEBUG: join prunable for task partitionId 0 and 1 +DEBUG: join prunable for task partitionId 0 and 2 +DEBUG: join prunable for task partitionId 0 and 3 +DEBUG: join prunable for task partitionId 1 and 0 +DEBUG: join prunable for task partitionId 1 and 2 +DEBUG: join prunable for task partitionId 1 and 3 +DEBUG: join prunable for task partitionId 2 and 0 +DEBUG: join prunable for task partitionId 2 and 1 +DEBUG: join prunable for task partitionId 2 and 3 +DEBUG: join prunable for task partitionId 3 and 0 +DEBUG: join prunable for task partitionId 3 and 1 +DEBUG: join prunable for task partitionId 3 and 2 +DEBUG: pruning merge fetch taskId 1 +DETAIL: Creating dependency on merge taskId 3 +DEBUG: pruning merge fetch taskId 2 +DETAIL: Creating dependency on merge taskId 4 +DEBUG: pruning merge fetch taskId 4 +DETAIL: Creating dependency on merge taskId 6 +DEBUG: pruning merge fetch taskId 5 +DETAIL: Creating dependency on merge taskId 8 +DEBUG: pruning merge fetch taskId 7 +DETAIL: Creating dependency on merge taskId 9 +DEBUG: pruning merge fetch taskId 8 +DETAIL: Creating dependency on merge taskId 12 +DEBUG: pruning merge fetch taskId 10 +DETAIL: Creating dependency on merge taskId 12 +DEBUG: pruning merge fetch taskId 11 +DETAIL: Creating dependency on merge taskId 16 count --------------------------------------------------------------------- 0 diff --git a/src/test/regress/expected/multi_repartition_join_task_assignment.out b/src/test/regress/expected/multi_repartition_join_task_assignment.out index 77d0158bc..c94012a27 100644 --- a/src/test/regress/expected/multi_repartition_join_task_assignment.out +++ b/src/test/regress/expected/multi_repartition_join_task_assignment.out @@ -56,19 +56,25 @@ WHERE o_custkey = c_custkey AND o_orderkey = l_orderkey; DEBUG: Router planner does not support append-partitioned tables. -DEBUG: no sharding pruning constraints on customer_append found -DEBUG: shard count after pruning for customer_append: 3 -DEBUG: assigned task to node localhost:xxxxx -DEBUG: assigned task to node localhost:xxxxx -DEBUG: assigned task to node localhost:xxxxx DEBUG: no sharding pruning constraints on lineitem found DEBUG: shard count after pruning for lineitem: 2 -DEBUG: join prunable for intervals [1,5986] and [8997,14947] -DEBUG: join prunable for intervals [8997,14947] and [1,5986] +DEBUG: assigned task to node localhost:xxxxx +DEBUG: assigned task to node localhost:xxxxx +DEBUG: no sharding pruning constraints on customer_append found +DEBUG: shard count after pruning for customer_append: 3 +DEBUG: join prunable for intervals [1,1000] and [1001,2000] +DEBUG: join prunable for intervals [1,1000] and [6001,7000] +DEBUG: join prunable for intervals [1001,2000] and [1,1000] +DEBUG: join prunable for intervals [1001,2000] and [6001,7000] +DEBUG: join prunable for intervals [6001,7000] and [1,1000] +DEBUG: join prunable for intervals [6001,7000] and [1001,2000] DEBUG: pruning merge fetch taskId 1 -DETAIL: Creating dependency on merge taskId 4 +DETAIL: Creating dependency on merge taskId 3 DEBUG: pruning merge fetch taskId 3 -DETAIL: Creating dependency on merge taskId 8 +DETAIL: Creating dependency on merge taskId 6 +DEBUG: pruning merge fetch taskId 5 +DETAIL: Creating dependency on merge taskId 9 +DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx count diff --git a/src/test/regress/output/multi_large_shardid.source b/src/test/regress/output/multi_large_shardid.source deleted file mode 100644 index d9ca39128..000000000 --- a/src/test/regress/output/multi_large_shardid.source +++ /dev/null @@ -1,55 +0,0 @@ --- --- MULTI_LARGE_SHARDID --- --- Load data into distributed tables, and run TPC-H query #1 and #6. This test --- differs from previous tests in that it modifies the *internal* shardId --- generator, forcing the distributed database to use 64-bit shard identifiers. -ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 100200300400500; --- Load additional data to start using large shard identifiers. -\copy lineitem FROM '@abs_srcdir@/data/lineitem.1.data' with delimiter '|' -\copy lineitem FROM '@abs_srcdir@/data/lineitem.2.data' with delimiter '|' --- Query #1 from the TPC-H decision support benchmark. -SELECT - l_returnflag, - l_linestatus, - sum(l_quantity) as sum_qty, - sum(l_extendedprice) as sum_base_price, - sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, - sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, - avg(l_quantity) as avg_qty, - avg(l_extendedprice) as avg_price, - avg(l_discount) as avg_disc, - count(*) as count_order -FROM - lineitem -WHERE - l_shipdate <= date '1998-12-01' - interval '90 days' -GROUP BY - l_returnflag, - l_linestatus -ORDER BY - l_returnflag, - l_linestatus; - l_returnflag | l_linestatus | sum_qty | sum_base_price | sum_disc_price | sum_charge | avg_qty | avg_price | avg_disc | count_order ---------------+--------------+-----------+----------------+----------------+------------------+---------------------+--------------------+------------------------+------------- - A | F | 150930.00 | 227239747.26 | 215682574.1456 | 224342306.491846 | 25.6334918478260870 | 38593.707075407609 | 0.05055027173913043478 | 5888 - N | F | 4044.00 | 6205102.90 | 5905081.4236 | 6145285.541304 | 26.6052631578947368 | 40823.045394736842 | 0.05263157894736842105 | 152 - N | O | 299556.00 | 449413896.32 | 427269715.3708 | 444268143.859602 | 25.4594594594594595 | 38195.979629440762 | 0.04939486656467788543 | 11766 - R | F | 146312.00 | 217875959.46 | 207033247.3396 | 215487067.568656 | 25.2175112030334367 | 37551.871675284385 | 0.04983798690106859704 | 5802 -(4 rows) - --- Query #6 from the TPC-H decision support benchmark. -SELECT - sum(l_extendedprice * l_discount) as revenue -FROM - lineitem -WHERE - l_shipdate >= date '1994-01-01' - and l_shipdate < date '1994-01-01' + interval '1 year' - and l_discount between 0.06 - 0.01 and 0.06 + 0.01 - and l_quantity < 24; - revenue -------------- - 486555.5716 -(1 row) - diff --git a/src/test/regress/pg_regress_multi.pl b/src/test/regress/pg_regress_multi.pl index 4c21e7ffe..9d8d0bdd6 100755 --- a/src/test/regress/pg_regress_multi.pl +++ b/src/test/regress/pg_regress_multi.pl @@ -443,10 +443,15 @@ push(@pgOptions, "wal_receiver_status_interval=1"); # src/backend/replication/logical/launcher.c. push(@pgOptions, "wal_retrieve_retry_interval=1000"); -# disable compute_query_id so that we don't get Query Identifiers -# in explain outputs if ($majorversion >= "14") { + # disable compute_query_id so that we don't get Query Identifiers + # in explain outputs push(@pgOptions, "compute_query_id=off"); + + # reduce test flappiness and different PG14 plans + if (!$vanillatest) { + push(@pgOptions, "enable_incremental_sort=off"); + } } # Citus options set for the tests diff --git a/src/test/regress/sql/.gitignore b/src/test/regress/sql/.gitignore index 5b06080ed..b10b2880c 100644 --- a/src/test/regress/sql/.gitignore +++ b/src/test/regress/sql/.gitignore @@ -11,7 +11,6 @@ /multi_complex_count_distinct.sql /multi_copy.sql /multi_create_schema.sql -/multi_large_shardid.sql /multi_load_data.sql /multi_load_data_superuser.sql /multi_load_large_records.sql diff --git a/src/test/regress/sql/multi_agg_approximate_distinct.sql b/src/test/regress/sql/multi_agg_approximate_distinct.sql index de799f81c..673dcfa7b 100644 --- a/src/test/regress/sql/multi_agg_approximate_distinct.sql +++ b/src/test/regress/sql/multi_agg_approximate_distinct.sql @@ -121,12 +121,12 @@ SELECT l_returnflag, count(DISTINCT l_shipdate) as count_distinct, count(*) as t LIMIT 10; SELECT - l_orderkey, + l_partkey, count(l_partkey) FILTER (WHERE l_shipmode = 'AIR'), count(DISTINCT l_partkey) FILTER (WHERE l_shipmode = 'AIR'), count(DISTINCT CASE WHEN l_shipmode = 'AIR' THEN l_partkey ELSE NULL END) FROM lineitem - GROUP BY l_orderkey + GROUP BY l_partkey ORDER BY 2 DESC, 1 DESC LIMIT 10; diff --git a/src/test/regress/sql/multi_array_agg.sql b/src/test/regress/sql/multi_array_agg.sql index bd275629a..53aa4025f 100644 --- a/src/test/regress/sql/multi_array_agg.sql +++ b/src/test/regress/sql/multi_array_agg.sql @@ -6,11 +6,13 @@ SET citus.next_shard_id TO 520000; SET citus.coordinator_aggregation_strategy TO 'disabled'; +SELECT run_command_on_master_and_workers($r$ CREATE OR REPLACE FUNCTION array_sort (ANYARRAY) RETURNS ANYARRAY LANGUAGE SQL AS $$ SELECT ARRAY(SELECT unnest($1) ORDER BY 1) $$; +$r$); -- Check multi_cat_agg() aggregate which is used to implement array_agg() @@ -61,9 +63,10 @@ SELECT l_quantity, array_sort(array_agg(l_orderkey * 2 + 1)) FROM lineitem WHERE -- Check that we can execute array_agg() with an expression containing NULL values -SELECT array_agg(case when l_quantity > 20 then l_quantity else NULL end) - FROM lineitem WHERE l_orderkey < 10; +SELECT array_sort(array_agg(case when l_quantity > 20 then l_quantity else NULL end)) + FROM lineitem WHERE l_orderkey < 10; -- Check that we return NULL in case there are no input rows to array_agg() -SELECT array_agg(l_orderkey) FROM lineitem WHERE l_quantity < 0; +SELECT array_sort(array_agg(l_orderkey)) + FROM lineitem WHERE l_orderkey < 0; diff --git a/src/test/regress/sql/multi_create_table.sql b/src/test/regress/sql/multi_create_table.sql index 48210a187..e25eec397 100644 --- a/src/test/regress/sql/multi_create_table.sql +++ b/src/test/regress/sql/multi_create_table.sql @@ -5,7 +5,7 @@ -- Create new table definitions for use in testing in distributed planning and -- execution functionality. Also create indexes to boost performance. Since we -- need to cover both reference join and partitioned join, we have created --- reference and append distributed version of orders, customer and part tables. +-- reference and hash-distributed version of orders, customer and part tables. SET citus.next_shard_id TO 360000; @@ -46,7 +46,7 @@ CREATE TABLE lineitem ( l_shipmode char(10) not null, l_comment varchar(44) not null, PRIMARY KEY(l_orderkey, l_linenumber) ); -SELECT create_distributed_table('lineitem', 'l_orderkey', 'append'); +SELECT create_distributed_table('lineitem', 'l_orderkey', 'hash', shard_count := 2); CREATE INDEX lineitem_time_index ON lineitem (l_shipdate); @@ -61,7 +61,7 @@ CREATE TABLE orders ( o_shippriority integer not null, o_comment varchar(79) not null, PRIMARY KEY(o_orderkey) ); -SELECT create_distributed_table('orders', 'o_orderkey', 'append'); +SELECT create_distributed_table('orders', 'o_orderkey', 'hash', colocate_with := 'lineitem'); CREATE TABLE orders_reference ( o_orderkey bigint not null, @@ -155,11 +155,11 @@ CREATE TABLE supplier_single_shard s_acctbal decimal(15,2) not null, s_comment varchar(101) not null ); -SELECT create_distributed_table('supplier_single_shard', 's_suppkey', 'append'); +SELECT create_distributed_table('supplier_single_shard', 's_suppkey', 'hash', shard_count := 1); CREATE TABLE mx_table_test (col1 int, col2 text); -SET citus.next_shard_id TO 360009; +SET citus.next_shard_id TO 360013; -- Test initial data loading CREATE TABLE data_load_test (col1 int, col2 text, col3 serial); diff --git a/src/test/regress/sql/multi_hash_pruning.sql b/src/test/regress/sql/multi_hash_pruning.sql index 5f2115c17..df432ca90 100644 --- a/src/test/regress/sql/multi_hash_pruning.sql +++ b/src/test/regress/sql/multi_hash_pruning.sql @@ -126,14 +126,14 @@ SELECT count(*) FROM lineitem_hash_part SELECT count(*) FROM lineitem_hash_part WHERE l_orderkey IN (SELECT l_orderkey FROM lineitem_hash_part); SELECT count(*) FROM lineitem_hash_part WHERE l_orderkey = ANY (SELECT l_orderkey FROM lineitem_hash_part); --- Check whether we support range queries with append distributed table +-- Check whether we support range queries SELECT count(*) FROM lineitem WHERE l_orderkey >= 1 AND l_orderkey <= 3; SELECT count(*) FROM lineitem WHERE (l_orderkey >= 1 AND l_orderkey <= 3) AND (l_quantity > 11 AND l_quantity < 22); --- Check whether we support IN/ANY in subquery with append and range distributed table +-- Check whether we support IN/ANY in subquery SELECT count(*) FROM lineitem WHERE l_orderkey = ANY ('{1,2,3}'); diff --git a/src/test/regress/sql/multi_index_statements.sql b/src/test/regress/sql/multi_index_statements.sql index 76fc84e43..f31329dcc 100644 --- a/src/test/regress/sql/multi_index_statements.sql +++ b/src/test/regress/sql/multi_index_statements.sql @@ -116,7 +116,6 @@ SET search_path TO multi_index_statements, public; -- Verify that we error out on unsupported statement types -CREATE UNIQUE INDEX try_index ON public.lineitem (l_orderkey); CREATE INDEX try_index ON lineitem (l_orderkey) TABLESPACE newtablespace; CREATE UNIQUE INDEX try_unique_range_index ON index_test_range(b); diff --git a/src/test/regress/sql/multi_join_pruning.sql b/src/test/regress/sql/multi_join_pruning.sql index b9feb999c..d655f7c52 100644 --- a/src/test/regress/sql/multi_join_pruning.sql +++ b/src/test/regress/sql/multi_join_pruning.sql @@ -32,12 +32,6 @@ where logicalrelid='lineitem'::regclass or logicalrelid='orders'::regclass order by shardid; -set citus.explain_distributed_queries to on; --- explain the query before actually executing it -EXPLAIN SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders - WHERE l_orderkey = o_orderkey AND l_orderkey > 6000 AND o_orderkey < 6000; -set citus.explain_distributed_queries to off; - set client_min_messages to debug3; SELECT sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders WHERE l_orderkey = o_orderkey AND l_orderkey > 6000 AND o_orderkey < 6000; diff --git a/src/test/regress/sql/multi_json_agg.sql b/src/test/regress/sql/multi_json_agg.sql index 28e61497a..8c3f829c5 100644 --- a/src/test/regress/sql/multi_json_agg.sql +++ b/src/test/regress/sql/multi_json_agg.sql @@ -6,6 +6,7 @@ SET citus.next_shard_id TO 520000; SET citus.coordinator_aggregation_strategy TO 'disabled'; +SELECT run_command_on_master_and_workers($r$ CREATE OR REPLACE FUNCTION array_sort (json) RETURNS json LANGUAGE SQL AS $$ @@ -13,6 +14,7 @@ SELECT json_agg(value) FROM ( SELECT value FROM json_array_elements($1) ORDER BY value::jsonb ) t $$; +$r$); -- Check multi_cat_agg() aggregate which is used to implement json_agg() @@ -64,24 +66,24 @@ SELECT l_quantity, array_sort(json_agg(l_orderkey * 2 + 1)) FROM lineitem WHERE -- Check that we can execute json_agg() with an expression containing NULL values -SELECT json_agg(case when l_quantity > 20 then l_quantity else NULL end) +SELECT array_sort(json_agg(case when l_quantity > 20 then l_quantity else NULL end)) FROM lineitem WHERE l_orderkey < 5; -- Check that we can execute json_agg() with an expression containing different types -SELECT json_agg(case when l_quantity > 20 then to_json(l_quantity) else '"f"'::json end) +SELECT array_sort(json_agg(case when l_quantity > 20 then to_json(l_quantity) else '"f"'::json end)) FROM lineitem WHERE l_orderkey < 5; -- Check that we can execute json_agg() with an expression containing json arrays -SELECT json_agg(json_build_array(l_quantity, l_shipdate)) +SELECT array_sort(json_agg(json_build_array(l_quantity, l_shipdate))) FROM lineitem WHERE l_orderkey < 3; -- Check that we can execute json_agg() with an expression containing arrays -SELECT json_agg(ARRAY[l_quantity, l_orderkey]) +SELECT array_sort(json_agg(ARRAY[l_quantity, l_orderkey])) FROM lineitem WHERE l_orderkey < 3; -- Check that we return NULL in case there are no input rows to json_agg() -SELECT json_agg(l_orderkey) FROM lineitem WHERE l_quantity < 0; +SELECT array_sort(json_agg(l_orderkey)) FROM lineitem WHERE l_quantity < 0; diff --git a/src/test/regress/sql/multi_json_object_agg.sql b/src/test/regress/sql/multi_json_object_agg.sql index 87532c880..d520b9752 100644 --- a/src/test/regress/sql/multi_json_object_agg.sql +++ b/src/test/regress/sql/multi_json_object_agg.sql @@ -12,6 +12,7 @@ AS $$ SELECT count(*) FROM (SELECT * FROM json_object_keys($1)) t $$; +SELECT run_command_on_master_and_workers($r$ CREATE OR REPLACE FUNCTION keys_sort (json) RETURNS json LANGUAGE SQL AS $$ @@ -19,6 +20,7 @@ SELECT json_object_agg(key, value) FROM ( SELECT * FROM json_each($1) ORDER BY key ) t $$; +$r$); -- Check multi_cat_agg() aggregate which is used to implement json_object_agg() @@ -100,4 +102,4 @@ SELECT keys_sort(json_object_agg(l_orderkey::text || l_linenumber::text, ARRAY[l -- Check that we return NULL in case there are no input rows to json_object_agg() -SELECT json_object_agg(l_shipdate, l_orderkey) FROM lineitem WHERE l_quantity < 0; +SELECT keys_sort(json_object_agg(l_shipdate, l_orderkey)) FROM lineitem WHERE l_quantity < 0; diff --git a/src/test/regress/sql/multi_jsonb_agg.sql b/src/test/regress/sql/multi_jsonb_agg.sql index 8fb0e5bef..78fe4d82b 100644 --- a/src/test/regress/sql/multi_jsonb_agg.sql +++ b/src/test/regress/sql/multi_jsonb_agg.sql @@ -6,6 +6,7 @@ SET citus.next_shard_id TO 520000; SET citus.coordinator_aggregation_strategy TO 'disabled'; +SELECT run_command_on_master_and_workers($r$ CREATE OR REPLACE FUNCTION array_sort (jsonb) RETURNS jsonb LANGUAGE SQL AS $$ @@ -13,6 +14,7 @@ SELECT jsonb_agg(value) FROM ( SELECT * FROM jsonb_array_elements($1) ORDER BY 1 ) t $$; +$r$); -- Check multi_cat_agg() aggregate which is used to implement jsonb_agg() @@ -64,24 +66,25 @@ SELECT l_quantity, array_sort(jsonb_agg(l_orderkey * 2 + 1)) FROM lineitem WHERE -- Check that we can execute jsonb_agg() with an expression containing NULL values -SELECT jsonb_agg(case when l_quantity > 20 then l_quantity else NULL end) +SELECT array_sort(jsonb_agg(case when l_quantity > 20 then l_quantity else NULL end)) FROM lineitem WHERE l_orderkey < 5; -- Check that we can execute jsonb_agg() with an expression containing different types -SELECT jsonb_agg(case when l_quantity > 20 then to_jsonb(l_quantity) else '"f"'::jsonb end) +SELECT array_sort(jsonb_agg(case when l_quantity > 20 then to_jsonb(l_quantity) else '"f"'::jsonb end)) FROM lineitem WHERE l_orderkey < 5; -- Check that we can execute jsonb_agg() with an expression containing jsonb arrays -SELECT jsonb_agg(jsonb_build_array(l_quantity, l_shipdate)) +SELECT array_sort(jsonb_agg(jsonb_build_array(l_quantity, l_shipdate))) FROM lineitem WHERE l_orderkey < 3; -- Check that we can execute jsonb_agg() with an expression containing arrays -SELECT jsonb_agg(ARRAY[l_quantity, l_orderkey]) +SELECT array_sort(jsonb_agg(ARRAY[l_quantity, l_orderkey])) FROM lineitem WHERE l_orderkey < 3; -- Check that we return NULL in case there are no input rows to jsonb_agg() -SELECT jsonb_agg(l_orderkey) FROM lineitem WHERE l_quantity < 0; +SELECT array_sort(jsonb_agg(l_orderkey)) + FROM lineitem WHERE l_quantity < 0; diff --git a/src/test/regress/sql/multi_jsonb_object_agg.sql b/src/test/regress/sql/multi_jsonb_object_agg.sql index 4329b5ce4..f9ca75608 100644 --- a/src/test/regress/sql/multi_jsonb_object_agg.sql +++ b/src/test/regress/sql/multi_jsonb_object_agg.sql @@ -6,11 +6,23 @@ SET citus.next_shard_id TO 520000; SET citus.coordinator_aggregation_strategy TO 'disabled'; +SELECT run_command_on_master_and_workers($r$ CREATE OR REPLACE FUNCTION count_keys (jsonb) RETURNS bigint LANGUAGE SQL AS $$ SELECT count(*) FROM (SELECT * FROM jsonb_object_keys($1)) t $$; +$r$); + +SELECT run_command_on_master_and_workers($r$ +CREATE OR REPLACE FUNCTION keys_sort (jsonb) +RETURNS jsonb LANGUAGE SQL +AS $$ +SELECT jsonb_object_agg(key, value) FROM ( + SELECT * FROM jsonb_each($1) ORDER BY key +) t +$$; +$r$); -- Check multi_cat_agg() aggregate which is used to implement jsonb_object_agg() @@ -70,26 +82,26 @@ SELECT l_quantity, jsonb_object_agg(l_orderkey::text || l_linenumber::text, l_or -- Check that we can execute jsonb_object_agg() with an expression containing NULL values -SELECT jsonb_object_agg(l_orderkey::text || l_linenumber::text, - case when l_quantity > 20 then l_quantity else NULL end) +SELECT keys_sort(jsonb_object_agg(l_orderkey::text || l_linenumber::text, + case when l_quantity > 20 then l_quantity else NULL end)) FROM lineitem WHERE l_orderkey < 5; -- Check that we can execute jsonb_object_agg() with an expression containing different types -SELECT jsonb_object_agg(l_orderkey::text || l_linenumber::text, - case when l_quantity > 20 then to_jsonb(l_quantity) else '"f"'::jsonb end) +SELECT keys_sort(jsonb_object_agg(l_orderkey::text || l_linenumber::text, + case when l_quantity > 20 then to_jsonb(l_quantity) else '"f"'::jsonb end)) FROM lineitem WHERE l_orderkey < 5; -- Check that we can execute jsonb_object_agg() with an expression containing jsonb arrays -SELECT jsonb_object_agg(l_orderkey::text || l_linenumber::text, jsonb_build_array(l_quantity, l_shipdate)) +SELECT keys_sort(jsonb_object_agg(l_orderkey::text || l_linenumber::text, jsonb_build_array(l_quantity, l_shipdate))) FROM lineitem WHERE l_orderkey < 3; -- Check that we can execute jsonb_object_agg() with an expression containing arrays -SELECT jsonb_object_agg(l_orderkey::text || l_linenumber::text, ARRAY[l_quantity, l_orderkey]) +SELECT keys_sort(jsonb_object_agg(l_orderkey::text || l_linenumber::text, ARRAY[l_quantity, l_orderkey])) FROM lineitem WHERE l_orderkey < 3; -- Check that we return NULL in case there are no input rows to jsonb_object_agg() -SELECT jsonb_object_agg(l_shipdate, l_orderkey) FROM lineitem WHERE l_quantity < 0; +SELECT keys_sort(jsonb_object_agg(l_shipdate, l_orderkey)) FROM lineitem WHERE l_quantity < 0; diff --git a/src/test/regress/input/multi_large_shardid.source b/src/test/regress/sql/multi_large_shardid.sql similarity index 83% rename from src/test/regress/input/multi_large_shardid.source rename to src/test/regress/sql/multi_large_shardid.sql index 624a132b0..7c68cf022 100644 --- a/src/test/regress/input/multi_large_shardid.source +++ b/src/test/regress/sql/multi_large_shardid.sql @@ -9,11 +9,8 @@ ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 100200300400500; - --- Load additional data to start using large shard identifiers. - -\copy lineitem FROM '@abs_srcdir@/data/lineitem.1.data' with delimiter '|' -\copy lineitem FROM '@abs_srcdir@/data/lineitem.2.data' with delimiter '|' +CREATE TABLE lineitem_large_shard_id AS SELECT * FROM lineitem; +SELECT create_distributed_table('lineitem_large_shard_id', 'l_orderkey'); -- Query #1 from the TPC-H decision support benchmark. @@ -29,7 +26,7 @@ SELECT avg(l_discount) as avg_disc, count(*) as count_order FROM - lineitem + lineitem_large_shard_id WHERE l_shipdate <= date '1998-12-01' - interval '90 days' GROUP BY @@ -44,7 +41,7 @@ ORDER BY SELECT sum(l_extendedprice * l_discount) as revenue FROM - lineitem + lineitem_large_shard_id WHERE l_shipdate >= date '1994-01-01' and l_shipdate < date '1994-01-01' + interval '1 year' diff --git a/src/test/regress/sql/multi_repartition_join_pruning.sql b/src/test/regress/sql/multi_repartition_join_pruning.sql index 7c8f8bb36..c6592f99f 100644 --- a/src/test/regress/sql/multi_repartition_join_pruning.sql +++ b/src/test/regress/sql/multi_repartition_join_pruning.sql @@ -41,7 +41,7 @@ FROM orders, customer_append WHERE o_custkey = c_custkey AND - o_orderkey < 0; + o_orderkey < 0 AND o_orderkey > 0; -- Single range-repartition join with a selection clause on the base table to -- test the case when all sql tasks are pruned away. @@ -87,14 +87,14 @@ FROM lineitem, customer_append WHERE l_partkey = c_nationkey AND - l_orderkey < 0; + l_orderkey < 0 AND l_orderkey > 0; SELECT count(*) FROM lineitem, customer_append WHERE l_partkey = c_nationkey AND - l_orderkey < 0; + l_orderkey < 0 AND l_orderkey > 0; -- Test cases with false in the WHERE clause EXPLAIN (COSTS OFF)