From 2f096cad74431538714c58bf677f5866d394fdff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20Y=C3=BCcesoy?= Date: Tue, 17 May 2016 13:59:01 +0300 Subject: [PATCH] Update regression tests where metadata edited manually Fixes #302 Since our previous syntax did not allow creating hash partitioned tables, some of the previous tests manually changed partition method to hash to be able to test it. With this change we remove unnecessary workaround and create hash distributed tables instead. Also in some tests metadata was created manually. With this change we also fixed this issue. --- .../expected/multi_distribution_metadata.out | 109 ++++++------ .../expected/multi_join_order_additional.out | 127 +++++++++---- .../multi_large_table_join_planning.out | 14 +- .../multi_large_table_join_planning_0.out | 14 +- .../expected/multi_large_table_pruning.out | 4 +- .../multi_large_table_task_assignment.out | 18 +- .../regress/expected/multi_modifications.out | 6 +- .../expected/multi_prune_shard_list.out | 168 +++++++++++------- .../regress/expected/multi_simple_queries.out | 20 +-- .../input/multi_append_table_to_shard.source | 19 +- .../output/multi_append_table_to_shard.source | 29 ++- .../regress/output/multi_outer_join.source | 130 +++++++------- .../sql/multi_distribution_metadata.sql | 67 +++---- .../sql/multi_join_order_additional.sql | 96 ++++++---- .../regress/sql/multi_prune_shard_list.sql | 61 ++++--- 15 files changed, 510 insertions(+), 372 deletions(-) diff --git a/src/test/regress/expected/multi_distribution_metadata.out b/src/test/regress/expected/multi_distribution_metadata.out index a096495e7..88d5a3ba1 100644 --- a/src/test/regress/expected/multi_distribution_metadata.out +++ b/src/test/regress/expected/multi_distribution_metadata.out @@ -56,82 +56,89 @@ CREATE FUNCTION column_name_to_column(regclass, text) -- =================================================================== -- test distribution metadata functionality -- =================================================================== --- create table to be distributed +-- create hash distributed table CREATE TABLE events_hash ( id bigint, name text ); --- for this table we'll "distribute" manually but verify using function calls -INSERT INTO pg_dist_shard - (shardid, logicalrelid, shardstorage, shardminvalue, shardmaxvalue) -VALUES - (1, 'events_hash'::regclass, 't', '0', '10'), - (2, 'events_hash'::regclass, 't', '10', '20'), - (3, 'events_hash'::regclass, 't', '20', '30'), - (4, 'events_hash'::regclass, 't', '30', '40'); -INSERT INTO pg_dist_shard_placement - (nodename, nodeport, shardid, shardstate, shardlength) -VALUES - ('cluster-worker-01', 5432, 1, 0, 0), - ('cluster-worker-01', 5432, 2, 0, 0), - ('cluster-worker-02', 5433, 3, 0, 0), - ('cluster-worker-02', 5433, 4, 0, 0), - ('cluster-worker-03', 5434, 1, 1, 0), - ('cluster-worker-03', 5434, 2, 1, 0), - ('cluster-worker-04', 5435, 3, 1, 0), - ('cluster-worker-04', 5435, 4, 1, 0); -INSERT INTO pg_dist_partition (logicalrelid, partmethod, partkey) -VALUES - ('events_hash'::regclass, 'h', column_name_to_column('events_hash'::regclass, 'name')); +SELECT master_create_distributed_table('events_hash', 'name', 'hash'); + master_create_distributed_table +--------------------------------- + +(1 row) + +-- create worker shards +SELECT master_create_worker_shards('events_hash', 4, 2); + master_create_worker_shards +----------------------------- + +(1 row) + +-- set shardstate of one replication from each shard to 0 (invalid value) +UPDATE pg_dist_shard_placement SET shardstate = 0 WHERE nodeport = 57638 AND shardid BETWEEN 103025 AND 103028; -- should see above shard identifiers SELECT load_shard_id_array('events_hash'); - load_shard_id_array ---------------------- - {1,2,3,4} + load_shard_id_array +------------------------------- + {103025,103026,103027,103028} (1 row) -- should see array with first shard range -SELECT load_shard_interval_array(1, 0); +SELECT load_shard_interval_array(103025, 0); load_shard_interval_array --------------------------- - {0,10} + {-2147483648,-1073741825} (1 row) -- should even work for range-partitioned shards -BEGIN; - UPDATE pg_dist_shard SET - shardminvalue = 'Aardvark', - shardmaxvalue = 'Zebra' - WHERE shardid = 1; - UPDATE pg_dist_partition SET partmethod = 'r' - WHERE logicalrelid = 'events_hash'::regclass; - SELECT load_shard_interval_array(1, ''::text); +-- create range distributed table +CREATE TABLE events_range ( + id bigint, + name text +); +SELECT master_create_distributed_table('events_range', 'name', 'range'); + master_create_distributed_table +--------------------------------- + +(1 row) + +-- create empty shard +SELECT master_create_empty_shard('events_range'); + master_create_empty_shard +--------------------------- + 103029 +(1 row) + +UPDATE pg_dist_shard SET + shardminvalue = 'Aardvark', + shardmaxvalue = 'Zebra' +WHERE shardid = 103029; +SELECT load_shard_interval_array(103029, ''::text); load_shard_interval_array --------------------------- {Aardvark,Zebra} (1 row) -ROLLBACK; -- should see error for non-existent shard -SELECT load_shard_interval_array(5, 0); -ERROR: could not find valid entry for shard 5 +SELECT load_shard_interval_array(103030, 0); +ERROR: could not find valid entry for shard 103030 -- should see two placements -SELECT load_shard_placement_array(2, false); - load_shard_placement_array -------------------------------------------------- - {cluster-worker-03:5434,cluster-worker-01:5432} +SELECT load_shard_placement_array(103026, false); + load_shard_placement_array +----------------------------------- + {localhost:57637,localhost:57638} (1 row) -- only one of which is finalized -SELECT load_shard_placement_array(2, true); +SELECT load_shard_placement_array(103026, true); load_shard_placement_array ---------------------------- - {cluster-worker-03:5434} + {localhost:57637} (1 row) -- should see error for non-existent shard -SELECT load_shard_placement_array(6, false); -WARNING: could not find any shard placements for shardId 6 +SELECT load_shard_placement_array(103031, false); +WARNING: could not find any shard placements for shardId 103031 load_shard_placement_array ---------------------------- {} @@ -185,9 +192,11 @@ SELECT column_name_to_column_id('events_hash', 'non_existent'); ERROR: column "non_existent" of relation "events_hash" does not exist -- drop shard rows (must drop placements first) DELETE FROM pg_dist_shard_placement - WHERE shardid BETWEEN 1 AND 4; + WHERE shardid BETWEEN 103025 AND 103029; DELETE FROM pg_dist_shard WHERE logicalrelid = 'events_hash'::regclass; +DELETE FROM pg_dist_shard + WHERE logicalrelid = 'events_range'::regclass; -- verify that an eager load shows them missing SELECT load_shard_id_array('events_hash'); load_shard_id_array @@ -266,9 +275,9 @@ WHERE shardid = :new_shard_id AND nodename = 'localhost' and nodeport = 5432; -- deleting or updating a non-existent row should fail SELECT delete_shard_placement_row(:new_shard_id, 'wrong_localhost', 5432); -ERROR: could not find valid entry for shard placement 103024 on node "wrong_localhost:5432" +ERROR: could not find valid entry for shard placement 103030 on node "wrong_localhost:5432" SELECT update_shard_placement_row_state(:new_shard_id, 'localhost', 5432, 3); -ERROR: could not find valid entry for shard placement 103024 on node "localhost:5432" +ERROR: could not find valid entry for shard placement 103030 on node "localhost:5432" -- now we'll even test our lock methods... -- use transaction to bound how long we hold the lock BEGIN; diff --git a/src/test/regress/expected/multi_join_order_additional.out b/src/test/regress/expected/multi_join_order_additional.out index 3df390d33..f4380134d 100644 --- a/src/test/regress/expected/multi_join_order_additional.out +++ b/src/test/regress/expected/multi_join_order_additional.out @@ -5,6 +5,90 @@ SET citus.explain_distributed_queries TO off; SET citus.log_multi_join_order TO TRUE; SET client_min_messages TO DEBUG2; +-- Create new table definitions for use in testing in distributed planning and +-- execution functionality. Also create indexes to boost performance. +CREATE TABLE lineitem_hash ( + l_orderkey bigint not null, + l_partkey integer not null, + l_suppkey integer not null, + l_linenumber integer not null, + l_quantity decimal(15, 2) not null, + l_extendedprice decimal(15, 2) not null, + l_discount decimal(15, 2) not null, + l_tax decimal(15, 2) not null, + l_returnflag char(1) not null, + l_linestatus char(1) not null, + l_shipdate date not null, + l_commitdate date not null, + l_receiptdate date not null, + l_shipinstruct char(25) not null, + l_shipmode char(10) not null, + l_comment varchar(44) not null, + PRIMARY KEY(l_orderkey, l_linenumber) ); +DEBUG: CREATE TABLE / PRIMARY KEY will create implicit index "lineitem_hash_pkey" for table "lineitem_hash" +DEBUG: building index "lineitem_hash_pkey" on table "lineitem_hash" +SELECT master_create_distributed_table('lineitem_hash', 'l_orderkey', 'hash'); + master_create_distributed_table +--------------------------------- + +(1 row) + +SELECT master_create_worker_shards('lineitem_hash', 2, 1); + master_create_worker_shards +----------------------------- + +(1 row) + +CREATE INDEX lineitem_hash_time_index ON lineitem_hash (l_shipdate); +DEBUG: applied command on shard 102037 on node localhost:57637 +DEBUG: applied command on shard 102038 on node localhost:57638 +DEBUG: building index "lineitem_hash_time_index" on table "lineitem_hash" +CREATE TABLE orders_hash ( + o_orderkey bigint not null, + o_custkey integer not null, + o_orderstatus char(1) not null, + o_totalprice decimal(15,2) not null, + o_orderdate date not null, + o_orderpriority char(15) not null, + o_clerk char(15) not null, + o_shippriority integer not null, + o_comment varchar(79) not null, + PRIMARY KEY(o_orderkey) ); +DEBUG: CREATE TABLE / PRIMARY KEY will create implicit index "orders_hash_pkey" for table "orders_hash" +DEBUG: building index "orders_hash_pkey" on table "orders_hash" +SELECT master_create_distributed_table('orders_hash', 'o_orderkey', 'hash'); + master_create_distributed_table +--------------------------------- + +(1 row) + +SELECT master_create_worker_shards('orders_hash', 2, 1); + master_create_worker_shards +----------------------------- + +(1 row) + +CREATE TABLE customer_hash ( + c_custkey integer not null, + c_name varchar(25) not null, + c_address varchar(40) not null, + c_nationkey integer not null, + c_phone char(15) not null, + c_acctbal decimal(15,2) not null, + c_mktsegment char(10) not null, + c_comment varchar(117) not null); +SELECT master_create_distributed_table('customer_hash', 'c_custkey', 'hash'); + master_create_distributed_table +--------------------------------- + +(1 row) + +SELECT master_create_worker_shards('customer_hash', 1, 1); + master_create_worker_shards +----------------------------- + +(1 row) + -- The following query checks that we can correctly handle self-joins EXPLAIN SELECT l1.l_quantity FROM lineitem l1, lineitem l2 WHERE l1.l_orderkey = l2.l_orderkey AND l1.l_quantity > 5; @@ -68,44 +152,32 @@ DETAIL: Cartesian products are currently unsupported BEGIN; -- Validate that we take into account the partition method when building the -- join-order plan. -UPDATE pg_dist_partition SET partmethod = 'h' WHERE - logicalrelid = (SELECT relfilenode FROM pg_class WHERE relname = 'lineitem'); -EXPLAIN SELECT count(*) FROM orders, lineitem +EXPLAIN SELECT count(*) FROM orders, lineitem_hash WHERE o_orderkey = l_orderkey; -LOG: join order: [ "orders" ][ single partition join "lineitem" ] +LOG: join order: [ "orders" ][ single partition join "lineitem_hash" ] QUERY PLAN ------------------------------------------------------------ explain statements for distributed queries are not enabled (1 row) -- Verify we handle local joins between two hash-partitioned tables. -UPDATE pg_dist_partition SET partmethod = 'h' WHERE - logicalrelid = (SELECT relfilenode FROM pg_class WHERE relname = 'orders'); -EXPLAIN SELECT count(*) FROM orders, lineitem +EXPLAIN SELECT count(*) FROM orders_hash, lineitem_hash WHERE o_orderkey = l_orderkey; -LOG: join order: [ "orders" ][ local partition join "lineitem" ] +LOG: join order: [ "orders_hash" ][ local partition join "lineitem_hash" ] QUERY PLAN ------------------------------------------------------------ explain statements for distributed queries are not enabled (1 row) -UPDATE pg_dist_partition SET partmethod = 'a' WHERE - logicalrelid = (SELECT relfilenode FROM pg_class WHERE relname = 'lineitem'); -UPDATE pg_dist_partition SET partmethod = 'a' WHERE - logicalrelid = (SELECT relfilenode FROM pg_class WHERE relname = 'orders'); -- Validate that we can handle broadcast joins with hash-partitioned tables. -UPDATE pg_dist_partition SET partmethod = 'h' WHERE - logicalrelid = (SELECT relfilenode FROM pg_class WHERE relname = 'customer'); -EXPLAIN SELECT count(*) FROM customer, nation +EXPLAIN SELECT count(*) FROM customer_hash, nation WHERE c_nationkey = n_nationkey; -LOG: join order: [ "customer" ][ broadcast join "nation" ] +LOG: join order: [ "customer_hash" ][ broadcast join "nation" ] QUERY PLAN ------------------------------------------------------------ explain statements for distributed queries are not enabled (1 row) -UPDATE pg_dist_partition SET partmethod = 'a' WHERE - logicalrelid = (SELECT relfilenode FROM pg_class WHERE relname = 'customer'); -- Update the large table shard count for all the following tests. SET citus.large_table_shard_count TO 1; -- Validate that we don't use a single-partition join method for a hash @@ -120,32 +192,27 @@ LOG: join order: [ "orders" ][ dual partition join "lineitem" ][ dual partition -- Validate that we don't chose a single-partition join method with a -- hash-partitioned base table -UPDATE pg_dist_partition SET partmethod = 'h' WHERE - logicalrelid = (SELECT relfilenode FROM pg_class WHERE relname = 'customer'); -EXPLAIN SELECT count(*) FROM orders, customer +EXPLAIN SELECT count(*) FROM orders, customer_hash WHERE c_custkey = o_custkey; -LOG: join order: [ "orders" ][ dual partition join "customer" ] +LOG: join order: [ "orders" ][ dual partition join "customer_hash" ] QUERY PLAN ------------------------------------------------------------ explain statements for distributed queries are not enabled (1 row) -UPDATE pg_dist_partition SET partmethod = 'a' WHERE - logicalrelid = (SELECT relfilenode FROM pg_class WHERE relname = 'customer'); -- Validate that we can re-partition a hash partitioned table to join with a -- range partitioned one. -UPDATE pg_dist_partition SET partmethod = 'h' WHERE - logicalrelid = (SELECT relfilenode FROM pg_class WHERE relname = 'orders'); -EXPLAIN SELECT count(*) FROM orders, customer +EXPLAIN SELECT count(*) FROM orders_hash, customer WHERE c_custkey = o_custkey; -LOG: join order: [ "orders" ][ single partition join "customer" ] +LOG: join order: [ "orders_hash" ][ single partition join "customer" ] QUERY PLAN ------------------------------------------------------------ explain statements for distributed queries are not enabled (1 row) -UPDATE pg_dist_partition SET partmethod = 'a' WHERE - logicalrelid = (SELECT relfilenode FROM pg_class WHERE relname = 'orders'); COMMIT; -- Reset client logging level to its previous value SET client_min_messages TO NOTICE; +DROP TABLE lineitem_hash; +DROP TABLE orders_hash; +DROP TABLE customer_hash; diff --git a/src/test/regress/expected/multi_large_table_join_planning.out b/src/test/regress/expected/multi_large_table_join_planning.out index 413ac9ca4..ee2491426 100644 --- a/src/test/regress/expected/multi_large_table_join_planning.out +++ b/src/test/regress/expected/multi_large_table_join_planning.out @@ -74,13 +74,13 @@ DEBUG: join prunable for intervals [6001,7000] and [1,1000] DEBUG: generated sql query for job 1251 and task 3 DETAIL: query string: "SELECT "pg_merge_job_1250.task_000019".intermediate_column_1250_0, "pg_merge_job_1250.task_000019".intermediate_column_1250_1, "pg_merge_job_1250.task_000019".intermediate_column_1250_2, "pg_merge_job_1250.task_000019".intermediate_column_1250_3, "pg_merge_job_1250.task_000019".intermediate_column_1250_4 FROM (pg_merge_job_1250.task_000019 "pg_merge_job_1250.task_000019" JOIN part_102019 part ON (("pg_merge_job_1250.task_000019".intermediate_column_1250_0 = part.p_partkey))) WHERE (part.p_size > 8)" DEBUG: generated sql query for job 1251 and task 6 -DETAIL: query string: "SELECT "pg_merge_job_1250.task_000026".intermediate_column_1250_0, "pg_merge_job_1250.task_000026".intermediate_column_1250_1, "pg_merge_job_1250.task_000026".intermediate_column_1250_2, "pg_merge_job_1250.task_000026".intermediate_column_1250_3, "pg_merge_job_1250.task_000026".intermediate_column_1250_4 FROM (pg_merge_job_1250.task_000026 "pg_merge_job_1250.task_000026" JOIN part_102039 part ON (("pg_merge_job_1250.task_000026".intermediate_column_1250_0 = part.p_partkey))) WHERE (part.p_size > 8)" +DETAIL: query string: "SELECT "pg_merge_job_1250.task_000026".intermediate_column_1250_0, "pg_merge_job_1250.task_000026".intermediate_column_1250_1, "pg_merge_job_1250.task_000026".intermediate_column_1250_2, "pg_merge_job_1250.task_000026".intermediate_column_1250_3, "pg_merge_job_1250.task_000026".intermediate_column_1250_4 FROM (pg_merge_job_1250.task_000026 "pg_merge_job_1250.task_000026" JOIN part_102044 part ON (("pg_merge_job_1250.task_000026".intermediate_column_1250_0 = part.p_partkey))) WHERE (part.p_size > 8)" DEBUG: pruning merge fetch taskId 1 DETAIL: Creating dependency on merge taskId 19 DEBUG: pruning merge fetch taskId 4 DETAIL: Creating dependency on merge taskId 26 -DEBUG: assigned task 3 to node localhost:57637 -DEBUG: assigned task 6 to node localhost:57638 +DEBUG: assigned task 6 to node localhost:57637 +DEBUG: assigned task 3 to node localhost:57638 DEBUG: join prunable for intervals [1,1000] and [1001,2000] DEBUG: join prunable for intervals [1,1000] and [6001,7000] DEBUG: join prunable for intervals [1001,2000] and [1,1000] @@ -90,18 +90,18 @@ DEBUG: join prunable for intervals [6001,7000] and [1001,2000] DEBUG: generated sql query for job 1252 and task 3 DETAIL: query string: "SELECT "pg_merge_job_1251.task_000007".intermediate_column_1251_0 AS l_partkey, "pg_merge_job_1251.task_000007".intermediate_column_1251_1 AS o_orderkey, count(*) AS count FROM (pg_merge_job_1251.task_000007 "pg_merge_job_1251.task_000007" JOIN customer_102017 customer ON ((customer.c_custkey = "pg_merge_job_1251.task_000007".intermediate_column_1251_4))) WHERE ((("pg_merge_job_1251.task_000007".intermediate_column_1251_2 > 5.0) OR ("pg_merge_job_1251.task_000007".intermediate_column_1251_3 > 1200.0)) AND (customer.c_acctbal < 5000.0)) GROUP BY "pg_merge_job_1251.task_000007".intermediate_column_1251_0, "pg_merge_job_1251.task_000007".intermediate_column_1251_1 ORDER BY "pg_merge_job_1251.task_000007".intermediate_column_1251_0, "pg_merge_job_1251.task_000007".intermediate_column_1251_1, "pg_merge_job_1251.task_000007".intermediate_column_1251_0, "pg_merge_job_1251.task_000007".intermediate_column_1251_1 LIMIT '30'::bigint" DEBUG: generated sql query for job 1252 and task 6 -DETAIL: query string: "SELECT "pg_merge_job_1251.task_000010".intermediate_column_1251_0 AS l_partkey, "pg_merge_job_1251.task_000010".intermediate_column_1251_1 AS o_orderkey, count(*) AS count FROM (pg_merge_job_1251.task_000010 "pg_merge_job_1251.task_000010" JOIN customer_102038 customer ON ((customer.c_custkey = "pg_merge_job_1251.task_000010".intermediate_column_1251_4))) WHERE ((("pg_merge_job_1251.task_000010".intermediate_column_1251_2 > 5.0) OR ("pg_merge_job_1251.task_000010".intermediate_column_1251_3 > 1200.0)) AND (customer.c_acctbal < 5000.0)) GROUP BY "pg_merge_job_1251.task_000010".intermediate_column_1251_0, "pg_merge_job_1251.task_000010".intermediate_column_1251_1 ORDER BY "pg_merge_job_1251.task_000010".intermediate_column_1251_0, "pg_merge_job_1251.task_000010".intermediate_column_1251_1, "pg_merge_job_1251.task_000010".intermediate_column_1251_0, "pg_merge_job_1251.task_000010".intermediate_column_1251_1 LIMIT '30'::bigint" +DETAIL: query string: "SELECT "pg_merge_job_1251.task_000010".intermediate_column_1251_0 AS l_partkey, "pg_merge_job_1251.task_000010".intermediate_column_1251_1 AS o_orderkey, count(*) AS count FROM (pg_merge_job_1251.task_000010 "pg_merge_job_1251.task_000010" JOIN customer_102043 customer ON ((customer.c_custkey = "pg_merge_job_1251.task_000010".intermediate_column_1251_4))) WHERE ((("pg_merge_job_1251.task_000010".intermediate_column_1251_2 > 5.0) OR ("pg_merge_job_1251.task_000010".intermediate_column_1251_3 > 1200.0)) AND (customer.c_acctbal < 5000.0)) GROUP BY "pg_merge_job_1251.task_000010".intermediate_column_1251_0, "pg_merge_job_1251.task_000010".intermediate_column_1251_1 ORDER BY "pg_merge_job_1251.task_000010".intermediate_column_1251_0, "pg_merge_job_1251.task_000010".intermediate_column_1251_1, "pg_merge_job_1251.task_000010".intermediate_column_1251_0, "pg_merge_job_1251.task_000010".intermediate_column_1251_1 LIMIT '30'::bigint" DEBUG: generated sql query for job 1252 and task 9 -DETAIL: query string: "SELECT "pg_merge_job_1251.task_000013".intermediate_column_1251_0 AS l_partkey, "pg_merge_job_1251.task_000013".intermediate_column_1251_1 AS o_orderkey, count(*) AS count FROM (pg_merge_job_1251.task_000013 "pg_merge_job_1251.task_000013" JOIN customer_102037 customer ON ((customer.c_custkey = "pg_merge_job_1251.task_000013".intermediate_column_1251_4))) WHERE ((("pg_merge_job_1251.task_000013".intermediate_column_1251_2 > 5.0) OR ("pg_merge_job_1251.task_000013".intermediate_column_1251_3 > 1200.0)) AND (customer.c_acctbal < 5000.0)) GROUP BY "pg_merge_job_1251.task_000013".intermediate_column_1251_0, "pg_merge_job_1251.task_000013".intermediate_column_1251_1 ORDER BY "pg_merge_job_1251.task_000013".intermediate_column_1251_0, "pg_merge_job_1251.task_000013".intermediate_column_1251_1, "pg_merge_job_1251.task_000013".intermediate_column_1251_0, "pg_merge_job_1251.task_000013".intermediate_column_1251_1 LIMIT '30'::bigint" +DETAIL: query string: "SELECT "pg_merge_job_1251.task_000013".intermediate_column_1251_0 AS l_partkey, "pg_merge_job_1251.task_000013".intermediate_column_1251_1 AS o_orderkey, count(*) AS count FROM (pg_merge_job_1251.task_000013 "pg_merge_job_1251.task_000013" JOIN customer_102042 customer ON ((customer.c_custkey = "pg_merge_job_1251.task_000013".intermediate_column_1251_4))) WHERE ((("pg_merge_job_1251.task_000013".intermediate_column_1251_2 > 5.0) OR ("pg_merge_job_1251.task_000013".intermediate_column_1251_3 > 1200.0)) AND (customer.c_acctbal < 5000.0)) GROUP BY "pg_merge_job_1251.task_000013".intermediate_column_1251_0, "pg_merge_job_1251.task_000013".intermediate_column_1251_1 ORDER BY "pg_merge_job_1251.task_000013".intermediate_column_1251_0, "pg_merge_job_1251.task_000013".intermediate_column_1251_1, "pg_merge_job_1251.task_000013".intermediate_column_1251_0, "pg_merge_job_1251.task_000013".intermediate_column_1251_1 LIMIT '30'::bigint" DEBUG: pruning merge fetch taskId 1 DETAIL: Creating dependency on merge taskId 7 DEBUG: pruning merge fetch taskId 4 DETAIL: Creating dependency on merge taskId 10 DEBUG: pruning merge fetch taskId 7 DETAIL: Creating dependency on merge taskId 13 -DEBUG: assigned task 6 to node localhost:57637 -DEBUG: assigned task 3 to node localhost:57638 DEBUG: assigned task 9 to node localhost:57637 +DEBUG: assigned task 3 to node localhost:57638 +DEBUG: assigned task 6 to node localhost:57637 DEBUG: completed cleanup query for job 1252 on node "localhost:57638" DEBUG: completed cleanup query for job 1252 on node "localhost:57637" DEBUG: completed cleanup query for job 1251 on node "localhost:57638" diff --git a/src/test/regress/expected/multi_large_table_join_planning_0.out b/src/test/regress/expected/multi_large_table_join_planning_0.out index f254c40ca..7ea71fb38 100644 --- a/src/test/regress/expected/multi_large_table_join_planning_0.out +++ b/src/test/regress/expected/multi_large_table_join_planning_0.out @@ -74,13 +74,13 @@ DEBUG: join prunable for intervals [6001,7000] and [1,1000] DEBUG: generated sql query for job 1251 and task 3 DETAIL: query string: "SELECT "pg_merge_job_1250.task_000019".intermediate_column_1250_0, "pg_merge_job_1250.task_000019".intermediate_column_1250_1, "pg_merge_job_1250.task_000019".intermediate_column_1250_2, "pg_merge_job_1250.task_000019".intermediate_column_1250_3, "pg_merge_job_1250.task_000019".intermediate_column_1250_4 FROM (pg_merge_job_1250.task_000019 "pg_merge_job_1250.task_000019" JOIN part_102019 part ON (("pg_merge_job_1250.task_000019".intermediate_column_1250_0 = part.p_partkey))) WHERE (part.p_size > 8)" DEBUG: generated sql query for job 1251 and task 6 -DETAIL: query string: "SELECT "pg_merge_job_1250.task_000026".intermediate_column_1250_0, "pg_merge_job_1250.task_000026".intermediate_column_1250_1, "pg_merge_job_1250.task_000026".intermediate_column_1250_2, "pg_merge_job_1250.task_000026".intermediate_column_1250_3, "pg_merge_job_1250.task_000026".intermediate_column_1250_4 FROM (pg_merge_job_1250.task_000026 "pg_merge_job_1250.task_000026" JOIN part_102039 part ON (("pg_merge_job_1250.task_000026".intermediate_column_1250_0 = part.p_partkey))) WHERE (part.p_size > 8)" +DETAIL: query string: "SELECT "pg_merge_job_1250.task_000026".intermediate_column_1250_0, "pg_merge_job_1250.task_000026".intermediate_column_1250_1, "pg_merge_job_1250.task_000026".intermediate_column_1250_2, "pg_merge_job_1250.task_000026".intermediate_column_1250_3, "pg_merge_job_1250.task_000026".intermediate_column_1250_4 FROM (pg_merge_job_1250.task_000026 "pg_merge_job_1250.task_000026" JOIN part_102044 part ON (("pg_merge_job_1250.task_000026".intermediate_column_1250_0 = part.p_partkey))) WHERE (part.p_size > 8)" DEBUG: pruning merge fetch taskId 1 DETAIL: Creating dependency on merge taskId 19 DEBUG: pruning merge fetch taskId 4 DETAIL: Creating dependency on merge taskId 26 -DEBUG: assigned task 3 to node localhost:57637 -DEBUG: assigned task 6 to node localhost:57638 +DEBUG: assigned task 6 to node localhost:57637 +DEBUG: assigned task 3 to node localhost:57638 DEBUG: join prunable for intervals [1,1000] and [1001,2000] DEBUG: join prunable for intervals [1,1000] and [6001,7000] DEBUG: join prunable for intervals [1001,2000] and [1,1000] @@ -90,18 +90,18 @@ DEBUG: join prunable for intervals [6001,7000] and [1001,2000] DEBUG: generated sql query for job 1252 and task 3 DETAIL: query string: "SELECT "pg_merge_job_1251.task_000007".intermediate_column_1251_0 AS l_partkey, "pg_merge_job_1251.task_000007".intermediate_column_1251_1 AS o_orderkey, count(*) AS count FROM (pg_merge_job_1251.task_000007 "pg_merge_job_1251.task_000007" JOIN customer_102017 customer ON ((customer.c_custkey = "pg_merge_job_1251.task_000007".intermediate_column_1251_4))) WHERE ((("pg_merge_job_1251.task_000007".intermediate_column_1251_2 > 5.0) OR ("pg_merge_job_1251.task_000007".intermediate_column_1251_3 > 1200.0)) AND (customer.c_acctbal < 5000.0)) GROUP BY "pg_merge_job_1251.task_000007".intermediate_column_1251_0, "pg_merge_job_1251.task_000007".intermediate_column_1251_1 ORDER BY "pg_merge_job_1251.task_000007".intermediate_column_1251_0, "pg_merge_job_1251.task_000007".intermediate_column_1251_1, "pg_merge_job_1251.task_000007".intermediate_column_1251_0, "pg_merge_job_1251.task_000007".intermediate_column_1251_1 LIMIT 30::bigint" DEBUG: generated sql query for job 1252 and task 6 -DETAIL: query string: "SELECT "pg_merge_job_1251.task_000010".intermediate_column_1251_0 AS l_partkey, "pg_merge_job_1251.task_000010".intermediate_column_1251_1 AS o_orderkey, count(*) AS count FROM (pg_merge_job_1251.task_000010 "pg_merge_job_1251.task_000010" JOIN customer_102038 customer ON ((customer.c_custkey = "pg_merge_job_1251.task_000010".intermediate_column_1251_4))) WHERE ((("pg_merge_job_1251.task_000010".intermediate_column_1251_2 > 5.0) OR ("pg_merge_job_1251.task_000010".intermediate_column_1251_3 > 1200.0)) AND (customer.c_acctbal < 5000.0)) GROUP BY "pg_merge_job_1251.task_000010".intermediate_column_1251_0, "pg_merge_job_1251.task_000010".intermediate_column_1251_1 ORDER BY "pg_merge_job_1251.task_000010".intermediate_column_1251_0, "pg_merge_job_1251.task_000010".intermediate_column_1251_1, "pg_merge_job_1251.task_000010".intermediate_column_1251_0, "pg_merge_job_1251.task_000010".intermediate_column_1251_1 LIMIT 30::bigint" +DETAIL: query string: "SELECT "pg_merge_job_1251.task_000010".intermediate_column_1251_0 AS l_partkey, "pg_merge_job_1251.task_000010".intermediate_column_1251_1 AS o_orderkey, count(*) AS count FROM (pg_merge_job_1251.task_000010 "pg_merge_job_1251.task_000010" JOIN customer_102043 customer ON ((customer.c_custkey = "pg_merge_job_1251.task_000010".intermediate_column_1251_4))) WHERE ((("pg_merge_job_1251.task_000010".intermediate_column_1251_2 > 5.0) OR ("pg_merge_job_1251.task_000010".intermediate_column_1251_3 > 1200.0)) AND (customer.c_acctbal < 5000.0)) GROUP BY "pg_merge_job_1251.task_000010".intermediate_column_1251_0, "pg_merge_job_1251.task_000010".intermediate_column_1251_1 ORDER BY "pg_merge_job_1251.task_000010".intermediate_column_1251_0, "pg_merge_job_1251.task_000010".intermediate_column_1251_1, "pg_merge_job_1251.task_000010".intermediate_column_1251_0, "pg_merge_job_1251.task_000010".intermediate_column_1251_1 LIMIT 30::bigint" DEBUG: generated sql query for job 1252 and task 9 -DETAIL: query string: "SELECT "pg_merge_job_1251.task_000013".intermediate_column_1251_0 AS l_partkey, "pg_merge_job_1251.task_000013".intermediate_column_1251_1 AS o_orderkey, count(*) AS count FROM (pg_merge_job_1251.task_000013 "pg_merge_job_1251.task_000013" JOIN customer_102037 customer ON ((customer.c_custkey = "pg_merge_job_1251.task_000013".intermediate_column_1251_4))) WHERE ((("pg_merge_job_1251.task_000013".intermediate_column_1251_2 > 5.0) OR ("pg_merge_job_1251.task_000013".intermediate_column_1251_3 > 1200.0)) AND (customer.c_acctbal < 5000.0)) GROUP BY "pg_merge_job_1251.task_000013".intermediate_column_1251_0, "pg_merge_job_1251.task_000013".intermediate_column_1251_1 ORDER BY "pg_merge_job_1251.task_000013".intermediate_column_1251_0, "pg_merge_job_1251.task_000013".intermediate_column_1251_1, "pg_merge_job_1251.task_000013".intermediate_column_1251_0, "pg_merge_job_1251.task_000013".intermediate_column_1251_1 LIMIT 30::bigint" +DETAIL: query string: "SELECT "pg_merge_job_1251.task_000013".intermediate_column_1251_0 AS l_partkey, "pg_merge_job_1251.task_000013".intermediate_column_1251_1 AS o_orderkey, count(*) AS count FROM (pg_merge_job_1251.task_000013 "pg_merge_job_1251.task_000013" JOIN customer_102042 customer ON ((customer.c_custkey = "pg_merge_job_1251.task_000013".intermediate_column_1251_4))) WHERE ((("pg_merge_job_1251.task_000013".intermediate_column_1251_2 > 5.0) OR ("pg_merge_job_1251.task_000013".intermediate_column_1251_3 > 1200.0)) AND (customer.c_acctbal < 5000.0)) GROUP BY "pg_merge_job_1251.task_000013".intermediate_column_1251_0, "pg_merge_job_1251.task_000013".intermediate_column_1251_1 ORDER BY "pg_merge_job_1251.task_000013".intermediate_column_1251_0, "pg_merge_job_1251.task_000013".intermediate_column_1251_1, "pg_merge_job_1251.task_000013".intermediate_column_1251_0, "pg_merge_job_1251.task_000013".intermediate_column_1251_1 LIMIT 30::bigint" DEBUG: pruning merge fetch taskId 1 DETAIL: Creating dependency on merge taskId 7 DEBUG: pruning merge fetch taskId 4 DETAIL: Creating dependency on merge taskId 10 DEBUG: pruning merge fetch taskId 7 DETAIL: Creating dependency on merge taskId 13 -DEBUG: assigned task 6 to node localhost:57637 -DEBUG: assigned task 3 to node localhost:57638 DEBUG: assigned task 9 to node localhost:57637 +DEBUG: assigned task 3 to node localhost:57638 +DEBUG: assigned task 6 to node localhost:57637 DEBUG: completed cleanup query for job 1252 on node "localhost:57638" DEBUG: completed cleanup query for job 1252 on node "localhost:57637" DEBUG: completed cleanup query for job 1251 on node "localhost:57638" diff --git a/src/test/regress/expected/multi_large_table_pruning.out b/src/test/regress/expected/multi_large_table_pruning.out index ad13f7109..d20ae46bc 100644 --- a/src/test/regress/expected/multi_large_table_pruning.out +++ b/src/test/regress/expected/multi_large_table_pruning.out @@ -57,8 +57,8 @@ WHERE o_custkey = c_custkey AND c_custkey < 0; DEBUG: predicate pruning for shardId 102017 -DEBUG: predicate pruning for shardId 102038 -DEBUG: predicate pruning for shardId 102037 +DEBUG: predicate pruning for shardId 102043 +DEBUG: predicate pruning for shardId 102042 count ------- diff --git a/src/test/regress/expected/multi_large_table_task_assignment.out b/src/test/regress/expected/multi_large_table_task_assignment.out index 23b7bd2ac..74747311a 100644 --- a/src/test/regress/expected/multi_large_table_task_assignment.out +++ b/src/test/regress/expected/multi_large_table_task_assignment.out @@ -39,9 +39,9 @@ DEBUG: pruning merge fetch taskId 4 DETAIL: Creating dependency on merge taskId 8 DEBUG: pruning merge fetch taskId 7 DETAIL: Creating dependency on merge taskId 11 -DEBUG: assigned task 6 to node localhost:57637 -DEBUG: assigned task 3 to node localhost:57638 DEBUG: assigned task 9 to node localhost:57637 +DEBUG: assigned task 3 to node localhost:57638 +DEBUG: assigned task 6 to node localhost:57637 DEBUG: CommitTransactionCommand count ------- @@ -64,12 +64,12 @@ WHERE o_custkey = c_custkey AND o_orderkey = l_orderkey; DEBUG: StartTransactionCommand -DEBUG: assigned task 9 to node localhost:57637 -DEBUG: assigned task 3 to node localhost:57638 -DEBUG: assigned task 12 to node localhost:57637 -DEBUG: assigned task 6 to node localhost:57638 DEBUG: assigned task 15 to node localhost:57637 -DEBUG: assigned task 18 to node localhost:57638 +DEBUG: assigned task 3 to node localhost:57638 +DEBUG: assigned task 18 to node localhost:57637 +DEBUG: assigned task 6 to node localhost:57638 +DEBUG: assigned task 9 to node localhost:57637 +DEBUG: assigned task 12 to node localhost:57638 DEBUG: join prunable for intervals [1,2496] and [2497,4964] DEBUG: join prunable for intervals [1,2496] and [4965,5986] DEBUG: join prunable for intervals [1,2496] and [8997,11554] @@ -160,9 +160,9 @@ DEBUG: assigned task 8 to node localhost:57637 DEBUG: assigned task 6 to node localhost:57638 DEBUG: assigned task 12 to node localhost:57637 DEBUG: assigned task 10 to node localhost:57638 -DEBUG: assigned task 4 to node localhost:57637 -DEBUG: assigned task 2 to node localhost:57638 DEBUG: assigned task 6 to node localhost:57637 +DEBUG: assigned task 2 to node localhost:57638 +DEBUG: assigned task 4 to node localhost:57637 DEBUG: join prunable for task partitionId 0 and 1 DEBUG: join prunable for task partitionId 0 and 2 DEBUG: join prunable for task partitionId 0 and 3 diff --git a/src/test/regress/expected/multi_modifications.out b/src/test/regress/expected/multi_modifications.out index 7bfd65e3b..84b519435 100644 --- a/src/test/regress/expected/multi_modifications.out +++ b/src/test/regress/expected/multi_modifications.out @@ -93,7 +93,7 @@ INSERT INTO append_partitioned VALUES (414123, 'AAPL', 9580, '2004-10-19 10:23:5 SET client_min_messages TO 'DEBUG2'; SET citus.task_executor_type TO 'real-time'; SELECT * FROM range_partitioned WHERE id = 32743; -DEBUG: predicate pruning for shardId 103070 +DEBUG: predicate pruning for shardId 103084 DEBUG: Plan is router executable id | symbol | bidder_id | placed_at | kind | limit_price -------+--------+-----------+--------------------------+------+------------- @@ -101,7 +101,7 @@ DEBUG: Plan is router executable (1 row) SELECT * FROM append_partitioned WHERE id = 414123; -DEBUG: predicate pruning for shardId 103072 +DEBUG: predicate pruning for shardId 103086 DEBUG: Plan is router executable id | symbol | bidder_id | placed_at | kind | limit_price --------+--------+-----------+--------------------------+------+------------- @@ -275,7 +275,7 @@ WHERE nodename = 'localhost' AND -- Fourth: Perform the same INSERT (primary key violation) INSERT INTO limit_orders VALUES (275, 'ADR', 140, '2007-07-02 16:32:15', 'sell', 43.67); WARNING: Bad result from localhost:57638 -DETAIL: Remote message: duplicate key value violates unique constraint "limit_orders_pkey_103067" +DETAIL: Remote message: duplicate key value violates unique constraint "limit_orders_pkey_103081" -- Last: Verify the insert worked but the placement with the PK violation is now unhealthy SELECT count(*) FROM limit_orders WHERE id = 275; count diff --git a/src/test/regress/expected/multi_prune_shard_list.out b/src/test/regress/expected/multi_prune_shard_list.out index 44ae8203a..934639051 100644 --- a/src/test/regress/expected/multi_prune_shard_list.out +++ b/src/test/regress/expected/multi_prune_shard_list.out @@ -28,44 +28,47 @@ CREATE FUNCTION print_sorted_shard_intervals(regclass) -- =================================================================== -- test shard pruning functionality -- =================================================================== --- create distributed table metadata to observe shard pruning +-- create distributed table observe shard pruning CREATE TABLE pruning ( species text, last_pruned date, plant_id integer ); -INSERT INTO pg_dist_partition (logicalrelid, partmethod, partkey) -VALUES - ('pruning'::regclass, 'h', column_name_to_column('pruning'::regclass, 'species')); -INSERT INTO pg_dist_shard - (shardid, logicalrelid, shardstorage, shardminvalue, shardmaxvalue) -VALUES - (10, 'pruning'::regclass, 't', '-2147483648', '-1073741826'), - (11, 'pruning'::regclass, 't', '-1073741825', '-3'), - (12, 'pruning'::regclass, 't', '-2', '1073741820'), - (13, 'pruning'::regclass, 't', '1073741821', '2147483647'); +SELECT master_create_distributed_table('pruning', 'species', 'hash'); + master_create_distributed_table +--------------------------------- + +(1 row) + +-- create worker shards +SELECT master_create_worker_shards('pruning', 4, 1); + master_create_worker_shards +----------------------------- + +(1 row) + -- with no values, expect all shards SELECT prune_using_no_values('pruning'); - prune_using_no_values ------------------------ - {10,11,12,13} + prune_using_no_values +------------------------------- + {103070,103071,103072,103073} (1 row) -- with a single value, expect a single shard SELECT prune_using_single_value('pruning', 'tomato'); prune_using_single_value -------------------------- - {12} + {103072} (1 row) -- the above is true even if that value is null SELECT prune_using_single_value('pruning', NULL); prune_using_single_value -------------------------- - {12} + {103072} (1 row) -- build an OR clause and expect more than one sahrd SELECT prune_using_either_value('pruning', 'tomato', 'petunia'); prune_using_either_value -------------------------- - {11,12} + {103071,103072} (1 row) -- an AND clause with incompatible values returns no shards @@ -79,7 +82,7 @@ SELECT prune_using_both_values('pruning', 'tomato', 'petunia'); SELECT prune_using_both_values('pruning', 'tomato', 'rose'); prune_using_both_values ------------------------- - {12} + {103072} (1 row) -- unit test of the equality expression generation code @@ -91,86 +94,117 @@ SELECT debug_equality_expression('pruning'); -- print the initial ordering of shard intervals SELECT print_sorted_shard_intervals('pruning'); - print_sorted_shard_intervals ------------------------------- - {10,11,12,13} + print_sorted_shard_intervals +------------------------------- + {103070,103071,103072,103073} (1 row) -- update only min value for one shard -UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 11; +UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 103071; SELECT print_sorted_shard_intervals('pruning'); - print_sorted_shard_intervals ------------------------------- - {10,12,13,11} + print_sorted_shard_intervals +------------------------------- + {103070,103072,103073,103071} (1 row) -- now lets have one more shard without min/max values -UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 12; +UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 103072; SELECT print_sorted_shard_intervals('pruning'); - print_sorted_shard_intervals ------------------------------- - {10,13,11,12} + print_sorted_shard_intervals +------------------------------- + {103070,103073,103071,103072} (1 row) -- now lets have one more shard without min/max values -UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 10; +UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 103070; SELECT print_sorted_shard_intervals('pruning'); - print_sorted_shard_intervals ------------------------------- - {13,10,11,12} + print_sorted_shard_intervals +------------------------------- + {103073,103070,103071,103072} (1 row) -- all shard placements are uninitialized -UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 13; +UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 103073; SELECT print_sorted_shard_intervals('pruning'); - print_sorted_shard_intervals ------------------------------- - {10,11,12,13} + print_sorted_shard_intervals +------------------------------- + {103070,103071,103072,103073} +(1 row) + +-- create range distributed table observe shard pruning +CREATE TABLE pruning_range ( species text, last_pruned date, plant_id integer ); +SELECT master_create_distributed_table('pruning_range', 'species', 'range'); + master_create_distributed_table +--------------------------------- + +(1 row) + +-- create worker shards +SELECT master_create_empty_shard('pruning_range'); + master_create_empty_shard +--------------------------- + 103074 +(1 row) + +SELECT master_create_empty_shard('pruning_range'); + master_create_empty_shard +--------------------------- + 103075 +(1 row) + +SELECT master_create_empty_shard('pruning_range'); + master_create_empty_shard +--------------------------- + 103076 +(1 row) + +SELECT master_create_empty_shard('pruning_range'); + master_create_empty_shard +--------------------------- + 103077 (1 row) --- now update the metadata so that the table is a range distributed table -UPDATE pg_dist_partition SET partmethod = 'r' WHERE logicalrelid = 'pruning'::regclass; -- now the comparison is done via the partition column type, which is text -UPDATE pg_dist_shard SET shardminvalue = 'a', shardmaxvalue = 'b' WHERE shardid = 10; -UPDATE pg_dist_shard SET shardminvalue = 'c', shardmaxvalue = 'd' WHERE shardid = 11; -UPDATE pg_dist_shard SET shardminvalue = 'e', shardmaxvalue = 'f' WHERE shardid = 12; -UPDATE pg_dist_shard SET shardminvalue = 'g', shardmaxvalue = 'h' WHERE shardid = 13; +UPDATE pg_dist_shard SET shardminvalue = 'a', shardmaxvalue = 'b' WHERE shardid = 103074; +UPDATE pg_dist_shard SET shardminvalue = 'c', shardmaxvalue = 'd' WHERE shardid = 103075; +UPDATE pg_dist_shard SET shardminvalue = 'e', shardmaxvalue = 'f' WHERE shardid = 103076; +UPDATE pg_dist_shard SET shardminvalue = 'g', shardmaxvalue = 'h' WHERE shardid = 103077; -- print the ordering of shard intervals with range partitioning as well -SELECT print_sorted_shard_intervals('pruning'); - print_sorted_shard_intervals ------------------------------- - {10,11,12,13} +SELECT print_sorted_shard_intervals('pruning_range'); + print_sorted_shard_intervals +------------------------------- + {103074,103075,103076,103077} (1 row) -- update only min value for one shard -UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 11; -SELECT print_sorted_shard_intervals('pruning'); - print_sorted_shard_intervals ------------------------------- - {10,12,13,11} +UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 103075; +SELECT print_sorted_shard_intervals('pruning_range'); + print_sorted_shard_intervals +------------------------------- + {103074,103076,103077,103075} (1 row) -- now lets have one more shard without min/max values -UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 12; -SELECT print_sorted_shard_intervals('pruning'); - print_sorted_shard_intervals ------------------------------- - {10,13,11,12} +UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 103076; +SELECT print_sorted_shard_intervals('pruning_range'); + print_sorted_shard_intervals +------------------------------- + {103074,103077,103075,103076} (1 row) -- now lets have one more shard without min/max values -UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 10; -SELECT print_sorted_shard_intervals('pruning'); - print_sorted_shard_intervals ------------------------------- - {13,10,11,12} +UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 103074; +SELECT print_sorted_shard_intervals('pruning_range'); + print_sorted_shard_intervals +------------------------------- + {103077,103074,103075,103076} (1 row) -- all shard placements are uninitialized -UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 13; -SELECT print_sorted_shard_intervals('pruning'); - print_sorted_shard_intervals ------------------------------- - {10,11,12,13} +UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 103077; +SELECT print_sorted_shard_intervals('pruning_range'); + print_sorted_shard_intervals +------------------------------- + {103074,103075,103076,103077} (1 row) diff --git a/src/test/regress/expected/multi_simple_queries.out b/src/test/regress/expected/multi_simple_queries.out index 31ef27012..9a857511a 100644 --- a/src/test/regress/expected/multi_simple_queries.out +++ b/src/test/regress/expected/multi_simple_queries.out @@ -323,7 +323,7 @@ SELECT * FROM articles WHERE author_id = 1; DEBUG: Creating router plan -DEBUG: predicate pruning for shardId 103094 +DEBUG: predicate pruning for shardId 103108 DEBUG: Plan is router executable id | author_id | title | word_count ----+-----------+--------------+------------ @@ -338,7 +338,7 @@ DEBUG: Plan is router executable SELECT * FROM articles WHERE author_id = 1 OR author_id = 17; -DEBUG: predicate pruning for shardId 103094 +DEBUG: predicate pruning for shardId 103108 DEBUG: Plan is router executable id | author_id | title | word_count ----+-----------+--------------+------------ @@ -367,7 +367,7 @@ SELECT id as article_id, word_count * id as random_value FROM articles WHERE author_id = 1; DEBUG: Creating router plan -DEBUG: predicate pruning for shardId 103094 +DEBUG: predicate pruning for shardId 103108 DEBUG: Plan is router executable article_id | random_value ------------+-------------- @@ -385,7 +385,7 @@ SELECT a.author_id as first_author, b.word_count as second_word_count WHERE a.author_id = 10 and a.author_id = b.author_id LIMIT 3; DEBUG: push down of limit count: 3 -DEBUG: predicate pruning for shardId 103094 +DEBUG: predicate pruning for shardId 103108 DEBUG: join prunable for intervals [-2147483648,-1] and [0,2147483647] DEBUG: Plan is router executable first_author | second_word_count @@ -402,7 +402,7 @@ SELECT a.author_id as first_author, b.word_count as second_word_count WHERE a.author_id = 10 and a.author_id = b.author_id LIMIT 3; DEBUG: push down of limit count: 3 -DEBUG: predicate pruning for shardId 103094 +DEBUG: predicate pruning for shardId 103108 first_author | second_word_count --------------+------------------- 10 | 19519 @@ -416,7 +416,7 @@ SELECT * WHERE author_id = 1 LIMIT 2; DEBUG: Creating router plan -DEBUG: predicate pruning for shardId 103094 +DEBUG: predicate pruning for shardId 103108 DEBUG: Plan is router executable id | author_id | title | word_count ----+-----------+----------+------------ @@ -432,7 +432,7 @@ SELECT id WHERE author_id = 1 GROUP BY id; DEBUG: Creating router plan -DEBUG: predicate pruning for shardId 103094 +DEBUG: predicate pruning for shardId 103108 DEBUG: Plan is router executable id ---- @@ -452,7 +452,7 @@ SELECT avg(word_count) FROM articles WHERE author_id = 2; DEBUG: Creating router plan -DEBUG: predicate pruning for shardId 103093 +DEBUG: predicate pruning for shardId 103107 DEBUG: Plan is router executable avg -------------------- @@ -466,7 +466,7 @@ SELECT max(word_count) as max, min(word_count) as min, FROM articles WHERE author_id = 2; DEBUG: Creating router plan -DEBUG: predicate pruning for shardId 103093 +DEBUG: predicate pruning for shardId 103107 DEBUG: Plan is router executable max | min | sum | cnt -------+------+-------+----- @@ -477,7 +477,7 @@ DEBUG: Plan is router executable SELECT * FROM articles a, articles b WHERE a.id = b.id AND a.author_id = 1; -DEBUG: predicate pruning for shardId 103094 +DEBUG: predicate pruning for shardId 103108 DEBUG: join prunable for task partitionId 0 and 1 DEBUG: join prunable for task partitionId 0 and 2 DEBUG: join prunable for task partitionId 0 and 3 diff --git a/src/test/regress/input/multi_append_table_to_shard.source b/src/test/regress/input/multi_append_table_to_shard.source index 04ff276d4..af0816f7d 100644 --- a/src/test/regress/input/multi_append_table_to_shard.source +++ b/src/test/regress/input/multi_append_table_to_shard.source @@ -16,6 +16,14 @@ CREATE TABLE multi_append_table_to_shard_left ); SELECT master_create_distributed_table('multi_append_table_to_shard_left', 'left_number', 'append'); +CREATE TABLE multi_append_table_to_shard_right_hash +( + right_number INTEGER not null, + right_text TEXT not null +); +SELECT master_create_distributed_table('multi_append_table_to_shard_right_hash', 'right_number', 'hash'); +SELECT master_create_worker_shards('multi_append_table_to_shard_right_hash', 1, 1); + -- Replicate 'left' table on both workers SELECT set_config('citus.shard_replication_factor', '2', false); \STAGE multi_append_table_to_shard_left FROM '@abs_srcdir@/data/agg.data' @@ -70,19 +78,12 @@ FROM multi_append_table_to_shard_left, WHERE left_number = right_number; -- Check that we error out if we try to append data to a hash partitioned table. - -UPDATE pg_dist_partition SET partmethod = 'h' WHERE - logicalrelid = 'multi_append_table_to_shard_right'::regclass::oid; - -SELECT master_create_empty_shard('multi_append_table_to_shard_right'); +SELECT master_create_empty_shard('multi_append_table_to_shard_right_hash'); SELECT master_append_table_to_shard(shardid, 'multi_append_table_to_shard_stage', 'localhost', 57636) FROM pg_dist_shard -WHERE 'multi_append_table_to_shard_right'::regclass::oid = logicalrelid; - -UPDATE pg_dist_partition SET partmethod = 'a' WHERE - logicalrelid = 'multi_append_table_to_shard_right'::regclass::oid; +WHERE 'multi_append_table_to_shard_right_hash'::regclass::oid = logicalrelid; -- Clean up after test SELECT master_apply_delete_command('DELETE FROM multi_append_table_to_shard_right'); diff --git a/src/test/regress/output/multi_append_table_to_shard.source b/src/test/regress/output/multi_append_table_to_shard.source index 682671c74..e6f4fba5b 100644 --- a/src/test/regress/output/multi_append_table_to_shard.source +++ b/src/test/regress/output/multi_append_table_to_shard.source @@ -24,6 +24,23 @@ SELECT master_create_distributed_table('multi_append_table_to_shard_left', 'left (1 row) +CREATE TABLE multi_append_table_to_shard_right_hash +( + right_number INTEGER not null, + right_text TEXT not null +); +SELECT master_create_distributed_table('multi_append_table_to_shard_right_hash', 'right_number', 'hash'); + master_create_distributed_table +--------------------------------- + +(1 row) + +SELECT master_create_worker_shards('multi_append_table_to_shard_right_hash', 1, 1); + master_create_worker_shards +----------------------------- + +(1 row) + -- Replicate 'left' table on both workers SELECT set_config('citus.shard_replication_factor', '2', false); set_config @@ -107,19 +124,15 @@ WHERE left_number = right_number; (1 row) -- Check that we error out if we try to append data to a hash partitioned table. -UPDATE pg_dist_partition SET partmethod = 'h' WHERE - logicalrelid = 'multi_append_table_to_shard_right'::regclass::oid; -SELECT master_create_empty_shard('multi_append_table_to_shard_right'); -ERROR: relation "multi_append_table_to_shard_right" is a hash partitioned table +SELECT master_create_empty_shard('multi_append_table_to_shard_right_hash'); +ERROR: relation "multi_append_table_to_shard_right_hash" is a hash partitioned table DETAIL: We currently don't support creating shards on hash-partitioned tables SELECT master_append_table_to_shard(shardid, 'multi_append_table_to_shard_stage', 'localhost', 57636) FROM pg_dist_shard -WHERE 'multi_append_table_to_shard_right'::regclass::oid = logicalrelid; -ERROR: cannot append to shardId 103013 +WHERE 'multi_append_table_to_shard_right_hash'::regclass::oid = logicalrelid; +ERROR: cannot append to shardId 103011 DETAIL: We currently don't support appending to shards in hash-partitioned tables -UPDATE pg_dist_partition SET partmethod = 'a' WHERE - logicalrelid = 'multi_append_table_to_shard_right'::regclass::oid; -- Clean up after test SELECT master_apply_delete_command('DELETE FROM multi_append_table_to_shard_right'); master_apply_delete_command diff --git a/src/test/regress/output/multi_outer_join.source b/src/test/regress/output/multi_outer_join.source index 025f258fe..ac7b7eb3c 100644 --- a/src/test/regress/output/multi_outer_join.source +++ b/src/test/regress/output/multi_outer_join.source @@ -301,16 +301,6 @@ FROM LOG: join order: [ "multi_outer_join_left" ][ local partition join "multi_outer_join_right" ][ broadcast join "multi_outer_join_third" ] l_custkey | r_custkey | t_custkey -----------+-----------+----------- - 21 | 21 | 21 - 22 | 22 | 22 - 23 | 23 | 23 - 24 | 24 | 24 - 25 | 25 | 25 - 26 | 26 | 26 - 27 | 27 | 27 - 28 | 28 | 28 - 29 | 29 | 29 - 30 | 30 | 30 1 | | 2 | | 3 | | @@ -326,6 +316,16 @@ LOG: join order: [ "multi_outer_join_left" ][ local partition join "multi_outer 13 | 13 | 13 14 | 14 | 14 15 | 15 | 15 + 21 | 21 | 21 + 22 | 22 | 22 + 23 | 23 | 23 + 24 | 24 | 24 + 25 | 25 | 25 + 26 | 26 | 26 + 27 | 27 | 27 + 28 | 28 | 28 + 29 | 29 | 29 + 30 | 30 | 30 (25 rows) -- Right join with single shard right most table should error out @@ -347,16 +347,6 @@ FROM LOG: join order: [ "multi_outer_join_right" ][ broadcast join "multi_outer_join_third" ][ local partition join "multi_outer_join_left" ] t_custkey | r_custkey | l_custkey -----------+-----------+----------- - 21 | 21 | 21 - 22 | 22 | 22 - 23 | 23 | 23 - 24 | 24 | 24 - 25 | 25 | 25 - 26 | 26 | 26 - 27 | 27 | 27 - 28 | 28 | 28 - 29 | 29 | 29 - 30 | 30 | 30 11 | 11 | 11 12 | 12 | 12 13 | 13 | 13 @@ -367,6 +357,16 @@ LOG: join order: [ "multi_outer_join_right" ][ broadcast join "multi_outer_join 18 | 18 | 19 | 19 | 20 | 20 | + 21 | 21 | 21 + 22 | 22 | 22 + 23 | 23 | 23 + 24 | 24 | 24 + 25 | 25 | 25 + 26 | 26 | 26 + 27 | 27 | 27 + 28 | 28 | 28 + 29 | 29 | 29 + 30 | 30 | 30 (20 rows) -- Make it anti-join, should display values with l_custkey is null @@ -406,16 +406,6 @@ FROM LOG: join order: [ "multi_outer_join_left" ][ local partition join "multi_outer_join_right" ] l_custkey | r_custkey -----------+----------- - 21 | 21 - 22 | 22 - 23 | 23 - 24 | 24 - 25 | 25 - 26 | 26 - 27 | 27 - 28 | 28 - 29 | 29 - 30 | 30 1 | 2 | 3 | @@ -436,6 +426,16 @@ LOG: join order: [ "multi_outer_join_left" ][ local partition join "multi_outer | 18 | 19 | 16 + 21 | 21 + 22 | 22 + 23 | 23 + 24 | 24 + 25 | 25 + 26 | 26 + 27 | 27 + 28 | 28 + 29 | 29 + 30 | 30 (30 rows) -- full outer join + anti (right) should work with 1-1 matched shards @@ -525,6 +525,11 @@ FROM LOG: join order: [ "multi_outer_join_left" ][ local partition join "multi_outer_join_right" ][ broadcast join "multi_outer_join_third" ] l_custkey | r_custkey | t_custkey -----------+-----------+----------- + 11 | 11 | 11 + 12 | 12 | 12 + 13 | 13 | 13 + 14 | 14 | 14 + 15 | 15 | 15 21 | 21 | 21 22 | 22 | 22 23 | 23 | 23 @@ -535,11 +540,6 @@ LOG: join order: [ "multi_outer_join_left" ][ local partition join "multi_outer 28 | 28 | 28 29 | 29 | 29 30 | 30 | 30 - 11 | 11 | 11 - 12 | 12 | 12 - 13 | 13 | 13 - 14 | 14 | 14 - 15 | 15 | 15 (15 rows) -- inner (broadcast) join + 2 shards left (local) join should work @@ -552,16 +552,6 @@ FROM LOG: join order: [ "multi_outer_join_left" ][ broadcast join "multi_outer_join_third" ][ local partition join "multi_outer_join_right" ] l_custkey | t_custkey | r_custkey -----------+-----------+----------- - 21 | 21 | 21 - 22 | 22 | 22 - 23 | 23 | 23 - 24 | 24 | 24 - 25 | 25 | 25 - 26 | 26 | 26 - 27 | 27 | 27 - 28 | 28 | 28 - 29 | 29 | 29 - 30 | 30 | 30 1 | 1 | 2 | 2 | 3 | 3 | @@ -577,6 +567,16 @@ LOG: join order: [ "multi_outer_join_left" ][ broadcast join "multi_outer_join_ 13 | 13 | 13 14 | 14 | 14 15 | 15 | 15 + 21 | 21 | 21 + 22 | 22 | 22 + 23 | 23 | 23 + 24 | 24 | 24 + 25 | 25 | 25 + 26 | 26 | 26 + 27 | 27 | 27 + 28 | 28 | 28 + 29 | 29 | 29 + 30 | 30 | 30 (25 rows) -- inner (local) join + 2 shards left (dual partition) join should error out @@ -598,16 +598,6 @@ FROM LOG: join order: [ "multi_outer_join_left" ][ broadcast join "multi_outer_join_third" ][ local partition join "multi_outer_join_right" ] l_custkey | t_custkey | r_custkey -----------+-----------+----------- - 21 | 21 | 21 - 22 | 22 | 22 - 23 | 23 | 23 - 24 | 24 | 24 - 25 | 25 | 25 - 26 | 26 | 26 - 27 | 27 | 27 - 28 | 28 | 28 - 29 | 29 | 29 - 30 | 30 | 30 1 | 1 | 2 | 2 | 3 | 3 | @@ -623,6 +613,16 @@ LOG: join order: [ "multi_outer_join_left" ][ broadcast join "multi_outer_join_ 13 | 13 | 13 14 | 14 | 14 15 | 15 | 15 + 21 | 21 | 21 + 22 | 22 | 22 + 23 | 23 | 23 + 24 | 24 | 24 + 25 | 25 | 25 + 26 | 26 | 26 + 27 | 27 | 27 + 28 | 28 | 28 + 29 | 29 | 29 + 30 | 30 | 30 (25 rows) -- inner (broadcast) join + 2 shards left (local) + anti join should work @@ -660,16 +660,6 @@ FROM LOG: join order: [ "multi_outer_join_right" ][ local partition join "multi_outer_join_left" ][ broadcast join "multi_outer_join_third" ] t_custkey ----------- - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 11 12 13 @@ -680,6 +670,16 @@ LOG: join order: [ "multi_outer_join_right" ][ local partition join "multi_oute 18 19 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 (20 rows) -- Add a shard to the left table that overlaps with multiple shards in the right diff --git a/src/test/regress/sql/multi_distribution_metadata.sql b/src/test/regress/sql/multi_distribution_metadata.sql index d53d88c14..897c44c71 100644 --- a/src/test/regress/sql/multi_distribution_metadata.sql +++ b/src/test/regress/sql/multi_distribution_metadata.sql @@ -71,67 +71,54 @@ CREATE FUNCTION column_name_to_column(regclass, text) -- test distribution metadata functionality -- =================================================================== --- create table to be distributed +-- create hash distributed table CREATE TABLE events_hash ( id bigint, name text ); +SELECT master_create_distributed_table('events_hash', 'name', 'hash'); --- for this table we'll "distribute" manually but verify using function calls -INSERT INTO pg_dist_shard - (shardid, logicalrelid, shardstorage, shardminvalue, shardmaxvalue) -VALUES - (1, 'events_hash'::regclass, 't', '0', '10'), - (2, 'events_hash'::regclass, 't', '10', '20'), - (3, 'events_hash'::regclass, 't', '20', '30'), - (4, 'events_hash'::regclass, 't', '30', '40'); +-- create worker shards +SELECT master_create_worker_shards('events_hash', 4, 2); -INSERT INTO pg_dist_shard_placement - (nodename, nodeport, shardid, shardstate, shardlength) -VALUES - ('cluster-worker-01', 5432, 1, 0, 0), - ('cluster-worker-01', 5432, 2, 0, 0), - ('cluster-worker-02', 5433, 3, 0, 0), - ('cluster-worker-02', 5433, 4, 0, 0), - ('cluster-worker-03', 5434, 1, 1, 0), - ('cluster-worker-03', 5434, 2, 1, 0), - ('cluster-worker-04', 5435, 3, 1, 0), - ('cluster-worker-04', 5435, 4, 1, 0); - -INSERT INTO pg_dist_partition (logicalrelid, partmethod, partkey) -VALUES - ('events_hash'::regclass, 'h', column_name_to_column('events_hash'::regclass, 'name')); +-- set shardstate of one replication from each shard to 0 (invalid value) +UPDATE pg_dist_shard_placement SET shardstate = 0 WHERE nodeport = 57638 AND shardid BETWEEN 103025 AND 103028; -- should see above shard identifiers SELECT load_shard_id_array('events_hash'); -- should see array with first shard range -SELECT load_shard_interval_array(1, 0); +SELECT load_shard_interval_array(103025, 0); -- should even work for range-partitioned shards -BEGIN; - UPDATE pg_dist_shard SET - shardminvalue = 'Aardvark', - shardmaxvalue = 'Zebra' - WHERE shardid = 1; +-- create range distributed table +CREATE TABLE events_range ( + id bigint, + name text +); +SELECT master_create_distributed_table('events_range', 'name', 'range'); - UPDATE pg_dist_partition SET partmethod = 'r' - WHERE logicalrelid = 'events_hash'::regclass; +-- create empty shard +SELECT master_create_empty_shard('events_range'); - SELECT load_shard_interval_array(1, ''::text); -ROLLBACK; +UPDATE pg_dist_shard SET + shardminvalue = 'Aardvark', + shardmaxvalue = 'Zebra' +WHERE shardid = 103029; + +SELECT load_shard_interval_array(103029, ''::text); -- should see error for non-existent shard -SELECT load_shard_interval_array(5, 0); +SELECT load_shard_interval_array(103030, 0); -- should see two placements -SELECT load_shard_placement_array(2, false); +SELECT load_shard_placement_array(103026, false); -- only one of which is finalized -SELECT load_shard_placement_array(2, true); +SELECT load_shard_placement_array(103026, true); -- should see error for non-existent shard -SELECT load_shard_placement_array(6, false); +SELECT load_shard_placement_array(103031, false); -- should see column id of 'name' SELECT partition_column_id('events_hash'); @@ -152,9 +139,11 @@ SELECT column_name_to_column_id('events_hash', 'non_existent'); -- drop shard rows (must drop placements first) DELETE FROM pg_dist_shard_placement - WHERE shardid BETWEEN 1 AND 4; + WHERE shardid BETWEEN 103025 AND 103029; DELETE FROM pg_dist_shard WHERE logicalrelid = 'events_hash'::regclass; +DELETE FROM pg_dist_shard + WHERE logicalrelid = 'events_range'::regclass; -- verify that an eager load shows them missing SELECT load_shard_id_array('events_hash'); diff --git a/src/test/regress/sql/multi_join_order_additional.sql b/src/test/regress/sql/multi_join_order_additional.sql index 902ed4739..221a62538 100644 --- a/src/test/regress/sql/multi_join_order_additional.sql +++ b/src/test/regress/sql/multi_join_order_additional.sql @@ -8,6 +8,58 @@ SET citus.explain_distributed_queries TO off; SET citus.log_multi_join_order TO TRUE; SET client_min_messages TO DEBUG2; +-- Create new table definitions for use in testing in distributed planning and +-- execution functionality. Also create indexes to boost performance. + +CREATE TABLE lineitem_hash ( + l_orderkey bigint not null, + l_partkey integer not null, + l_suppkey integer not null, + l_linenumber integer not null, + l_quantity decimal(15, 2) not null, + l_extendedprice decimal(15, 2) not null, + l_discount decimal(15, 2) not null, + l_tax decimal(15, 2) not null, + l_returnflag char(1) not null, + l_linestatus char(1) not null, + l_shipdate date not null, + l_commitdate date not null, + l_receiptdate date not null, + l_shipinstruct char(25) not null, + l_shipmode char(10) not null, + l_comment varchar(44) not null, + PRIMARY KEY(l_orderkey, l_linenumber) ); +SELECT master_create_distributed_table('lineitem_hash', 'l_orderkey', 'hash'); +SELECT master_create_worker_shards('lineitem_hash', 2, 1); + +CREATE INDEX lineitem_hash_time_index ON lineitem_hash (l_shipdate); + +CREATE TABLE orders_hash ( + o_orderkey bigint not null, + o_custkey integer not null, + o_orderstatus char(1) not null, + o_totalprice decimal(15,2) not null, + o_orderdate date not null, + o_orderpriority char(15) not null, + o_clerk char(15) not null, + o_shippriority integer not null, + o_comment varchar(79) not null, + PRIMARY KEY(o_orderkey) ); +SELECT master_create_distributed_table('orders_hash', 'o_orderkey', 'hash'); +SELECT master_create_worker_shards('orders_hash', 2, 1); + +CREATE TABLE customer_hash ( + c_custkey integer not null, + c_name varchar(25) not null, + c_address varchar(40) not null, + c_nationkey integer not null, + c_phone char(15) not null, + c_acctbal decimal(15,2) not null, + c_mktsegment char(10) not null, + c_comment varchar(117) not null); +SELECT master_create_distributed_table('customer_hash', 'c_custkey', 'hash'); +SELECT master_create_worker_shards('customer_hash', 1, 1); + -- The following query checks that we can correctly handle self-joins EXPLAIN SELECT l1.l_quantity FROM lineitem l1, lineitem l2 @@ -37,35 +89,17 @@ BEGIN; -- Validate that we take into account the partition method when building the -- join-order plan. -UPDATE pg_dist_partition SET partmethod = 'h' WHERE - logicalrelid = (SELECT relfilenode FROM pg_class WHERE relname = 'lineitem'); - -EXPLAIN SELECT count(*) FROM orders, lineitem +EXPLAIN SELECT count(*) FROM orders, lineitem_hash WHERE o_orderkey = l_orderkey; -- Verify we handle local joins between two hash-partitioned tables. -UPDATE pg_dist_partition SET partmethod = 'h' WHERE - logicalrelid = (SELECT relfilenode FROM pg_class WHERE relname = 'orders'); - -EXPLAIN SELECT count(*) FROM orders, lineitem +EXPLAIN SELECT count(*) FROM orders_hash, lineitem_hash WHERE o_orderkey = l_orderkey; -UPDATE pg_dist_partition SET partmethod = 'a' WHERE - logicalrelid = (SELECT relfilenode FROM pg_class WHERE relname = 'lineitem'); -UPDATE pg_dist_partition SET partmethod = 'a' WHERE - logicalrelid = (SELECT relfilenode FROM pg_class WHERE relname = 'orders'); - - -- Validate that we can handle broadcast joins with hash-partitioned tables. -UPDATE pg_dist_partition SET partmethod = 'h' WHERE - logicalrelid = (SELECT relfilenode FROM pg_class WHERE relname = 'customer'); - -EXPLAIN SELECT count(*) FROM customer, nation +EXPLAIN SELECT count(*) FROM customer_hash, nation WHERE c_nationkey = n_nationkey; -UPDATE pg_dist_partition SET partmethod = 'a' WHERE - logicalrelid = (SELECT relfilenode FROM pg_class WHERE relname = 'customer'); - -- Update the large table shard count for all the following tests. SET citus.large_table_shard_count TO 1; @@ -76,28 +110,20 @@ EXPLAIN SELECT count(*) FROM orders, lineitem, customer -- Validate that we don't chose a single-partition join method with a -- hash-partitioned base table -UPDATE pg_dist_partition SET partmethod = 'h' WHERE - logicalrelid = (SELECT relfilenode FROM pg_class WHERE relname = 'customer'); - -EXPLAIN SELECT count(*) FROM orders, customer +EXPLAIN SELECT count(*) FROM orders, customer_hash WHERE c_custkey = o_custkey; -UPDATE pg_dist_partition SET partmethod = 'a' WHERE - logicalrelid = (SELECT relfilenode FROM pg_class WHERE relname = 'customer'); - -- Validate that we can re-partition a hash partitioned table to join with a -- range partitioned one. -UPDATE pg_dist_partition SET partmethod = 'h' WHERE - logicalrelid = (SELECT relfilenode FROM pg_class WHERE relname = 'orders'); - -EXPLAIN SELECT count(*) FROM orders, customer +EXPLAIN SELECT count(*) FROM orders_hash, customer WHERE c_custkey = o_custkey; -UPDATE pg_dist_partition SET partmethod = 'a' WHERE - logicalrelid = (SELECT relfilenode FROM pg_class WHERE relname = 'orders'); - COMMIT; -- Reset client logging level to its previous value SET client_min_messages TO NOTICE; + +DROP TABLE lineitem_hash; +DROP TABLE orders_hash; +DROP TABLE customer_hash; diff --git a/src/test/regress/sql/multi_prune_shard_list.sql b/src/test/regress/sql/multi_prune_shard_list.sql index 6db632e1e..25d7be0fc 100644 --- a/src/test/regress/sql/multi_prune_shard_list.sql +++ b/src/test/regress/sql/multi_prune_shard_list.sql @@ -36,20 +36,12 @@ CREATE FUNCTION print_sorted_shard_intervals(regclass) -- test shard pruning functionality -- =================================================================== --- create distributed table metadata to observe shard pruning +-- create distributed table observe shard pruning CREATE TABLE pruning ( species text, last_pruned date, plant_id integer ); +SELECT master_create_distributed_table('pruning', 'species', 'hash'); -INSERT INTO pg_dist_partition (logicalrelid, partmethod, partkey) -VALUES - ('pruning'::regclass, 'h', column_name_to_column('pruning'::regclass, 'species')); - -INSERT INTO pg_dist_shard - (shardid, logicalrelid, shardstorage, shardminvalue, shardmaxvalue) -VALUES - (10, 'pruning'::regclass, 't', '-2147483648', '-1073741826'), - (11, 'pruning'::regclass, 't', '-1073741825', '-3'), - (12, 'pruning'::regclass, 't', '-2', '1073741820'), - (13, 'pruning'::regclass, 't', '1073741821', '2147483647'); +-- create worker shards +SELECT master_create_worker_shards('pruning', 4, 1); -- with no values, expect all shards SELECT prune_using_no_values('pruning'); @@ -76,45 +68,52 @@ SELECT debug_equality_expression('pruning'); SELECT print_sorted_shard_intervals('pruning'); -- update only min value for one shard -UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 11; +UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 103071; SELECT print_sorted_shard_intervals('pruning'); -- now lets have one more shard without min/max values -UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 12; +UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 103072; SELECT print_sorted_shard_intervals('pruning'); -- now lets have one more shard without min/max values -UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 10; +UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 103070; SELECT print_sorted_shard_intervals('pruning'); -- all shard placements are uninitialized -UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 13; +UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 103073; SELECT print_sorted_shard_intervals('pruning'); --- now update the metadata so that the table is a range distributed table -UPDATE pg_dist_partition SET partmethod = 'r' WHERE logicalrelid = 'pruning'::regclass; +-- create range distributed table observe shard pruning +CREATE TABLE pruning_range ( species text, last_pruned date, plant_id integer ); +SELECT master_create_distributed_table('pruning_range', 'species', 'range'); + +-- create worker shards +SELECT master_create_empty_shard('pruning_range'); +SELECT master_create_empty_shard('pruning_range'); +SELECT master_create_empty_shard('pruning_range'); +SELECT master_create_empty_shard('pruning_range'); -- now the comparison is done via the partition column type, which is text -UPDATE pg_dist_shard SET shardminvalue = 'a', shardmaxvalue = 'b' WHERE shardid = 10; -UPDATE pg_dist_shard SET shardminvalue = 'c', shardmaxvalue = 'd' WHERE shardid = 11; -UPDATE pg_dist_shard SET shardminvalue = 'e', shardmaxvalue = 'f' WHERE shardid = 12; -UPDATE pg_dist_shard SET shardminvalue = 'g', shardmaxvalue = 'h' WHERE shardid = 13; +UPDATE pg_dist_shard SET shardminvalue = 'a', shardmaxvalue = 'b' WHERE shardid = 103074; +UPDATE pg_dist_shard SET shardminvalue = 'c', shardmaxvalue = 'd' WHERE shardid = 103075; +UPDATE pg_dist_shard SET shardminvalue = 'e', shardmaxvalue = 'f' WHERE shardid = 103076; +UPDATE pg_dist_shard SET shardminvalue = 'g', shardmaxvalue = 'h' WHERE shardid = 103077; -- print the ordering of shard intervals with range partitioning as well -SELECT print_sorted_shard_intervals('pruning'); +SELECT print_sorted_shard_intervals('pruning_range'); -- update only min value for one shard -UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 11; -SELECT print_sorted_shard_intervals('pruning'); +UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 103075; +SELECT print_sorted_shard_intervals('pruning_range'); -- now lets have one more shard without min/max values -UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 12; -SELECT print_sorted_shard_intervals('pruning'); +UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 103076; +SELECT print_sorted_shard_intervals('pruning_range'); -- now lets have one more shard without min/max values -UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 10; -SELECT print_sorted_shard_intervals('pruning'); +UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 103074; +SELECT print_sorted_shard_intervals('pruning_range'); -- all shard placements are uninitialized -UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 13; -SELECT print_sorted_shard_intervals('pruning'); +UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 103077; +SELECT print_sorted_shard_intervals('pruning_range');