-- -- MULTI_COPY -- ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 560000; ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 560000; -- Create a new hash-partitioned table into which to COPY CREATE TABLE customer_copy_hash ( c_custkey integer, c_name varchar(25) not null, c_address varchar(40), c_nationkey integer, c_phone char(15), c_acctbal decimal(15,2), c_mktsegment char(10), c_comment varchar(117), primary key (c_custkey)); SELECT master_create_distributed_table('customer_copy_hash', 'c_custkey', 'hash'); master_create_distributed_table --------------------------------- (1 row) -- Test COPY into empty hash-partitioned table COPY customer_copy_hash FROM '@abs_srcdir@/data/customer.1.data' WITH (DELIMITER '|'); ERROR: could not find any shards into which to copy DETAIL: No shards exist for distributed table "customer_copy_hash". HINT: Run master_create_worker_shards to create shards and try again. SELECT master_create_worker_shards('customer_copy_hash', 64, 1); master_create_worker_shards ----------------------------- (1 row) -- Test empty copy COPY customer_copy_hash FROM STDIN; -- Test syntax error COPY customer_copy_hash (c_custkey,c_name) FROM STDIN; ERROR: invalid input syntax for integer: "1,customer1" CONTEXT: COPY customer_copy_hash, line 1, column c_custkey: "1,customer1" -- Confirm that no data was copied SELECT count(*) FROM customer_copy_hash; count ------- 0 (1 row) -- Test primary key violation COPY customer_copy_hash (c_custkey, c_name) FROM STDIN WITH (FORMAT 'csv'); ERROR: duplicate key value violates unique constraint "customer_copy_hash_pkey_560048" DETAIL: Key (c_custkey)=(2) already exists. -- Confirm that no data was copied SELECT count(*) FROM customer_copy_hash; count ------- 0 (1 row) -- Test headers option COPY customer_copy_hash (c_custkey, c_name) FROM STDIN WITH (FORMAT 'csv', HEADER true, FORCE_NULL (c_custkey)); -- Confirm that only first row was skipped SELECT count(*) FROM customer_copy_hash; count ------- 3 (1 row) -- Test force_not_null option COPY customer_copy_hash (c_custkey, c_name, c_address) FROM STDIN WITH (FORMAT 'csv', QUOTE '"', FORCE_NOT_NULL (c_address)); -- Confirm that value is not null SELECT count(c_address) FROM customer_copy_hash WHERE c_custkey = 4; count ------- 1 (1 row) -- Test force_null option COPY customer_copy_hash (c_custkey, c_name, c_address) FROM STDIN WITH (FORMAT 'csv', QUOTE '"', FORCE_NULL (c_address)); -- Confirm that value is null SELECT count(c_address) FROM customer_copy_hash WHERE c_custkey = 5; count ------- 0 (1 row) -- Test null violation COPY customer_copy_hash (c_custkey, c_name) FROM STDIN WITH (FORMAT 'csv'); ERROR: null value in column "c_name" violates not-null constraint DETAIL: Failing row contains (8, null, null, null, null, null, null, null). -- Confirm that no data was copied SELECT count(*) FROM customer_copy_hash; count ------- 5 (1 row) -- Test server-side copy from program COPY customer_copy_hash (c_custkey, c_name) FROM PROGRAM 'echo 9 customer9' WITH (DELIMITER ' '); -- Confirm that data was copied SELECT count(*) FROM customer_copy_hash WHERE c_custkey = 9; count ------- 1 (1 row) -- Test server-side copy from file COPY customer_copy_hash FROM '@abs_srcdir@/data/customer.2.data' WITH (DELIMITER '|'); -- Confirm that data was copied SELECT count(*) FROM customer_copy_hash; count ------- 1006 (1 row) -- Test client-side copy from file \COPY customer_copy_hash FROM '@abs_srcdir@/data/customer.3.data' WITH (DELIMITER '|'); -- Confirm that data was copied SELECT count(*) FROM customer_copy_hash; count ------- 2006 (1 row) -- Make sure that master_update_shard_statistics() only updates shard length for -- hash-partitioned tables SELECT master_update_shard_statistics(560000); master_update_shard_statistics -------------------------------- 8192 (1 row) SELECT shardid, shardminvalue, shardmaxvalue FROM pg_dist_shard WHERE shardid = 560000; shardid | shardminvalue | shardmaxvalue ---------+---------------+--------------- 560000 | -2147483648 | -2080374785 (1 row) SELECT shardid, shardlength FROM pg_dist_shard_placement WHERE shardid = 560000; shardid | shardlength ---------+------------- 560000 | 8192 (1 row) -- Create a new hash-partitioned table with default now() function CREATE TABLE customer_with_default( c_custkey integer, c_name varchar(25) not null, c_time timestamp default now()); SELECT master_create_distributed_table('customer_with_default', 'c_custkey', 'hash'); master_create_distributed_table --------------------------------- (1 row) SELECT master_create_worker_shards('customer_with_default', 64, 1); master_create_worker_shards ----------------------------- (1 row) -- Test with default values for now() function COPY customer_with_default (c_custkey, c_name) FROM STDIN WITH (FORMAT 'csv'); -- Confirm that data was copied with now() function SELECT count(*) FROM customer_with_default where c_time IS NOT NULL; count ------- 2 (1 row) -- Add columns to the table and perform a COPY ALTER TABLE customer_copy_hash ADD COLUMN extra1 INT DEFAULT 0; ALTER TABLE customer_copy_hash ADD COLUMN extra2 INT DEFAULT 0; COPY customer_copy_hash (c_custkey, c_name, extra1, extra2) FROM STDIN CSV; SELECT * FROM customer_copy_hash WHERE extra1 = 1; c_custkey | c_name | c_address | c_nationkey | c_phone | c_acctbal | c_mktsegment | c_comment | extra1 | extra2 -----------+------------+-----------+-------------+---------+-----------+--------------+-----------+--------+-------- 10 | customer10 | | | | | | | 1 | 5 (1 row) -- Test dropping an intermediate column ALTER TABLE customer_copy_hash DROP COLUMN extra1; COPY customer_copy_hash (c_custkey, c_name, extra2) FROM STDIN CSV; SELECT * FROM customer_copy_hash WHERE c_custkey = 11; c_custkey | c_name | c_address | c_nationkey | c_phone | c_acctbal | c_mktsegment | c_comment | extra2 -----------+------------+-----------+-------------+---------+-----------+--------------+-----------+-------- 11 | customer11 | | | | | | | 5 (1 row) -- Test dropping the last column ALTER TABLE customer_copy_hash DROP COLUMN extra2; COPY customer_copy_hash (c_custkey, c_name) FROM STDIN CSV; SELECT * FROM customer_copy_hash WHERE c_custkey = 12; c_custkey | c_name | c_address | c_nationkey | c_phone | c_acctbal | c_mktsegment | c_comment -----------+------------+-----------+-------------+---------+-----------+--------------+----------- 12 | customer12 | | | | | | (1 row) -- Create a new range-partitioned table into which to COPY CREATE TABLE customer_copy_range ( c_custkey integer, c_name varchar(25), c_address varchar(40), c_nationkey integer, c_phone char(15), c_acctbal decimal(15,2), c_mktsegment char(10), c_comment varchar(117), primary key (c_custkey)); SELECT master_create_distributed_table('customer_copy_range', 'c_custkey', 'range'); master_create_distributed_table --------------------------------- (1 row) -- Test COPY into empty range-partitioned table COPY customer_copy_range FROM '@abs_srcdir@/data/customer.1.data' WITH (DELIMITER '|'); ERROR: could not find any shards into which to copy DETAIL: No shards exist for distributed table "customer_copy_range". SELECT master_create_empty_shard('customer_copy_range') AS new_shard_id \gset UPDATE pg_dist_shard SET shardminvalue = 1, shardmaxvalue = 500 WHERE shardid = :new_shard_id; SELECT master_create_empty_shard('customer_copy_range') AS new_shard_id \gset UPDATE pg_dist_shard SET shardminvalue = 501, shardmaxvalue = 1000 WHERE shardid = :new_shard_id; -- Test copy into range-partitioned table COPY customer_copy_range FROM '@abs_srcdir@/data/customer.1.data' WITH (DELIMITER '|'); -- Check whether data went into the right shard (maybe) SELECT min(c_custkey), max(c_custkey), avg(c_custkey), count(*) FROM customer_copy_range WHERE c_custkey <= 500; min | max | avg | count -----+-----+----------------------+------- 1 | 500 | 250.5000000000000000 | 500 (1 row) -- Check whether data was copied SELECT count(*) FROM customer_copy_range; count ------- 1000 (1 row) -- Manipulate min/max values and check shard statistics for new shard UPDATE pg_dist_shard SET shardminvalue = 1501, shardmaxvalue = 2000 WHERE shardid = :new_shard_id; SELECT shardid, shardminvalue, shardmaxvalue FROM pg_dist_shard WHERE shardid = :new_shard_id; shardid | shardminvalue | shardmaxvalue ---------+---------------+--------------- 560129 | 1501 | 2000 (1 row) SELECT shardid, shardlength FROM pg_dist_shard_placement WHERE shardid = :new_shard_id; shardid | shardlength ---------+------------- 560129 | 0 560129 | 0 (2 rows) -- Update shard statistics for range-partitioned shard and check that only the -- shard length is updated. SELECT master_update_shard_statistics(:new_shard_id); master_update_shard_statistics -------------------------------- 131072 (1 row) SELECT shardid, shardminvalue, shardmaxvalue FROM pg_dist_shard WHERE shardid = :new_shard_id; shardid | shardminvalue | shardmaxvalue ---------+---------------+--------------- 560129 | 1501 | 2000 (1 row) SELECT shardid, shardlength FROM pg_dist_shard_placement WHERE shardid = :new_shard_id; shardid | shardlength ---------+------------- 560129 | 131072 560129 | 131072 (2 rows) -- Revert back min/max value updates UPDATE pg_dist_shard SET shardminvalue = 501, shardmaxvalue = 1000 WHERE shardid = :new_shard_id; -- Create a new append-partitioned table into which to COPY CREATE TABLE customer_copy_append ( c_custkey integer, c_name varchar(25) not null, c_address varchar(40), c_nationkey integer, c_phone char(15), c_acctbal decimal(15,2), c_mktsegment char(10), c_comment varchar(117)); SELECT master_create_distributed_table('customer_copy_append', 'c_custkey', 'append'); master_create_distributed_table --------------------------------- (1 row) -- Test syntax error COPY customer_copy_append(c_custkey, c_name) FROM STDIN WITH (FORMAT 'csv'); ERROR: invalid input syntax for integer: "notinteger" CONTEXT: COPY customer_copy_append, line 3, column c_custkey: "notinteger" -- Test that no shard is created for failing copy SELECT count(*) FROM pg_dist_shard WHERE logicalrelid = 'customer_copy_append'::regclass; count ------- 0 (1 row) -- Test empty copy COPY customer_copy_append FROM STDIN; -- Test that no shard is created for copying zero rows SELECT count(*) FROM pg_dist_shard WHERE logicalrelid = 'customer_copy_append'::regclass; count ------- 0 (1 row) -- Test proper copy COPY customer_copy_append(c_custkey, c_name) FROM STDIN WITH (FORMAT 'csv'); -- Check whether data was copied properly SELECT * FROM customer_copy_append; c_custkey | c_name | c_address | c_nationkey | c_phone | c_acctbal | c_mktsegment | c_comment -----------+-----------+-----------+-------------+---------+-----------+--------------+----------- 1 | customer1 | | | | | | 2 | customer2 | | | | | | (2 rows) -- Manipulate manipulate and check shard statistics for append-partitioned table shard UPDATE pg_dist_shard SET shardminvalue = 1501, shardmaxvalue = 2000 WHERE shardid = 560131; UPDATE pg_dist_shard_placement SET shardlength = 0 WHERE shardid = 560131; SELECT shardid, shardminvalue, shardmaxvalue FROM pg_dist_shard WHERE shardid = 560131; shardid | shardminvalue | shardmaxvalue ---------+---------------+--------------- 560131 | 1501 | 2000 (1 row) SELECT shardid, shardlength FROM pg_dist_shard_placement WHERE shardid = 560131; shardid | shardlength ---------+------------- 560131 | 0 560131 | 0 (2 rows) -- Update shard statistics for append-partitioned shard SELECT master_update_shard_statistics(560131); master_update_shard_statistics -------------------------------- 8192 (1 row) SELECT shardid, shardminvalue, shardmaxvalue FROM pg_dist_shard WHERE shardid = 560131; shardid | shardminvalue | shardmaxvalue ---------+---------------+--------------- 560131 | 1 | 2 (1 row) SELECT shardid, shardlength FROM pg_dist_shard_placement WHERE shardid = 560131; shardid | shardlength ---------+------------- 560131 | 8192 560131 | 8192 (2 rows) -- Create lineitem table CREATE TABLE lineitem_copy_append ( l_orderkey bigint not null, l_partkey integer not null, l_suppkey integer not null, l_linenumber integer not null, l_quantity decimal(15, 2) not null, l_extendedprice decimal(15, 2) not null, l_discount decimal(15, 2) not null, l_tax decimal(15, 2) not null, l_returnflag char(1) not null, l_linestatus char(1) not null, l_shipdate date not null, l_commitdate date not null, l_receiptdate date not null, l_shipinstruct char(25) not null, l_shipmode char(10) not null, l_comment varchar(44) not null); SELECT master_create_distributed_table('lineitem_copy_append', 'l_orderkey', 'append'); master_create_distributed_table --------------------------------- (1 row) -- Test multiple shard creation SET citus.shard_max_size TO '256kB'; COPY lineitem_copy_append FROM '@abs_srcdir@/data/lineitem.1.data' with delimiter '|'; SELECT count(*) FROM pg_dist_shard WHERE logicalrelid = 'lineitem_copy_append'::regclass; count ------- 5 (1 row) -- Test round robin shard policy SET citus.shard_replication_factor TO 1; COPY lineitem_copy_append FROM '@abs_srcdir@/data/lineitem.2.data' with delimiter '|'; SELECT pg_dist_shard_placement.shardid, pg_dist_shard_placement.nodeport FROM pg_dist_shard, pg_dist_shard_placement WHERE pg_dist_shard.shardid = pg_dist_shard_placement.shardid AND logicalrelid = 'lineitem_copy_append'::regclass ORDER BY pg_dist_shard.shardid DESC LIMIT 5; shardid | nodeport ---------+---------- 560141 | 57637 560140 | 57638 560139 | 57637 560138 | 57638 560137 | 57637 (5 rows) -- Create customer table for the worker copy with constraint and index CREATE TABLE customer_worker_copy_append ( c_custkey integer , c_name varchar(25) not null, c_address varchar(40), c_nationkey integer, c_phone char(15), c_acctbal decimal(15,2), c_mktsegment char(10), c_comment varchar(117), primary key (c_custkey)); CREATE INDEX ON customer_worker_copy_append (c_name); SELECT master_create_distributed_table('customer_worker_copy_append', 'c_custkey', 'append'); WARNING: table "customer_worker_copy_append" has a unique constraint DETAIL: Unique constraints and primary keys on append-partitioned tables cannot be enforced. HINT: Consider using hash partitioning. master_create_distributed_table --------------------------------- (1 row) -- Connect to the first worker node \c - - - 57637 -- Test copy from the worker node COPY customer_worker_copy_append FROM '@abs_srcdir@/data/customer.1.data' with (delimiter '|', master_host 'localhost', master_port 57636); COPY customer_worker_copy_append FROM '@abs_srcdir@/data/customer.2.data' with (delimiter '|', master_host 'localhost', master_port 57636); -- Test if there is no relation to copy data with the worker copy COPY lineitem_copy_none FROM '@abs_srcdir@/data/lineitem.1.data' with (delimiter '|', master_host 'localhost', master_port 57636); WARNING: relation "lineitem_copy_none" does not exist CONTEXT: while executing command on localhost:57636 ERROR: could not run copy from the worker node -- Connect back to the master node \c - - - 57636 -- Test the content of the table SELECT min(c_custkey), max(c_custkey), avg(c_acctbal), count(*) FROM customer_worker_copy_append; min | max | avg | count -----+------+-----------------------+------- 1 | 7000 | 4443.8028800000000000 | 2000 (1 row) -- Test schema support on append partitioned tables CREATE SCHEMA append; NOTICE: Citus partially supports CREATE SCHEMA for distributed databases DETAIL: schema usage in joins and in some UDFs provided by Citus are not supported yet CREATE TABLE append.customer_copy ( c_custkey integer , c_name varchar(25) not null, c_address varchar(40), c_nationkey integer, c_phone char(15), c_acctbal decimal(15,2), c_mktsegment char(10), c_comment varchar(117)); SELECT master_create_distributed_table('append.customer_copy', 'c_custkey', 'append'); master_create_distributed_table --------------------------------- (1 row) -- Test copy from the master node COPY append.customer_copy FROM '@abs_srcdir@/data/customer.1.data' with (delimiter '|'); -- Test copy from the worker node \c - - - 57637 COPY append.customer_copy FROM '@abs_srcdir@/data/customer.2.data' with (delimiter '|', master_host 'localhost', master_port 57636); -- Connect back to the master node \c - - - 57636 -- Test the content of the table SELECT min(c_custkey), max(c_custkey), avg(c_acctbal), count(*) FROM append.customer_copy; min | max | avg | count -----+------+-----------------------+------- 1 | 7000 | 4443.8028800000000000 | 2000 (1 row)