diff --git a/src/backend/distributed/executor/multi_task_tracker_executor.c b/src/backend/distributed/executor/multi_task_tracker_executor.c index 244973e59..df6589247 100644 --- a/src/backend/distributed/executor/multi_task_tracker_executor.c +++ b/src/backend/distributed/executor/multi_task_tracker_executor.c @@ -447,7 +447,7 @@ MultiTaskTrackerExecute(Job *job) } else if (clusterFailed) { - ereport(ERROR, (errmsg("failed to execute task %u", failedTaskId))); + ereport(ERROR, (errmsg("cluster failed"))); } else if (QueryCancelPending) { diff --git a/src/backend/distributed/planner/postgres_planning_functions.c b/src/backend/distributed/planner/postgres_planning_functions.c index d8b8107a0..c867566c8 100644 --- a/src/backend/distributed/planner/postgres_planning_functions.c +++ b/src/backend/distributed/planner/postgres_planning_functions.c @@ -30,6 +30,7 @@ */ #if PG_VERSION_NUM >= 120000 + /* * distinctList is a list of SortGroupClauses, identifying the targetlist items * that should be considered by the Unique filter. The input path must @@ -38,14 +39,14 @@ Unique * make_unique_from_sortclauses(Plan *lefttree, List *distinctList) { - Unique *node = makeNode(Unique); - Plan *plan = &node->plan; - int numCols = list_length(distinctList); - int keyno = 0; + Unique *node = makeNode(Unique); + Plan *plan = &node->plan; + int numCols = list_length(distinctList); + int keyno = 0; AttrNumber *uniqColIdx; - Oid *uniqOperators; - Oid *uniqCollations; - ListCell *slitem; + Oid *uniqOperators; + Oid *uniqCollations; + ListCell *slitem; plan->targetlist = lefttree->targetlist; plan->qual = NIL; @@ -81,7 +82,9 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList) return node; } + #else + /* * distinctList is a list of SortGroupClauses, identifying the targetlist items * that should be considered by the Unique filter. The input path must @@ -128,4 +131,6 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList) return node; } + + #endif diff --git a/src/backend/distributed/utils/metadata_cache.c b/src/backend/distributed/utils/metadata_cache.c index a27911bb0..49e4f911c 100644 --- a/src/backend/distributed/utils/metadata_cache.c +++ b/src/backend/distributed/utils/metadata_cache.c @@ -1261,8 +1261,6 @@ static ShardInterval ** SortShardIntervalArray(ShardInterval **shardIntervalArray, int shardCount, FmgrInfo *shardIntervalSortCompareFunction) { - ShardInterval **sortedShardIntervalArray = NULL; - /* short cut if there are no shard intervals in the array */ if (shardCount == 0) { @@ -1274,9 +1272,7 @@ SortShardIntervalArray(ShardInterval **shardIntervalArray, int shardCount, (qsort_arg_comparator) CompareShardIntervals, (void *) shardIntervalSortCompareFunction); - sortedShardIntervalArray = shardIntervalArray; - - return sortedShardIntervalArray; + return shardIntervalArray; } diff --git a/src/test/regress/bin/normalize.sed b/src/test/regress/bin/normalize.sed index 7b8c14bb5..f7df3ba25 100644 --- a/src/test/regress/bin/normalize.sed +++ b/src/test/regress/bin/normalize.sed @@ -63,3 +63,8 @@ s/ERROR: failed to execute task [0-9]+/ERROR: failed to execute task X/g # normalize file names for partitioned files s/(task_[0-9]+\.)[0-9]+/\1xxxx/g s/(job_[0-9]+\/task_[0-9]+\/p_[0-9]+\.)[0-9]+/\1xxxx/g + +# pg12 changes +s/Partitioned table "/Table "/g +s/\) TABLESPACE pg_default$/\)/g +s/invalid input syntax for type /invalid input syntax /g diff --git a/src/test/regress/bin/normalized_tests.lst b/src/test/regress/bin/normalized_tests.lst index 39cf8803b..2347913e1 100644 --- a/src/test/regress/bin/normalized_tests.lst +++ b/src/test/regress/bin/normalized_tests.lst @@ -1,40 +1,37 @@ # List of tests whose output we want to normalize, one per line +custom_aggregate_support +failure_copy_on_hash +failure_real_time_select +failure_savepoints +failure_vacuum +foreign_key_restriction_enforcement +foreign_key_to_reference_table +isolation_citus_dist_activity multi_alter_table_add_constraints multi_alter_table_statements -foreign_key_to_reference_table -failure_copy_on_hash -failure_savepoints -foreign_key_restriction_enforcement -failure_real_time_select -failure_vacuum -isolation_citus_dist_activity +multi_create_table_constraints +multi_explain +multi_generate_ddl_commands +multi_having_pushdown multi_insert_select multi_insert_select_conflict -multi_multiuser -multi_name_lengths -multi_partition_pruning -multi_subtransactions -multi_modifying_xacts -multi_insert_select -sql_procedure -multi_reference_table -multi_create_table_constraints - -# the following tests' output are -# normalized for EXPLAIN outputs -# where the executor name is wiped out multi_join_order_tpch_small multi_join_pruning +multi_master_protocol +multi_modifying_xacts +multi_multiuser +multi_mx_explain +multi_name_lengths +multi_null_minmax_value_pruning multi_orderby_limit_pushdown +multi_partitioning multi_partition_pruning +multi_reference_table multi_select_distinct multi_subquery_window_functions +multi_subtransactions multi_task_assignment_policy multi_view -multi_explain -multi_null_minmax_value_pruning +sql_procedure window_functions -multi_having_pushdown -multi_partitioning -multi_mx_explain -custom_aggregate_support + diff --git a/src/test/regress/expected/multi_master_protocol_2.out b/src/test/regress/expected/multi_master_protocol_2.out new file mode 100644 index 000000000..2fb7ff3c5 --- /dev/null +++ b/src/test/regress/expected/multi_master_protocol_2.out @@ -0,0 +1,34 @@ +-- +-- MULTI_MASTER_PROTOCOL +-- +-- Tests that check the metadata returned by the master node. +SET citus.next_shard_id TO 740000; +SELECT part_storage_type, part_key, part_replica_count, part_max_size, + part_placement_policy FROM master_get_table_metadata('lineitem'); + part_storage_type | part_key | part_replica_count | part_max_size | part_placement_policy +-------------------+------------+--------------------+---------------+----------------------- + t | l_orderkey | 2 | 1536000 | 2 +(1 row) + +SELECT * FROM master_get_table_ddl_events('lineitem'); + master_get_table_ddl_events +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + CREATE TABLE public.lineitem (l_orderkey bigint NOT NULL, l_partkey integer NOT NULL, l_suppkey integer NOT NULL, l_linenumber integer NOT NULL, l_quantity numeric(15,2) NOT NULL, l_extendedprice numeric(15,2) NOT NULL, l_discount numeric(15,2) NOT NULL, l_tax numeric(15,2) NOT NULL, l_returnflag character(1) NOT NULL, l_linestatus character(1) NOT NULL, l_shipdate date NOT NULL, l_commitdate date NOT NULL, l_receiptdate date NOT NULL, l_shipinstruct character(25) NOT NULL, l_shipmode character(10) NOT NULL, l_comment character varying(44) NOT NULL) + ALTER TABLE public.lineitem OWNER TO postgres + ALTER TABLE public.lineitem ADD CONSTRAINT lineitem_pkey PRIMARY KEY (l_orderkey, l_linenumber) + CREATE INDEX lineitem_time_index ON public.lineitem USING btree (l_shipdate) +(4 rows) + +SELECT * FROM master_get_new_shardid(); + master_get_new_shardid +------------------------ + 740000 +(1 row) + +SELECT * FROM master_get_active_worker_nodes(); + node_name | node_port +-----------+----------- + localhost | 57638 + localhost | 57637 +(2 rows) + diff --git a/src/test/regress/expected/multi_partitioning_utils_2.out b/src/test/regress/expected/multi_partitioning_utils_2.out new file mode 100644 index 000000000..315f041e9 --- /dev/null +++ b/src/test/regress/expected/multi_partitioning_utils_2.out @@ -0,0 +1,432 @@ +-- This test has different output per major version +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int as server_major_version; + server_major_version +---------------------- + 12 +(1 row) + +-- =================================================================== +-- create test functions +-- =================================================================== +CREATE FUNCTION generate_alter_table_detach_partition_command(regclass) + RETURNS text + AS 'citus' + LANGUAGE C STRICT; +CREATE FUNCTION generate_alter_table_attach_partition_command(regclass) + RETURNS text + AS 'citus' + LANGUAGE C STRICT; +CREATE FUNCTION generate_partition_information(regclass) + RETURNS text + AS 'citus' + LANGUAGE C STRICT; +CREATE FUNCTION print_partitions(regclass) + RETURNS text + AS 'citus' + LANGUAGE C STRICT; +CREATE FUNCTION table_inherits(regclass) + RETURNS bool + AS 'citus' + LANGUAGE C STRICT; +CREATE FUNCTION table_inherited(regclass) + RETURNS bool + AS 'citus' + LANGUAGE C STRICT; +CREATE OR REPLACE FUNCTION detach_and_attach_partition(partition_name regclass, parent_table_name regclass) +RETURNS void LANGUAGE plpgsql VOLATILE +AS $function$ +DECLARE + detach_partition_command text := ''; + attach_partition_command text := ''; + command_result text := ''; + +BEGIN + -- first generate the command + SELECT public.generate_alter_table_attach_partition_command(partition_name) INTO attach_partition_command; + + -- now genereate the detach command + SELECT public.generate_alter_table_detach_partition_command(partition_name) INTO detach_partition_command; + + -- later detach the same partition + EXECUTE detach_partition_command; + + -- not attach it again + EXECUTE attach_partition_command; +END; +$function$; +CREATE OR REPLACE FUNCTION drop_and_recreate_partitioned_table(parent_table_name regclass) +RETURNS void LANGUAGE plpgsql VOLATILE +AS $function$ +DECLARE + command text := ''; + +BEGIN + -- first generate the command + CREATE TABLE partitioned_table_create_commands AS SELECT master_get_table_ddl_events(parent_table_name::text); + + -- later detach the same partition + EXECUTE 'DROP TABLE ' || parent_table_name::text || ';'; + + FOR command IN SELECT * FROM partitioned_table_create_commands + LOOP + -- can do some processing here + EXECUTE command; + END LOOP; + + DROP TABLE partitioned_table_create_commands; + +END; +$function$; +-- create a partitioned table +CREATE TABLE date_partitioned_table(id int, time date) PARTITION BY RANGE (time); +-- we should be able to get the partitioning information even if there are no partitions +SELECT generate_partition_information('date_partitioned_table'); + generate_partition_information +-------------------------------- + RANGE ("time") +(1 row) + +-- we should be able to drop and re-create the partitioned table using the command that Citus generate +SELECT drop_and_recreate_partitioned_table('date_partitioned_table'); + drop_and_recreate_partitioned_table +------------------------------------- + +(1 row) + +-- we should also be able to see the PARTITION BY ... for the parent table +SELECT master_get_table_ddl_events('date_partitioned_table'); + master_get_table_ddl_events +--------------------------------------------------------------------------------------------------- + CREATE TABLE public.date_partitioned_table (id integer, "time" date) PARTITION BY RANGE ("time") + ALTER TABLE public.date_partitioned_table OWNER TO postgres +(2 rows) + +-- now create the partitions +CREATE TABLE date_partition_2006 PARTITION OF date_partitioned_table FOR VALUES FROM ('2006-01-01') TO ('2007-01-01'); +CREATE TABLE date_partition_2007 PARTITION OF date_partitioned_table FOR VALUES FROM ('2007-01-01') TO ('2008-01-01'); +-- we should be able to get the partitioning information after the partitions are created +SELECT generate_partition_information('date_partitioned_table'); + generate_partition_information +-------------------------------- + RANGE ("time") +(1 row) + +-- lets get the attach partition commands +SELECT generate_alter_table_attach_partition_command('date_partition_2006'); + generate_alter_table_attach_partition_command +----------------------------------------------------------------------------------------------------------------------------------------- + ALTER TABLE public.date_partitioned_table ATTACH PARTITION public.date_partition_2006 FOR VALUES FROM ('01-01-2006') TO ('01-01-2007'); +(1 row) + +SELECT generate_alter_table_attach_partition_command('date_partition_2007'); + generate_alter_table_attach_partition_command +----------------------------------------------------------------------------------------------------------------------------------------- + ALTER TABLE public.date_partitioned_table ATTACH PARTITION public.date_partition_2007 FOR VALUES FROM ('01-01-2007') TO ('01-01-2008'); +(1 row) + +-- detach and attach the partition by the command generated by us +\d+ date_partitioned_table + Partitioned table "public.date_partitioned_table" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+---------+--------------+------------- + id | integer | | | | plain | | + time | date | | | | plain | | +Partition key: RANGE ("time") +Partitions: date_partition_2006 FOR VALUES FROM ('01-01-2006') TO ('01-01-2007'), + date_partition_2007 FOR VALUES FROM ('01-01-2007') TO ('01-01-2008') + +SELECT detach_and_attach_partition('date_partition_2007', 'date_partitioned_table'); + detach_and_attach_partition +----------------------------- + +(1 row) + +-- check that both partitions are visiable +\d+ date_partitioned_table + Partitioned table "public.date_partitioned_table" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+---------+--------------+------------- + id | integer | | | | plain | | + time | date | | | | plain | | +Partition key: RANGE ("time") +Partitions: date_partition_2006 FOR VALUES FROM ('01-01-2006') TO ('01-01-2007'), + date_partition_2007 FOR VALUES FROM ('01-01-2007') TO ('01-01-2008') + +-- make sure that inter shard commands work as expected +-- assume that the shardId is 100 +CREATE TABLE date_partitioned_table_100 (id int, time date) PARTITION BY RANGE (time); +CREATE TABLE date_partition_2007_100 (id int, time date ); +-- now create the partitioning hierarcy +SELECT worker_apply_inter_shard_ddl_command(referencing_shard:=100, referencing_schema_name:='public', + referenced_shard:=100, referenced_schema_name:='public', + command:='ALTER TABLE date_partitioned_table ATTACH PARTITION date_partition_2007 FOR VALUES FROM (''2007-01-01'') TO (''2008-01-02'')' ); + worker_apply_inter_shard_ddl_command +-------------------------------------- + +(1 row) + +-- the hierarcy is successfully created +\d+ date_partitioned_table_100 + Partitioned table "public.date_partitioned_table_100" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+---------+--------------+------------- + id | integer | | | | plain | | + time | date | | | | plain | | +Partition key: RANGE ("time") +Partitions: date_partition_2007_100 FOR VALUES FROM ('01-01-2007') TO ('01-02-2008') + +-- Citus can also get the DDL events for the partitions as regular tables +SELECT master_get_table_ddl_events('date_partition_2007_100'); + master_get_table_ddl_events +----------------------------------------------------------------------- + CREATE TABLE public.date_partition_2007_100 (id integer, "time" date) + ALTER TABLE public.date_partition_2007_100 OWNER TO postgres +(2 rows) + +-- now break the partitioning hierarcy +SELECT worker_apply_inter_shard_ddl_command(referencing_shard:=100, referencing_schema_name:='public', + referenced_shard:=100, referenced_schema_name:='public', + command:='ALTER TABLE date_partitioned_table DETACH PARTITION date_partition_2007' ); + worker_apply_inter_shard_ddl_command +-------------------------------------- + +(1 row) + +-- the hierarcy is successfully broken +\d+ date_partitioned_table_100 + Partitioned table "public.date_partitioned_table_100" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+---------+--------------+------------- + id | integer | | | | plain | | + time | date | | | | plain | | +Partition key: RANGE ("time") +Number of partitions: 0 + +-- now lets have some more complex partitioning hierarcies with +-- tables on different schemas and constraints on the tables +CREATE SCHEMA partition_parent_schema; +CREATE TABLE partition_parent_schema.parent_table (id int NOT NULL, time date DEFAULT now()) PARTITION BY RANGE (time); +CREATE SCHEMA partition_child_1_schema; +CREATE TABLE partition_child_1_schema.child_1 (id int NOT NULL, time date ); +CREATE SCHEMA partition_child_2_schema; +CREATE TABLE partition_child_2_schema.child_2 (id int NOT NULL, time date ); +-- we should be able to get the partitioning information even if there are no partitions +SELECT generate_partition_information('partition_parent_schema.parent_table'); + generate_partition_information +-------------------------------- + RANGE ("time") +(1 row) + +-- we should be able to drop and re-create the partitioned table using the command that Citus generate +SELECT drop_and_recreate_partitioned_table('partition_parent_schema.parent_table'); + drop_and_recreate_partitioned_table +------------------------------------- + +(1 row) + +ALTER TABLE partition_parent_schema.parent_table ATTACH PARTITION partition_child_1_schema.child_1 FOR VALUES FROM ('2009-01-01') TO ('2010-01-02'); +SET search_path = 'partition_parent_schema'; +ALTER TABLE parent_table ATTACH PARTITION partition_child_2_schema.child_2 FOR VALUES FROM ('2006-01-01') TO ('2007-01-01'); +SELECT public.generate_partition_information('parent_table'); + generate_partition_information +-------------------------------- + RANGE ("time") +(1 row) + +-- lets get the attach partition commands +SELECT public.generate_alter_table_attach_partition_command('partition_child_1_schema.child_1'); + generate_alter_table_attach_partition_command +------------------------------------------------------------------------------------------------------------------------------------------------------ + ALTER TABLE partition_parent_schema.parent_table ATTACH PARTITION partition_child_1_schema.child_1 FOR VALUES FROM ('01-01-2009') TO ('01-02-2010'); +(1 row) + +SET search_path = 'partition_child_2_schema'; +SELECT public.generate_alter_table_attach_partition_command('child_2'); + generate_alter_table_attach_partition_command +------------------------------------------------------------------------------------------------------------------------------------------------------ + ALTER TABLE partition_parent_schema.parent_table ATTACH PARTITION partition_child_2_schema.child_2 FOR VALUES FROM ('01-01-2006') TO ('01-01-2007'); +(1 row) + +SET search_path = 'partition_parent_schema'; +-- detach and attach the partition by the command generated by us +\d+ parent_table + Partitioned table "partition_parent_schema.parent_table" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+---------+--------------+------------- + id | integer | | not null | | plain | | + time | date | | | now() | plain | | +Partition key: RANGE ("time") +Partitions: partition_child_1_schema.child_1 FOR VALUES FROM ('01-01-2009') TO ('01-02-2010'), + partition_child_2_schema.child_2 FOR VALUES FROM ('01-01-2006') TO ('01-01-2007') + +SELECT public.detach_and_attach_partition('partition_child_1_schema.child_1', 'parent_table'); + detach_and_attach_partition +----------------------------- + +(1 row) + +-- check that both partitions are visiable +\d+ parent_table + Partitioned table "partition_parent_schema.parent_table" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+---------+--------------+------------- + id | integer | | not null | | plain | | + time | date | | | now() | plain | | +Partition key: RANGE ("time") +Partitions: partition_child_1_schema.child_1 FOR VALUES FROM ('01-01-2009') TO ('01-02-2010'), + partition_child_2_schema.child_2 FOR VALUES FROM ('01-01-2006') TO ('01-01-2007') + +-- some very simple checks that should error out +SELECT public.generate_alter_table_attach_partition_command('parent_table'); +ERROR: "parent_table" is not a partition +SELECT public.generate_partition_information('partition_child_1_schema.child_1'); +ERROR: "child_1" is not a parent table +SELECT public.print_partitions('partition_child_1_schema.child_1'); +ERROR: "child_1" is not a parent table +-- now pring the partitions +SELECT public.print_partitions('parent_table'); + print_partitions +------------------ + child_1,child_2 +(1 row) + +SET search_path = 'public'; +-- test multi column / expression partitioning with UNBOUNDED ranges +CREATE OR REPLACE FUNCTION some_function(input_val text) +RETURNS text LANGUAGE plpgsql IMMUTABLE +AS $function$ +BEGIN + return reverse(input_val); +END; +$function$; +CREATE TABLE multi_column_partitioned ( + a int, + b int, + c text + ) PARTITION BY RANGE (a, (a+b+1), some_function(upper(c))); +CREATE TABLE multi_column_partition_1( + a int, + b int, + c text +); +CREATE TABLE multi_column_partition_2( + a int, + b int, + c text +); +-- partitioning information +SELECT generate_partition_information('multi_column_partitioned'); + generate_partition_information +----------------------------------------------------- + RANGE (a, (((a + b) + 1)), some_function(upper(c))) +(1 row) + +SELECT master_get_table_ddl_events('multi_column_partitioned'); + master_get_table_ddl_events +------------------------------------------------------------------------------------------------------------------------------------------------------ + CREATE TABLE public.multi_column_partitioned (a integer, b integer, c text) PARTITION BY RANGE (a, (((a + b) + 1)), public.some_function(upper(c))) + ALTER TABLE public.multi_column_partitioned OWNER TO postgres +(2 rows) + +SELECT drop_and_recreate_partitioned_table('multi_column_partitioned'); + drop_and_recreate_partitioned_table +------------------------------------- + +(1 row) + +-- partitions and their ranges +ALTER TABLE multi_column_partitioned ATTACH PARTITION multi_column_partition_1 FOR VALUES FROM (1, 10, '250') TO (1, 20, '250'); +SELECT generate_alter_table_attach_partition_command('multi_column_partition_1'); + generate_alter_table_attach_partition_command +------------------------------------------------------------------------------------------------------------------------------------------------ + ALTER TABLE public.multi_column_partitioned ATTACH PARTITION public.multi_column_partition_1 FOR VALUES FROM (1, 10, '250') TO (1, 20, '250'); +(1 row) + +ALTER TABLE multi_column_partitioned ATTACH PARTITION multi_column_partition_2 FOR VALUES FROM (10, 1000, '2500') TO (MAXVALUE, MAXVALUE, MAXVALUE); +SELECT generate_alter_table_attach_partition_command('multi_column_partition_2'); + generate_alter_table_attach_partition_command +-------------------------------------------------------------------------------------------------------------------------------------------------------------------- + ALTER TABLE public.multi_column_partitioned ATTACH PARTITION public.multi_column_partition_2 FOR VALUES FROM (10, 1000, '2500') TO (MAXVALUE, MAXVALUE, MAXVALUE); +(1 row) + +SELECT generate_alter_table_detach_partition_command('multi_column_partition_2'); + generate_alter_table_detach_partition_command +--------------------------------------------------------------------------------------------------------- + ALTER TABLE IF EXISTS public.multi_column_partitioned DETACH PARTITION public.multi_column_partition_2; +(1 row) + +-- finally a test with LIST partitioning +CREATE TABLE list_partitioned (col1 NUMERIC, col2 NUMERIC, col3 VARCHAR(10)) PARTITION BY LIST (col1) ; +SELECT generate_partition_information('list_partitioned'); + generate_partition_information +-------------------------------- + LIST (col1) +(1 row) + +SELECT master_get_table_ddl_events('list_partitioned'); + master_get_table_ddl_events +------------------------------------------------------------------------------------------------------------------------- + CREATE TABLE public.list_partitioned (col1 numeric, col2 numeric, col3 character varying(10)) PARTITION BY LIST (col1) + ALTER TABLE public.list_partitioned OWNER TO postgres +(2 rows) + +SELECT drop_and_recreate_partitioned_table('list_partitioned'); + drop_and_recreate_partitioned_table +------------------------------------- + +(1 row) + +CREATE TABLE list_partitioned_1 PARTITION OF list_partitioned FOR VALUES IN (100, 101, 102, 103, 104); +SELECT generate_alter_table_attach_partition_command('list_partitioned_1'); + generate_alter_table_attach_partition_command +----------------------------------------------------------------------------------------------------------------------------------- + ALTER TABLE public.list_partitioned ATTACH PARTITION public.list_partitioned_1 FOR VALUES IN ('100', '101', '102', '103', '104'); +(1 row) + +-- also differentiate partitions and inhereted tables +CREATE TABLE cities ( + name text, + population float, + altitude int -- in feet +); +CREATE TABLE capitals ( + state char(2) +) INHERITS (cities); +-- returns true since capitals inherits from cities +SELECT table_inherits('capitals'); + table_inherits +---------------- + t +(1 row) + +-- although date_partition_2006 inherits from its parent +-- returns false since the hierarcy is formed via partitioning +SELECT table_inherits('date_partition_2006'); + table_inherits +---------------- + f +(1 row) + +-- returns true since cities inherited by capitals +SELECT table_inherited('cities'); + table_inherited +----------------- + t +(1 row) + +-- although date_partitioned_table inherited by its partitions +-- returns false since the hierarcy is formed via partitioning +SELECT table_inherited('date_partitioned_table'); + table_inherited +----------------- + f +(1 row) + +-- also these are not supported +SELECT master_get_table_ddl_events('capitals'); +ERROR: capitals is not a regular, foreign or partitioned table +SELECT master_get_table_ddl_events('cities'); +ERROR: cities is not a regular, foreign or partitioned table +-- dropping parents frop the partitions +DROP TABLE date_partitioned_table, multi_column_partitioned, list_partitioned, partition_parent_schema.parent_table, cities, capitals; diff --git a/src/test/regress/expected/multi_shard_update_delete_2.out b/src/test/regress/expected/multi_shard_update_delete_2.out new file mode 100644 index 000000000..e38d5f0d0 --- /dev/null +++ b/src/test/regress/expected/multi_shard_update_delete_2.out @@ -0,0 +1,903 @@ +-- +-- multi shard update delete +-- this file is intended to test multi shard update/delete queries +-- +SET citus.next_shard_id TO 1440000; +ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1440000; +SET citus.shard_replication_factor to 1; +SET citus.multi_shard_modify_mode to 'parallel'; +CREATE TABLE users_test_table(user_id int, value_1 int, value_2 int, value_3 int); +SELECT create_distributed_table('users_test_table', 'user_id'); + create_distributed_table +-------------------------- + +(1 row) + +\COPY users_test_table FROM STDIN DELIMITER AS ','; +CREATE TABLE events_test_table (user_id int, value_1 int, value_2 int, value_3 int); +SELECT create_distributed_table('events_test_table', 'user_id'); + create_distributed_table +-------------------------- + +(1 row) + +\COPY events_test_table FROM STDIN DELIMITER AS ','; +CREATE TABLE events_reference_copy_table (like events_test_table); +SELECT create_reference_table('events_reference_copy_table'); + create_reference_table +------------------------ + +(1 row) + +INSERT INTO events_reference_copy_table SELECT * FROM events_test_table; +CREATE TABLE users_reference_copy_table (like users_test_table); +SELECT create_reference_table('users_reference_copy_table'); + create_reference_table +------------------------ + +(1 row) + +INSERT INTO users_reference_copy_table SELECT * FROM users_test_table; +-- Run multi shard updates and deletes without transaction on hash distributed tables +UPDATE users_test_table SET value_1 = 1; +SELECT COUNT(*), SUM(value_1) FROM users_test_table; + count | sum +-------+----- + 15 | 15 +(1 row) + +SELECT COUNT(*), SUM(value_2) FROM users_test_table WHERE user_id = 1 or user_id = 3; + count | sum +-------+----- + 4 | 52 +(1 row) + +UPDATE users_test_table SET value_2 = value_2 + 1 WHERE user_id = 1 or user_id = 3; +SELECT COUNT(*), SUM(value_2) FROM users_test_table WHERE user_id = 1 or user_id = 3; + count | sum +-------+----- + 4 | 56 +(1 row) + +UPDATE users_test_table SET value_3 = 0 WHERE user_id <> 5; +SELECT SUM(value_3) FROM users_test_table WHERE user_id <> 5; + sum +----- + 0 +(1 row) + +SELECT COUNT(*) FROM users_test_table WHERE user_id = 3 or user_id = 5; + count +------- + 4 +(1 row) + +DELETE FROM users_test_table WHERE user_id = 3 or user_id = 5; +SELECT COUNT(*) FROM users_test_table WHERE user_id = 3 or user_id = 5; + count +------- + 0 +(1 row) + +-- Run multi shard update delete queries within transactions +BEGIN; +UPDATE users_test_table SET value_3 = 0; +END; +SELECT SUM(value_3) FROM users_test_table; + sum +----- + 0 +(1 row) + +-- Update can also be rollbacked +BEGIN; +UPDATE users_test_table SET value_3 = 1; +ROLLBACK; +SELECT SUM(value_3) FROM users_test_table; + sum +----- + 0 +(1 row) + +-- Run with inserts (we need to set citus.multi_shard_modify_mode to sequential) +BEGIN; +INSERT INTO users_test_table (user_id, value_3) VALUES(20, 15); +INSERT INTO users_test_table (user_id, value_3) VALUES(16,1), (20,16), (7,1), (20,17); +SET citus.multi_shard_modify_mode to sequential; +UPDATE users_test_table SET value_3 = 1; +END; +SELECT SUM(value_3) FROM users_test_table; + sum +----- + 16 +(1 row) + +SET citus.multi_shard_modify_mode to 'sequential'; +-- Run multiple multi shard updates (with sequential executor) +BEGIN; +UPDATE users_test_table SET value_3 = 5; +UPDATE users_test_table SET value_3 = 0; +END; +SELECT SUM(value_3) FROM users_copy_table; +ERROR: relation "users_copy_table" does not exist +LINE 1: SELECT SUM(value_3) FROM users_copy_table; + ^ +-- Run multiple multi shard updates (with parallel executor) +SET citus.multi_shard_modify_mode to 'parallel'; +UPDATE users_test_table SET value_3 = 5; +BEGIN; +UPDATE users_test_table SET value_3 = 2; +UPDATE users_test_table SET value_3 = 0; +END; +SELECT SUM(value_3) FROM users_test_table; + sum +----- + 0 +(1 row) + +-- Check with kind of constraints +UPDATE users_test_table SET value_3 = 1 WHERE user_id = 3 or true; +SELECT COUNT(*), SUM(value_3) FROM users_test_table; + count | sum +-------+----- + 16 | 16 +(1 row) + +UPDATE users_test_table SET value_3 = 0 WHERE user_id = 20 and false; +SELECT COUNT(*), SUM(value_3) FROM users_test_table; + count | sum +-------+----- + 16 | 16 +(1 row) + +-- Run multi shard updates with prepared statements +PREPARE foo_plan(int,int) AS UPDATE users_test_table SET value_1 = $1, value_3 = $2; +EXECUTE foo_plan(1,5); +EXECUTE foo_plan(3,15); +EXECUTE foo_plan(5,25); +EXECUTE foo_plan(7,35); +EXECUTE foo_plan(9,45); +EXECUTE foo_plan(0,0); +SELECT SUM(value_1), SUM(value_3) FROM users_test_table; + sum | sum +-----+----- + 0 | 0 +(1 row) + +-- Test on append table (set executor mode to sequential, since with the append +-- distributed tables parallel executor may create tons of connections) +SET citus.multi_shard_modify_mode to sequential; +CREATE TABLE append_stage_table(id int, col_2 int); +INSERT INTO append_stage_table VALUES(1,3); +INSERT INTO append_stage_table VALUES(3,2); +INSERT INTO append_stage_table VALUES(5,4); +CREATE TABLE append_stage_table_2(id int, col_2 int); +INSERT INTO append_stage_table_2 VALUES(8,3); +INSERT INTO append_stage_table_2 VALUES(9,2); +INSERT INTO append_stage_table_2 VALUES(10,4); +CREATE TABLE test_append_table(id int, col_2 int); +SELECT create_distributed_table('test_append_table','id','append'); + create_distributed_table +-------------------------- + +(1 row) + +SELECT master_create_empty_shard('test_append_table'); + master_create_empty_shard +--------------------------- + 1440010 +(1 row) + +SELECT * FROM master_append_table_to_shard(1440010, 'append_stage_table', 'localhost', :master_port); + master_append_table_to_shard +------------------------------ + 0.0053333333 +(1 row) + +SELECT master_create_empty_shard('test_append_table') AS new_shard_id; + new_shard_id +-------------- + 1440011 +(1 row) + +SELECT * FROM master_append_table_to_shard(1440011, 'append_stage_table_2', 'localhost', :master_port); + master_append_table_to_shard +------------------------------ + 0.0053333333 +(1 row) + +UPDATE test_append_table SET col_2 = 5; +SELECT * FROM test_append_table ORDER BY 1 DESC, 2 DESC; + id | col_2 +----+------- + 10 | 5 + 9 | 5 + 8 | 5 + 5 | 5 + 3 | 5 + 1 | 5 +(6 rows) + +DROP TABLE append_stage_table; +DROP TABLE append_stage_table_2; +DROP TABLE test_append_table; +-- Update multi shard of partitioned distributed table +SET citus.multi_shard_modify_mode to 'parallel'; +SET citus.shard_replication_factor to 1; +CREATE TABLE tt1(id int, col_2 int) partition by range (col_2); +CREATE TABLE tt1_510 partition of tt1 for VALUES FROM (5) to (10); +CREATE TABLE tt1_1120 partition of tt1 for VALUES FROM (11) to (20); +INSERT INTO tt1 VALUES (1,11), (3,15), (5,17), (6,19), (8,17), (2,12); +SELECT create_distributed_table('tt1','id'); +NOTICE: Copying data from local table... + create_distributed_table +-------------------------- + +(1 row) + +UPDATE tt1 SET col_2 = 13; +DELETE FROM tt1 WHERE id = 1 or id = 3 or id = 5; +SELECT * FROM tt1 ORDER BY 1 DESC, 2 DESC; + id | col_2 +----+------- + 8 | 13 + 6 | 13 + 2 | 13 +(3 rows) + +-- Partitioned distributed table within transaction +INSERT INTO tt1 VALUES(4,6); +INSERT INTO tt1 VALUES(7,7); +INSERT INTO tt1 VALUES(9,8); +BEGIN; +-- Update rows from partititon tt1_1120 +UPDATE tt1 SET col_2 = 12 WHERE col_2 > 10 and col_2 < 20; +-- Update rows from partititon tt1_510 +UPDATE tt1 SET col_2 = 7 WHERE col_2 < 10 and col_2 > 5; +COMMIT; +SELECT * FROM tt1 ORDER BY id; + id | col_2 +----+------- + 2 | 12 + 4 | 7 + 6 | 12 + 7 | 7 + 8 | 12 + 9 | 7 +(6 rows) + +-- Modify main table and partition table within same transaction +BEGIN; +UPDATE tt1 SET col_2 = 12 WHERE col_2 > 10 and col_2 < 20; +UPDATE tt1 SET col_2 = 7 WHERE col_2 < 10 and col_2 > 5; +DELETE FROM tt1_510; +DELETE FROM tt1_1120; +COMMIT; +SELECT * FROM tt1 ORDER BY id; + id | col_2 +----+------- +(0 rows) + +DROP TABLE tt1; +-- Update and copy in the same transaction +CREATE TABLE tt2(id int, col_2 int); +SELECT create_distributed_table('tt2','id'); + create_distributed_table +-------------------------- + +(1 row) + +BEGIN; +\COPY tt2 FROM STDIN DELIMITER AS ','; +UPDATE tt2 SET col_2 = 1; +COMMIT; +SELECT * FROM tt2 ORDER BY id; + id | col_2 +----+------- + 1 | 1 + 2 | 1 + 3 | 1 + 7 | 1 + 9 | 1 +(5 rows) + +-- Test returning with both type of executors +UPDATE tt2 SET col_2 = 5 RETURNING id, col_2; + id | col_2 +----+------- + 1 | 5 + 2 | 5 + 3 | 5 + 7 | 5 + 9 | 5 +(5 rows) + +SET citus.multi_shard_modify_mode to sequential; +UPDATE tt2 SET col_2 = 3 RETURNING id, col_2; + id | col_2 +----+------- + 1 | 3 + 2 | 3 + 3 | 3 + 7 | 3 + 9 | 3 +(5 rows) + +DROP TABLE tt2; +-- Multiple RTEs are only supported if subquery is pushdownable +SET citus.multi_shard_modify_mode to DEFAULT; +-- To test colocation between tables in modify query +SET citus.shard_count to 6; +CREATE TABLE events_test_table_2 (user_id int, value_1 int, value_2 int, value_3 int); +SELECT create_distributed_table('events_test_table_2', 'user_id'); + create_distributed_table +-------------------------- + +(1 row) + +\COPY events_test_table_2 FROM STDIN DELIMITER AS ','; +CREATE TABLE events_test_table_local (user_id int, value_1 int, value_2 int, value_3 int); +\COPY events_test_table_local FROM STDIN DELIMITER AS ','; +CREATE TABLE test_table_1(id int, date_col timestamptz, col_3 int); +INSERT INTO test_table_1 VALUES(1, '2014-04-05 08:32:12', 5); +INSERT INTO test_table_1 VALUES(2, '2015-02-01 08:31:16', 7); +INSERT INTO test_table_1 VALUES(3, '2111-01-12 08:35:19', 9); +SELECT create_distributed_table('test_table_1', 'id'); +NOTICE: Copying data from local table... + create_distributed_table +-------------------------- + +(1 row) + +-- We can pushdown query if there is partition key equality +UPDATE users_test_table +SET value_2 = 5 +FROM events_test_table +WHERE users_test_table.user_id = events_test_table.user_id; +DELETE FROM users_test_table +USING events_test_table +WHERE users_test_table.user_id = events_test_table.user_id; +UPDATE users_test_table +SET value_1 = 3 +WHERE user_id IN (SELECT user_id + FROM events_test_table); +DELETE FROM users_test_table +WHERE user_id IN (SELECT user_id + FROM events_test_table); +DELETE FROM events_test_table_2 +WHERE now() > (SELECT max(date_col) + FROM test_table_1 + WHERE test_table_1.id = events_test_table_2.user_id + GROUP BY id) +RETURNING *; + user_id | value_1 | value_2 | value_3 +---------+---------+---------+--------- + 1 | 5 | 7 | 7 + 1 | 20 | 12 | 25 + 1 | 60 | 17 | 17 +(3 rows) + +UPDATE users_test_table +SET value_1 = 5 +FROM events_test_table +WHERE users_test_table.user_id = events_test_table.user_id + AND events_test_table.user_id > 5; +UPDATE users_test_table +SET value_1 = 4 +WHERE user_id IN (SELECT user_id + FROM users_test_table + UNION + SELECT user_id + FROM events_test_table); +UPDATE users_test_table +SET value_1 = 4 +WHERE user_id IN (SELECT user_id + FROM users_test_table + UNION + SELECT user_id + FROM events_test_table) returning value_3; + value_3 +--------- + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 +(11 rows) + +UPDATE users_test_table +SET value_1 = 4 +WHERE user_id IN (SELECT user_id + FROM users_test_table + UNION ALL + SELECT user_id + FROM events_test_table) returning value_3; + value_3 +--------- + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 +(11 rows) + +UPDATE users_test_table +SET value_1 = 5 +WHERE + value_2 > + (SELECT + max(value_2) + FROM + events_test_table + WHERE + users_test_table.user_id = events_test_table.user_id + GROUP BY + user_id + ); +UPDATE users_test_table +SET value_3 = 1 +WHERE + value_2 > + (SELECT + max(value_2) + FROM + events_test_table + WHERE + users_test_table.user_id = events_test_table.user_id AND + users_test_table.value_2 > events_test_table.value_2 + GROUP BY + user_id + ); +UPDATE users_test_table +SET value_2 = 4 +WHERE + value_1 > 1 AND value_1 < 3 + AND value_2 >= 1 + AND user_id IN + ( + SELECT + e1.user_id + FROM ( + SELECT + user_id, + 1 AS view_homepage + FROM events_test_table + WHERE + value_1 IN (0, 1) + ) e1 LEFT JOIN LATERAL ( + SELECT + user_id, + 1 AS use_demo + FROM events_test_table + WHERE + user_id = e1.user_id + ) e2 ON true +); +UPDATE users_test_table +SET value_3 = 5 +WHERE value_2 IN (SELECT AVG(value_1) OVER (PARTITION BY user_id) FROM events_test_table WHERE events_test_table.user_id = users_test_table.user_id); +-- Test it within transaction +BEGIN; +INSERT INTO users_test_table +SELECT * FROM events_test_table +WHERE events_test_table.user_id = 1 OR events_test_table.user_id = 5; +SELECT SUM(value_2) FROM users_test_table; + sum +----- + 169 +(1 row) + +UPDATE users_test_table +SET value_2 = 1 +FROM events_test_table +WHERE users_test_table.user_id = events_test_table.user_id; +SELECT SUM(value_2) FROM users_test_table; + sum +----- + 97 +(1 row) + +COMMIT; +-- Test with schema +CREATE SCHEMA sec_schema; +CREATE TABLE sec_schema.tt1(id int, value_1 int); +SELECT create_distributed_table('sec_schema.tt1','id'); + create_distributed_table +-------------------------- + +(1 row) + +INSERT INTO sec_schema.tt1 values(1,1),(2,2),(7,7),(9,9); +UPDATE sec_schema.tt1 +SET value_1 = 11 +WHERE id < (SELECT max(value_2) FROM events_test_table_2 + WHERE sec_schema.tt1.id = events_test_table_2.user_id + GROUP BY user_id) +RETURNING *; + id | value_1 +----+--------- + 7 | 11 + 9 | 11 +(2 rows) + +DROP SCHEMA sec_schema CASCADE; +NOTICE: drop cascades to table sec_schema.tt1 +-- We don't need partition key equality with reference tables +UPDATE events_test_table +SET value_2 = 5 +FROM users_reference_copy_table +WHERE users_reference_copy_table.user_id = events_test_table.value_1; +-- Both reference tables and hash distributed tables can be used in subquery +UPDATE events_test_table as ett +SET value_2 = 6 +WHERE ett.value_3 IN (SELECT utt.value_3 + FROM users_test_table as utt, users_reference_copy_table as uct + WHERE utt.user_id = uct.user_id AND utt.user_id = ett.user_id); +-- We don't need equality check with constant values in sub-select +UPDATE users_reference_copy_table +SET value_2 = 6 +WHERE user_id IN (SELECT 2); +UPDATE users_reference_copy_table +SET value_2 = 6 +WHERE value_1 IN (SELECT 2); +UPDATE users_test_table +SET value_2 = 6 +WHERE user_id IN (SELECT 2); +UPDATE users_test_table +SET value_2 = 6 +WHERE value_1 IN (SELECT 2); +-- Function calls in subqueries will be recursively planned +UPDATE test_table_1 +SET col_3 = 6 +WHERE date_col IN (SELECT now()); +-- Test with prepared statements +SELECT COUNT(*) FROM users_test_table WHERE value_1 = 0; + count +------- + 0 +(1 row) + +PREPARE foo_plan_2(int,int) AS UPDATE users_test_table + SET value_1 = $1, value_3 = $2 + FROM events_test_table + WHERE users_test_table.user_id = events_test_table.user_id; +EXECUTE foo_plan_2(1,5); +EXECUTE foo_plan_2(3,15); +EXECUTE foo_plan_2(5,25); +EXECUTE foo_plan_2(7,35); +EXECUTE foo_plan_2(9,45); +EXECUTE foo_plan_2(0,0); +SELECT COUNT(*) FROM users_test_table WHERE value_1 = 0; + count +------- + 6 +(1 row) + +-- Test with varying WHERE expressions +UPDATE users_test_table +SET value_1 = 7 +FROM events_test_table +WHERE users_test_table.user_id = events_test_table.user_id OR FALSE; +UPDATE users_test_table +SET value_1 = 7 +FROM events_test_table +WHERE users_test_table.user_id = events_test_table.user_id AND TRUE; +-- Test with inactive shard-placement +-- manually set shardstate of one placement of users_test_table as inactive +UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid = 1440000; +UPDATE users_test_table +SET value_2 = 5 +FROM events_test_table +WHERE users_test_table.user_id = events_test_table.user_id; +ERROR: cannot find a worker that has active placements for all shards in the query +-- manually set shardstate of one placement of events_test_table as inactive +UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid = 1440004; +UPDATE users_test_table +SET value_2 = 5 +FROM events_test_table +WHERE users_test_table.user_id = events_test_table.user_id; +ERROR: cannot find a worker that has active placements for all shards in the query +UPDATE pg_dist_shard_placement SET shardstate = 1 WHERE shardid = 1440000; +UPDATE pg_dist_shard_placement SET shardstate = 1 WHERE shardid = 1440004; +-- Subquery must return single value to use it with comparison operators +UPDATE users_test_table as utt +SET value_1 = 3 +WHERE value_2 > (SELECT value_3 FROM events_test_table as ett WHERE utt.user_id = ett.user_id); +ERROR: more than one row returned by a subquery used as an expression +CONTEXT: while executing command on localhost:57637 +-- We can not pushdown a query if the target relation is reference table +UPDATE users_reference_copy_table +SET value_2 = 5 +FROM events_test_table +WHERE users_reference_copy_table.user_id = events_test_table.user_id; +ERROR: only reference tables may be queried when targeting a reference table with multi shard UPDATE/DELETE queries with multiple tables +-- We cannot push down it if the query has outer join and using +UPDATE events_test_table +SET value_2 = users_test_table.user_id +FROM users_test_table +FULL OUTER JOIN events_test_table e2 USING (user_id) +WHERE e2.user_id = events_test_table.user_id RETURNING events_test_table.value_2; +ERROR: a join with USING causes an internal naming conflict, use ON instead +-- Non-pushdownable subqueries, but will be handled through recursive planning +UPDATE users_test_table +SET value_1 = 1 +WHERE user_id IN (SELECT Count(value_1) + FROM events_test_table + GROUP BY user_id); +UPDATE users_test_table +SET value_1 = (SELECT Count(*) + FROM events_test_table); +UPDATE users_test_table +SET value_1 = 4 +WHERE user_id IN (SELECT user_id + FROM users_test_table + UNION + SELECT value_1 + FROM events_test_table); +UPDATE users_test_table +SET value_1 = 4 +WHERE user_id IN (SELECT user_id + FROM users_test_table + INTERSECT + SELECT Sum(value_1) + FROM events_test_table + GROUP BY user_id); +UPDATE users_test_table +SET value_2 = (SELECT value_3 + FROM users_test_table); +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +UPDATE users_test_table +SET value_2 = 2 +WHERE + value_2 > + (SELECT + max(value_2) + FROM + events_test_table + WHERE + users_test_table.user_id > events_test_table.user_id AND + users_test_table.value_1 = events_test_table.value_1 + GROUP BY + user_id + ); +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +UPDATE users_test_table +SET (value_1, value_2) = (2,1) +WHERE user_id IN + (SELECT user_id + FROM users_test_table + INTERSECT + SELECT user_id + FROM events_test_table); +-- Reference tables can not locate on the outer part of the outer join +UPDATE users_test_table +SET value_1 = 4 +WHERE user_id IN + (SELECT DISTINCT e2.user_id + FROM users_reference_copy_table + LEFT JOIN users_test_table e2 ON (e2.user_id = users_reference_copy_table.value_1)) RETURNING *; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +-- Volatile functions are also not supported +UPDATE users_test_table +SET value_2 = 5 +FROM events_test_table +WHERE users_test_table.user_id = events_test_table.user_id * random(); +ERROR: functions used in the WHERE clause of modification queries on distributed tables must not be VOLATILE +UPDATE users_test_table +SET value_2 = 5 * random() +FROM events_test_table +WHERE users_test_table.user_id = events_test_table.user_id; +ERROR: functions used in UPDATE queries on distributed tables must not be VOLATILE +-- Recursive modify planner does not take care of following test because the query +-- is fully pushdownable, yet not allowed because it would lead to inconsistent replicas. +UPDATE users_test_table +SET value_2 = subquery.random FROM (SELECT user_id, random() + FROM events_test_table) subquery +WHERE users_test_table.user_id = subquery.user_id; +ERROR: functions used in UPDATE queries on distributed tables must not be VOLATILE +-- Volatile functions in a subquery are recursively planned +UPDATE users_test_table +SET value_2 = 5 +WHERE users_test_table.user_id IN (SELECT user_id * random() FROM events_test_table); +UPDATE users_test_table +SET value_2 = subquery.random FROM (SELECT user_id, random() + FROM events_test_table) subquery; +UPDATE users_test_table +SET value_2 = subquery.random FROM (SELECT user_id, random() + FROM events_test_table OFFSET 0) subquery +WHERE users_test_table.user_id = subquery.user_id; +-- Make following tests consistent +UPDATE users_test_table SET value_2 = 0; +-- Local tables are not supported +UPDATE users_test_table +SET value_2 = 5 +FROM events_test_table_local +WHERE users_test_table.user_id = events_test_table_local.user_id; +ERROR: relation events_test_table_local is not distributed +UPDATE events_test_table_local +SET value_2 = 5 +FROM users_test_table +WHERE events_test_table_local.user_id = users_test_table.user_id; +ERROR: relation events_test_table_local is not distributed +-- Local tables in a subquery are supported through recursive planning +UPDATE users_test_table +SET value_2 = 5 +WHERE users_test_table.user_id IN(SELECT user_id FROM events_test_table_local); +-- Shard counts of tables must be equal to pushdown the query +UPDATE users_test_table +SET value_2 = 5 +FROM events_test_table_2 +WHERE users_test_table.user_id = events_test_table_2.user_id; +ERROR: cannot push down this subquery +DETAIL: Shards of relations in subquery need to have 1-to-1 shard partitioning +-- Should error out due to multiple row return from subquery, but we can not get this information within +-- subquery pushdown planner. This query will be sent to worker with recursive planner. +\set VERBOSITY terse +DELETE FROM users_test_table +WHERE users_test_table.user_id = (SELECT user_id + FROM events_test_table); +ERROR: more than one row returned by a subquery used as an expression +\set VERBOSITY default +-- Cursors are not supported +BEGIN; +DECLARE test_cursor CURSOR FOR SELECT * FROM users_test_table ORDER BY user_id; +FETCH test_cursor; + user_id | value_1 | value_2 | value_3 +---------+---------+---------+--------- + 1 | 2 | 5 | 0 +(1 row) + +UPDATE users_test_table SET value_2 = 5 WHERE CURRENT OF test_cursor; +ERROR: cannot run DML queries with cursors +ROLLBACK; +-- Stable functions are supported +SELECT * FROM test_table_1 ORDER BY 1 DESC, 2 DESC, 3 DESC; + id | date_col | col_3 +----+------------------------------+------- + 3 | Mon Jan 12 08:35:19 2111 PST | 9 + 2 | Sun Feb 01 08:31:16 2015 PST | 7 + 1 | Sat Apr 05 08:32:12 2014 PDT | 5 +(3 rows) + +UPDATE test_table_1 SET col_3 = 3 WHERE date_col < now(); +SELECT * FROM test_table_1 ORDER BY 1 DESC, 2 DESC, 3 DESC; + id | date_col | col_3 +----+------------------------------+------- + 3 | Mon Jan 12 08:35:19 2111 PST | 9 + 2 | Sun Feb 01 08:31:16 2015 PST | 3 + 1 | Sat Apr 05 08:32:12 2014 PDT | 3 +(3 rows) + +DELETE FROM test_table_1 WHERE date_col < current_timestamp; +SELECT * FROM test_table_1 ORDER BY 1 DESC, 2 DESC, 3 DESC; + id | date_col | col_3 +----+------------------------------+------- + 3 | Mon Jan 12 08:35:19 2111 PST | 9 +(1 row) + +DROP TABLE test_table_1; +-- Volatile functions are not supported +CREATE TABLE test_table_2(id int, double_col double precision); +INSERT INTO test_table_2 VALUES(1, random()); +INSERT INTO test_table_2 VALUES(2, random()); +INSERT INTO test_table_2 VALUES(3, random()); +SELECT create_distributed_table('test_table_2', 'id'); +NOTICE: Copying data from local table... + create_distributed_table +-------------------------- + +(1 row) + +UPDATE test_table_2 SET double_col = random(); +ERROR: functions used in UPDATE queries on distributed tables must not be VOLATILE +DROP TABLE test_table_2; +-- Run multi shard updates and deletes without transaction on reference tables +SELECT COUNT(*) FROM users_reference_copy_table; + count +------- + 15 +(1 row) + +UPDATE users_reference_copy_table SET value_1 = 1; +SELECT SUM(value_1) FROM users_reference_copy_table; + sum +----- + 15 +(1 row) + +SELECT COUNT(*), SUM(value_2) FROM users_reference_copy_table WHERE user_id = 3 or user_id = 5; + count | sum +-------+----- + 4 | 52 +(1 row) + +UPDATE users_reference_copy_table SET value_2 = value_2 + 1 WHERE user_id = 3 or user_id = 5; +SELECT COUNT(*), SUM(value_2) FROM users_reference_copy_table WHERE user_id = 3 or user_id = 5; + count | sum +-------+----- + 4 | 56 +(1 row) + +UPDATE users_reference_copy_table SET value_3 = 0 WHERE user_id <> 3; +SELECT SUM(value_3) FROM users_reference_copy_table WHERE user_id <> 3; + sum +----- + 0 +(1 row) + +DELETE FROM users_reference_copy_table WHERE user_id = 3 or user_id = 5; +SELECT COUNT(*) FROM users_reference_copy_table WHERE user_id = 3 or user_id = 5; + count +------- + 0 +(1 row) + +-- Do some tests by changing shard replication factor +DROP TABLE users_test_table; +SET citus.shard_replication_factor to 2; +CREATE TABLE users_test_table(user_id int, value_1 int, value_2 int, value_3 int); +SELECT create_distributed_table('users_test_table', 'user_id'); + create_distributed_table +-------------------------- + +(1 row) + +\COPY users_test_table FROM STDIN DELIMITER AS ','; +-- Run multi shard updates and deletes without transaction on hash distributed tables +UPDATE users_test_table SET value_1 = 1; +SELECT COUNT(*), SUM(value_1) FROM users_test_table; + count | sum +-------+----- + 15 | 15 +(1 row) + +SELECT COUNT(*), SUM(value_2) FROM users_test_table WHERE user_id = 1 or user_id = 3; + count | sum +-------+----- + 4 | 52 +(1 row) + +UPDATE users_test_table SET value_2 = value_2 + 1 WHERE user_id = 1 or user_id = 3; +SELECT COUNT(*), SUM(value_2) FROM users_test_table WHERE user_id = 1 or user_id = 3; + count | sum +-------+----- + 4 | 56 +(1 row) + +UPDATE users_test_table SET value_3 = 0 WHERE user_id <> 5; +SELECT SUM(value_3) FROM users_test_table WHERE user_id <> 5; + sum +----- + 0 +(1 row) + +SELECT COUNT(*) FROM users_test_table WHERE user_id = 3 or user_id = 5; + count +------- + 4 +(1 row) + +DELETE FROM users_test_table WHERE user_id = 3 or user_id = 5; +SELECT COUNT(*) FROM users_test_table WHERE user_id = 3 or user_id = 5; + count +------- + 0 +(1 row) + +DROP TABLE users_test_table; +DROP TABLE events_test_table; +DROP TABLE events_reference_copy_table; +DROP TABLE users_reference_copy_table; diff --git a/src/test/regress/expected/multi_subquery_behavioral_analytics_2.out b/src/test/regress/expected/multi_subquery_behavioral_analytics_2.out new file mode 100644 index 000000000..d2df2c122 --- /dev/null +++ b/src/test/regress/expected/multi_subquery_behavioral_analytics_2.out @@ -0,0 +1,2296 @@ +-- +-- multi subquery behavioral analytics queries aims to expand existing subquery pushdown +-- regression tests to cover more cases +-- the tables that are used depends to multi_insert_select_behavioral_analytics_create_table.sql +-- +--- We don't need shard id sequence here given that we're not creating any shards, so not writing it at all +-- The following line is intended to force Citus to NOT use router planner for the tests in this +-- file. The motivation for doing this is to make sure that single-task queries can be planned +-- by non-router code-paths. Thus, this flag should NOT be used in production. Otherwise, the actual +-- router queries would fail. +SET citus.enable_router_execution TO FALSE; +------------------------------------ +-- Vanilla funnel query +------------------------------------ +SELECT user_id, array_length(events_table, 1) +FROM ( + SELECT user_id, array_agg(event ORDER BY time) AS events_table + FROM ( + SELECT u.user_id, e.event_type::text AS event, e.time + FROM users_table AS u, + events_table AS e + WHERE u.user_id = e.user_id + AND u.user_id >= 1 + AND u.user_id <= 3 + AND e.event_type IN (1, 2) + ) t + GROUP BY user_id +) q +ORDER BY 2 DESC, 1; + user_id | array_length +---------+-------------- + 3 | 187 + 2 | 180 + 1 | 28 +(3 rows) + +------------------------------------ +-- Funnel grouped by whether or not a user has done an event +-- This has multiple subqueries joinin at the top level +------------------------------------ +SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event +FROM ( + SELECT + t1.user_id, + array_agg(event ORDER BY time) AS events_table, + COALESCE(hasdone_event, 'Has not done event') AS hasdone_event + FROM ( + ( + SELECT u.user_id, 'step=>1'::text AS event, e.time + FROM users_table AS u, + events_table AS e + WHERE u.user_id = e.user_id + AND u.user_id >= 1 + AND u.user_id <= 3 + AND e.event_type IN (1, 2) + ) + UNION + ( + SELECT u.user_id, 'step=>2'::text AS event, e.time + FROM users_table AS u, + events_table AS e + WHERE u.user_id = e.user_id + AND u.user_id >= 1 + AND u.user_id <= 3 + AND e.event_type IN (3, 4) + ) + ) t1 LEFT JOIN ( + SELECT DISTINCT user_id, + 'Has done event'::TEXT AS hasdone_event + FROM events_table AS e + WHERE e.user_id >= 1 + AND e.user_id <= 3 + AND e.event_type IN (5, 6) + ) t2 ON (t1.user_id = t2.user_id) + GROUP BY t1.user_id, hasdone_event +) t GROUP BY user_id, hasdone_event +ORDER BY user_id; + user_id | sum | length | hasdone_event +---------+-----+--------+---------------- + 1 | 12 | 14 | Has done event + 2 | 20 | 14 | Has done event + 3 | 20 | 14 | Has done event +(3 rows) + +-- same query but multiple joins are one level below, returns count of row instead of actual rows +SELECT count(*) +FROM ( + SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event + FROM ( + SELECT + t1.user_id, + array_agg(event ORDER BY time) AS events_table, + COALESCE(hasdone_event, 'Has not done event') AS hasdone_event + FROM ( + ( + SELECT u.user_id, 'step=>1'::text AS event, e.time + FROM users_table AS u, + events_table AS e + WHERE u.user_id = e.user_id + AND u.user_id >= 1 + AND u.user_id <= 3 + AND e.event_type IN (1, 2) + ) + UNION + ( + SELECT u.user_id, 'step=>2'::text AS event, e.time + FROM users_table AS u, + events_table AS e + WHERE u.user_id = e.user_id + AND u.user_id >= 1 + AND u.user_id <= 3 + AND e.event_type IN (3, 4) + ) + ) t1 LEFT JOIN ( + SELECT DISTINCT user_id, + 'Has done event'::TEXT AS hasdone_event + FROM events_table AS e + WHERE e.user_id >= 1 + AND e.user_id <= 3 + AND e.event_type IN (5, 6) + ) t2 ON (t1.user_id = t2.user_id) + GROUP BY t1.user_id, hasdone_event + ) t GROUP BY user_id, hasdone_event + ORDER BY user_id) u; + count +------- + 3 +(1 row) + +-- Same queries written without unions +SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event +FROM ( + SELECT + t1.user_id, + array_agg(event ORDER BY time) AS events_table, + COALESCE(hasdone_event, 'Has not done event') AS hasdone_event + FROM ( + SELECT + u.user_id, + CASE WHEN e.event_type IN (1, 2) THEN 'step=>1'::text else 'step==>2'::text END AS event, + e.time + FROM users_table AS u, + events_table AS e + WHERE u.user_id = e.user_id + AND u.user_id >= 1 + AND u.user_id <= 3 + AND e.event_type IN (1, 2, 3, 4) + GROUP BY 1,2,3 + ) t1 LEFT JOIN ( + SELECT DISTINCT user_id, + 'Has done event'::TEXT AS hasdone_event + FROM events_table AS e + WHERE e.user_id >= 1 + AND e.user_id <= 3 + AND e.event_type IN (5, 6) + ) t2 ON (t1.user_id = t2.user_id) + GROUP BY t1.user_id, hasdone_event +) t GROUP BY user_id, hasdone_event +ORDER BY user_id; + user_id | sum | length | hasdone_event +---------+-----+--------+---------------- + 1 | 12 | 14 | Has done event + 2 | 20 | 14 | Has done event + 3 | 20 | 14 | Has done event +(3 rows) + +-- same query but multiple joins are one level below, returns count of row instead of actual rows +SELECT count(*) +FROM ( + SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event + FROM ( + SELECT + t1.user_id, + array_agg(event ORDER BY time) AS events_table, + COALESCE(hasdone_event, 'Has not done event') AS hasdone_event + FROM ( + SELECT + u.user_id, + CASE WHEN e.event_type in (1, 2) then 'step=>1'::text else 'step==>2'::text END AS event, + e.time + FROM users_table AS u, + events_table AS e + WHERE u.user_id = e.user_id + AND u.user_id >= 1 + AND u.user_id <= 3 + AND e.event_type IN (1, 2, 3, 4) + GROUP BY 1,2,3 + ) t1 LEFT JOIN ( + SELECT DISTINCT user_id, + 'Has done event'::TEXT AS hasdone_event + FROM events_table AS e + WHERE e.user_id >= 1 + AND e.user_id <= 3 + AND e.event_type IN (5, 6) + ) t2 ON (t1.user_id = t2.user_id) + GROUP BY t1.user_id, hasdone_event + ) t GROUP BY user_id, hasdone_event + ORDER BY user_id) u; + count +------- + 3 +(1 row) + +------------------------------------ +-- Funnel, grouped by the number of times a user has done an event +------------------------------------ +SELECT + user_id, + avg(array_length(events_table, 1)) AS event_average, + count_pay + FROM ( + SELECT + subquery_1.user_id, + array_agg(event ORDER BY time) AS events_table, + COALESCE(count_pay, 0) AS count_pay + FROM + ( + (SELECT + users_table.user_id, + 'action=>1'AS event, + events_table.time + FROM + users_table, + events_table + WHERE + users_table.user_id = events_table.user_id AND + users_table.user_id >= 1 AND + users_table.user_id <= 3 AND + events_table.event_type > 1 AND events_table.event_type < 3 + ) + UNION + (SELECT + users_table.user_id, + 'action=>2'AS event, + events_table.time + FROM + users_table, + events_table + WHERE + users_table.user_id = events_table.user_id AND + users_table.user_id >= 1 AND + users_table.user_id <= 3 AND + events_table.event_type > 1 AND events_table.event_type < 4 + ) + ) AS subquery_1 + LEFT JOIN + (SELECT + user_id, + COUNT(*) AS count_pay + FROM + users_table + WHERE + user_id >= 1 AND + user_id <= 3 AND + users_table.value_1 > 2 AND users_table.value_1 < 5 + GROUP BY + user_id + HAVING + COUNT(*) > 1) AS subquery_2 + ON + subquery_1.user_id = subquery_2.user_id + GROUP BY + subquery_1.user_id, + count_pay) AS subquery_top +WHERE + array_ndims(events_table) > 0 +GROUP BY + count_pay, user_id +ORDER BY + event_average DESC, count_pay DESC, user_id DESC; + user_id | event_average | count_pay +---------+---------------------+----------- + 3 | 19.0000000000000000 | 7 + 2 | 12.0000000000000000 | 9 + 1 | 7.0000000000000000 | 5 +(3 rows) + +SELECT + user_id, + avg(array_length(events_table, 1)) AS event_average, + count_pay + FROM ( + SELECT + subquery_1.user_id, + array_agg(event ORDER BY time) AS events_table, + COALESCE(count_pay, 0) AS count_pay + FROM + ( + (SELECT + users_table.user_id, + 'action=>1'AS event, + events_table.time + FROM + users_table, + events_table + WHERE + users_table.user_id = events_table.user_id AND + users_table.user_id >= 1 AND + users_table.user_id <= 3 AND + events_table.event_type > 1 AND events_table.event_type < 3 + ) + UNION + (SELECT + users_table.user_id, + 'action=>2'AS event, + events_table.time + FROM + users_table, + events_table + WHERE + users_table.user_id = events_table.user_id AND + users_table.user_id >= 1 AND + users_table.user_id <= 3 AND + events_table.event_type > 1 AND events_table.event_type < 4 + ) + ) AS subquery_1 + LEFT JOIN + (SELECT + user_id, + COUNT(*) AS count_pay + FROM + users_table + WHERE + user_id >= 1 AND + user_id <= 3 AND + users_table.value_1 > 2 AND users_table.value_1 < 4 + GROUP BY + user_id + HAVING + COUNT(*) > 1) AS subquery_2 + ON + subquery_1.user_id = subquery_2.user_id + GROUP BY + subquery_1.user_id, + count_pay) AS subquery_top +WHERE + array_ndims(events_table) > 0 +GROUP BY + count_pay, user_id +HAVING + avg(array_length(events_table, 1)) > 0 +ORDER BY + event_average DESC, count_pay DESC, user_id DESC; + user_id | event_average | count_pay +---------+---------------------+----------- + 3 | 19.0000000000000000 | 3 + 2 | 12.0000000000000000 | 4 + 1 | 7.0000000000000000 | 3 +(3 rows) + +-- Same queries rewritten without using unions + SELECT + user_id, + avg(array_length(events_table, 1)) AS event_average, + count_pay + FROM ( + SELECT + subquery_1.user_id, + array_agg(event ORDER BY time) AS events_table, + COALESCE(count_pay, 0) AS count_pay + FROM + ( + SELECT + users_table.user_id, + CASE + WHEN + events_table.event_type > 1 AND events_table.event_type < 3 + THEN 'action=>1' + ELSE 'action=>2' + END AS event, + events_table.time + FROM + users_table, + events_table + WHERE + users_table.user_id = events_table.user_id AND + users_table.user_id >= 1 AND + users_table.user_id <= 3 AND + (events_table.event_type > 1 AND events_table.event_type < 3 + OR + events_table.event_type > 2 AND events_table.event_type < 4) + GROUP BY 1, 2, 3 + ) AS subquery_1 + LEFT JOIN + (SELECT + user_id, + COUNT(*) AS count_pay + FROM + users_table + WHERE + user_id >= 1 AND + user_id <= 3 AND + users_table.value_1 > 3 AND users_table.value_1 < 5 + GROUP BY + user_id + HAVING + COUNT(*) > 1) AS subquery_2 + ON + subquery_1.user_id = subquery_2.user_id + GROUP BY + subquery_1.user_id, + count_pay) AS subquery_top +WHERE + array_ndims(events_table) > 0 +GROUP BY + count_pay, user_id +ORDER BY + event_average DESC, count_pay DESC, user_id DESC; + user_id | event_average | count_pay +---------+---------------------+----------- + 3 | 12.0000000000000000 | 4 + 2 | 9.0000000000000000 | 5 + 1 | 5.0000000000000000 | 2 +(3 rows) + +SELECT + user_id, + avg(array_length(events_table, 1)) AS event_average, + count_pay + FROM ( + SELECT + subquery_1.user_id, + array_agg(event ORDER BY time) AS events_table, + COALESCE(count_pay, 0) AS count_pay + FROM + ( + SELECT + users_table.user_id, + CASE WHEN events_table.event_type > 1 AND events_table.event_type < 3 THEN 'action=>1' ELSE 'action=>2' END AS event, + events_table.time + FROM + users_table, + events_table + WHERE + users_table.user_id = events_table.user_id AND + users_table.user_id >= 1 AND + users_table.user_id <= 3 AND + (events_table.event_type > 1 AND events_table.event_type < 3 + OR + events_table.event_type > 2 AND events_table.event_type < 4) + GROUP BY 1, 2, 3 + ) AS subquery_1 + LEFT JOIN + (SELECT + user_id, + COUNT(*) AS count_pay + FROM + users_table + WHERE + user_id >= 1 AND + user_id <= 3 AND + users_table.value_1 > 3 AND users_table.value_1 < 5 + GROUP BY + user_id + HAVING + COUNT(*) > 1) AS subquery_2 + ON + subquery_1.user_id = subquery_2.user_id + GROUP BY + subquery_1.user_id, + count_pay) AS subquery_top +WHERE + array_ndims(events_table) > 0 +GROUP BY + count_pay, user_id +HAVING + avg(array_length(events_table, 1)) > 0 +ORDER BY + event_average DESC, count_pay DESC, user_id DESC; + user_id | event_average | count_pay +---------+---------------------+----------- + 3 | 12.0000000000000000 | 4 + 2 | 9.0000000000000000 | 5 + 1 | 5.0000000000000000 | 2 +(3 rows) + +------------------------------------ +-- Most recently seen users_table events_table +------------------------------------ +-- Note that we don't use ORDER BY/LIMIT yet +------------------------------------ +SELECT + user_id, + user_lastseen, + array_length(event_array, 1) +FROM ( + SELECT + user_id, + max(u.time) as user_lastseen, + array_agg(event_type ORDER BY u.time) AS event_array + FROM ( + SELECT user_id, time + FROM users_table + WHERE + user_id >= 1 AND + user_id <= 3 AND + users_table.value_1 > 1 AND users_table.value_1 < 3 + ) u LEFT JOIN LATERAL ( + SELECT event_type, time + FROM events_table + WHERE user_id = u.user_id AND + events_table.event_type > 1 AND events_table.event_type < 3 + ) t ON true + GROUP BY user_id +) AS shard_union +ORDER BY user_lastseen DESC, user_id; + user_id | user_lastseen | array_length +---------+---------------------------------+-------------- + 2 | Thu Nov 23 11:47:26.900284 2017 | 12 + 3 | Thu Nov 23 11:18:53.114408 2017 | 14 +(2 rows) + +------------------------------------ +-- Count the number of distinct users_table who are in segment X and Y and Z +------------------------------------ +SELECT user_id +FROM users_table +WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 1 AND value_1 <= 2) + AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 3 AND value_1 <= 4) + AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 5 AND value_1 <= 6) +GROUP BY + user_id +ORDER BY + user_id DESC + LIMIT 5; + user_id +--------- + 6 + 5 + 4 + 3 + 1 +(5 rows) + +------------------------------------ +-- Find customers who have done X, and satisfy other customer specific criteria +------------------------------------ +SELECT user_id, value_2 FROM users_table WHERE + value_1 > 1 AND value_1 < 3 + AND value_2 >= 1 + AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 1 AND event_type < 3 AND value_3 > 1 AND user_id = users_table.user_id) +ORDER BY 2 DESC, 1 DESC +LIMIT 5; + user_id | value_2 +---------+--------- + 6 | 4 + 6 | 4 + 2 | 4 + 6 | 3 + 4 | 3 +(5 rows) + +------------------------------------ +-- Customers who haven’t done X, and satisfy other customer specific criteria +------------------------------------ +SELECT user_id, value_2 FROM users_table WHERE + value_1 = 2 + AND value_2 >= 1 + AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=2 AND value_3 > 1 AND user_id = users_table.user_id) +ORDER BY 1 DESC, 2 DESC +LIMIT 3; + user_id | value_2 +---------+--------- + 5 | 5 + 5 | 5 + 5 | 2 +(3 rows) + +------------------------------------ +-- Customers who have done X and Y, and satisfy other customer specific criteria +------------------------------------ +SELECT user_id, sum(value_2) as cnt FROM users_table WHERE + value_1 > 1 + AND value_2 >= 1 + AND EXISTS (SELECT user_id FROM events_table WHERE event_type != 1 AND value_3 > 1 AND user_id = users_table.user_id) + AND EXISTS (SELECT user_id FROM events_table WHERE event_type = 2 AND value_3 > 1 AND user_id = users_table.user_id) +GROUP BY + user_id +ORDER BY cnt DESC, user_id DESC +LIMIT 5; + user_id | cnt +---------+----- + 4 | 43 + 2 | 37 + 3 | 34 + 1 | 17 + 6 | 15 +(5 rows) + +------------------------------------ +-- Customers who have done X and haven’t done Y, and satisfy other customer specific criteria +------------------------------------ +SELECT user_id, value_2 FROM users_table WHERE + value_2 >= 1 + AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 1 AND event_type <= 3 AND value_3 > 1 AND user_id = users_table.user_id) + AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 3 AND event_type <= 4 AND value_3 > 1 AND user_id = users_table.user_id) +ORDER BY 2 DESC, 1 DESC +LIMIT 4; + user_id | value_2 +---------+--------- + 5 | 5 + 5 | 5 + 5 | 5 + 5 | 4 +(4 rows) + +------------------------------------ +-- Customers who have done X more than 2 times, and satisfy other customer specific criteria +------------------------------------ +SELECT user_id, + avg(value_2) + FROM users_table + WHERE value_1 > 1 + AND value_1 < 3 + AND value_2 >= 1 + AND EXISTS (SELECT user_id + FROM events_table + WHERE event_type > 1 + AND event_type < 3 + AND value_3 > 1 + AND user_id = users_table.user_id + GROUP BY user_id + HAVING Count(*) > 2) +GROUP BY + user_id +ORDER BY + 1 DESC, 2 DESC +LIMIT 5; + user_id | avg +---------+-------------------- + 4 | 2.0000000000000000 + 3 | 2.0000000000000000 +(2 rows) + +------------------------------------ +-- Find me all users_table who logged in more than once +------------------------------------ +SELECT user_id, value_1 from +( + SELECT + user_id, value_1 From users_table + WHERE + value_2 > 1 and user_id = 2 + GROUP BY + value_1, user_id + HAVING + count(*) > 1 +) AS a +ORDER BY + user_id ASC, value_1 ASC; + user_id | value_1 +---------+--------- + 2 | 0 + 2 | 2 + 2 | 3 + 2 | 4 +(4 rows) + +-- same query with additional filter to make it not router plannable +SELECT user_id, value_1 from +( + SELECT + user_id, value_1 From users_table + WHERE + value_2 > 1 and (user_id = 2 OR user_id = 3) + GROUP BY + value_1, user_id + HAVING count(*) > 1 +) AS a +ORDER BY + user_id ASC, value_1 ASC; + user_id | value_1 +---------+--------- + 2 | 0 + 2 | 2 + 2 | 3 + 2 | 4 + 3 | 1 + 3 | 2 + 3 | 3 + 3 | 4 +(8 rows) + +------------------------------------ +-- Find me all users_table who has done some event and has filters +------------------------------------ +SELECT user_id +FROM events_table +WHERE + event_type = 3 AND value_2 > 2 AND + user_id IN + (SELECT + user_id + FROM + users_table + WHERE + value_1 = 1 AND value_2 > 2 + ) +ORDER BY 1; + user_id +--------- + 1 + 2 + 2 + 3 + 5 +(5 rows) + +------------------------------------ +-- Which events_table did people who has done some specific events_table +------------------------------------ +SELECT + user_id, event_type FROM events_table +WHERE + user_id in (SELECT user_id from events_table WHERE event_type > 3 and event_type < 5) +GROUP BY + user_id, event_type +ORDER BY 2 DESC, 1 +LIMIT 3; + user_id | event_type +---------+------------ + 1 | 6 + 2 | 5 + 3 | 5 +(3 rows) + +------------------------------------ +-- Find me all the users_table who has done some event more than three times +------------------------------------ +SELECT user_id FROM +( + SELECT + user_id + FROM + events_table + WHERE + event_type = 2 + GROUP BY + user_id + HAVING + count(*) > 1 +) AS a +ORDER BY + user_id; + user_id +--------- + 1 + 2 + 3 + 4 + 6 +(5 rows) + +------------------------------------ +-- Find my assets that have the highest probability and fetch their metadata +------------------------------------ +CREATE TEMP TABLE assets AS +SELECT + users_table.user_id, users_table.value_1, prob +FROM + users_table + JOIN + (SELECT + ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob + FROM + users_table AS ma, events_table as short_list + WHERE + short_list.user_id = ma.user_id and ma.value_1 < 2 and short_list.event_type < 2 + ) temp + ON users_table.user_id = temp.user_id + WHERE + users_table.value_1 < 2; + -- get some statistics from the aggregated results to ensure the results are correct +SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM assets; + count | count | avg +-------+-------+-------------------- + 732 | 6 | 3.3934426229508197 +(1 row) + +DROP TABLE assets; +-- count number of distinct users who have value_1 equal to 5 or 13 but not 3 +-- is recusrively planned +SET client_min_messages TO DEBUG1; +SELECT count(*) FROM +( + SELECT + user_id + FROM + users_table + WHERE + (value_1 = '1' OR value_1 = '3') AND + user_id NOT IN (select user_id from users_table where value_1 = '4') + GROUP BY + user_id + HAVING + count(distinct value_1) = 2 +) as foo; +DEBUG: generating subplan 23_1 for subquery SELECT user_id FROM public.users_table WHERE (value_1 OPERATOR(pg_catalog.=) 4) +DEBUG: Plan 23 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT users_table.user_id FROM public.users_table WHERE (((users_table.value_1 OPERATOR(pg_catalog.=) 1) OR (users_table.value_1 OPERATOR(pg_catalog.=) 3)) AND (NOT (users_table.user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('23_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer))))) GROUP BY users_table.user_id HAVING (count(DISTINCT users_table.value_1) OPERATOR(pg_catalog.=) 2)) foo + count +------- + 1 +(1 row) + +RESET client_min_messages; +-- previous push down query +SELECT subquery_count FROM + (SELECT count(*) as subquery_count FROM + (SELECT + user_id + FROM + users_table + WHERE + (value_1 = '1' OR value_1 = '3') + GROUP BY + user_id + HAVING + count(distinct value_1) = 2) as a + LEFT JOIN + (SELECT + user_id + FROM + users_table + WHERE + (value_1 = '2') + GROUP BY + user_id) as b + ON a.user_id = b.user_id + WHERE + b.user_id IS NULL + GROUP BY + a.user_id + ) AS inner_subquery; + subquery_count +---------------- + 1 +(1 row) + +-- new pushdown query without single range table entry at top requirement +SELECT count(*) as subquery_count +FROM ( + SELECT + user_id + FROM + users_table + WHERE + (value_1 = '1' OR value_1 = '3') + GROUP BY + user_id + HAVING + count(distinct value_1) = 2 + ) as a + LEFT JOIN ( + SELECT + user_id + FROM + users_table + WHERE + (value_1 = '2') + GROUP BY + user_id) AS b + ON a.user_id = b.user_id +WHERE + b.user_id IS NULL +GROUP BY + a.user_id; + subquery_count +---------------- + 1 +(1 row) + +-- most queries below has limit clause +-- therefore setting subquery_pushdown flag for all +SET citus.subquery_pushdown to ON; +-- multi-subquery-join +-- The first query has filters on partion column to make it router plannable +-- but it is processed by logical planner since we disabled router execution +SELECT + e1.user_id, + sum(view_homepage) AS viewed_homepage, + sum(use_demo) AS use_demo, + sum(enter_credit_card) AS entered_credit_card, + sum(submit_card_info) as submit_card_info, + sum(see_bought_screen) as see_bought_screen +FROM ( + -- Get the first time each user viewed the homepage. + SELECT + user_id, + 1 AS view_homepage, + min(time) AS view_homepage_time + FROM events_table + WHERE user_id = 1 and + event_type IN (1, 2) + GROUP BY user_id +) e1 LEFT JOIN LATERAL ( + SELECT + user_id, + 1 AS use_demo, + time AS use_demo_time + FROM events_table + WHERE + user_id = e1.user_id AND user_id = 1 and + event_type IN (2, 3) + ORDER BY time + LIMIT 1 +) e2 ON true LEFT JOIN LATERAL ( + SELECT + user_id, + 1 AS enter_credit_card, + time AS enter_credit_card_time + FROM events_table + WHERE + user_id = e2.user_id AND user_id = 1 and + event_type IN (3, 4) + ORDER BY time + LIMIT 1 +) e3 ON true LEFT JOIN LATERAL ( + SELECT + 1 AS submit_card_info, + user_id, + time AS enter_credit_card_time + FROM events_table + WHERE + user_id = e3.user_id AND user_id = 1 and + event_type IN (4, 5) + ORDER BY time + LIMIT 1 +) e4 ON true LEFT JOIN LATERAL ( + SELECT + 1 AS see_bought_screen + FROM events_table + WHERE + user_id = e4.user_id AND user_id = 1 and + event_type IN (5, 6) + ORDER BY time + LIMIT 1 +) e5 ON true +WHERE + e1.user_id = 1 +GROUP BY + e1.user_id +LIMIT 1; + user_id | viewed_homepage | use_demo | entered_credit_card | submit_card_info | see_bought_screen +---------+-----------------+----------+---------------------+------------------+------------------- + 1 | 1 | 1 | 1 | 1 | 1 +(1 row) + +-- Same query without all limitations +SELECT + e1.user_id, + sum(view_homepage) AS viewed_homepage, + sum(use_demo) AS use_demo, + sum(enter_credit_card) AS entered_credit_card, + sum(submit_card_info) as submit_card_info, + sum(see_bought_screen) as see_bought_screen +FROM ( + -- Get the first time each user viewed the homepage. + SELECT + user_id, + 1 AS view_homepage, + min(time) AS view_homepage_time + FROM events_table + WHERE + event_type IN (1, 2) + GROUP BY user_id +) e1 LEFT JOIN LATERAL ( + SELECT + user_id, + 1 AS use_demo, + time AS use_demo_time + FROM events_table + WHERE + user_id = e1.user_id AND + event_type IN (2, 3) + ORDER BY time +) e2 ON true LEFT JOIN LATERAL ( + SELECT + user_id, + 1 AS enter_credit_card, + time AS enter_credit_card_time + FROM events_table + WHERE + user_id = e2.user_id AND + event_type IN (3, 4) + ORDER BY time +) e3 ON true LEFT JOIN LATERAL ( + SELECT + 1 AS submit_card_info, + user_id, + time AS enter_credit_card_time + FROM events_table + WHERE + user_id = e3.user_id AND + event_type IN (4, 5) + ORDER BY time +) e4 ON true LEFT JOIN LATERAL ( + SELECT + 1 AS see_bought_screen + FROM events_table + WHERE + user_id = e4.user_id AND + event_type IN (5, 6) + ORDER BY time +) e5 ON true +GROUP BY e1.user_id +ORDER BY 6 DESC NULLS LAST, 5 DESC NULLS LAST, 4 DESC NULLS LAST, 3 DESC NULLS LAST, 2 DESC NULLS LAST, 1 +LIMIT 15; + user_id | viewed_homepage | use_demo | entered_credit_card | submit_card_info | see_bought_screen +---------+-----------------+----------+---------------------+------------------+------------------- + 2 | 1080 | 1080 | 1080 | 1080 | 1080 + 3 | 540 | 540 | 540 | 540 | 540 + 4 | 252 | 252 | 252 | 252 | 252 + 1 | 200 | 200 | 200 | 200 | 200 + 6 | 128 | 128 | 128 | 128 | 128 + 5 | 72 | 72 | 72 | 72 | 72 +(6 rows) + +-- Same query without all limitations but uses having() to show only those submitted their credit card info +SELECT + e1.user_id, + sum(view_homepage) AS viewed_homepage, + sum(use_demo) AS use_demo, + sum(enter_credit_card) AS entered_credit_card, + sum(submit_card_info) as submit_card_info, + sum(see_bought_screen) as see_bought_screen +FROM ( + -- Get the first time each user viewed the homepage. + SELECT + user_id, + 1 AS view_homepage, + min(time) AS view_homepage_time + FROM events_table + WHERE + event_type IN (1, 2) + GROUP BY user_id +) e1 LEFT JOIN LATERAL ( + SELECT + user_id, + 1 AS use_demo, + time AS use_demo_time + FROM events_table + WHERE + user_id = e1.user_id AND + event_type IN (2, 3) + ORDER BY time +) e2 ON true LEFT JOIN LATERAL ( + SELECT + user_id, + 1 AS enter_credit_card, + time AS enter_credit_card_time + FROM events_table + WHERE + user_id = e2.user_id AND + event_type IN (3, 4) + ORDER BY time +) e3 ON true LEFT JOIN LATERAL ( + SELECT + 1 AS submit_card_info, + user_id, + time AS enter_credit_card_time + FROM events_table + WHERE + user_id = e3.user_id AND + event_type IN (4, 5) + ORDER BY time +) e4 ON true LEFT JOIN LATERAL ( + SELECT + 1 AS see_bought_screen + FROM events_table + WHERE + user_id = e4.user_id AND + event_type IN (5, 6) + ORDER BY time +) e5 ON true +group by e1.user_id +HAVING sum(submit_card_info) > 0 +ORDER BY 6 DESC NULLS LAST, 5 DESC NULLS LAST, 4 DESC NULLS LAST, 3 DESC NULLS LAST, 2 DESC NULLS LAST, 1 +LIMIT 15; + user_id | viewed_homepage | use_demo | entered_credit_card | submit_card_info | see_bought_screen +---------+-----------------+----------+---------------------+------------------+------------------- + 2 | 1080 | 1080 | 1080 | 1080 | 1080 + 3 | 540 | 540 | 540 | 540 | 540 + 4 | 252 | 252 | 252 | 252 | 252 + 1 | 200 | 200 | 200 | 200 | 200 + 6 | 128 | 128 | 128 | 128 | 128 + 5 | 72 | 72 | 72 | 72 | 72 +(6 rows) + +-- Explain analyze on this query fails due to #756 +-- avg expression used on order by +SELECT a.user_id, avg(b.value_2) as subquery_avg +FROM ( + SELECT + user_id + FROM + users_table + WHERE + (value_1 > 2) + GROUP BY + user_id + HAVING + count(distinct value_1) > 2 + ) as a + LEFT JOIN ( + SELECT + user_id, value_2, value_3 + FROM + users_table + WHERE + (value_1 > 3)) AS b +ON a.user_id = b.user_id +WHERE + b.user_id IS NOT NULL +GROUP BY + a.user_id +ORDER BY + avg(b.value_3), 2, 1 +LIMIT 5; + user_id | subquery_avg +---------+-------------------- + 3 | 3.6000000000000000 + 5 | 2.1666666666666667 + 4 | 2.6666666666666667 + 1 | 2.3333333333333333 +(4 rows) + +-- add having to the same query +SELECT a.user_id, avg(b.value_2) as subquery_avg +FROM ( + SELECT + user_id + FROM + users_table + WHERE + (value_1 > 2) + GROUP BY user_id + HAVING count(distinct value_1) > 2 + ) as a + LEFT JOIN ( + SELECT + user_id, value_2, value_3 + FROM + users_table + WHERE + (value_1 > 3)) AS b +ON a.user_id = b.user_id +WHERE + b.user_id IS NOT NULL +GROUP BY + a.user_id +HAVING + sum(b.value_3) > 5 +ORDER BY + avg(b.value_3), 2, 1 +LIMIT 5; + user_id | subquery_avg +---------+-------------------- + 3 | 3.6000000000000000 + 5 | 2.1666666666666667 + 4 | 2.6666666666666667 + 1 | 2.3333333333333333 +(4 rows) + +-- avg on the value_3 is not a resjunk +SELECT a.user_id, avg(b.value_2) as subquery_avg, avg(b.value_3) +FROM + (SELECT + user_id + FROM + users_table + WHERE + (value_1 > 2) + GROUP BY + user_id + HAVING + count(distinct value_1) > 2 + ) as a + LEFT JOIN + ( + SELECT + user_id, value_2, value_3 + FROM + users_table + WHERE + (value_1 > 3) + ) AS b + ON a.user_id = b.user_id +WHERE + b.user_id IS NOT NULL +GROUP BY + a.user_id +ORDER BY + avg(b.value_3) DESC, 2, 1 +LIMIT 5; + user_id | subquery_avg | avg +---------+--------------------+-------------------- + 1 | 2.3333333333333333 | 3.3333333333333335 + 4 | 2.6666666666666667 | 2.5555555555555554 + 5 | 2.1666666666666667 | 2.1666666666666665 + 3 | 3.6000000000000000 | 1.6 +(4 rows) + +-- a powerful query structure that analyzes users/events +-- using (relation JOIN subquery JOIN relation) +SELECT u.user_id, sub.value_2, sub.value_3, COUNT(e2.user_id) counts +FROM + users_table u + LEFT OUTER JOIN LATERAL + (SELECT + * + FROM + events_table e1 + WHERE + e1.user_id = u.user_id + ORDER BY + e1.value_3 DESC + LIMIT 1 + ) sub + ON true + LEFT OUTER JOIN events_table e2 + ON e2.user_id = sub.user_id +WHERE + e2.value_2 > 1 AND e2.value_2 < 5 AND u.value_2 > 1 AND u.value_2 < 5 +GROUP BY + u.user_id, sub.value_2, sub.value_3 +ORDER BY + 4 DESC, 1 DESC, 2 ASC, 3 ASC +LIMIT 10; + user_id | value_2 | value_3 | counts +---------+---------+---------+-------- + 5 | 3 | 4 | 160 + 2 | 3 | 5 | 156 + 3 | 2 | 5 | 108 + 4 | 2 | 4 | 90 + 1 | 2 | 5 | 60 + 6 | 2 | 5 | 48 +(6 rows) + +-- distinct users joined with events +SELECT + avg(events_table.event_type) as avg_type, + count(*) as users_count +FROM events_table + JOIN + (SELECT + DISTINCT user_id + FROM + users_table + ) as distinct_users + ON distinct_users.user_id = events_table.user_id +GROUP BY + distinct_users.user_id +ORDER BY + users_count desc, avg_type DESC +LIMIT 5; + avg_type | users_count +--------------------+------------- + 2.3750000000000000 | 24 + 2.5714285714285714 | 21 + 2.5294117647058824 | 17 + 2.7333333333333333 | 15 + 2.2142857142857143 | 14 +(5 rows) + +-- reduce the data set, aggregate and join +SELECT + events_table.event_type, + users_count.ct +FROM events_table + JOIN + (SELECT distinct_users.user_id, count(1) as ct + FROM + (SELECT + user_id + FROM + users_table + ) as distinct_users + GROUP BY + distinct_users.user_id + ) as users_count + ON users_count.user_id = events_table.user_id +ORDER BY + users_count.ct desc, event_type DESC +LIMIT 5; + event_type | ct +------------+---- + 5 | 26 + 4 | 26 + 3 | 26 + 3 | 26 + 3 | 26 +(5 rows) + +--- now, test (subquery JOIN subquery) +SELECT n1.user_id, count_1, total_count +FROM + (SELECT + user_id, count(1) as count_1 + FROM + users_table + GROUP BY + user_id + ) n1 + INNER JOIN + ( + SELECT + user_id, count(1) as total_count + FROM + events_table + GROUP BY + user_id, event_type + ) n2 + ON (n2.user_id = n1.user_id) +ORDER BY + total_count DESC, count_1 DESC, 1 DESC +LIMIT 10; + user_id | count_1 | total_count +---------+---------+------------- + 2 | 18 | 7 + 3 | 17 | 7 + 2 | 18 | 6 + 5 | 26 | 5 + 5 | 26 | 5 + 4 | 23 | 5 + 3 | 17 | 5 + 1 | 7 | 5 + 4 | 23 | 4 + 4 | 23 | 4 +(10 rows) + +SELECT a.user_id, avg(b.value_2) as subquery_avg +FROM + (SELECT + user_id + FROM + users_table + WHERE + (value_1 > 2) + GROUP BY + user_id + HAVING + count(distinct value_1) > 2 + ) as a + LEFT JOIN + (SELECT + DISTINCT ON (user_id) user_id, value_2, value_3 + FROM + users_table + WHERE + (value_1 > 3) + ORDER BY + 1,2,3 + ) AS b + ON a.user_id = b.user_id +WHERE b.user_id IS NOT NULL +GROUP BY a.user_id +ORDER BY avg(b.value_3), 2, 1 +LIMIT 5; + user_id | subquery_avg +---------+------------------------ + 5 | 0.00000000000000000000 + 3 | 2.0000000000000000 + 4 | 1.00000000000000000000 + 1 | 0.00000000000000000000 +(4 rows) + +-- distinct clause must include partition column +-- when used in target list +SELECT a.user_id, avg(b.value_2) as subquery_avg +FROM + (SELECT + user_id + FROM + users_table + WHERE + (value_1 > 2) + GROUP BY + user_id + HAVING + count(distinct value_1) > 2 + ) as a + LEFT JOIN + (SELECT + DISTINCT ON (value_2) value_2 , user_id, value_3 + FROM + users_table + WHERE + (value_1 > 3) + ORDER BY + 1,2,3 + ) AS b + USING (user_id) +GROUP BY user_id; +ERROR: cannot push down this subquery +DETAIL: Distinct on columns without partition column is currently unsupported +SELECT a.user_id, avg(b.value_2) as subquery_avg +FROM + (SELECT + user_id + FROM + users_table + WHERE + (value_1 > 2) + GROUP BY + user_id + HAVING + count(distinct value_1) > 2 + ) as a + LEFT JOIN + (SELECT + DISTINCT ON (value_2, user_id) value_2 , user_id, value_3 + FROM + users_table + WHERE + (value_1 > 3) + ORDER BY + 1,2,3 + ) AS b + ON a.user_id = b.user_id +WHERE + b.user_id IS NOT NULL +GROUP BY + a.user_id +ORDER BY + avg(b.value_3), 2, 1 +LIMIT 5; + user_id | subquery_avg +---------+-------------------- + 3 | 3.3333333333333333 + 5 | 2.2000000000000000 + 4 | 3.2500000000000000 + 1 | 2.3333333333333333 +(4 rows) + +SELECT user_id, event_type +FROM + (SELECT * + FROM + ( + (SELECT + event_type, user_id as a_user_id + FROM + events_table) AS a + JOIN + (SELECT + ma.user_id AS user_id, ma.value_2 AS value_2, + (GREATEST(coalesce((ma.value_3 * ma.value_2) / 20, 0.0) + GREATEST(1.0))) / 2 AS prob + FROM + users_table AS ma + WHERE + (ma.value_2 > 1) + ORDER BY + prob DESC, value_2 DESC, user_id DESC + LIMIT 10 + ) AS ma + ON (a.a_user_id = ma.user_id) + ) AS inner_sub + ORDER BY + prob DESC, value_2 DESC, user_id DESC, event_type DESC + LIMIT 10 + ) AS outer_sub +ORDER BY + prob DESC, value_2 DESC, user_id DESC, event_type DESC +LIMIT 10; + user_id | event_type +---------+------------ + 3 | 5 + 3 | 4 + 3 | 4 + 3 | 4 + 3 | 4 + 3 | 3 + 3 | 3 + 3 | 3 + 3 | 3 + 3 | 3 +(10 rows) + +-- very similar query but produces different result due to +-- ordering difference in the previous one's inner query +SELECT user_id, event_type +FROM + (SELECT + event_type, user_id as a_user_id + FROM + events_table) AS a + JOIN + (SELECT + ma.user_id AS user_id, ma.value_2 AS value_2, + (GREATEST(coalesce((ma.value_3 * ma.value_2) / 20, 0.0) + GREATEST(1.0))) / 2 AS prob + FROM + users_table AS ma + WHERE + (ma.value_2 > 1) + ORDER BY + prob DESC, user_id DESC + LIMIT 10 + ) AS ma + ON (a.a_user_id = ma.user_id) +ORDER BY + prob DESC, event_type DESC, user_id DESC +LIMIT 10; + user_id | event_type +---------+------------ + 3 | 5 + 2 | 5 + 2 | 5 + 3 | 4 + 3 | 4 + 3 | 4 + 3 | 4 + 2 | 4 + 2 | 4 + 2 | 4 +(10 rows) + +-- now they produce the same result when ordering fixed in 'outer_sub' +SELECT user_id, event_type +FROM + (SELECT * + FROM + ( + (SELECT + event_type, user_id as a_user_id + FROM + events_table + ) AS a + JOIN + (SELECT + ma.user_id AS user_id, ma.value_2 AS value_2, + (GREATEST(coalesce((ma.value_3 * ma.value_2) / 20, 0.0) + GREATEST(1.0))) / 2 AS prob + FROM + users_table AS ma + WHERE + (ma.value_2 > 1) + ORDER BY + prob DESC, user_id DESC + LIMIT 10 + ) AS ma + ON (a.a_user_id = ma.user_id) + ) AS inner_sub + ORDER BY + prob DESC, event_type DESC, user_id DESC + LIMIT 10 + ) AS outer_sub +ORDER BY + prob DESC, event_type DESC, user_id DESC +LIMIT 10; + user_id | event_type +---------+------------ + 3 | 5 + 2 | 5 + 2 | 5 + 3 | 4 + 3 | 4 + 3 | 4 + 3 | 4 + 2 | 4 + 2 | 4 + 2 | 4 +(10 rows) + +-- this is one complex join query derived from a user's production query +-- first declare the function on workers on master +-- With array_index: +SELECT * FROM run_command_on_workers('CREATE OR REPLACE FUNCTION array_index(ANYARRAY, ANYELEMENT) + RETURNS INT AS $$ + SELECT i + FROM (SELECT generate_series(array_lower($1, 1), array_upper($1, 1))) g(i) + WHERE $1 [i] = $2 + LIMIT 1; + $$ LANGUAGE sql') +ORDER BY 1,2; + nodename | nodeport | success | result +-----------+----------+---------+----------------- + localhost | 57637 | t | CREATE FUNCTION + localhost | 57638 | t | CREATE FUNCTION +(2 rows) + +CREATE OR REPLACE FUNCTION array_index(ANYARRAY, ANYELEMENT) + RETURNS INT AS $$ + SELECT i + FROM (SELECT generate_series(array_lower($1, 1), array_upper($1, 1))) g(i) + WHERE $1 [i] = $2 + LIMIT 1; + $$ LANGUAGE sql; +SELECT * +FROM + (SELECT * + FROM ( + (SELECT user_id AS user_id_e, + event_type AS event_type_e + FROM events_table ) AS ma_e + JOIN + (SELECT value_2, + value_3, + user_id + FROM + (SELECT * + FROM ( + (SELECT user_id_p AS user_id + FROM + (SELECT * + FROM ( + (SELECT + user_id AS user_id_p + FROM + events_table + WHERE + (event_type IN (1,2,3,4,5)) ) AS ma_p + JOIN + (SELECT + user_id AS user_id_a + FROM + users_table + WHERE + (value_2 % 5 = 1) ) AS a + ON (a.user_id_a = ma_p.user_id_p) ) ) AS a_ma_p ) AS inner_filter_q + JOIN + (SELECT + value_2, value_3, user_id AS user_id_ck + FROM + events_table + WHERE + event_type = ANY(ARRAY [4, 5, 6]) + ORDER BY + value_3 ASC, user_id_ck DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC + LIMIT 10 ) + AS ma_ck ON (ma_ck.user_id_ck = inner_filter_q.user_id) ) + AS inner_sub_q + ORDER BY + value_3 ASC, user_id_ck DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC + LIMIT 10 ) + AS outer_sub_q + ORDER BY + value_3 ASC, user_id DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC + LIMIT 10) + AS inner_search_q + ON (ma_e.user_id_e = inner_search_q.user_id) ) + AS outer_inner_sub_q + ORDER BY + value_3 ASC, user_id DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC, event_type_e DESC + LIMIT 10) +AS outer_outer_sub_q +ORDER BY + value_3 ASC, user_id DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC, event_type_e DESC +LIMIT 10; + user_id_e | event_type_e | value_2 | value_3 | user_id +-----------+--------------+---------+---------+--------- + 5 | 5 | 2 | 0 | 5 + 5 | 5 | 2 | 0 | 5 + 5 | 5 | 2 | 0 | 5 + 5 | 5 | 2 | 0 | 5 + 5 | 5 | 2 | 0 | 5 + 5 | 5 | 2 | 0 | 5 + 5 | 5 | 2 | 0 | 5 + 5 | 5 | 2 | 0 | 5 + 5 | 5 | 2 | 0 | 5 + 5 | 5 | 2 | 0 | 5 +(10 rows) + +-- top level select * is removed now there is +-- a join at top level. +SELECT * +FROM + ( + (SELECT + user_id AS user_id_e, event_type as event_type_e + FROM + events_table + ) AS ma_e + JOIN + (SELECT + value_2, value_3, user_id + FROM + (SELECT + * + FROM + ( + (SELECT + user_id_p AS user_id + FROM + (SELECT + * + FROM + ( + (SELECT + user_id AS user_id_p + FROM + events_table + WHERE + (event_type IN (1, 2, 3, 4, 5)) + ) AS ma_p + JOIN + (SELECT + user_id AS user_id_a + FROM + users_table + WHERE + (value_2 % 5 = 1) + ) AS a + ON (a.user_id_a = ma_p.user_id_p) + ) + ) AS a_ma_p + ) AS inner_filter_q + JOIN + (SELECT + value_2, value_3, user_id AS user_id_ck + FROM + events_table + WHERE + event_type = ANY(ARRAY [4, 5, 6]) + ORDER BY + value_3 ASC, user_id_ck DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC + LIMIT 10 + ) AS ma_ck + ON (ma_ck.user_id_ck = inner_filter_q.user_id) + ) AS inner_sub_q + ORDER BY + value_3 ASC, user_id_ck DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC + LIMIT 10 + ) AS outer_sub_q + ORDER BY + value_3 ASC, user_id DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC + LIMIT 10) AS inner_search_q + ON (ma_e.user_id_e = inner_search_q.user_id) + ) AS outer_inner_sub_q +ORDER BY + value_3 ASC, user_id DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC, event_type_e DESC +LIMIT 10; + user_id_e | event_type_e | value_2 | value_3 | user_id +-----------+--------------+---------+---------+--------- + 5 | 5 | 2 | 0 | 5 + 5 | 5 | 2 | 0 | 5 + 5 | 5 | 2 | 0 | 5 + 5 | 5 | 2 | 0 | 5 + 5 | 5 | 2 | 0 | 5 + 5 | 5 | 2 | 0 | 5 + 5 | 5 | 2 | 0 | 5 + 5 | 5 | 2 | 0 | 5 + 5 | 5 | 2 | 0 | 5 + 5 | 5 | 2 | 0 | 5 +(10 rows) + +-- drop created functions +SELECT * FROM run_command_on_workers('DROP FUNCTION array_index(ANYARRAY, ANYELEMENT)') +ORDER BY 1,2; + nodename | nodeport | success | result +-----------+----------+---------+--------------- + localhost | 57637 | t | DROP FUNCTION + localhost | 57638 | t | DROP FUNCTION +(2 rows) + +DROP FUNCTION array_index(ANYARRAY, ANYELEMENT); +-- a query with a constant subquery +SELECT count(*) as subquery_count +FROM ( + SELECT + user_id + FROM + users_table + WHERE + (value_1 = '1' OR value_1 = '3') + GROUP BY user_id + HAVING count(distinct value_1) = 2 + ) as a + LEFT JOIN ( + SELECT + 1 as user_id + ) AS b + ON a.user_id = b.user_id +WHERE b.user_id IS NULL +GROUP BY a.user_id; + subquery_count +---------------- + 1 + 1 + 1 + 1 + 1 +(5 rows) + +-- volatile function in the subquery +SELECT count(*) as subquery_count +FROM ( + SELECT + user_id + FROM + users_table + WHERE + (value_1 = '1' OR value_1 = '3') + GROUP BY user_id + HAVING count(distinct value_1) = 2 + ) as a + INNER JOIN ( + SELECT + random()::int as user_id + ) AS b + ON a.user_id = b.user_id +WHERE b.user_id IS NULL +GROUP BY a.user_id; +ERROR: cannot push down this subquery +DETAIL: Subqueries without a FROM clause can only contain immutable functions +-- this is slightly different, we use RTE_VALUEs here +SELECT Count(*) AS subquery_count +FROM (SELECT + user_id + FROM + users_table + WHERE + (value_1 = '1' OR value_1 = '3' ) + GROUP BY + user_id + HAVING + Count(DISTINCT value_1) = 2) AS a + INNER JOIN + (SELECT + * + FROM + (VALUES (1, 'one'), (2, 'two'), (3, 'three')) AS t (user_id, letter)) AS b + ON a.user_id = b.user_id +WHERE b.user_id IS NULL +GROUP BY a.user_id; +ERROR: cannot push down this subquery +DETAIL: VALUES in multi-shard queries is currently unsupported +-- same query without LIMIT/OFFSET returns 30 rows +SET client_min_messages TO DEBUG1; +-- now, lets use a simple expression on the LIMIT and explicit coercion on the OFFSET +SELECT user_id, array_length(events_table, 1) +FROM ( + SELECT user_id, array_agg(event ORDER BY time) AS events_table + FROM ( + SELECT + u.user_id, e.event_type::text AS event, e.time + FROM + users_table AS u, + events_table AS e + WHERE + u.user_id = e.user_id AND e.event_type IN (1, 2) + ) t + GROUP BY user_id +) q +ORDER BY 2 DESC, 1 +LIMIT 1+1 OFFSET 1::smallint; +DEBUG: push down of limit count: 3 + user_id | array_length +---------+-------------- + 4 | 184 + 2 | 180 +(2 rows) + +-- now, lets use implicit coersion in LIMIT and a simple expressions on OFFSET +SELECT user_id, array_length(events_table, 1) +FROM ( + SELECT user_id, array_agg(event ORDER BY time) AS events_table + FROM ( + SELECT + u.user_id, e.event_type::text AS event, e.time + FROM + users_table AS u, + events_table AS e + WHERE + u.user_id = e.user_id AND e.event_type IN (1, 2) + ) t + GROUP BY user_id +) q +ORDER BY 2 DESC, 1 +LIMIT '3' OFFSET 2+1; +DEBUG: push down of limit count: 6 + user_id | array_length +---------+-------------- + 5 | 156 + 6 | 40 + 1 | 28 +(3 rows) + +-- create a test function which is marked as volatile +CREATE OR REPLACE FUNCTION volatile_func_test() + RETURNS INT AS $$ + SELECT 1; + $$ LANGUAGE sql VOLATILE; +-- Citus should be able to evalute functions/row comparisons on the LIMIT/OFFSET +SELECT user_id, array_length(events_table, 1) +FROM ( + SELECT user_id, array_agg(event ORDER BY time) AS events_table + FROM ( + SELECT + u.user_id, e.event_type::text AS event, e.time + FROM + users_table AS u, + events_table AS e + WHERE + u.user_id = e.user_id AND e.event_type IN (1, 2, 3, 4) + ) t + GROUP BY user_id +) q +ORDER BY 2 DESC, 1 +LIMIT volatile_func_test() + (ROW(1,2,NULL) < ROW(1,3,0))::int OFFSET volatile_func_test() + volatile_func_test(); +DEBUG: push down of limit count: 4 + user_id | array_length +---------+-------------- + 3 | 340 + 5 | 312 +(2 rows) + +-- now, lets use expressions on both the LIMIT and OFFSET +SELECT user_id, array_length(events_table, 1) +FROM ( + SELECT user_id, array_agg(event ORDER BY time) AS events_table + FROM ( + SELECT + u.user_id, e.event_type::text AS event, e.time + FROM + users_table AS u, + events_table AS e + WHERE + u.user_id = e.user_id AND e.event_type IN (1, 2) + ) t + GROUP BY user_id +) q +ORDER BY 2 DESC, 1 +LIMIT (5 > 4)::int OFFSET + CASE + WHEN 5 != 5 THEN 27 + WHEN 1 > 5 THEN 28 + ELSE 2 + END; +DEBUG: push down of limit count: 3 + user_id | array_length +---------+-------------- + 2 | 180 +(1 row) + +-- we don't allow parameters on the LIMIT/OFFSET clauses +PREPARE parametrized_limit AS +SELECT user_id, array_length(events_table, 1) +FROM ( + SELECT user_id, array_agg(event ORDER BY time) AS events_table + FROM ( + SELECT u.user_id, e.event_type::text AS event, e.time + FROM users_table AS u, + events_table AS e + WHERE u.user_id = e.user_id + AND e.event_type IN (1, 2) + ) t + GROUP BY user_id + ) q + ORDER BY 2 DESC, 1 + LIMIT $1 OFFSET $2; + EXECUTE parametrized_limit(1,1); +DEBUG: push down of limit count: 2 + user_id | array_length +---------+-------------- + 4 | 184 +(1 row) + +PREPARE parametrized_offset AS +SELECT user_id, array_length(events_table, 1) +FROM ( + SELECT user_id, array_agg(event ORDER BY time) AS events_table + FROM ( + SELECT u.user_id, e.event_type::text AS event, e.time + FROM users_table AS u, + events_table AS e + WHERE u.user_id = e.user_id + AND e.event_type IN (1, 2) + ) t + GROUP BY user_id + ) q + ORDER BY 2 DESC, 1 + LIMIT 1 OFFSET $1; + EXECUTE parametrized_offset(1); +DEBUG: push down of limit count: 2 + user_id | array_length +---------+-------------- + 4 | 184 +(1 row) + +SET client_min_messages TO DEFAULT; +DROP FUNCTION volatile_func_test(); +CREATE FUNCTION test_join_function_2(integer, integer) RETURNS bool + AS 'select $1 > $2;' + LANGUAGE SQL + IMMUTABLE + RETURNS NULL ON NULL INPUT; +SELECT run_command_on_workers($f$ + +CREATE FUNCTION test_join_function_2(integer, integer) RETURNS bool + AS 'select $1 > $2;' + LANGUAGE SQL + IMMUTABLE + RETURNS NULL ON NULL INPUT; + +$f$); + run_command_on_workers +--------------------------------------- + (localhost,57637,t,"CREATE FUNCTION") + (localhost,57638,t,"CREATE FUNCTION") +(2 rows) + +-- we don't support joins via functions +SELECT user_id, array_length(events_table, 1) +FROM ( + SELECT user_id, array_agg(event ORDER BY time) AS events_table + FROM ( + SELECT u.user_id, e.event_type::text AS event, e.time + FROM users_table AS u, + events_table AS e + WHERE test_join_function_2(u.user_id, e.user_id) + ) t + GROUP BY user_id +) q +ORDER BY 2 DESC, 1; +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +-- note that the following query has both equi-joins on the partition keys +-- and non-equi-joins on other columns. We now support query filters +-- having non-equi-joins as long as they have equi-joins on partition keys. +SELECT + users_table.user_id, users_table.value_1, prob +FROM + users_table + JOIN + (SELECT + ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob + FROM + users_table AS ma, events_table as short_list + WHERE + short_list.user_id = ma.user_id and ma.value_1 < 3 and short_list.event_type < 3 + ) temp + ON users_table.user_id = temp.user_id + WHERE + users_table.value_1 < 3 AND test_join_function_2(users_table.user_id, temp.user_id); + user_id | value_1 | prob +---------+---------+------ +(0 rows) + +-- we do support the following since there is already an equality on the partition +-- key and we have an additional join via a function +SELECT + temp.user_id, users_table.value_1, prob +FROM + users_table + JOIN + (SELECT + ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob, random() + FROM + users_table AS ma, events_table as short_list + WHERE + short_list.user_id = ma.user_id and ma.value_1 < 3 and short_list.event_type < 4 AND + test_join_function_2(ma.value_1, short_list.value_2) + ) temp + ON users_table.user_id = temp.user_id + WHERE + users_table.value_1 < 3 + ORDER BY 2 DESC, 1 DESC + LIMIT 10; + user_id | value_1 | prob +---------+---------+------------------------ + 6 | 2 | 0.50000000000000000000 + 6 | 2 | 0.50000000000000000000 + 6 | 2 | 0.50000000000000000000 + 6 | 2 | 0.50000000000000000000 + 6 | 2 | 0.50000000000000000000 + 6 | 2 | 0.50000000000000000000 + 6 | 2 | 0.50000000000000000000 + 6 | 2 | 0.50000000000000000000 + 6 | 2 | 0.50000000000000000000 + 5 | 2 | 0.50000000000000000000 +(10 rows) + +-- similarly we do support non equi joins on columns if there is aready +-- an equality join +SELECT + count(*) +FROM + (SELECT + event_type, random() + FROM + events_table, users_table + WHERE + events_table.user_id = users_table.user_id AND + events_table.time > users_table.time AND + events_table.value_2 IN (0, 4) + ) as foo; + count +------- + 180 +(1 row) + +-- the other way around is not supported +SELECT + count(*) +FROM + (SELECT + event_type, random() + FROM + events_table, users_table + WHERE + events_table.user_id > users_table.user_id AND + events_table.time = users_table.time AND + events_table.value_2 IN (0, 4) + ) as foo; +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +-- we can even allow that on top level joins +SELECT + count(*) +FROM + (SELECT + event_type, random(), events_table.user_id + FROM + events_table, users_table + WHERE + events_table.user_id = users_table.user_id AND + events_table.value_2 IN (0, 4) + ) as foo, +(SELECT + event_type, random(), events_table.user_id + FROM + events_table, users_table + WHERE + events_table.user_id = users_table.user_id AND + events_table.value_2 IN (1, 5) + ) as bar +WHERE foo.event_type > bar.event_type +AND foo.user_id = bar.user_id; + count +------- + 11971 +(1 row) + +-- note that the following is not supported +-- since the top level join is not on the distribution key +SELECT + count(*) +FROM + (SELECT + event_type, random() + FROM + events_table, users_table + WHERE + events_table.user_id = users_table.user_id AND + events_table.value_2 IN (0, 4) + ) as foo, +(SELECT + event_type, random() + FROM + events_table, users_table + WHERE + events_table.user_id = users_table.user_id AND + events_table.value_2 IN (1, 5) + ) as bar +WHERE foo.event_type = bar.event_type; +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +-- DISTINCT in the outer query and DISTINCT in the subquery +SELECT + DISTINCT users_ids.user_id +FROM + (SELECT DISTINCT user_id FROM users_table) as users_ids + JOIN + (SELECT + ma.user_id, ma.value_1, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob + FROM + users_table AS ma, events_table as short_list + WHERE + short_list.user_id = ma.user_id and ma.value_1 < 3 and short_list.event_type < 3 + ) temp + ON users_ids.user_id = temp.user_id + WHERE temp.value_1 < 3 + ORDER BY 1 + LIMIT 5; + user_id +--------- + 1 + 2 + 3 + 4 + 5 +(5 rows) + +-- DISTINCT ON in the outer query and DISTINCT in the subquery +SELECT + DISTINCT ON (users_ids.user_id) users_ids.user_id, temp.value_1, prob +FROM + (SELECT DISTINCT user_id FROM users_table) as users_ids + JOIN + (SELECT + ma.user_id, ma.value_1, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob + FROM + users_table AS ma, events_table as short_list + WHERE + short_list.user_id = ma.user_id and ma.value_1 < 3 and short_list.event_type < 2 + ) temp + ON users_ids.user_id = temp.user_id + WHERE temp.value_1 < 3 + ORDER BY 1, 2 + LIMIT 5; + user_id | value_1 | prob +---------+---------+------------------------ + 1 | 1 | 0.50000000000000000000 + 2 | 0 | 0.50000000000000000000 + 3 | 0 | 0.50000000000000000000 + 4 | 0 | 0.50000000000000000000 + 5 | 0 | 0.50000000000000000000 +(5 rows) + +-- DISTINCT ON in the outer query and DISTINCT ON in the subquery +SELECT + DISTINCT ON (users_ids.user_id) users_ids.user_id, temp.value_1, prob +FROM + (SELECT DISTINCT ON (user_id) user_id, value_1 FROM users_table ORDER BY 1,2) as users_ids + JOIN + (SELECT + ma.user_id, ma.value_1, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob + FROM + users_table AS ma, events_table as short_list + WHERE + short_list.user_id = ma.user_id and ma.value_1 < 2 and short_list.event_type < 3 + ) temp + ON users_ids.user_id = temp.user_id + ORDER BY 1,2 + LIMIT 5; + user_id | value_1 | prob +---------+---------+------------------------ + 1 | 1 | 0.50000000000000000000 + 2 | 0 | 0.50000000000000000000 + 3 | 0 | 0.50000000000000000000 + 4 | 0 | 0.50000000000000000000 + 5 | 0 | 0.50000000000000000000 +(5 rows) + +-- Getting aggregation of value which is created by aggregation in subquery +SELECT + count(1), + avg(agg_value) +FROM + (SELECT + users_table.user_id, + avg(users_table.value_1 / events_table.value_4) AS agg_value + FROM + users_table, + events_table + WHERE + users_table.user_id = events_table.user_id + GROUP BY + 1 + ) AS temp; + count | avg +-------+----- + 6 | +(1 row) + +-- Test the case when a subquery has a lateral reference to two levels upper +SELECT + b.user_id, b.value_2, b.cnt +FROM ( + SELECT + user_id, + value_2 + FROM events_table + WHERE events_table.user_id BETWEEN 2 AND 5 +) a, +LATERAL ( + SELECT + user_id, value_2, count(*) as cnt + FROM ( + SELECT + value_2, time, user_id + FROM events_table + WHERE user_id BETWEEN 2 AND 5 + AND events_table.user_id = a.user_id + AND events_table.value_2 = a.value_2 + ORDER BY time DESC + ) events + GROUP BY user_id, value_2 +) b +ORDER BY user_id, value_2, cnt +LIMIT 1; + user_id | value_2 | cnt +---------+---------+----- + 2 | 0 | 1 +(1 row) + +DROP FUNCTION test_join_function_2(integer, integer); +SELECT run_command_on_workers($f$ + + DROP FUNCTION test_join_function_2(integer, integer); + +$f$); + run_command_on_workers +------------------------------------- + (localhost,57637,t,"DROP FUNCTION") + (localhost,57638,t,"DROP FUNCTION") +(2 rows) + +SET citus.enable_router_execution TO TRUE; +SET citus.subquery_pushdown to OFF; diff --git a/src/test/regress/expected/multi_utility_statements_2.out b/src/test/regress/expected/multi_utility_statements_2.out new file mode 100644 index 000000000..d8836c7c6 --- /dev/null +++ b/src/test/regress/expected/multi_utility_statements_2.out @@ -0,0 +1,363 @@ +-- +-- MULTI_UTILITY_STATEMENTS +-- +-- Check that we can run utility statements with embedded SELECT statements on +-- distributed tables. Currently we only support CREATE TABLE AS (SELECT..), +-- DECLARE CURSOR, and COPY ... TO statements. +SET citus.next_shard_id TO 1000000; +CREATE TEMP TABLE lineitem_pricing_summary AS +( + SELECT + l_returnflag, + l_linestatus, + sum(l_quantity) as sum_qty, + sum(l_extendedprice) as sum_base_price, + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + avg(l_quantity) as avg_qty, + avg(l_extendedprice) as avg_price, + avg(l_discount) as avg_disc, + count(*) as count_order + FROM + lineitem + WHERE + l_shipdate <= date '1998-12-01' - interval '90 days' + GROUP BY + l_returnflag, + l_linestatus + ORDER BY + l_returnflag, + l_linestatus +); +SELECT * FROM lineitem_pricing_summary ORDER BY l_returnflag, l_linestatus; + l_returnflag | l_linestatus | sum_qty | sum_base_price | sum_disc_price | sum_charge | avg_qty | avg_price | avg_disc | count_order +--------------+--------------+-----------+----------------+----------------+------------------+---------------------+--------------------+------------------------+------------- + A | F | 75465.00 | 113619873.63 | 107841287.0728 | 112171153.245923 | 25.6334918478260870 | 38593.707075407609 | 0.05055027173913043478 | 2944 + N | F | 2022.00 | 3102551.45 | 2952540.7118 | 3072642.770652 | 26.6052631578947368 | 40823.045394736842 | 0.05263157894736842105 | 76 + N | O | 149778.00 | 224706948.16 | 213634857.6854 | 222134071.929801 | 25.4594594594594595 | 38195.979629440762 | 0.04939486656467788543 | 5883 + R | F | 73156.00 | 108937979.73 | 103516623.6698 | 107743533.784328 | 25.2175112030334367 | 37551.871675284385 | 0.04983798690106859704 | 2901 +(4 rows) + +-- Test we can handle joins +CREATE TABLE shipping_priority AS +( + SELECT + l_orderkey, + sum(l_extendedprice * (1 - l_discount)) as revenue, + o_orderdate, + o_shippriority + FROM + customer, + orders, + lineitem + WHERE + c_mktsegment = 'BUILDING' + AND c_custkey = o_custkey + AND l_orderkey = o_orderkey + AND o_orderdate < date '1995-03-15' + AND l_shipdate > date '1995-03-15' + GROUP BY + l_orderkey, + o_orderdate, + o_shippriority + ORDER BY + revenue DESC, + o_orderdate +); +SELECT * FROM shipping_priority; + l_orderkey | revenue | o_orderdate | o_shippriority +------------+-------------+-------------+---------------- + 1637 | 268170.6408 | 02-08-1995 | 0 + 9696 | 252014.5497 | 02-20-1995 | 0 + 10916 | 242749.1996 | 03-11-1995 | 0 + 450 | 221012.3165 | 03-05-1995 | 0 + 5347 | 198353.7942 | 02-22-1995 | 0 + 10691 | 112800.1020 | 03-14-1995 | 0 + 386 | 104975.2484 | 01-25-1995 | 0 + 5765 | 88222.7556 | 12-15-1994 | 0 + 4707 | 88143.7774 | 02-27-1995 | 0 + 5312 | 83750.7028 | 02-24-1995 | 0 + 5728 | 70101.6400 | 12-11-1994 | 0 + 577 | 57986.6224 | 12-19-1994 | 0 + 12706 | 16636.6368 | 11-21-1994 | 0 + 3844 | 8851.3200 | 12-29-1994 | 0 + 11073 | 7433.6295 | 12-02-1994 | 0 + 13924 | 3111.4970 | 12-20-1994 | 0 +(16 rows) + +DROP TABLE shipping_priority; +-- Check COPY against distributed tables works both when specifying a +-- query as the source, and when directly naming a table. +COPY ( + SELECT + l_orderkey, + sum(l_extendedprice * (1 - l_discount)) as revenue, + o_orderdate, + o_shippriority + FROM + customer, + orders, + lineitem + WHERE + c_mktsegment = 'BUILDING' + AND c_custkey = o_custkey + AND l_orderkey = o_orderkey + AND o_orderdate < date '1995-03-15' + AND l_shipdate > date '1995-03-15' + GROUP BY + l_orderkey, + o_orderdate, + o_shippriority + ORDER BY + revenue DESC, + o_orderdate +) TO stdout; +1637 268170.6408 02-08-1995 0 +9696 252014.5497 02-20-1995 0 +10916 242749.1996 03-11-1995 0 +450 221012.3165 03-05-1995 0 +5347 198353.7942 02-22-1995 0 +10691 112800.1020 03-14-1995 0 +386 104975.2484 01-25-1995 0 +5765 88222.7556 12-15-1994 0 +4707 88143.7774 02-27-1995 0 +5312 83750.7028 02-24-1995 0 +5728 70101.6400 12-11-1994 0 +577 57986.6224 12-19-1994 0 +12706 16636.6368 11-21-1994 0 +3844 8851.3200 12-29-1994 0 +11073 7433.6295 12-02-1994 0 +13924 3111.4970 12-20-1994 0 +-- check copying to file +-- (quiet off to force number of copied records to be displayed) +\set QUIET off +COPY nation TO :'dev_null'; +COPY 25 +\set QUIET on +-- stdout +COPY nation TO STDOUT; +0 ALGERIA 0 haggle. carefully final deposits detect slyly agai +1 ARGENTINA 1 al foxes promise slyly according to the regular accounts. bold requests alon +2 BRAZIL 1 y alongside of the pending deposits. carefully special packages are about the ironic forges. slyly special +3 CANADA 1 eas hang ironic, silent packages. slyly regular packages are furiously over the tithes. fluffily bold +4 EGYPT 4 y above the carefully unusual theodolites. final dugouts are quickly across the furiously regular d +5 ETHIOPIA 0 ven packages wake quickly. regu +6 FRANCE 3 refully final requests. regular, ironi +7 GERMANY 3 l platelets. regular accounts x-ray: unusual, regular acco +8 INDIA 2 ss excuses cajole slyly across the packages. deposits print aroun +9 INDONESIA 2 slyly express asymptotes. regular deposits haggle slyly. carefully ironic hockey players sleep blithely. carefull +10 IRAN 4 efully alongside of the slyly final dependencies. +11 IRAQ 4 nic deposits boost atop the quickly final requests? quickly regula +12 JAPAN 2 ously. final, express gifts cajole a +13 JORDAN 4 ic deposits are blithely about the carefully regular pa +14 KENYA 0 pending excuses haggle furiously deposits. pending, express pinto beans wake fluffily past t +15 MOROCCO 0 rns. blithely bold courts among the closely regular packages use furiously bold platelets? +16 MOZAMBIQUE 0 s. ironic, unusual asymptotes wake blithely r +17 PERU 1 platelets. blithely pending dependencies use fluffily across the even pinto beans. carefully silent accoun +18 CHINA 2 c dependencies. furiously express notornis sleep slyly regular accounts. ideas sleep. depos +19 ROMANIA 3 ular asymptotes are about the furious multipliers. express dependencies nag above the ironically ironic account +20 SAUDI ARABIA 4 ts. silent requests haggle. closely express packages sleep across the blithely +21 VIETNAM 2 hely enticingly express accounts. even, final +22 RUSSIA 3 requests against the platelets use never according to the quickly regular pint +23 UNITED KINGDOM 3 eans boost carefully special requests. accounts are. carefull +24 UNITED STATES 1 y final packages. slow foxes cajole quickly. quickly silent platelets breach ironic accounts. unusual pinto be +-- ensure individual cols can be copied out, too +COPY nation(n_name) TO STDOUT; +ALGERIA +ARGENTINA +BRAZIL +CANADA +EGYPT +ETHIOPIA +FRANCE +GERMANY +INDIA +INDONESIA +IRAN +IRAQ +JAPAN +JORDAN +KENYA +MOROCCO +MOZAMBIQUE +PERU +CHINA +ROMANIA +SAUDI ARABIA +VIETNAM +RUSSIA +UNITED KINGDOM +UNITED STATES +-- Test that we can create on-commit drop tables, and also test creating with +-- oids, along with changing column names +BEGIN; +CREATE TEMP TABLE customer_few (customer_key) WITH (OIDS) ON COMMIT DROP AS + (SELECT * FROM customer WHERE c_nationkey = 1 ORDER BY c_custkey LIMIT 10); +ERROR: tables declared WITH OIDS are not supported +SELECT customer_key, c_name, c_address + FROM customer_few ORDER BY customer_key LIMIT 5; +ERROR: current transaction is aborted, commands ignored until end of transaction block +COMMIT; +SELECT customer_key, c_name, c_address + FROM customer_few ORDER BY customer_key LIMIT 5; +ERROR: relation "customer_few" does not exist +LINE 2: FROM customer_few ORDER BY customer_key LIMIT 5; + ^ +-- Test DECLARE CURSOR .. WITH HOLD without parameters that calls ReScan on the top-level CustomScan +CREATE TABLE cursor_me (x int, y int); +SELECT create_distributed_table('cursor_me', 'x'); + create_distributed_table +-------------------------- + +(1 row) + +INSERT INTO cursor_me SELECT s/10, s FROM generate_series(1, 100) s; +DECLARE holdCursor CURSOR WITH HOLD FOR + SELECT * FROM cursor_me WHERE x = 1 ORDER BY y; +FETCH NEXT FROM holdCursor; + x | y +---+---- + 1 | 10 +(1 row) + +FETCH FORWARD 3 FROM holdCursor; + x | y +---+---- + 1 | 11 + 1 | 12 + 1 | 13 +(3 rows) + +FETCH LAST FROM holdCursor; + x | y +---+---- + 1 | 19 +(1 row) + +FETCH BACKWARD 3 FROM holdCursor; + x | y +---+---- + 1 | 18 + 1 | 17 + 1 | 16 +(3 rows) + +FETCH FORWARD 3 FROM holdCursor; + x | y +---+---- + 1 | 17 + 1 | 18 + 1 | 19 +(3 rows) + +CLOSE holdCursor; +-- Test DECLARE CURSOR .. WITH HOLD with parameter +CREATE OR REPLACE FUNCTION declares_cursor(p int) +RETURNS void AS $$ + DECLARE c CURSOR WITH HOLD FOR SELECT * FROM cursor_me WHERE x = $1; +$$ LANGUAGE SQL; +SELECT declares_cursor(5); +ERROR: Cursors for queries on distributed tables with parameters are currently unsupported +CREATE OR REPLACE FUNCTION cursor_plpgsql(p int) +RETURNS SETOF int AS $$ +DECLARE + val int; + my_cursor CURSOR (a INTEGER) FOR SELECT y FROM cursor_me WHERE x = $1 ORDER BY y; +BEGIN + -- Open the cursor + OPEN my_cursor(p); + + LOOP + FETCH my_cursor INTO val; + EXIT WHEN NOT FOUND; + + RETURN NEXT val; + END LOOP; + + -- Close the cursor + CLOSE my_cursor; +END; $$ +LANGUAGE plpgsql; +SELECT cursor_plpgsql(4); + cursor_plpgsql +---------------- + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 +(10 rows) + +DROP FUNCTION declares_cursor(int); +DROP FUNCTION cursor_plpgsql(int); +DROP TABLE cursor_me; +-- Test DECLARE CURSOR statement with SCROLL +DECLARE holdCursor SCROLL CURSOR WITH HOLD FOR + SELECT l_orderkey, l_linenumber, l_quantity, l_discount + FROM lineitem + ORDER BY l_orderkey, l_linenumber; +FETCH NEXT FROM holdCursor; + l_orderkey | l_linenumber | l_quantity | l_discount +------------+--------------+------------+------------ + 1 | 1 | 17.00 | 0.04 +(1 row) + +FETCH FORWARD 5 FROM holdCursor; + l_orderkey | l_linenumber | l_quantity | l_discount +------------+--------------+------------+------------ + 1 | 2 | 36.00 | 0.09 + 1 | 3 | 8.00 | 0.10 + 1 | 4 | 28.00 | 0.09 + 1 | 5 | 24.00 | 0.10 + 1 | 6 | 32.00 | 0.07 +(5 rows) + +FETCH LAST FROM holdCursor; + l_orderkey | l_linenumber | l_quantity | l_discount +------------+--------------+------------+------------ + 14947 | 2 | 29.00 | 0.04 +(1 row) + +FETCH BACKWARD 5 FROM holdCursor; + l_orderkey | l_linenumber | l_quantity | l_discount +------------+--------------+------------+------------ + 14947 | 1 | 14.00 | 0.09 + 14946 | 2 | 37.00 | 0.01 + 14946 | 1 | 38.00 | 0.00 + 14945 | 6 | 37.00 | 0.05 + 14945 | 5 | 44.00 | 0.08 +(5 rows) + +-- Test WITHOUT HOLD cursors inside transactions +BEGIN; +DECLARE noHoldCursor SCROLL CURSOR FOR + SELECT l_orderkey, l_linenumber, l_quantity, l_discount + FROM lineitem + ORDER BY l_orderkey, l_linenumber; +FETCH ABSOLUTE 5 FROM noHoldCursor; + l_orderkey | l_linenumber | l_quantity | l_discount +------------+--------------+------------+------------ + 1 | 5 | 24.00 | 0.10 +(1 row) + +FETCH BACKWARD noHoldCursor; + l_orderkey | l_linenumber | l_quantity | l_discount +------------+--------------+------------+------------ + 1 | 4 | 28.00 | 0.09 +(1 row) + +COMMIT; +FETCH ABSOLUTE 5 FROM noHoldCursor; +ERROR: cursor "noholdcursor" does not exist +-- Test we don't throw an error for DROP IF EXISTS +DROP DATABASE IF EXISTS not_existing_database; +NOTICE: database "not_existing_database" does not exist, skipping +DROP TABLE IF EXISTS not_existing_table; +NOTICE: table "not_existing_table" does not exist, skipping +DROP SCHEMA IF EXISTS not_existing_schema; +NOTICE: schema "not_existing_schema" does not exist, skipping diff --git a/src/test/regress/expected/sql_procedure_2.out b/src/test/regress/expected/sql_procedure_2.out new file mode 100644 index 000000000..4df32656e --- /dev/null +++ b/src/test/regress/expected/sql_procedure_2.out @@ -0,0 +1,222 @@ +-- +-- SQL_PROCEDURE +-- +-- Tests basic PROCEDURE functionality with SQL and PLPGSQL procedures. +-- +-- print whether we're using version > 10 to make version-specific tests clear +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; + version_above_ten +------------------- + t +(1 row) + +SET citus.next_shard_id TO 100500; +CREATE SCHEMA procedure_schema; +SET SEARCH_PATH = procedure_schema; +CREATE TABLE test_table(id integer , org_id integer); +CREATE UNIQUE INDEX idx_table ON test_table(id, org_id); +SELECT create_distributed_table('test_table','id'); + create_distributed_table +-------------------------- + +(1 row) + +INSERT INTO test_table VALUES(1, 1); +-- test CREATE PROCEDURE +CREATE PROCEDURE test_procedure_delete_insert(id int, org_id int) LANGUAGE SQL AS $$ + DELETE FROM test_table; + INSERT INTO test_table VALUES(id, org_id); +$$; +CALL test_procedure_delete_insert(2,3); +SELECT * FROM test_table ORDER BY 1, 2; + id | org_id +----+-------- + 2 | 3 +(1 row) + +-- commit/rollback is not allowed in procedures in SQL +-- following calls should fail +CREATE PROCEDURE test_procedure_commit(tt_id int, tt_org_id int) LANGUAGE SQL AS $$ + DELETE FROM test_table; + COMMIT; + INSERT INTO test_table VALUES(tt_id, -1); + UPDATE test_table SET org_id = tt_org_id WHERE id = tt_id; + COMMIT; +$$; +CALL test_procedure_commit(2,5); +ERROR: COMMIT is not allowed in a SQL function +CONTEXT: SQL function "test_procedure_commit" during startup +SELECT * FROM test_table ORDER BY 1, 2; + id | org_id +----+-------- + 2 | 3 +(1 row) + +CREATE PROCEDURE test_procedure_rollback(tt_id int, tt_org_id int) LANGUAGE SQL AS $$ + DELETE FROM test_table; + ROLLBACK; + UPDATE test_table SET org_id = tt_org_id WHERE id = tt_id; + COMMIT; +$$; +CALL test_procedure_rollback(2,15); +ERROR: ROLLBACK is not allowed in a SQL function +CONTEXT: SQL function "test_procedure_rollback" during startup +SELECT * FROM test_table ORDER BY 1, 2; + id | org_id +----+-------- + 2 | 3 +(1 row) + +DROP PROCEDURE test_procedure_delete_insert(int, int); +DROP PROCEDURE test_procedure_commit(int, int); +DROP PROCEDURE test_procedure_rollback(int, int); +-- same tests with plpgsql +-- test CREATE PROCEDURE +CREATE PROCEDURE test_procedure_delete_insert(id int, org_id int) LANGUAGE PLPGSQL AS $$ +BEGIN + DELETE FROM test_table; + INSERT INTO test_table VALUES(id, org_id); +END; +$$; +CALL test_procedure_delete_insert(2,3); +SELECT * FROM test_table ORDER BY 1, 2; + id | org_id +----+-------- + 2 | 3 +(1 row) + +-- notice that the update succeed and committed +CREATE PROCEDURE test_procedure_modify_insert(tt_id int, tt_org_id int) LANGUAGE PLPGSQL AS $$ +BEGIN + UPDATE test_table SET org_id = tt_org_id WHERE id = tt_id; + COMMIT; + INSERT INTO test_table VALUES (tt_id, tt_org_id); + ROLLBACK; +END; +$$; +CALL test_procedure_modify_insert(2,12); +ERROR: duplicate key value violates unique constraint "idx_table_100503" +DETAIL: Key (id, org_id)=(2, 12) already exists. +CONTEXT: while executing command on localhost:57638 +SQL statement "INSERT INTO test_table VALUES (tt_id, tt_org_id)" +PL/pgSQL function test_procedure_modify_insert(integer,integer) line 5 at SQL statement +SELECT * FROM test_table ORDER BY 1, 2; + id | org_id +----+-------- + 2 | 12 +(1 row) + +CREATE PROCEDURE test_procedure_modify_insert_commit(tt_id int, tt_org_id int) LANGUAGE PLPGSQL AS $$ +BEGIN + UPDATE test_table SET org_id = tt_org_id WHERE id = tt_id; + COMMIT; + INSERT INTO test_table VALUES (tt_id, tt_org_id); + COMMIT; +END; +$$; +CALL test_procedure_modify_insert_commit(2,30); +ERROR: duplicate key value violates unique constraint "idx_table_100503" +DETAIL: Key (id, org_id)=(2, 30) already exists. +CONTEXT: while executing command on localhost:57638 +SQL statement "INSERT INTO test_table VALUES (tt_id, tt_org_id)" +PL/pgSQL function test_procedure_modify_insert_commit(integer,integer) line 5 at SQL statement +SELECT * FROM test_table ORDER BY 1, 2; + id | org_id +----+-------- + 2 | 30 +(1 row) + +-- delete is commited but insert is rolled back +CREATE PROCEDURE test_procedure_rollback(tt_id int, tt_org_id int) LANGUAGE PLPGSQL AS $$ +BEGIN + DELETE FROM test_table; + COMMIT; + INSERT INTO test_table VALUES (tt_id, tt_org_id); + ROLLBACK; +END; +$$; +CALL test_procedure_rollback(2,5); +SELECT * FROM test_table ORDER BY 1, 2; + id | org_id +----+-------- +(0 rows) + +-- rollback is successfull when insert is on multiple rows +CREATE PROCEDURE test_procedure_rollback_2(tt_id int, tt_org_id int) LANGUAGE PLPGSQL AS $$ +BEGIN + DELETE FROM test_table; + COMMIT; + INSERT INTO test_table VALUES (tt_id, tt_org_id), (tt_id+1, tt_org_id+1); + ROLLBACK; +END; +$$; +CALL test_procedure_rollback_2(12, 15); +SELECT * FROM test_table ORDER BY 1, 2; + id | org_id +----+-------- +(0 rows) + +-- delete is rolled back, update is committed +CREATE PROCEDURE test_procedure_rollback_3(tt_id int, tt_org_id int) LANGUAGE PLPGSQL AS $$ +BEGIN + DELETE FROM test_table; + ROLLBACK; + UPDATE test_table SET org_id = tt_org_id WHERE id = tt_id; + COMMIT; +END; +$$; +INSERT INTO test_table VALUES (1, 1), (2, 2); +CALL test_procedure_rollback_3(2,15); +SELECT * FROM test_table ORDER BY 1, 2; + id | org_id +----+-------- + 1 | 1 + 2 | 15 +(2 rows) + +TRUNCATE test_table; +-- nested procedure calls should roll back normally +CREATE OR REPLACE PROCEDURE test_procedure_rollback(tt_id int, tt_org_id int) LANGUAGE PLPGSQL AS $$ +BEGIN + INSERT INTO test_table VALUES (tt_id+12, tt_org_id+12); + ROLLBACK; +END; +$$; +CREATE OR REPLACE PROCEDURE test_procedure_rollback_2(tt_id int, tt_org_id int) LANGUAGE PLPGSQL AS $$ +BEGIN + INSERT INTO test_table VALUES (tt_id+2, tt_org_id+1); + ROLLBACK; +END; +$$; +CREATE OR REPLACE PROCEDURE test_procedure(tt_id int, tt_org_id int) LANGUAGE PLPGSQL AS $$ +BEGIN + CALL test_procedure_rollback(tt_id, tt_org_id); + CALL test_procedure_rollback_2(tt_id, tt_org_id); + INSERT INTO test_table VALUES (tt_id+100, tt_org_id+100); + ROLLBACK; +END; +$$; +SELECT * from test_table; + id | org_id +----+-------- +(0 rows) + +call test_procedure(1,1); +call test_procedure(20, 20); +SELECT * from test_table; + id | org_id +----+-------- +(0 rows) + +DROP SCHEMA procedure_schema CASCADE; +NOTICE: drop cascades to 8 other objects +DETAIL: drop cascades to table test_table +drop cascades to function test_procedure_delete_insert(integer,integer) +drop cascades to function test_procedure_modify_insert(integer,integer) +drop cascades to function test_procedure_modify_insert_commit(integer,integer) +drop cascades to function test_procedure_rollback(integer,integer) +drop cascades to function test_procedure_rollback_2(integer,integer) +drop cascades to function test_procedure_rollback_3(integer,integer) +drop cascades to function test_procedure(integer,integer) +RESET SEARCH_PATH;