diff --git a/src/test/regress/bin/normalize.sed b/src/test/regress/bin/normalize.sed index 8b41f0a0e..efa9e310f 100644 --- a/src/test/regress/bin/normalize.sed +++ b/src/test/regress/bin/normalize.sed @@ -303,5 +303,8 @@ s/(NOTICE: issuing CREATE EXTENSION IF NOT EXISTS citus_columnar WITH SCHEMA p s/, password_required=false//g s/provide the file or change sslmode/provide the file, use the system's trusted roots with sslrootcert=system, or change sslmode/g s/(:varcollid [0-9]+) :varlevelsup 0/\1 :varnullingrels (b) :varlevelsup 0/g +s/table_name_for_view\.([_a-z0-9]+)(,| |$)/\1\2/g +s/permission denied to terminate process/must be a superuser to terminate superuser process/g +s/permission denied to cancel query/must be a superuser to cancel superuser query/g #endif /* PG_VERSION_NUM < PG_VERSION_16 */ diff --git a/src/test/regress/expected/citus_local_tables_mx.out b/src/test/regress/expected/citus_local_tables_mx.out index adc2f4c61..8b3cb953f 100644 --- a/src/test/regress/expected/citus_local_tables_mx.out +++ b/src/test/regress/expected/citus_local_tables_mx.out @@ -912,12 +912,12 @@ select run_command_on_workers($$SELECT count(*)=0 from citus_local_tables_mx.v10 (2 rows) CREATE TABLE loc_tb_2 (a int); -CREATE VIEW v104 AS SELECT * from loc_tb_2; +CREATE VIEW v104 AS SELECT * from loc_tb_2 table_name_for_view; SET client_min_messages TO DEBUG1; -- verify the CREATE command for the view is generated correctly ALTER TABLE loc_tb_2 ADD CONSTRAINT fkey_2 FOREIGN KEY (a) references ref_tb(a); -DEBUG: executing "CREATE OR REPLACE VIEW citus_local_tables_mx.v104 (a) AS SELECT loc_tb_2.a - FROM citus_local_tables_mx.loc_tb_2; ALTER VIEW citus_local_tables_mx.v104 OWNER TO postgres" +DEBUG: executing "CREATE OR REPLACE VIEW citus_local_tables_mx.v104 (a) AS SELECT a + FROM citus_local_tables_mx.loc_tb_2 table_name_for_view; ALTER VIEW citus_local_tables_mx.v104 OWNER TO postgres" DEBUG: "view v104" has dependency to "table loc_tb_2" that is not in Citus' metadata DEBUG: validating foreign key constraint "fkey_2_1330083" SET client_min_messages TO WARNING; diff --git a/src/test/regress/expected/columnar_paths.out b/src/test/regress/expected/columnar_paths.out index 4b32361fa..07b91a42e 100644 --- a/src/test/regress/expected/columnar_paths.out +++ b/src/test/regress/expected/columnar_paths.out @@ -326,7 +326,7 @@ WHERE w2.a = 123; EXPLAIN (COSTS OFF) SELECT sub_1.b, sub_2.a, sub_3.avg FROM - (SELECT b FROM full_correlated WHERE (a > 2) GROUP BY b HAVING count(DISTINCT a) > 0 ORDER BY 1 DESC LIMIT 5) AS sub_1, + (SELECT b FROM full_correlated WHERE (a > 2) GROUP BY b ORDER BY 1 DESC LIMIT 5) AS sub_1, (SELECT a FROM full_correlated WHERE (a > 10) GROUP BY a HAVING count(DISTINCT a) >= 1 ORDER BY 1 DESC LIMIT 3) AS sub_2, (SELECT avg(a) AS AVG FROM full_correlated WHERE (a > 2) GROUP BY a HAVING sum(a) > 10 ORDER BY (sum(d) - avg(a) - COALESCE(array_upper(ARRAY[max(a)],1) * 5, 0)) DESC LIMIT 3) AS sub_3 WHERE sub_2.a < sub_1.b::integer @@ -341,11 +341,10 @@ LIMIT 100; -> Nested Loop Join Filter: (full_correlated_1.a < (full_correlated.b)::integer) -> Limit - -> GroupAggregate - Group Key: full_correlated.b - Filter: (count(DISTINCT full_correlated.a) > 0) - -> Sort - Sort Key: full_correlated.b DESC + -> Sort + Sort Key: full_correlated.b DESC + -> HashAggregate + Group Key: full_correlated.b -> Custom Scan (ColumnarScan) on full_correlated Filter: (a > 2) Columnar Projected Columns: a, b @@ -366,7 +365,7 @@ LIMIT 100; Filter: (sum(full_correlated_2.a) > 10) -> Index Scan using full_correlated_btree on full_correlated full_correlated_2 Index Cond: (a > 2) -(32 rows) +(31 rows) DROP INDEX full_correlated_btree; CREATE INDEX full_correlated_hash ON full_correlated USING hash(a); diff --git a/src/test/regress/expected/global_cancel.out b/src/test/regress/expected/global_cancel.out index df50dbe3f..5adeef3c8 100644 --- a/src/test/regress/expected/global_cancel.out +++ b/src/test/regress/expected/global_cancel.out @@ -67,12 +67,12 @@ SELECT pg_typeof(:maintenance_daemon_gpid); bigint (1 row) +\set VERBOSITY terse SELECT pg_cancel_backend(:maintenance_daemon_gpid); ERROR: must be a superuser to cancel superuser query -CONTEXT: while executing command on localhost:xxxxx SELECT pg_terminate_backend(:maintenance_daemon_gpid); ERROR: must be a superuser to terminate superuser process -CONTEXT: while executing command on localhost:xxxxx +\set VERBOSITY default -- we can cancel our own backend SELECT pg_cancel_backend(citus_backend_gpid()); ERROR: canceling statement due to user request diff --git a/src/test/regress/expected/local_dist_join_mixed.out b/src/test/regress/expected/local_dist_join_mixed.out index 20287ee35..b8f074c73 100644 --- a/src/test/regress/expected/local_dist_join_mixed.out +++ b/src/test/regress/expected/local_dist_join_mixed.out @@ -357,13 +357,13 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS c 101 (1 row) -CREATE VIEW local_regular_view AS SELECT * FROM local; +CREATE VIEW local_regular_view AS SELECT * FROM local table_name_for_view; WARNING: "view local_regular_view" has dependency to "table local" that is not in Citus' metadata DETAIL: "view local_regular_view" will be created only locally HINT: Distribute "table local" first to distribute "view local_regular_view" CREATE VIEW dist_regular_view AS SELECT * FROM distributed; SELECT count(*) FROM distributed JOIN local_regular_view USING (id); -DEBUG: generating subplan XXX_1 for subquery SELECT local.id, local.title FROM local_dist_join_mixed.local +DEBUG: generating subplan XXX_1 for subquery SELECT id, title FROM local_dist_join_mixed.local table_name_for_view DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (local_dist_join_mixed.distributed JOIN (SELECT intermediate_result.id, intermediate_result.title FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, title text)) local_regular_view USING (id)) count --------------------------------------------------------------------- @@ -380,7 +380,7 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS c (1 row) SELECT count(*) FROM dist_regular_view JOIN local_regular_view USING (id); -DEBUG: generating subplan XXX_1 for subquery SELECT local.id, local.title FROM local_dist_join_mixed.local +DEBUG: generating subplan XXX_1 for subquery SELECT id, title FROM local_dist_join_mixed.local table_name_for_view DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT distributed.id, distributed.name, distributed.created_at FROM local_dist_join_mixed.distributed) dist_regular_view JOIN (SELECT intermediate_result.id, intermediate_result.title FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, title text)) local_regular_view USING (id)) count --------------------------------------------------------------------- diff --git a/src/test/regress/expected/local_table_join.out b/src/test/regress/expected/local_table_join.out index 7da341207..297959d41 100644 --- a/src/test/regress/expected/local_table_join.out +++ b/src/test/regress/expected/local_table_join.out @@ -1370,9 +1370,6 @@ select typdefault from ( select a from tbl where typdefault > 'a' limit 1) as subq_0 - where ( - select true as bool from pg_catalog.pg_am limit 1 - ) ) as subq_1 ) as subq_2; typdefault @@ -1400,15 +1397,11 @@ select typdefault from ( select a from tbl where typdefault > 'a' limit 1) as subq_0 - where ( - select true as bool from pg_catalog.pg_am limit 1 - ) ) as subq_1 ) as subq_2; -DEBUG: generating subplan XXX_1 for subquery SELECT true AS bool FROM pg_am LIMIT 1 DEBUG: Wrapping relation "custom_pg_type" to a subquery -DEBUG: generating subplan XXX_2 for subquery SELECT typdefault FROM local_table_join.custom_pg_type WHERE true -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT typdefault FROM (SELECT subq_1.typdefault FROM (SELECT custom_pg_type.typdefault FROM (SELECT custom_pg_type_1.typdefault FROM (SELECT intermediate_result.typdefault FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(typdefault text)) custom_pg_type_1) custom_pg_type, LATERAL (SELECT tbl.a FROM local_table_join.tbl WHERE (custom_pg_type.typdefault OPERATOR(pg_catalog.>) 'a'::text) LIMIT 1) subq_0 WHERE (SELECT intermediate_result.bool FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(bool boolean))) subq_1) subq_2 +DEBUG: generating subplan XXX_1 for subquery SELECT typdefault FROM local_table_join.custom_pg_type WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT typdefault FROM (SELECT subq_1.typdefault FROM (SELECT custom_pg_type.typdefault FROM (SELECT custom_pg_type_1.typdefault FROM (SELECT intermediate_result.typdefault FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(typdefault text)) custom_pg_type_1) custom_pg_type, LATERAL (SELECT tbl.a FROM local_table_join.tbl WHERE (custom_pg_type.typdefault OPERATOR(pg_catalog.>) 'a'::text) LIMIT 1) subq_0) subq_1) subq_2 ERROR: cannot push down this subquery DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from complex subqueries, CTEs or local tables -- Not supported because of 4470 diff --git a/src/test/regress/expected/multi_complex_count_distinct.out b/src/test/regress/expected/multi_complex_count_distinct.out index d4e6ecfa3..baa9c829a 100644 --- a/src/test/regress/expected/multi_complex_count_distinct.out +++ b/src/test/regress/expected/multi_complex_count_distinct.out @@ -1,6 +1,18 @@ -- -- COMPLEX_COUNT_DISTINCT -- +-- This test file has an alternative output because of the following in PG16: +-- https://github.com/postgres/postgres/commit/1349d2790bf48a4de072931c722f39337e72055e +-- https://github.com/postgres/postgres/commit/f4c7c410ee4a7baa06f51ebb8d5333c169691dd3 +-- The alternative output can be deleted when we drop support for PG15 +-- +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 16 AS server_version_ge_16; + server_version_ge_16 +--------------------------------------------------------------------- + t +(1 row) + SET citus.next_shard_id TO 240000; SET citus.shard_count TO 8; SET citus.shard_replication_factor TO 1; @@ -65,7 +77,7 @@ SELECT GROUP BY l_orderkey ORDER BY 2 DESC, 1 DESC LIMIT 10; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- Limit Output: remote_scan.l_orderkey, remote_scan.count @@ -87,9 +99,12 @@ SELECT -> GroupAggregate Output: l_orderkey, count(DISTINCT l_partkey) Group Key: lineitem_hash.l_orderkey - -> Index Scan Backward using lineitem_hash_pkey_240000 on public.lineitem_hash_240000 lineitem_hash - Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment -(22 rows) + -> Sort + Output: l_orderkey, l_partkey + Sort Key: lineitem_hash.l_orderkey DESC, lineitem_hash.l_partkey + -> Seq Scan on public.lineitem_hash_240000 lineitem_hash + Output: l_orderkey, l_partkey +(25 rows) -- it is also supported if there is no grouping or grouping is on non-partition field SELECT @@ -108,7 +123,7 @@ SELECT FROM lineitem_hash ORDER BY 1 DESC LIMIT 10; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- Limit Output: (count(DISTINCT remote_scan.count)) @@ -117,19 +132,22 @@ SELECT Sort Key: (count(DISTINCT remote_scan.count)) DESC -> Aggregate Output: count(DISTINCT remote_scan.count) - -> Custom Scan (Citus Adaptive) + -> Sort Output: remote_scan.count - Task Count: 8 - Tasks Shown: One of 8 - -> Task - Query: SELECT l_partkey AS count FROM public.lineitem_hash_240000 lineitem_hash WHERE true GROUP BY l_partkey - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Output: l_partkey - Group Key: lineitem_hash.l_partkey - -> Seq Scan on public.lineitem_hash_240000 lineitem_hash - Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment -(19 rows) + Sort Key: remote_scan.count + -> Custom Scan (Citus Adaptive) + Output: remote_scan.count + Task Count: 8 + Tasks Shown: One of 8 + -> Task + Query: SELECT l_partkey AS count FROM public.lineitem_hash_240000 lineitem_hash WHERE true GROUP BY l_partkey + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Output: l_partkey + Group Key: lineitem_hash.l_partkey + -> Seq Scan on public.lineitem_hash_240000 lineitem_hash + Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment +(22 rows) SELECT l_shipmode, count(DISTINCT l_partkey) @@ -167,7 +185,7 @@ SELECT Group Key: remote_scan.l_shipmode -> Sort Output: remote_scan.l_shipmode, remote_scan.count - Sort Key: remote_scan.l_shipmode DESC + Sort Key: remote_scan.l_shipmode DESC, remote_scan.count -> Custom Scan (Citus Adaptive) Output: remote_scan.l_shipmode, remote_scan.count Task Count: 8 @@ -210,7 +228,7 @@ SELECT GROUP BY l_orderkey ORDER BY 3 DESC, 2 DESC, 1 LIMIT 10; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- Limit Output: remote_scan.l_orderkey, remote_scan.count, remote_scan.count_1 @@ -232,9 +250,12 @@ SELECT -> GroupAggregate Output: l_orderkey, count(DISTINCT l_partkey), count(DISTINCT l_shipmode) Group Key: lineitem_hash.l_orderkey - -> Index Scan using lineitem_hash_pkey_240000 on public.lineitem_hash_240000 lineitem_hash - Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment -(22 rows) + -> Sort + Output: l_orderkey, l_partkey, l_shipmode + Sort Key: lineitem_hash.l_orderkey, lineitem_hash.l_partkey + -> Seq Scan on public.lineitem_hash_240000 lineitem_hash + Output: l_orderkey, l_partkey, l_shipmode +(25 rows) -- partition/non-partition column count distinct no grouping SELECT @@ -249,23 +270,26 @@ EXPLAIN (COSTS false, VERBOSE true) SELECT count(distinct l_orderkey), count(distinct l_partkey), count(distinct l_shipmode) FROM lineitem_hash; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- Aggregate Output: count(DISTINCT remote_scan.count), count(DISTINCT remote_scan.count_1), count(DISTINCT remote_scan.count_2) - -> Custom Scan (Citus Adaptive) + -> Sort Output: remote_scan.count, remote_scan.count_1, remote_scan.count_2 - Task Count: 8 - Tasks Shown: One of 8 - -> Task - Query: SELECT l_orderkey AS count, l_partkey AS count, l_shipmode AS count FROM public.lineitem_hash_240000 lineitem_hash WHERE true GROUP BY l_orderkey, l_partkey, l_shipmode - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Output: l_orderkey, l_partkey, l_shipmode - Group Key: lineitem_hash.l_orderkey, lineitem_hash.l_partkey, lineitem_hash.l_shipmode - -> Seq Scan on public.lineitem_hash_240000 lineitem_hash - Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment -(14 rows) + Sort Key: remote_scan.count + -> Custom Scan (Citus Adaptive) + Output: remote_scan.count, remote_scan.count_1, remote_scan.count_2 + Task Count: 8 + Tasks Shown: One of 8 + -> Task + Query: SELECT l_orderkey AS count, l_partkey AS count, l_shipmode AS count FROM public.lineitem_hash_240000 lineitem_hash WHERE true GROUP BY l_orderkey, l_partkey, l_shipmode + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Output: l_orderkey, l_partkey, l_shipmode + Group Key: lineitem_hash.l_orderkey, lineitem_hash.l_partkey, lineitem_hash.l_shipmode + -> Seq Scan on public.lineitem_hash_240000 lineitem_hash + Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment +(17 rows) -- distinct/non-distinct on partition and non-partition columns SELECT @@ -433,7 +457,7 @@ SELECT * Group Key: lineitem_hash.l_partkey -> Sort Output: l_partkey, l_orderkey - Sort Key: lineitem_hash.l_partkey + Sort Key: lineitem_hash.l_partkey, lineitem_hash.l_orderkey -> Seq Scan on public.lineitem_hash_240000 lineitem_hash Output: l_partkey, l_orderkey Task Count: 1 @@ -483,7 +507,7 @@ SELECT GROUP BY l_orderkey ORDER BY 2 DESC, 3 DESC, 1 LIMIT 10; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- Limit Output: remote_scan.l_orderkey, remote_scan.count, remote_scan.count_1 @@ -505,9 +529,12 @@ SELECT -> GroupAggregate Output: l_orderkey, count(DISTINCT l_suppkey) FILTER (WHERE (l_shipmode = 'AIR'::bpchar)), count(DISTINCT l_suppkey) Group Key: lineitem_hash.l_orderkey - -> Index Scan using lineitem_hash_pkey_240000 on public.lineitem_hash_240000 lineitem_hash - Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment -(22 rows) + -> Sort + Output: l_orderkey, l_suppkey, l_shipmode + Sort Key: lineitem_hash.l_orderkey, lineitem_hash.l_suppkey + -> Seq Scan on public.lineitem_hash_240000 lineitem_hash + Output: l_orderkey, l_suppkey, l_shipmode +(25 rows) -- group by on non-partition column SELECT @@ -550,7 +577,7 @@ SELECT Group Key: remote_scan.l_suppkey -> Sort Output: remote_scan.l_suppkey, remote_scan.count, remote_scan.count_1 - Sort Key: remote_scan.l_suppkey DESC + Sort Key: remote_scan.l_suppkey DESC, remote_scan.count -> Custom Scan (Citus Adaptive) Output: remote_scan.l_suppkey, remote_scan.count, remote_scan.count_1 Task Count: 8 diff --git a/src/test/regress/expected/multi_complex_count_distinct_0.out b/src/test/regress/expected/multi_complex_count_distinct_0.out new file mode 100644 index 000000000..36af62e96 --- /dev/null +++ b/src/test/regress/expected/multi_complex_count_distinct_0.out @@ -0,0 +1,1139 @@ +-- +-- COMPLEX_COUNT_DISTINCT +-- +-- This test file has an alternative output because of the following in PG16: +-- https://github.com/postgres/postgres/commit/1349d2790bf48a4de072931c722f39337e72055e +-- https://github.com/postgres/postgres/commit/f4c7c410ee4a7baa06f51ebb8d5333c169691dd3 +-- The alternative output can be deleted when we drop support for PG15 +-- +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 16 AS server_version_ge_16; + server_version_ge_16 +--------------------------------------------------------------------- + f +(1 row) + +SET citus.next_shard_id TO 240000; +SET citus.shard_count TO 8; +SET citus.shard_replication_factor TO 1; +SET citus.coordinator_aggregation_strategy TO 'disabled'; +CREATE TABLE lineitem_hash ( + l_orderkey bigint not null, + l_partkey integer not null, + l_suppkey integer not null, + l_linenumber integer not null, + l_quantity decimal(15, 2) not null, + l_extendedprice decimal(15, 2) not null, + l_discount decimal(15, 2) not null, + l_tax decimal(15, 2) not null, + l_returnflag char(1) not null, + l_linestatus char(1) not null, + l_shipdate date not null, + l_commitdate date not null, + l_receiptdate date not null, + l_shipinstruct char(25) not null, + l_shipmode char(10) not null, + l_comment varchar(44) not null, + PRIMARY KEY(l_orderkey, l_linenumber) ); +SELECT create_distributed_table('lineitem_hash', 'l_orderkey', 'hash'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +\set lineitem_1_data_file :abs_srcdir '/data/lineitem.1.data' +\set lineitem_2_data_file :abs_srcdir '/data/lineitem.2.data' +\set client_side_copy_command '\\copy lineitem_hash FROM ' :'lineitem_1_data_file' ' with delimiter '''|''';' +:client_side_copy_command +\set client_side_copy_command '\\copy lineitem_hash FROM ' :'lineitem_2_data_file' ' with delimiter '''|''';' +:client_side_copy_command +ANALYZE lineitem_hash; +-- count(distinct) is supported on top level query if there +-- is a grouping on the partition key +SELECT + l_orderkey, count(DISTINCT l_partkey) + FROM lineitem_hash + GROUP BY l_orderkey + ORDER BY 2 DESC, 1 DESC + LIMIT 10; + l_orderkey | count +--------------------------------------------------------------------- + 14885 | 7 + 14884 | 7 + 14821 | 7 + 14790 | 7 + 14785 | 7 + 14755 | 7 + 14725 | 7 + 14694 | 7 + 14627 | 7 + 14624 | 7 +(10 rows) + +EXPLAIN (COSTS false, VERBOSE true) +SELECT + l_orderkey, count(DISTINCT l_partkey) + FROM lineitem_hash + GROUP BY l_orderkey + ORDER BY 2 DESC, 1 DESC + LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------- + Limit + Output: remote_scan.l_orderkey, remote_scan.count + -> Sort + Output: remote_scan.l_orderkey, remote_scan.count + Sort Key: remote_scan.count DESC, remote_scan.l_orderkey DESC + -> Custom Scan (Citus Adaptive) + Output: remote_scan.l_orderkey, remote_scan.count + Task Count: 8 + Tasks Shown: One of 8 + -> Task + Query: SELECT l_orderkey, count(DISTINCT l_partkey) AS count FROM public.lineitem_hash_240000 lineitem_hash WHERE true GROUP BY l_orderkey ORDER BY (count(DISTINCT l_partkey)) DESC, l_orderkey DESC LIMIT '10'::bigint + Node: host=localhost port=xxxxx dbname=regression + -> Limit + Output: l_orderkey, (count(DISTINCT l_partkey)) + -> Sort + Output: l_orderkey, (count(DISTINCT l_partkey)) + Sort Key: (count(DISTINCT lineitem_hash.l_partkey)) DESC, lineitem_hash.l_orderkey DESC + -> GroupAggregate + Output: l_orderkey, count(DISTINCT l_partkey) + Group Key: lineitem_hash.l_orderkey + -> Index Scan Backward using lineitem_hash_pkey_240000 on public.lineitem_hash_240000 lineitem_hash + Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment +(22 rows) + +-- it is also supported if there is no grouping or grouping is on non-partition field +SELECT + count(DISTINCT l_partkey) + FROM lineitem_hash + ORDER BY 1 DESC + LIMIT 10; + count +--------------------------------------------------------------------- + 11661 +(1 row) + +EXPLAIN (COSTS false, VERBOSE true) +SELECT + count(DISTINCT l_partkey) + FROM lineitem_hash + ORDER BY 1 DESC + LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------- + Limit + Output: (count(DISTINCT remote_scan.count)) + -> Sort + Output: (count(DISTINCT remote_scan.count)) + Sort Key: (count(DISTINCT remote_scan.count)) DESC + -> Aggregate + Output: count(DISTINCT remote_scan.count) + -> Custom Scan (Citus Adaptive) + Output: remote_scan.count + Task Count: 8 + Tasks Shown: One of 8 + -> Task + Query: SELECT l_partkey AS count FROM public.lineitem_hash_240000 lineitem_hash WHERE true GROUP BY l_partkey + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Output: l_partkey + Group Key: lineitem_hash.l_partkey + -> Seq Scan on public.lineitem_hash_240000 lineitem_hash + Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment +(19 rows) + +SELECT + l_shipmode, count(DISTINCT l_partkey) + FROM lineitem_hash + GROUP BY l_shipmode + ORDER BY 2 DESC, 1 DESC + LIMIT 10; + l_shipmode | count +--------------------------------------------------------------------- + TRUCK | 1757 + MAIL | 1730 + AIR | 1702 + FOB | 1700 + RAIL | 1696 + SHIP | 1684 + REG AIR | 1676 +(7 rows) + +EXPLAIN (COSTS false, VERBOSE true) +SELECT + l_shipmode, count(DISTINCT l_partkey) + FROM lineitem_hash + GROUP BY l_shipmode + ORDER BY 2 DESC, 1 DESC + LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------- + Limit + Output: remote_scan.l_shipmode, (count(DISTINCT remote_scan.count)) + -> Sort + Output: remote_scan.l_shipmode, (count(DISTINCT remote_scan.count)) + Sort Key: (count(DISTINCT remote_scan.count)) DESC, remote_scan.l_shipmode DESC + -> GroupAggregate + Output: remote_scan.l_shipmode, count(DISTINCT remote_scan.count) + Group Key: remote_scan.l_shipmode + -> Sort + Output: remote_scan.l_shipmode, remote_scan.count + Sort Key: remote_scan.l_shipmode DESC + -> Custom Scan (Citus Adaptive) + Output: remote_scan.l_shipmode, remote_scan.count + Task Count: 8 + Tasks Shown: One of 8 + -> Task + Query: SELECT l_shipmode, l_partkey AS count FROM public.lineitem_hash_240000 lineitem_hash WHERE true GROUP BY l_shipmode, l_partkey + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Output: l_shipmode, l_partkey + Group Key: lineitem_hash.l_shipmode, lineitem_hash.l_partkey + -> Seq Scan on public.lineitem_hash_240000 lineitem_hash + Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment +(23 rows) + +-- mixed mode count distinct, grouped by partition column +SELECT + l_orderkey, count(distinct l_partkey), count(distinct l_shipmode) + FROM lineitem_hash + GROUP BY l_orderkey + ORDER BY 3 DESC, 2 DESC, 1 + LIMIT 10; + l_orderkey | count | count +--------------------------------------------------------------------- + 226 | 7 | 7 + 1316 | 7 | 7 + 1477 | 7 | 7 + 3555 | 7 | 7 + 12258 | 7 | 7 + 12835 | 7 | 7 + 768 | 7 | 6 + 1121 | 7 | 6 + 1153 | 7 | 6 + 1281 | 7 | 6 +(10 rows) + +EXPLAIN (COSTS false, VERBOSE true) +SELECT + l_orderkey, count(distinct l_partkey), count(distinct l_shipmode) + FROM lineitem_hash + GROUP BY l_orderkey + ORDER BY 3 DESC, 2 DESC, 1 + LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------- + Limit + Output: remote_scan.l_orderkey, remote_scan.count, remote_scan.count_1 + -> Sort + Output: remote_scan.l_orderkey, remote_scan.count, remote_scan.count_1 + Sort Key: remote_scan.count_1 DESC, remote_scan.count DESC, remote_scan.l_orderkey + -> Custom Scan (Citus Adaptive) + Output: remote_scan.l_orderkey, remote_scan.count, remote_scan.count_1 + Task Count: 8 + Tasks Shown: One of 8 + -> Task + Query: SELECT l_orderkey, count(DISTINCT l_partkey) AS count, count(DISTINCT l_shipmode) AS count FROM public.lineitem_hash_240000 lineitem_hash WHERE true GROUP BY l_orderkey ORDER BY (count(DISTINCT l_shipmode)) DESC, (count(DISTINCT l_partkey)) DESC, l_orderkey LIMIT '10'::bigint + Node: host=localhost port=xxxxx dbname=regression + -> Limit + Output: l_orderkey, (count(DISTINCT l_partkey)), (count(DISTINCT l_shipmode)) + -> Sort + Output: l_orderkey, (count(DISTINCT l_partkey)), (count(DISTINCT l_shipmode)) + Sort Key: (count(DISTINCT lineitem_hash.l_shipmode)) DESC, (count(DISTINCT lineitem_hash.l_partkey)) DESC, lineitem_hash.l_orderkey + -> GroupAggregate + Output: l_orderkey, count(DISTINCT l_partkey), count(DISTINCT l_shipmode) + Group Key: lineitem_hash.l_orderkey + -> Index Scan using lineitem_hash_pkey_240000 on public.lineitem_hash_240000 lineitem_hash + Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment +(22 rows) + +-- partition/non-partition column count distinct no grouping +SELECT + count(distinct l_orderkey), count(distinct l_partkey), count(distinct l_shipmode) + FROM lineitem_hash; + count | count | count +--------------------------------------------------------------------- + 2985 | 11661 | 7 +(1 row) + +EXPLAIN (COSTS false, VERBOSE true) +SELECT + count(distinct l_orderkey), count(distinct l_partkey), count(distinct l_shipmode) + FROM lineitem_hash; + QUERY PLAN +--------------------------------------------------------------------- + Aggregate + Output: count(DISTINCT remote_scan.count), count(DISTINCT remote_scan.count_1), count(DISTINCT remote_scan.count_2) + -> Custom Scan (Citus Adaptive) + Output: remote_scan.count, remote_scan.count_1, remote_scan.count_2 + Task Count: 8 + Tasks Shown: One of 8 + -> Task + Query: SELECT l_orderkey AS count, l_partkey AS count, l_shipmode AS count FROM public.lineitem_hash_240000 lineitem_hash WHERE true GROUP BY l_orderkey, l_partkey, l_shipmode + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Output: l_orderkey, l_partkey, l_shipmode + Group Key: lineitem_hash.l_orderkey, lineitem_hash.l_partkey, lineitem_hash.l_shipmode + -> Seq Scan on public.lineitem_hash_240000 lineitem_hash + Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment +(14 rows) + +-- distinct/non-distinct on partition and non-partition columns +SELECT + count(distinct l_orderkey), count(l_orderkey), + count(distinct l_partkey), count(l_partkey), + count(distinct l_shipmode), count(l_shipmode) + FROM lineitem_hash; + count | count | count | count | count | count +--------------------------------------------------------------------- + 2985 | 12000 | 11661 | 12000 | 7 | 12000 +(1 row) + +-- mixed mode count distinct, grouped by non-partition column +SELECT + l_shipmode, count(distinct l_partkey), count(distinct l_orderkey) + FROM lineitem_hash + GROUP BY l_shipmode + ORDER BY 1, 2 DESC, 3 DESC; + l_shipmode | count | count +--------------------------------------------------------------------- + AIR | 1702 | 1327 + FOB | 1700 | 1276 + MAIL | 1730 | 1299 + RAIL | 1696 | 1265 + REG AIR | 1676 | 1275 + SHIP | 1684 | 1289 + TRUCK | 1757 | 1333 +(7 rows) + +-- mixed mode count distinct, grouped by non-partition column +-- having on partition column +SELECT + l_shipmode, count(distinct l_partkey), count(distinct l_orderkey) + FROM lineitem_hash + GROUP BY l_shipmode + HAVING count(distinct l_orderkey) > 1300 + ORDER BY 1, 2 DESC; + l_shipmode | count | count +--------------------------------------------------------------------- + AIR | 1702 | 1327 + TRUCK | 1757 | 1333 +(2 rows) + +-- same but having clause is not on target list +SELECT + l_shipmode, count(distinct l_partkey) + FROM lineitem_hash + GROUP BY l_shipmode + HAVING count(distinct l_orderkey) > 1300 + ORDER BY 1, 2 DESC; + l_shipmode | count +--------------------------------------------------------------------- + AIR | 1702 + TRUCK | 1757 +(2 rows) + +-- mixed mode count distinct, grouped by non-partition column +-- having on non-partition column +SELECT + l_shipmode, count(distinct l_partkey), count(distinct l_suppkey) + FROM lineitem_hash + GROUP BY l_shipmode + HAVING count(distinct l_suppkey) > 1550 + ORDER BY 1, 2 DESC; + l_shipmode | count | count +--------------------------------------------------------------------- + AIR | 1702 | 1564 + FOB | 1700 | 1571 + MAIL | 1730 | 1573 + RAIL | 1696 | 1581 + REG AIR | 1676 | 1557 + SHIP | 1684 | 1554 + TRUCK | 1757 | 1602 +(7 rows) + +-- same but having clause is not on target list +SELECT + l_shipmode, count(distinct l_partkey) + FROM lineitem_hash + GROUP BY l_shipmode + HAVING count(distinct l_suppkey) > 1550 + ORDER BY 1, 2 DESC; + l_shipmode | count +--------------------------------------------------------------------- + AIR | 1702 + FOB | 1700 + MAIL | 1730 + RAIL | 1696 + REG AIR | 1676 + SHIP | 1684 + TRUCK | 1757 +(7 rows) + +-- count distinct is supported on single table subqueries +SELECT * + FROM ( + SELECT + l_orderkey, count(DISTINCT l_partkey) + FROM lineitem_hash + GROUP BY l_orderkey) sub + ORDER BY 2 DESC, 1 DESC + LIMIT 10; + l_orderkey | count +--------------------------------------------------------------------- + 14885 | 7 + 14884 | 7 + 14821 | 7 + 14790 | 7 + 14785 | 7 + 14755 | 7 + 14725 | 7 + 14694 | 7 + 14627 | 7 + 14624 | 7 +(10 rows) + +SELECT * + FROM ( + SELECT + l_partkey, count(DISTINCT l_orderkey) + FROM lineitem_hash + GROUP BY l_partkey) sub + ORDER BY 2 DESC, 1 DESC + LIMIT 10; + l_partkey | count +--------------------------------------------------------------------- + 199146 | 3 + 188804 | 3 + 177771 | 3 + 160895 | 3 + 149926 | 3 + 136884 | 3 + 87761 | 3 + 15283 | 3 + 6983 | 3 + 1927 | 3 +(10 rows) + +EXPLAIN (COSTS false, VERBOSE true) +SELECT * + FROM ( + SELECT + l_partkey, count(DISTINCT l_orderkey) + FROM lineitem_hash + GROUP BY l_partkey) sub + ORDER BY 2 DESC, 1 DESC + LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + Output: remote_scan.l_partkey, remote_scan.count + -> Distributed Subplan XXX_1 + -> HashAggregate + Output: remote_scan.l_partkey, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) + Group Key: remote_scan.l_partkey + -> Custom Scan (Citus Adaptive) + Output: remote_scan.l_partkey, remote_scan.count + Task Count: 8 + Tasks Shown: One of 8 + -> Task + Query: SELECT l_partkey, count(DISTINCT l_orderkey) AS count FROM public.lineitem_hash_240000 lineitem_hash WHERE true GROUP BY l_partkey + Node: host=localhost port=xxxxx dbname=regression + -> GroupAggregate + Output: l_partkey, count(DISTINCT l_orderkey) + Group Key: lineitem_hash.l_partkey + -> Sort + Output: l_partkey, l_orderkey + Sort Key: lineitem_hash.l_partkey + -> Seq Scan on public.lineitem_hash_240000 lineitem_hash + Output: l_partkey, l_orderkey + Task Count: 1 + Tasks Shown: All + -> Task + Query: SELECT l_partkey, count FROM (SELECT intermediate_result.l_partkey, intermediate_result.count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(l_partkey integer, count bigint)) sub ORDER BY count DESC, l_partkey DESC LIMIT 10 + Node: host=localhost port=xxxxx dbname=regression + -> Limit + Output: intermediate_result.l_partkey, intermediate_result.count + -> Sort + Output: intermediate_result.l_partkey, intermediate_result.count + Sort Key: intermediate_result.count DESC, intermediate_result.l_partkey DESC + -> Function Scan on pg_catalog.read_intermediate_result intermediate_result + Output: intermediate_result.l_partkey, intermediate_result.count + Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) +(34 rows) + +-- count distinct with filters +SELECT + l_orderkey, + count(DISTINCT l_suppkey) FILTER (WHERE l_shipmode = 'AIR'), + count(DISTINCT l_suppkey) + FROM lineitem_hash + GROUP BY l_orderkey + ORDER BY 2 DESC, 3 DESC, 1 + LIMIT 10; + l_orderkey | count | count +--------------------------------------------------------------------- + 4964 | 4 | 7 + 12005 | 4 | 7 + 5409 | 4 | 6 + 164 | 3 | 7 + 322 | 3 | 7 + 871 | 3 | 7 + 1156 | 3 | 7 + 1574 | 3 | 7 + 2054 | 3 | 7 + 2309 | 3 | 7 +(10 rows) + +EXPLAIN (COSTS false, VERBOSE true) +SELECT + l_orderkey, + count(DISTINCT l_suppkey) FILTER (WHERE l_shipmode = 'AIR'), + count(DISTINCT l_suppkey) + FROM lineitem_hash + GROUP BY l_orderkey + ORDER BY 2 DESC, 3 DESC, 1 + LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------- + Limit + Output: remote_scan.l_orderkey, remote_scan.count, remote_scan.count_1 + -> Sort + Output: remote_scan.l_orderkey, remote_scan.count, remote_scan.count_1 + Sort Key: remote_scan.count DESC, remote_scan.count_1 DESC, remote_scan.l_orderkey + -> Custom Scan (Citus Adaptive) + Output: remote_scan.l_orderkey, remote_scan.count, remote_scan.count_1 + Task Count: 8 + Tasks Shown: One of 8 + -> Task + Query: SELECT l_orderkey, count(DISTINCT l_suppkey) FILTER (WHERE (l_shipmode OPERATOR(pg_catalog.=) 'AIR'::bpchar)) AS count, count(DISTINCT l_suppkey) AS count FROM public.lineitem_hash_240000 lineitem_hash WHERE true GROUP BY l_orderkey ORDER BY (count(DISTINCT l_suppkey) FILTER (WHERE (l_shipmode OPERATOR(pg_catalog.=) 'AIR'::bpchar))) DESC, (count(DISTINCT l_suppkey)) DESC, l_orderkey LIMIT '10'::bigint + Node: host=localhost port=xxxxx dbname=regression + -> Limit + Output: l_orderkey, (count(DISTINCT l_suppkey) FILTER (WHERE (l_shipmode = 'AIR'::bpchar))), (count(DISTINCT l_suppkey)) + -> Sort + Output: l_orderkey, (count(DISTINCT l_suppkey) FILTER (WHERE (l_shipmode = 'AIR'::bpchar))), (count(DISTINCT l_suppkey)) + Sort Key: (count(DISTINCT lineitem_hash.l_suppkey) FILTER (WHERE (lineitem_hash.l_shipmode = 'AIR'::bpchar))) DESC, (count(DISTINCT lineitem_hash.l_suppkey)) DESC, lineitem_hash.l_orderkey + -> GroupAggregate + Output: l_orderkey, count(DISTINCT l_suppkey) FILTER (WHERE (l_shipmode = 'AIR'::bpchar)), count(DISTINCT l_suppkey) + Group Key: lineitem_hash.l_orderkey + -> Index Scan using lineitem_hash_pkey_240000 on public.lineitem_hash_240000 lineitem_hash + Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment +(22 rows) + +-- group by on non-partition column +SELECT + l_suppkey, count(DISTINCT l_partkey) FILTER (WHERE l_shipmode = 'AIR') + FROM lineitem_hash + GROUP BY l_suppkey + ORDER BY 2 DESC, 1 DESC + LIMIT 10; + l_suppkey | count +--------------------------------------------------------------------- + 7680 | 4 + 7703 | 3 + 7542 | 3 + 7072 | 3 + 6335 | 3 + 5873 | 3 + 1318 | 3 + 1042 | 3 + 160 | 3 + 9872 | 2 +(10 rows) + +-- explaining the same query fails +EXPLAIN (COSTS false, VERBOSE true) +SELECT + l_suppkey, count(DISTINCT l_partkey) FILTER (WHERE l_shipmode = 'AIR') + FROM lineitem_hash + GROUP BY l_suppkey + ORDER BY 2 DESC, 1 DESC + LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------- + Limit + Output: remote_scan.l_suppkey, (count(DISTINCT remote_scan.count) FILTER (WHERE (remote_scan.count_1 = 'AIR'::bpchar))) + -> Sort + Output: remote_scan.l_suppkey, (count(DISTINCT remote_scan.count) FILTER (WHERE (remote_scan.count_1 = 'AIR'::bpchar))) + Sort Key: (count(DISTINCT remote_scan.count) FILTER (WHERE (remote_scan.count_1 = 'AIR'::bpchar))) DESC, remote_scan.l_suppkey DESC + -> GroupAggregate + Output: remote_scan.l_suppkey, count(DISTINCT remote_scan.count) FILTER (WHERE (remote_scan.count_1 = 'AIR'::bpchar)) + Group Key: remote_scan.l_suppkey + -> Sort + Output: remote_scan.l_suppkey, remote_scan.count, remote_scan.count_1 + Sort Key: remote_scan.l_suppkey DESC + -> Custom Scan (Citus Adaptive) + Output: remote_scan.l_suppkey, remote_scan.count, remote_scan.count_1 + Task Count: 8 + Tasks Shown: One of 8 + -> Task + Query: SELECT l_suppkey, l_partkey AS count, l_shipmode AS count FROM public.lineitem_hash_240000 lineitem_hash WHERE true GROUP BY l_suppkey, l_partkey, l_shipmode + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Output: l_suppkey, l_partkey, l_shipmode + Group Key: lineitem_hash.l_suppkey, lineitem_hash.l_partkey, lineitem_hash.l_shipmode + -> Seq Scan on public.lineitem_hash_240000 lineitem_hash + Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment +(23 rows) + +-- without group by, on partition column +SELECT + count(DISTINCT l_orderkey) FILTER (WHERE l_shipmode = 'AIR') + FROM lineitem_hash; + count +--------------------------------------------------------------------- + 1327 +(1 row) + +-- without group by, on non-partition column +SELECT + count(DISTINCT l_partkey) FILTER (WHERE l_shipmode = 'AIR') + FROM lineitem_hash; + count +--------------------------------------------------------------------- + 1702 +(1 row) + +SELECT + count(DISTINCT l_partkey) FILTER (WHERE l_shipmode = 'AIR'), + count(DISTINCT l_partkey), + count(DISTINCT l_shipdate) + FROM lineitem_hash; + count | count | count +--------------------------------------------------------------------- + 1702 | 11661 | 2470 +(1 row) + +-- filter column already exists in target list +SELECT * + FROM ( + SELECT + l_orderkey, count(DISTINCT l_partkey) FILTER (WHERE l_orderkey > 100) + FROM lineitem_hash + GROUP BY l_orderkey) sub + ORDER BY 2 DESC, 1 DESC + LIMIT 10; + l_orderkey | count +--------------------------------------------------------------------- + 14885 | 7 + 14884 | 7 + 14821 | 7 + 14790 | 7 + 14785 | 7 + 14755 | 7 + 14725 | 7 + 14694 | 7 + 14627 | 7 + 14624 | 7 +(10 rows) + +-- filter column does not exist in target list +SELECT * + FROM ( + SELECT + l_orderkey, count(DISTINCT l_partkey) FILTER (WHERE l_shipmode = 'AIR') + FROM lineitem_hash + GROUP BY l_orderkey) sub + ORDER BY 2 DESC, 1 DESC + LIMIT 10; + l_orderkey | count +--------------------------------------------------------------------- + 12005 | 4 + 5409 | 4 + 4964 | 4 + 14848 | 3 + 14496 | 3 + 13473 | 3 + 13122 | 3 + 12929 | 3 + 12645 | 3 + 12417 | 3 +(10 rows) + +-- case expr in count distinct is supported. +-- count orders partkeys if l_shipmode is air +SELECT * + FROM ( + SELECT + l_orderkey, count(DISTINCT CASE WHEN l_shipmode = 'AIR' THEN l_partkey ELSE NULL END) as count + FROM lineitem_hash + GROUP BY l_orderkey) sub + WHERE count > 0 + ORDER BY 2 DESC, 1 DESC + LIMIT 10; + l_orderkey | count +--------------------------------------------------------------------- + 12005 | 4 + 5409 | 4 + 4964 | 4 + 14848 | 3 + 14496 | 3 + 13473 | 3 + 13122 | 3 + 12929 | 3 + 12645 | 3 + 12417 | 3 +(10 rows) + +-- text like operator is also supported +SELECT * + FROM ( + SELECT + l_orderkey, count(DISTINCT CASE WHEN l_shipmode like '%A%' THEN l_partkey ELSE NULL END) as count + FROM lineitem_hash + GROUP BY l_orderkey) sub + WHERE count > 0 + ORDER BY 2 DESC, 1 DESC + LIMIT 10; + l_orderkey | count +--------------------------------------------------------------------- + 14275 | 7 + 14181 | 7 + 13605 | 7 + 12707 | 7 + 12384 | 7 + 11746 | 7 + 10727 | 7 + 10467 | 7 + 5636 | 7 + 4614 | 7 +(10 rows) + +-- count distinct is rejected if it does not reference any columns +SELECT * + FROM ( + SELECT + l_linenumber, count(DISTINCT 1) + FROM lineitem_hash + GROUP BY l_linenumber) sub + ORDER BY 2 DESC, 1 DESC + LIMIT 10; +ERROR: cannot compute aggregate (distinct) +DETAIL: aggregate (distinct) with no columns is unsupported +HINT: You can load the hll extension from contrib packages and enable distinct approximations. +-- count distinct is rejected if it does not reference any columns +SELECT * + FROM ( + SELECT + l_linenumber, count(DISTINCT (random() * 5)::int) + FROM lineitem_hash + GROUP BY l_linenumber) sub + ORDER BY 2 DESC, 1 DESC + LIMIT 10; +ERROR: cannot compute aggregate (distinct) +DETAIL: aggregate (distinct) with no columns is unsupported +HINT: You can load the hll extension from contrib packages and enable distinct approximations. +-- even non-const function calls are supported within count distinct +SELECT * + FROM ( + SELECT + l_orderkey, count(DISTINCT (random() * 5)::int = l_linenumber) + FROM lineitem_hash + GROUP BY l_orderkey) sub + ORDER BY 2 DESC, 1 DESC + LIMIT 0; + l_orderkey | count +--------------------------------------------------------------------- +(0 rows) + +-- multiple nested subquery +SELECT + total, + avg(avg_count) as total_avg_count + FROM ( + SELECT + number_sum, + count(DISTINCT l_suppkey) as total, + avg(total_count) avg_count + FROM ( + SELECT + l_suppkey, + sum(l_linenumber) as number_sum, + count(DISTINCT l_shipmode) as total_count + FROM + lineitem_hash + WHERE + l_partkey > 100 and + l_quantity > 2 and + l_orderkey < 10000 + GROUP BY + l_suppkey) as distributed_table + WHERE + number_sum >= 10 + GROUP BY + number_sum) as distributed_table_2 + GROUP BY + total + ORDER BY + total_avg_count DESC; + total | total_avg_count +--------------------------------------------------------------------- + 1 | 3.6000000000000000 + 6 | 2.8333333333333333 + 10 | 2.6000000000000000 + 27 | 2.5555555555555556 + 32 | 2.4687500000000000 + 77 | 2.1948051948051948 + 57 | 2.1754385964912281 +(7 rows) + +-- multiple cases query +SELECT * + FROM ( + SELECT + count(DISTINCT + CASE + WHEN l_shipmode = 'TRUCK' THEN l_partkey + WHEN l_shipmode = 'AIR' THEN l_quantity + WHEN l_shipmode = 'SHIP' THEN l_discount + ELSE l_suppkey + END) as count, + l_shipdate + FROM + lineitem_hash + GROUP BY + l_shipdate) sub + WHERE + count > 0 + ORDER BY + 1 DESC, 2 DESC + LIMIT 10; + count | l_shipdate +--------------------------------------------------------------------- + 14 | 07-30-1997 + 13 | 05-26-1998 + 13 | 08-08-1997 + 13 | 11-17-1995 + 13 | 01-09-1993 + 12 | 01-15-1998 + 12 | 10-15-1997 + 12 | 09-07-1997 + 12 | 06-02-1997 + 12 | 03-14-1997 +(10 rows) + +-- count DISTINCT expression +SELECT * + FROM ( + SELECT + l_quantity, count(DISTINCT ((l_orderkey / 1000) * 1000 )) as count + FROM + lineitem_hash + GROUP BY + l_quantity) sub + WHERE + count > 0 + ORDER BY + 2 DESC, 1 DESC + LIMIT 10; + l_quantity | count +--------------------------------------------------------------------- + 48.00 | 13 + 47.00 | 13 + 37.00 | 13 + 33.00 | 13 + 26.00 | 13 + 25.00 | 13 + 23.00 | 13 + 21.00 | 13 + 15.00 | 13 + 12.00 | 13 +(10 rows) + +-- count DISTINCT is part of an expression which includes another aggregate +SELECT * + FROM ( + SELECT + sum(((l_partkey * l_tax) / 100)) / + count(DISTINCT + CASE + WHEN l_shipmode = 'TRUCK' THEN l_partkey + ELSE l_suppkey + END) as avg, + l_shipmode + FROM + lineitem_hash + GROUP BY + l_shipmode) sub + ORDER BY + 1 DESC, 2 DESC + LIMIT 10; + avg | l_shipmode +--------------------------------------------------------------------- + 44.82904609027336300064 | MAIL + 44.80704536679536679537 | SHIP + 44.68891732736572890026 | AIR + 44.34106724470134874759 | REG AIR + 43.12739987269255251432 | FOB + 43.07299253636938646426 | RAIL + 40.50298377916903813318 | TRUCK +(7 rows) + +-- count DISTINCT CASE WHEN expression +SELECT * + FROM ( + SELECT + count(DISTINCT + CASE + WHEN l_shipmode = 'TRUCK' THEN l_linenumber + WHEN l_shipmode = 'AIR' THEN l_linenumber + 10 + ELSE 2 + END) as avg + FROM + lineitem_hash + GROUP BY l_shipdate) sub + ORDER BY 1 DESC + LIMIT 10; + avg +--------------------------------------------------------------------- + 7 + 6 + 6 + 6 + 6 + 6 + 6 + 6 + 5 + 5 +(10 rows) + +-- COUNT DISTINCT (c1, c2) +SELECT * + FROM + (SELECT + l_shipmode, + count(DISTINCT (l_shipdate, l_tax)) + FROM + lineitem_hash + GROUP BY + l_shipmode) t + ORDER BY + 2 DESC,1 DESC + LIMIT 10; + l_shipmode | count +--------------------------------------------------------------------- + TRUCK | 1689 + MAIL | 1683 + FOB | 1655 + AIR | 1650 + SHIP | 1644 + RAIL | 1636 + REG AIR | 1607 +(7 rows) + +-- distinct on non-var (type cast/field select) columns are also +-- supported if grouped on distribution column +-- random is added to prevent flattening by postgresql +SELECT + l_orderkey, count(a::int), count(distinct a::int) + FROM ( + SELECT l_orderkey, l_orderkey * 1.5 a, random() b + FROM lineitem_hash) sub + GROUP BY 1 + ORDER BY 1 DESC + LIMIT 5; + l_orderkey | count | count +--------------------------------------------------------------------- + 14947 | 2 | 1 + 14946 | 2 | 1 + 14945 | 6 | 1 + 14944 | 2 | 1 + 14919 | 1 | 1 +(5 rows) + +SELECT user_id, + count(sub.a::int), + count(DISTINCT sub.a::int), + count(DISTINCT (sub).a) +FROM + (SELECT user_id, + unnest(ARRAY[user_id * 1.5])a, + random() b + FROM users_table + ) sub +GROUP BY 1 +ORDER BY 1 DESC +LIMIT 5; + user_id | count | count | count +--------------------------------------------------------------------- + 6 | 11 | 1 | 1 + 5 | 27 | 1 | 1 + 4 | 24 | 1 | 1 + 3 | 18 | 1 | 1 + 2 | 19 | 1 | 1 +(5 rows) + +CREATE TYPE test_item AS +( + id INTEGER, + duration INTEGER +); +CREATE TABLE test_count_distinct_array (key int, value int , value_arr test_item[]); +SELECT create_distributed_table('test_count_distinct_array', 'key'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO test_count_distinct_array SELECT i, i, ARRAY[(i,i)::test_item] FROM generate_Series(0, 1000) i; +SELECT + key, + count(DISTINCT value), + count(DISTINCT (item)."id"), + count(DISTINCT (item)."id" * 3) +FROM + ( + SELECT key, unnest(value_arr) as item, value FROM test_count_distinct_array + ) as sub +GROUP BY 1 +ORDER BY 1 DESC +LIMIT 5; + key | count | count | count +--------------------------------------------------------------------- + 1000 | 1 | 1 | 1 + 999 | 1 | 1 | 1 + 998 | 1 | 1 | 1 + 997 | 1 | 1 | 1 + 996 | 1 | 1 | 1 +(5 rows) + +DROP TABLE test_count_distinct_array; +DROP TYPE test_item; +-- other distinct aggregate are not supported +SELECT * + FROM ( + SELECT + l_linenumber, sum(DISTINCT l_partkey) + FROM lineitem_hash + GROUP BY l_linenumber) sub + ORDER BY 2 DESC, 1 DESC + LIMIT 10; +ERROR: cannot compute aggregate (distinct) +DETAIL: table partitioning is unsuitable for aggregate (distinct) +SELECT * + FROM ( + SELECT + l_linenumber, avg(DISTINCT l_partkey) + FROM lineitem_hash + GROUP BY l_linenumber) sub + ORDER BY 2 DESC, 1 DESC + LIMIT 10; +ERROR: cannot compute aggregate (distinct) +DETAIL: table partitioning is unsuitable for aggregate (distinct) +-- whole row references, oid, and ctid are not supported in count distinct +-- test table does not have oid or ctid enabled, so tests for them are skipped +SELECT * + FROM ( + SELECT + l_linenumber, count(DISTINCT lineitem_hash) + FROM lineitem_hash + GROUP BY l_linenumber) sub + ORDER BY 2 DESC, 1 DESC + LIMIT 10; +ERROR: cannot compute count (distinct) +DETAIL: Non-column references are not supported yet +SELECT * + FROM ( + SELECT + l_linenumber, count(DISTINCT lineitem_hash.*) + FROM lineitem_hash + GROUP BY l_linenumber) sub + ORDER BY 2 DESC, 1 DESC + LIMIT 10; +ERROR: cannot compute count (distinct) +DETAIL: Non-column references are not supported yet +-- count distinct pushdown is enabled +SELECT * + FROM ( + SELECT + l_shipdate, + count(DISTINCT + CASE + WHEN l_shipmode = 'TRUCK' THEN l_partkey + ELSE NULL + END) as distinct_part, + extract(year from l_shipdate) as year + FROM + lineitem_hash + GROUP BY l_shipdate, year) sub + WHERE year = 1995 + ORDER BY 2 DESC, 1 + LIMIT 10; + l_shipdate | distinct_part | year +--------------------------------------------------------------------- + 11-29-1995 | 5 | 1995 + 03-24-1995 | 4 | 1995 + 09-18-1995 | 4 | 1995 + 01-17-1995 | 3 | 1995 + 04-02-1995 | 3 | 1995 + 05-23-1995 | 3 | 1995 + 08-11-1995 | 3 | 1995 + 09-27-1995 | 3 | 1995 + 10-27-1995 | 3 | 1995 + 10-30-1995 | 3 | 1995 +(10 rows) + +-- count distinct pushdown is enabled +SELECT * + FROM ( + SELECT + l_shipdate, + count(DISTINCT + CASE + WHEN l_shipmode = 'TRUCK' THEN l_partkey + ELSE NULL + END) as distinct_part, + extract(year from l_shipdate) as year + FROM + lineitem_hash + GROUP BY l_shipdate, year) sub + WHERE year = 1995 + ORDER BY 2 DESC, 1 + LIMIT 10; + l_shipdate | distinct_part | year +--------------------------------------------------------------------- + 11-29-1995 | 5 | 1995 + 03-24-1995 | 4 | 1995 + 09-18-1995 | 4 | 1995 + 01-17-1995 | 3 | 1995 + 04-02-1995 | 3 | 1995 + 05-23-1995 | 3 | 1995 + 08-11-1995 | 3 | 1995 + 09-27-1995 | 3 | 1995 + 10-27-1995 | 3 | 1995 + 10-30-1995 | 3 | 1995 +(10 rows) + +SELECT * + FROM ( + SELECT + l_shipdate, + count(DISTINCT + CASE + WHEN l_shipmode = 'TRUCK' THEN l_partkey + ELSE NULL + END) as distinct_part, + extract(year from l_shipdate) as year + FROM + lineitem_hash + GROUP BY l_shipdate) sub + WHERE year = 1995 + ORDER BY 2 DESC, 1 + LIMIT 10; + l_shipdate | distinct_part | year +--------------------------------------------------------------------- + 11-29-1995 | 5 | 1995 + 03-24-1995 | 4 | 1995 + 09-18-1995 | 4 | 1995 + 01-17-1995 | 3 | 1995 + 04-02-1995 | 3 | 1995 + 05-23-1995 | 3 | 1995 + 08-11-1995 | 3 | 1995 + 09-27-1995 | 3 | 1995 + 10-27-1995 | 3 | 1995 + 10-30-1995 | 3 | 1995 +(10 rows) + +DROP TABLE lineitem_hash; diff --git a/src/test/regress/expected/multi_explain.out b/src/test/regress/expected/multi_explain.out index b3e47474f..17b673607 100644 --- a/src/test/regress/expected/multi_explain.out +++ b/src/test/regress/expected/multi_explain.out @@ -1,6 +1,18 @@ -- -- MULTI_EXPLAIN -- +-- This test file has an alternative output because of the following in PG16: +-- https://github.com/postgres/postgres/commit/1349d2790bf48a4de072931c722f39337e72055e +-- https://github.com/postgres/postgres/commit/f4c7c410ee4a7baa06f51ebb8d5333c169691dd3 +-- The alternative output can be deleted when we drop support for PG15 +-- +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 16 AS server_version_ge_16; + server_version_ge_16 +--------------------------------------------------------------------- + t +(1 row) + SET citus.next_shard_id TO 570000; \a\t SET citus.explain_distributed_queries TO on; @@ -651,7 +663,7 @@ Aggregate -> GroupAggregate Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id) -> Sort - Sort Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id) + Sort Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), events.event_time -> Hash Join Hash Cond: (users.composite_id = events.composite_id) -> Seq Scan on users_1400289 users @@ -737,7 +749,7 @@ HashAggregate -> GroupAggregate Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), subquery_2.hasdone -> Sort - Sort Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), subquery_2.hasdone + Sort Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), subquery_2.hasdone, events.event_time -> Hash Left Join Hash Cond: (users.composite_id = subquery_2.composite_id) -> HashAggregate @@ -853,7 +865,7 @@ Sort Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), subquery_2.count_pay Filter: (array_ndims(array_agg(('action=>1'::text) ORDER BY events.event_time)) > 0) -> Sort - Sort Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), subquery_2.count_pay + Sort Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), subquery_2.count_pay, events.event_time -> Hash Left Join Hash Cond: (users.composite_id = subquery_2.composite_id) -> HashAggregate @@ -951,7 +963,7 @@ Limit -> GroupAggregate Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id) -> Sort - Sort Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id) + Sort Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), events.event_time -> Nested Loop Left Join -> Limit -> Sort @@ -2381,11 +2393,16 @@ Custom Scan (Citus Adaptive) (actual rows=1 loops=1) Tuple data received from node: 8 bytes Node: host=localhost port=xxxxx dbname=regression -> Aggregate (actual rows=1 loops=1) - -> Hash Join (actual rows=10 loops=1) - Hash Cond: (ref_table.a = intermediate_result.a) - -> Seq Scan on ref_table_570021 ref_table (actual rows=10 loops=1) - -> Hash (actual rows=10 loops=1) + -> Merge Join (actual rows=10 loops=1) + Merge Cond: (intermediate_result.a = ref_table.a) + -> Sort (actual rows=10 loops=1) + Sort Key: intermediate_result.a + Sort Method: quicksort Memory: 25kB -> Function Scan on read_intermediate_result intermediate_result (actual rows=10 loops=1) + -> Sort (actual rows=10 loops=1) + Sort Key: ref_table.a + Sort Method: quicksort Memory: 25kB + -> Seq Scan on ref_table_570021 ref_table (actual rows=10 loops=1) EXPLAIN :default_analyze_flags SELECT count(distinct a) FROM (SELECT GREATEST(random(), 2) r, a FROM dist_table) t NATURAL JOIN ref_table; Aggregate (actual rows=1 loops=1) @@ -2442,9 +2459,12 @@ Aggregate (actual rows=1 loops=1) -> Aggregate (actual rows=1 loops=1) InitPlan 1 (returns $0) -> Function Scan on read_intermediate_result intermediate_result (actual rows=1 loops=1) - -> Result (actual rows=4 loops=1) - One-Time Filter: $0 - -> Seq Scan on dist_table_570017 dist_table (actual rows=4 loops=1) + -> Sort (actual rows=4 loops=1) + Sort Key: dist_table.a + Sort Method: quicksort Memory: 25kB + -> Result (actual rows=4 loops=1) + One-Time Filter: $0 + -> Seq Scan on dist_table_570017 dist_table (actual rows=4 loops=1) BEGIN; EXPLAIN :default_analyze_flags WITH r AS ( @@ -2486,7 +2506,10 @@ Custom Scan (Citus Adaptive) (actual rows=1 loops=1) Tuple data received from node: 8 bytes Node: host=localhost port=xxxxx dbname=regression -> Aggregate (actual rows=1 loops=1) - -> Function Scan on read_intermediate_result intermediate_result (actual rows=10 loops=1) + -> Sort (actual rows=10 loops=1) + Sort Key: intermediate_result.a2 + Sort Method: quicksort Memory: 25kB + -> Function Scan on read_intermediate_result intermediate_result (actual rows=10 loops=1) ROLLBACK; -- https://github.com/citusdata/citus/issues/4074 prepare ref_select(int) AS select * from ref_table where 1 = $1; diff --git a/src/test/regress/expected/multi_explain_0.out b/src/test/regress/expected/multi_explain_0.out new file mode 100644 index 000000000..9534cefb8 --- /dev/null +++ b/src/test/regress/expected/multi_explain_0.out @@ -0,0 +1,3219 @@ +-- +-- MULTI_EXPLAIN +-- +-- This test file has an alternative output because of the following in PG16: +-- https://github.com/postgres/postgres/commit/1349d2790bf48a4de072931c722f39337e72055e +-- https://github.com/postgres/postgres/commit/f4c7c410ee4a7baa06f51ebb8d5333c169691dd3 +-- The alternative output can be deleted when we drop support for PG15 +-- +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 16 AS server_version_ge_16; + server_version_ge_16 +--------------------------------------------------------------------- + f +(1 row) + +SET citus.next_shard_id TO 570000; +\a\t +SET citus.explain_distributed_queries TO on; +SET citus.enable_repartition_joins to ON; +-- Ensure tuple data in explain analyze output is the same on all PG versions +SET citus.enable_binary_protocol = TRUE; +-- Function that parses explain output as JSON +CREATE OR REPLACE FUNCTION explain_json(query text) +RETURNS jsonb +AS $BODY$ +DECLARE + result jsonb; +BEGIN + EXECUTE format('EXPLAIN (FORMAT JSON) %s', query) INTO result; + RETURN result; +END; +$BODY$ LANGUAGE plpgsql; +CREATE OR REPLACE FUNCTION explain_analyze_json(query text) +RETURNS jsonb +AS $BODY$ +DECLARE + result jsonb; +BEGIN + EXECUTE format('EXPLAIN (ANALYZE TRUE, FORMAT JSON) %s', query) INTO result; + RETURN result; +END; +$BODY$ LANGUAGE plpgsql; +-- Function that parses explain output as XML +CREATE OR REPLACE FUNCTION explain_xml(query text) +RETURNS xml +AS $BODY$ +DECLARE + result xml; +BEGIN + EXECUTE format('EXPLAIN (FORMAT XML) %s', query) INTO result; + RETURN result; +END; +$BODY$ LANGUAGE plpgsql; +-- Function that parses explain output as XML +CREATE OR REPLACE FUNCTION explain_analyze_xml(query text) +RETURNS xml +AS $BODY$ +DECLARE + result xml; +BEGIN + EXECUTE format('EXPLAIN (ANALYZE true, FORMAT XML) %s', query) INTO result; + RETURN result; +END; +$BODY$ LANGUAGE plpgsql; +-- VACUMM related tables to ensure test outputs are stable +VACUUM ANALYZE lineitem; +VACUUM ANALYZE orders; +-- Test Text format +EXPLAIN (COSTS FALSE, FORMAT TEXT) + SELECT l_quantity, count(*) count_quantity FROM lineitem + GROUP BY l_quantity ORDER BY count_quantity, l_quantity; +Sort + Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint)), remote_scan.l_quantity + -> HashAggregate + Group Key: remote_scan.l_quantity + -> Custom Scan (Citus Adaptive) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: l_quantity + -> Seq Scan on lineitem_360000 lineitem +-- Test disable hash aggregate +SET enable_hashagg TO off; +EXPLAIN (COSTS FALSE, FORMAT TEXT) + SELECT l_quantity, count(*) count_quantity FROM lineitem + GROUP BY l_quantity ORDER BY count_quantity, l_quantity; +Sort + Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint)), remote_scan.l_quantity + -> GroupAggregate + Group Key: remote_scan.l_quantity + -> Sort + Sort Key: remote_scan.l_quantity + -> Custom Scan (Citus Adaptive) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: l_quantity + -> Seq Scan on lineitem_360000 lineitem +SET enable_hashagg TO on; +-- Test JSON format +EXPLAIN (COSTS FALSE, FORMAT JSON) + SELECT l_quantity, count(*) count_quantity FROM lineitem + GROUP BY l_quantity ORDER BY count_quantity, l_quantity; +[ + { + "Plan": { + "Node Type": "Sort", + "Parallel Aware": false, + "Async Capable": false, + "Sort Key": ["(COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))", "remote_scan.l_quantity"], + "Plans": [ + { + "Node Type": "Aggregate", + "Strategy": "Hashed", + "Partial Mode": "Simple", + "Parent Relationship": "Outer", + "Parallel Aware": false, + "Async Capable": false, + "Group Key": ["remote_scan.l_quantity"], + "Plans": [ + { + "Node Type": "Custom Scan", + "Parent Relationship": "Outer", + "Custom Plan Provider": "Citus Adaptive", + "Parallel Aware": false, + "Async Capable": false, + "Distributed Query": { + "Job": { + "Task Count": 2, + "Tasks Shown": "One of 2", + "Tasks": [ + { + "Node": "host=localhost port=xxxxx dbname=regression", + "Remote Plan": [ + [ + { + "Plan": { + "Node Type": "Aggregate", + "Strategy": "Hashed", + "Partial Mode": "Simple", + "Parallel Aware": false, + "Async Capable": false, + "Group Key": ["l_quantity"], + "Plans": [ + { + "Node Type": "Seq Scan", + "Parent Relationship": "Outer", + "Parallel Aware": false, + "Async Capable": false, + "Relation Name": "lineitem_360000", + "Alias": "lineitem" + } + ] + } + } + ] + + ] + } + ] + } + } + } + ] + } + ] + } + } +] +-- Validate JSON format +SELECT true AS valid FROM explain_json($$ + SELECT l_quantity, count(*) count_quantity FROM lineitem + GROUP BY l_quantity ORDER BY count_quantity, l_quantity$$); +t +SELECT true AS valid FROM explain_analyze_json($$ + WITH a AS ( + SELECT l_quantity, count(*) count_quantity FROM lineitem + GROUP BY l_quantity ORDER BY count_quantity, l_quantity LIMIT 10) + SELECT count(*) FROM a +$$); +t +-- Test XML format +EXPLAIN (COSTS FALSE, FORMAT XML) + SELECT l_quantity, count(*) count_quantity FROM lineitem + GROUP BY l_quantity ORDER BY count_quantity, l_quantity; + + + + Sort + false + false + + (COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint)) + remote_scan.l_quantity + + + + Aggregate + Hashed + Simple + Outer + false + false + + remote_scan.l_quantity + + + + Custom Scan + Outer + Citus Adaptive + false + false + + + 2 + One of 2 + + + host=localhost port=xxxxx dbname=regression + + + + + Aggregate + Hashed + Simple + false + false + + l_quantity + + + + Seq Scan + Outer + false + false + lineitem_360000 + lineitem + + + + + + + + + + + + + + + + + +-- Validate XML format +SELECT true AS valid FROM explain_xml($$ + SELECT l_quantity, count(*) count_quantity FROM lineitem + GROUP BY l_quantity ORDER BY count_quantity, l_quantity$$); +t +SELECT true AS valid FROM explain_analyze_xml($$ + WITH a AS ( + SELECT l_quantity, count(*) count_quantity FROM lineitem + GROUP BY l_quantity ORDER BY count_quantity, l_quantity LIMIT 10) + SELECT count(*) FROM a +$$); +t +-- Test YAML format +EXPLAIN (COSTS FALSE, FORMAT YAML) + SELECT l_quantity, count(*) count_quantity FROM lineitem + GROUP BY l_quantity ORDER BY count_quantity, l_quantity; +- Plan: + Node Type: "Sort" + Parallel Aware: false + Async Capable: false + Sort Key: + - "(COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))" + - "remote_scan.l_quantity" + Plans: + - Node Type: "Aggregate" + Strategy: "Hashed" + Partial Mode: "Simple" + Parent Relationship: "Outer" + Parallel Aware: false + Async Capable: false + Group Key: + - "remote_scan.l_quantity" + Plans: + - Node Type: "Custom Scan" + Parent Relationship: "Outer" + Custom Plan Provider: "Citus Adaptive" + Parallel Aware: false + Async Capable: false + Distributed Query: + Job: + Task Count: 2 + Tasks Shown: "One of 2" + Tasks: + - Node: "host=localhost port=xxxxx dbname=regression" + Remote Plan: + - Plan: + Node Type: "Aggregate" + Strategy: "Hashed" + Partial Mode: "Simple" + Parallel Aware: false + Async Capable: false + Group Key: + - "l_quantity" + Plans: + - Node Type: "Seq Scan" + Parent Relationship: "Outer" + Parallel Aware: false + Async Capable: false + Relation Name: "lineitem_360000" + Alias: "lineitem" + +-- Test Text format +EXPLAIN (COSTS FALSE, FORMAT TEXT) + SELECT l_quantity, count(*) count_quantity FROM lineitem + GROUP BY l_quantity ORDER BY count_quantity, l_quantity; +Sort + Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint)), remote_scan.l_quantity + -> HashAggregate + Group Key: remote_scan.l_quantity + -> Custom Scan (Citus Adaptive) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: l_quantity + -> Seq Scan on lineitem_360000 lineitem +-- Test analyze (with TIMING FALSE and SUMMARY FALSE for consistent output) +SELECT public.plan_normalize_memory($Q$ +EXPLAIN (COSTS FALSE, ANALYZE TRUE, TIMING FALSE, SUMMARY FALSE) + SELECT l_quantity, count(*) count_quantity FROM lineitem + GROUP BY l_quantity ORDER BY count_quantity, l_quantity; +$Q$); +Sort (actual rows=50 loops=1) + Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint)), remote_scan.l_quantity + Sort Method: quicksort Memory: xxx + -> HashAggregate (actual rows=50 loops=1) + Group Key: remote_scan.l_quantity + -> Custom Scan (Citus Adaptive) (actual rows=100 loops=1) + Task Count: 2 + Tuple data received from nodes: 1800 bytes + Tasks Shown: One of 2 + -> Task + Tuple data received from node: 900 bytes + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate (actual rows=50 loops=1) + Group Key: l_quantity + -> Seq Scan on lineitem_360000 lineitem (actual rows=5894 loops=1) +-- EXPLAIN ANALYZE doesn't show worker tasks for repartition joins yet +SET citus.shard_count TO 3; +CREATE TABLE t1(a int, b int); +CREATE TABLE t2(a int, b int); +SELECT create_distributed_table('t1', 'a'), create_distributed_table('t2', 'a'); +| +BEGIN; +SET LOCAL citus.enable_repartition_joins TO true; +EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off) SELECT count(*) FROM t1, t2 WHERE t1.a=t2.b; +Aggregate (actual rows=1 loops=1) + -> Custom Scan (Citus Adaptive) (actual rows=6 loops=1) + Task Count: 6 + Tuple data received from nodes: 48 bytes + Tasks Shown: None, not supported for re-partition queries + -> MapMergeJob + Map Task Count: 3 + Merge Task Count: 6 + -> MapMergeJob + Map Task Count: 3 + Merge Task Count: 6 +-- Confirm repartiton join in distributed subplan works +EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off) +WITH repartition AS (SELECT count(*) FROM t1, t2 WHERE t1.a=t2.b) +SELECT count(*) from repartition; +Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + -> Distributed Subplan XXX_1 + Intermediate Data Size: 14 bytes + Result destination: Write locally + -> Aggregate (actual rows=1 loops=1) + -> Custom Scan (Citus Adaptive) (actual rows=6 loops=1) + Task Count: 6 + Tuple data received from nodes: 48 bytes + Tasks Shown: None, not supported for re-partition queries + -> MapMergeJob + Map Task Count: 3 + Merge Task Count: 6 + -> MapMergeJob + Map Task Count: 3 + Merge Task Count: 6 + Task Count: 1 + Tuple data received from nodes: 8 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 8 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Aggregate (actual rows=1 loops=1) + -> Function Scan on read_intermediate_result intermediate_result (actual rows=1 loops=1) +END; +DROP TABLE t1, t2; +-- Test query text output, with ANALYZE ON +SELECT public.plan_normalize_memory($Q$ +EXPLAIN (COSTS FALSE, ANALYZE TRUE, TIMING FALSE, SUMMARY FALSE, VERBOSE TRUE) + SELECT l_quantity, count(*) count_quantity FROM lineitem + GROUP BY l_quantity ORDER BY count_quantity, l_quantity; +$Q$); +Sort (actual rows=50 loops=1) + Output: remote_scan.l_quantity, (COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint)) + Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint)), remote_scan.l_quantity + Sort Method: quicksort Memory: xxx + -> HashAggregate (actual rows=50 loops=1) + Output: remote_scan.l_quantity, COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint) + Group Key: remote_scan.l_quantity + -> Custom Scan (Citus Adaptive) (actual rows=100 loops=1) + Output: remote_scan.l_quantity, remote_scan.count_quantity + Task Count: 2 + Tuple data received from nodes: 1800 bytes + Tasks Shown: One of 2 + -> Task + Query: SELECT l_quantity, count(*) AS count_quantity FROM public.lineitem_360000 lineitem WHERE true GROUP BY l_quantity + Tuple data received from node: 900 bytes + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate (actual rows=50 loops=1) + Output: l_quantity, count(*) + Group Key: lineitem.l_quantity + -> Seq Scan on public.lineitem_360000 lineitem (actual rows=5894 loops=1) + Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment +-- Test query text output, with ANALYZE OFF +EXPLAIN (COSTS FALSE, ANALYZE FALSE, TIMING FALSE, SUMMARY FALSE, VERBOSE TRUE) + SELECT l_quantity, count(*) count_quantity FROM lineitem + GROUP BY l_quantity ORDER BY count_quantity, l_quantity; +Sort + Output: remote_scan.l_quantity, (COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint)) + Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint)), remote_scan.l_quantity + -> HashAggregate + Output: remote_scan.l_quantity, COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint) + Group Key: remote_scan.l_quantity + -> Custom Scan (Citus Adaptive) + Output: remote_scan.l_quantity, remote_scan.count_quantity + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Query: SELECT l_quantity, count(*) AS count_quantity FROM public.lineitem_360000 lineitem WHERE true GROUP BY l_quantity + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Output: l_quantity, count(*) + Group Key: lineitem.l_quantity + -> Seq Scan on public.lineitem_360000 lineitem + Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment +-- Test verbose +EXPLAIN (COSTS FALSE, VERBOSE TRUE) + SELECT sum(l_quantity) / avg(l_quantity) FROM lineitem; +Aggregate + Output: (sum(remote_scan."?column?") / (sum(remote_scan."?column?_1") / pg_catalog.sum(remote_scan."?column?_2"))) + -> Custom Scan (Citus Adaptive) + Output: remote_scan."?column?", remote_scan."?column?_1", remote_scan."?column?_2" + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Query: SELECT sum(l_quantity), sum(l_quantity), count(l_quantity) FROM public.lineitem_360000 lineitem WHERE true + Node: host=localhost port=xxxxx dbname=regression + -> Aggregate + Output: sum(l_quantity), sum(l_quantity), count(l_quantity) + -> Seq Scan on public.lineitem_360000 lineitem + Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment +-- Test join +EXPLAIN (COSTS FALSE) + SELECT * FROM lineitem + JOIN orders ON l_orderkey = o_orderkey AND l_quantity < 5.0 + ORDER BY l_quantity LIMIT 10; +Limit + -> Sort + Sort Key: remote_scan.l_quantity + -> Custom Scan (Citus Adaptive) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Limit + -> Sort + Sort Key: lineitem.l_quantity + -> Hash Join + Hash Cond: (lineitem.l_orderkey = orders.o_orderkey) + -> Seq Scan on lineitem_360000 lineitem + Filter: (l_quantity < 5.0) + -> Hash + -> Seq Scan on orders_360002 orders +-- Test insert +EXPLAIN (COSTS FALSE) + INSERT INTO lineitem VALUES (1,0), (2, 0), (3, 0), (4, 0); +Custom Scan (Citus Adaptive) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Insert on lineitem_360000 citus_table_alias + -> Values Scan on "*VALUES*" +-- Test update +EXPLAIN (COSTS FALSE) + UPDATE lineitem + SET l_suppkey = 12 + WHERE l_orderkey = 1 AND l_partkey = 0; +Custom Scan (Citus Adaptive) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Update on lineitem_360000 lineitem + -> Index Scan using lineitem_pkey_360000 on lineitem_360000 lineitem + Index Cond: (l_orderkey = 1) + Filter: (l_partkey = 0) +-- Test analyze (with TIMING FALSE and SUMMARY FALSE for consistent output) +BEGIN; +EXPLAIN (COSTS FALSE, ANALYZE TRUE, TIMING FALSE, SUMMARY FALSE) + UPDATE lineitem + SET l_suppkey = 12 + WHERE l_orderkey = 1 AND l_partkey = 0; +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Update on lineitem_360000 lineitem (actual rows=0 loops=1) + -> Index Scan using lineitem_pkey_360000 on lineitem_360000 lineitem (actual rows=0 loops=1) + Index Cond: (l_orderkey = 1) + Filter: (l_partkey = 0) + Rows Removed by Filter: 6 +ROLLBACk; +-- Test delete +EXPLAIN (COSTS FALSE) + DELETE FROM lineitem + WHERE l_orderkey = 1 AND l_partkey = 0; +Custom Scan (Citus Adaptive) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Delete on lineitem_360000 lineitem + -> Index Scan using lineitem_pkey_360000 on lineitem_360000 lineitem + Index Cond: (l_orderkey = 1) + Filter: (l_partkey = 0) +-- Test zero-shard update +EXPLAIN (COSTS FALSE) + UPDATE lineitem + SET l_suppkey = 12 + WHERE l_orderkey = 1 AND l_orderkey = 0; +Custom Scan (Citus Adaptive) + Task Count: 0 + Tasks Shown: All +-- Test zero-shard delete +EXPLAIN (COSTS FALSE) + DELETE FROM lineitem + WHERE l_orderkey = 1 AND l_orderkey = 0; +Custom Scan (Citus Adaptive) + Task Count: 0 + Tasks Shown: All +-- Test single-shard SELECT +EXPLAIN (COSTS FALSE) + SELECT l_quantity FROM lineitem WHERE l_orderkey = 5; +Custom Scan (Citus Adaptive) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Index Scan using lineitem_pkey_360000 on lineitem_360000 lineitem + Index Cond: (l_orderkey = 5) +SELECT true AS valid FROM explain_xml($$ + SELECT l_quantity FROM lineitem WHERE l_orderkey = 5$$); +t +SELECT true AS valid FROM explain_json($$ + SELECT l_quantity FROM lineitem WHERE l_orderkey = 5$$); +t +-- Test CREATE TABLE ... AS +EXPLAIN (COSTS FALSE) + CREATE TABLE explain_result AS + SELECT * FROM lineitem; +Custom Scan (Citus Adaptive) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on lineitem_360000 lineitem +-- Test having +EXPLAIN (COSTS FALSE, VERBOSE TRUE) + SELECT sum(l_quantity) / avg(l_quantity) FROM lineitem + HAVING sum(l_quantity) > 100; +Aggregate + Output: (sum(remote_scan."?column?") / (sum(remote_scan."?column?_1") / pg_catalog.sum(remote_scan."?column?_2"))) + Filter: (sum(remote_scan.worker_column_4) > '100'::numeric) + -> Custom Scan (Citus Adaptive) + Output: remote_scan."?column?", remote_scan."?column?_1", remote_scan."?column?_2", remote_scan.worker_column_4 + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Query: SELECT sum(l_quantity), sum(l_quantity), count(l_quantity), sum(l_quantity) AS worker_column_4 FROM public.lineitem_360000 lineitem WHERE true + Node: host=localhost port=xxxxx dbname=regression + -> Aggregate + Output: sum(l_quantity), sum(l_quantity), count(l_quantity), sum(l_quantity) + -> Seq Scan on public.lineitem_360000 lineitem + Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment +-- Test having without aggregate +EXPLAIN (COSTS FALSE, VERBOSE TRUE) + SELECT l_quantity FROM lineitem + GROUP BY l_quantity + HAVING l_quantity > (100 * random()); +HashAggregate + Output: remote_scan.l_quantity + Group Key: remote_scan.l_quantity + Filter: ((remote_scan.worker_column_2)::double precision > ('100'::double precision * random())) + -> Custom Scan (Citus Adaptive) + Output: remote_scan.l_quantity, remote_scan.worker_column_2 + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Query: SELECT l_quantity, l_quantity AS worker_column_2 FROM public.lineitem_360000 lineitem WHERE true GROUP BY l_quantity + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Output: l_quantity, l_quantity + Group Key: lineitem.l_quantity + -> Seq Scan on public.lineitem_360000 lineitem + Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment +-- Subquery pushdown tests with explain +EXPLAIN (COSTS OFF) +SELECT + avg(array_length(events, 1)) AS event_average +FROM + (SELECT + tenant_id, + user_id, + array_agg(event_type ORDER BY event_time) AS events + FROM + (SELECT + (users.composite_id).tenant_id, + (users.composite_id).user_id, + event_type, + events.event_time + FROM + users, + events + WHERE + (users.composite_id) = (events.composite_id) AND + users.composite_id >= '(1, -9223372036854775808)'::user_composite_type AND + users.composite_id <= '(1, 9223372036854775807)'::user_composite_type AND + event_type IN ('click', 'submit', 'pay')) AS subquery + GROUP BY + tenant_id, + user_id) AS subquery; +Aggregate + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Aggregate + -> GroupAggregate + Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id) + -> Sort + Sort Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id) + -> Hash Join + Hash Cond: (users.composite_id = events.composite_id) + -> Seq Scan on users_1400289 users + Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type)) + -> Hash + -> Seq Scan on events_1400285 events + Filter: ((event_type)::text = ANY ('{click,submit,pay}'::text[])) +-- Union and left join subquery pushdown +EXPLAIN (COSTS OFF) +SELECT + avg(array_length(events, 1)) AS event_average, + hasdone +FROM + (SELECT + subquery_1.tenant_id, + subquery_1.user_id, + array_agg(event ORDER BY event_time) AS events, + COALESCE(hasdone, 'Has not done paying') AS hasdone + FROM + ( + (SELECT + (users.composite_id).tenant_id, + (users.composite_id).user_id, + (users.composite_id) as composite_id, + 'action=>1'AS event, + events.event_time + FROM + users, + events + WHERE + (users.composite_id) = (events.composite_id) AND + users.composite_id >= '(1, -9223372036854775808)'::user_composite_type AND + users.composite_id <= '(1, 9223372036854775807)'::user_composite_type AND + event_type = 'click') + UNION + (SELECT + (users.composite_id).tenant_id, + (users.composite_id).user_id, + (users.composite_id) as composite_id, + 'action=>2'AS event, + events.event_time + FROM + users, + events + WHERE + (users.composite_id) = (events.composite_id) AND + users.composite_id >= '(1, -9223372036854775808)'::user_composite_type AND + users.composite_id <= '(1, 9223372036854775807)'::user_composite_type AND + event_type = 'submit') + ) AS subquery_1 + LEFT JOIN + (SELECT + DISTINCT ON ((composite_id).tenant_id, (composite_id).user_id) composite_id, + (composite_id).tenant_id, + (composite_id).user_id, + 'Has done paying'::TEXT AS hasdone + FROM + events + WHERE + events.composite_id >= '(1, -9223372036854775808)'::user_composite_type AND + events.composite_id <= '(1, 9223372036854775807)'::user_composite_type AND + event_type = 'pay') AS subquery_2 + ON + subquery_1.composite_id = subquery_2.composite_id + GROUP BY + subquery_1.tenant_id, + subquery_1.user_id, + hasdone) AS subquery_top +GROUP BY + hasdone; +HashAggregate + Group Key: remote_scan.hasdone + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> GroupAggregate + Group Key: subquery_top.hasdone + -> Sort + Sort Key: subquery_top.hasdone + -> Subquery Scan on subquery_top + -> GroupAggregate + Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), subquery_2.hasdone + -> Sort + Sort Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), subquery_2.hasdone + -> Hash Left Join + Hash Cond: (users.composite_id = subquery_2.composite_id) + -> HashAggregate + Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), users.composite_id, ('action=>1'::text), events.event_time + -> Append + -> Hash Join + Hash Cond: (users.composite_id = events.composite_id) + -> Seq Scan on users_1400289 users + Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type)) + -> Hash + -> Seq Scan on events_1400285 events + Filter: ((event_type)::text = 'click'::text) + -> Hash Join + Hash Cond: (users_1.composite_id = events_1.composite_id) + -> Seq Scan on users_1400289 users_1 + Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type)) + -> Hash + -> Seq Scan on events_1400285 events_1 + Filter: ((event_type)::text = 'submit'::text) + -> Hash + -> Subquery Scan on subquery_2 + -> Unique + -> Sort + Sort Key: ((events_2.composite_id).tenant_id), ((events_2.composite_id).user_id) + -> Seq Scan on events_1400285 events_2 + Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type) AND ((event_type)::text = 'pay'::text)) +-- Union, left join and having subquery pushdown +EXPLAIN (COSTS OFF) + SELECT + avg(array_length(events, 1)) AS event_average, + count_pay + FROM ( + SELECT + subquery_1.tenant_id, + subquery_1.user_id, + array_agg(event ORDER BY event_time) AS events, + COALESCE(count_pay, 0) AS count_pay + FROM + ( + (SELECT + (users.composite_id).tenant_id, + (users.composite_id).user_id, + (users.composite_id), + 'action=>1'AS event, + events.event_time + FROM + users, + events + WHERE + (users.composite_id) = (events.composite_id) AND + users.composite_id >= '(1, -9223372036854775808)'::user_composite_type AND + users.composite_id <= '(1, 9223372036854775807)'::user_composite_type AND + event_type = 'click') + UNION + (SELECT + (users.composite_id).tenant_id, + (users.composite_id).user_id, + (users.composite_id), + 'action=>2'AS event, + events.event_time + FROM + users, + events + WHERE + (users.composite_id) = (events.composite_id) AND + users.composite_id >= '(1, -9223372036854775808)'::user_composite_type AND + users.composite_id <= '(1, 9223372036854775807)'::user_composite_type AND + event_type = 'submit') + ) AS subquery_1 + LEFT JOIN + (SELECT + (composite_id).tenant_id, + (composite_id).user_id, + composite_id, + COUNT(*) AS count_pay + FROM + events + WHERE + events.composite_id >= '(1, -9223372036854775808)'::user_composite_type AND + events.composite_id <= '(1, 9223372036854775807)'::user_composite_type AND + event_type = 'pay' + GROUP BY + composite_id + HAVING + COUNT(*) > 2) AS subquery_2 + ON + subquery_1.composite_id = subquery_2.composite_id + GROUP BY + subquery_1.tenant_id, + subquery_1.user_id, + count_pay) AS subquery_top +WHERE + array_ndims(events) > 0 +GROUP BY + count_pay +ORDER BY + count_pay; +Sort + Sort Key: remote_scan.count_pay + -> HashAggregate + Group Key: remote_scan.count_pay + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> GroupAggregate + Group Key: subquery_top.count_pay + -> Sort + Sort Key: subquery_top.count_pay + -> Subquery Scan on subquery_top + -> GroupAggregate + Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), subquery_2.count_pay + Filter: (array_ndims(array_agg(('action=>1'::text) ORDER BY events.event_time)) > 0) + -> Sort + Sort Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), subquery_2.count_pay + -> Hash Left Join + Hash Cond: (users.composite_id = subquery_2.composite_id) + -> HashAggregate + Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), users.composite_id, ('action=>1'::text), events.event_time + -> Append + -> Hash Join + Hash Cond: (users.composite_id = events.composite_id) + -> Seq Scan on users_1400289 users + Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type)) + -> Hash + -> Seq Scan on events_1400285 events + Filter: ((event_type)::text = 'click'::text) + -> Hash Join + Hash Cond: (users_1.composite_id = events_1.composite_id) + -> Seq Scan on users_1400289 users_1 + Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type)) + -> Hash + -> Seq Scan on events_1400285 events_1 + Filter: ((event_type)::text = 'submit'::text) + -> Hash + -> Subquery Scan on subquery_2 + -> GroupAggregate + Group Key: events_2.composite_id + Filter: (count(*) > 2) + -> Sort + Sort Key: events_2.composite_id + -> Seq Scan on events_1400285 events_2 + Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type) AND ((event_type)::text = 'pay'::text)) +-- Lateral join subquery pushdown +-- set subquery_pushdown due to limit in the query +SET citus.subquery_pushdown to ON; +NOTICE: Setting citus.subquery_pushdown flag is discouraged becuase it forces the planner to pushdown certain queries, skipping relevant correctness checks. +DETAIL: When enabled, the planner skips many correctness checks for subqueries and pushes down the queries to shards as-is. It means that the queries are likely to return wrong results unless the user is absolutely sure that pushing down the subquery is safe. This GUC is maintained only for backward compatibility, no new users are supposed to use it. The planner is capable of pushing down as much computation as possible to the shards depending on the query. +EXPLAIN (COSTS OFF) +SELECT + tenant_id, + user_id, + user_lastseen, + event_array +FROM + (SELECT + tenant_id, + user_id, + max(lastseen) as user_lastseen, + array_agg(event_type ORDER BY event_time) AS event_array + FROM + (SELECT + (composite_id).tenant_id, + (composite_id).user_id, + composite_id, + lastseen + FROM + users + WHERE + composite_id >= '(1, -9223372036854775808)'::user_composite_type AND + composite_id <= '(1, 9223372036854775807)'::user_composite_type + ORDER BY + lastseen DESC + LIMIT + 10 + ) AS subquery_top + LEFT JOIN LATERAL + (SELECT + event_type, + event_time + FROM + events + WHERE + (composite_id) = subquery_top.composite_id + ORDER BY + event_time DESC + LIMIT + 99) AS subquery_lateral + ON + true + GROUP BY + tenant_id, + user_id + ) AS shard_union +ORDER BY + user_lastseen DESC +LIMIT + 10; +Limit + -> Sort + Sort Key: remote_scan.user_lastseen DESC + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Limit + -> Sort + Sort Key: (max(users.lastseen)) DESC + -> GroupAggregate + Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id) + -> Sort + Sort Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id) + -> Nested Loop Left Join + -> Limit + -> Sort + Sort Key: users.lastseen DESC + -> Seq Scan on users_1400289 users + Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type)) + -> Limit + -> Sort + Sort Key: events.event_time DESC + -> Seq Scan on events_1400285 events + Filter: (composite_id = users.composite_id) +RESET citus.subquery_pushdown; +-- Test all tasks output +SET citus.explain_all_tasks TO on; +EXPLAIN (COSTS FALSE) + SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030; +Aggregate + -> Custom Scan (Citus Adaptive) + Task Count: 2 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Aggregate + -> Index Only Scan using lineitem_pkey_360000 on lineitem_360000 lineitem + Index Cond: (l_orderkey > 9030) + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Aggregate + -> Index Only Scan using lineitem_pkey_360001 on lineitem_360001 lineitem + Index Cond: (l_orderkey > 9030) +SELECT true AS valid FROM explain_xml($$ + SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030$$); +t +SELECT true AS valid FROM explain_json($$ + SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030$$); +t +-- Test multi shard update +EXPLAIN (COSTS FALSE) + UPDATE lineitem_hash_part + SET l_suppkey = 12; +Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Update on lineitem_hash_part_360041 lineitem_hash_part + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Update on lineitem_hash_part_360042 lineitem_hash_part + -> Seq Scan on lineitem_hash_part_360042 lineitem_hash_part + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Update on lineitem_hash_part_360043 lineitem_hash_part + -> Seq Scan on lineitem_hash_part_360043 lineitem_hash_part + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Update on lineitem_hash_part_360044 lineitem_hash_part + -> Seq Scan on lineitem_hash_part_360044 lineitem_hash_part +EXPLAIN (COSTS FALSE) + UPDATE lineitem_hash_part + SET l_suppkey = 12 + WHERE l_orderkey = 1 OR l_orderkey = 3; +Custom Scan (Citus Adaptive) + Task Count: 2 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Update on lineitem_hash_part_360041 lineitem_hash_part + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part + Filter: ((l_orderkey = 1) OR (l_orderkey = 3)) + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Update on lineitem_hash_part_360042 lineitem_hash_part + -> Seq Scan on lineitem_hash_part_360042 lineitem_hash_part + Filter: ((l_orderkey = 1) OR (l_orderkey = 3)) +-- Test multi shard delete +EXPLAIN (COSTS FALSE) + DELETE FROM lineitem_hash_part; +Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Delete on lineitem_hash_part_360041 lineitem_hash_part + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Delete on lineitem_hash_part_360042 lineitem_hash_part + -> Seq Scan on lineitem_hash_part_360042 lineitem_hash_part + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Delete on lineitem_hash_part_360043 lineitem_hash_part + -> Seq Scan on lineitem_hash_part_360043 lineitem_hash_part + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Delete on lineitem_hash_part_360044 lineitem_hash_part + -> Seq Scan on lineitem_hash_part_360044 lineitem_hash_part +-- Test analyze (with TIMING FALSE and SUMMARY FALSE for consistent output) +SELECT public.plan_normalize_memory($Q$ +EXPLAIN (COSTS FALSE, ANALYZE TRUE, TIMING FALSE, SUMMARY FALSE) + SELECT l_quantity, count(*) count_quantity FROM lineitem + GROUP BY l_quantity ORDER BY count_quantity, l_quantity; +$Q$); +Sort (actual rows=50 loops=1) + Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint)), remote_scan.l_quantity + Sort Method: quicksort Memory: xxx + -> HashAggregate (actual rows=50 loops=1) + Group Key: remote_scan.l_quantity + -> Custom Scan (Citus Adaptive) (actual rows=100 loops=1) + Task Count: 2 + Tuple data received from nodes: 1800 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 900 bytes + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate (actual rows=50 loops=1) + Group Key: l_quantity + -> Seq Scan on lineitem_360000 lineitem (actual rows=5894 loops=1) + -> Task + Tuple data received from node: 900 bytes + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate (actual rows=50 loops=1) + Group Key: l_quantity + -> Seq Scan on lineitem_360001 lineitem (actual rows=6106 loops=1) +SET citus.explain_all_tasks TO off; +-- Test update with subquery +EXPLAIN (COSTS FALSE) + UPDATE lineitem_hash_part + SET l_suppkey = 12 + FROM orders_hash_part + WHERE orders_hash_part.o_orderkey = lineitem_hash_part.l_orderkey; +Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Update on lineitem_hash_part_360041 lineitem_hash_part + -> Hash Join + Hash Cond: (lineitem_hash_part.l_orderkey = orders_hash_part.o_orderkey) + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part + -> Hash + -> Seq Scan on orders_hash_part_360045 orders_hash_part +-- Test delete with subquery +EXPLAIN (COSTS FALSE) + DELETE FROM lineitem_hash_part + USING orders_hash_part + WHERE orders_hash_part.o_orderkey = lineitem_hash_part.l_orderkey; +Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Delete on lineitem_hash_part_360041 lineitem_hash_part + -> Hash Join + Hash Cond: (lineitem_hash_part.l_orderkey = orders_hash_part.o_orderkey) + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part + -> Hash + -> Seq Scan on orders_hash_part_360045 orders_hash_part +-- Test track tracker +EXPLAIN (COSTS FALSE) + SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030; +Aggregate + -> Custom Scan (Citus Adaptive) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Aggregate + -> Index Only Scan using lineitem_pkey_360000 on lineitem_360000 lineitem + Index Cond: (l_orderkey > 9030) +-- Test re-partition join +EXPLAIN (COSTS FALSE) + SELECT count(*) + FROM lineitem, orders, customer_append, supplier_single_shard + WHERE l_orderkey = o_orderkey + AND o_custkey = c_custkey + AND l_suppkey = s_suppkey; +Aggregate + -> Custom Scan (Citus Adaptive) + Task Count: 6 + Tasks Shown: None, not supported for re-partition queries + -> MapMergeJob + Map Task Count: 6 + Merge Task Count: 6 + -> MapMergeJob + Map Task Count: 2 + Merge Task Count: 6 + -> MapMergeJob + Map Task Count: 1 + Merge Task Count: 6 + -> MapMergeJob + Map Task Count: 1 + Merge Task Count: 6 +EXPLAIN (COSTS FALSE, FORMAT JSON) + SELECT count(*) + FROM lineitem, orders, customer_append, supplier_single_shard + WHERE l_orderkey = o_orderkey + AND o_custkey = c_custkey + AND l_suppkey = s_suppkey; +[ + { + "Plan": { + "Node Type": "Aggregate", + "Strategy": "Plain", + "Partial Mode": "Simple", + "Parallel Aware": false, + "Async Capable": false, + "Plans": [ + { + "Node Type": "Custom Scan", + "Parent Relationship": "Outer", + "Custom Plan Provider": "Citus Adaptive", + "Parallel Aware": false, + "Async Capable": false, + "Distributed Query": { + "Job": { + "Task Count": 6, + "Tasks Shown": "None, not supported for re-partition queries", + "Dependent Jobs": [ + { + "Map Task Count": 6, + "Merge Task Count": 6, + "Dependent Jobs": [ + { + "Map Task Count": 2, + "Merge Task Count": 6 + }, + { + "Map Task Count": 1, + "Merge Task Count": 6 + } + ] + }, + { + "Map Task Count": 1, + "Merge Task Count": 6 + } + ] + } + } + } + ] + } + } +] +SELECT true AS valid FROM explain_json($$ + SELECT count(*) + FROM lineitem, orders, customer_append, supplier_single_shard + WHERE l_orderkey = o_orderkey + AND o_custkey = c_custkey + AND l_suppkey = s_suppkey$$); +t +EXPLAIN (COSTS FALSE, FORMAT XML) + SELECT count(*) + FROM lineitem, orders, customer_append, supplier_single_shard + WHERE l_orderkey = o_orderkey + AND o_custkey = c_custkey + AND l_suppkey = s_suppkey; + + + + Aggregate + Plain + Simple + false + false + + + Custom Scan + Outer + Citus Adaptive + false + false + + + 6 + None, not supported for re-partition queries + + + 6 + 6 + + + 2 + 6 + + + 1 + 6 + + + + + 1 + 6 + + + + + + + + + +SELECT true AS valid FROM explain_xml($$ + SELECT count(*) + FROM lineitem, orders, customer_append, supplier + WHERE l_orderkey = o_orderkey + AND o_custkey = c_custkey + AND l_suppkey = s_suppkey$$); +t +-- make sure that EXPLAIN works without +-- problems for queries that inlvolves only +-- reference tables +SELECT true AS valid FROM explain_xml($$ + SELECT count(*) + FROM nation + WHERE n_name = 'CHINA'$$); +t +SELECT true AS valid FROM explain_xml($$ + SELECT count(*) + FROM nation, supplier + WHERE nation.n_nationkey = supplier.s_nationkey$$); +t +EXPLAIN (COSTS FALSE, FORMAT YAML) + SELECT count(*) + FROM lineitem, orders, customer, supplier_single_shard + WHERE l_orderkey = o_orderkey + AND o_custkey = c_custkey + AND l_suppkey = s_suppkey; +- Plan: + Node Type: "Aggregate" + Strategy: "Plain" + Partial Mode: "Simple" + Parallel Aware: false + Async Capable: false + Plans: + - Node Type: "Custom Scan" + Parent Relationship: "Outer" + Custom Plan Provider: "Citus Adaptive" + Parallel Aware: false + Async Capable: false + Distributed Query: + Job: + Task Count: 6 + Tasks Shown: "None, not supported for re-partition queries" + Dependent Jobs: + - Map Task Count: 2 + Merge Task Count: 6 + - Map Task Count: 1 + Merge Task Count: 6 +-- ensure local plans display correctly +CREATE TABLE lineitem_clone (LIKE lineitem); +EXPLAIN (COSTS FALSE) SELECT avg(l_linenumber) FROM lineitem_clone; +Aggregate + -> Seq Scan on lineitem_clone +DROP TABLE lineitem_clone; +-- ensure distributed plans don't break +EXPLAIN (COSTS FALSE) SELECT avg(l_linenumber) FROM lineitem; +Aggregate + -> Custom Scan (Citus Adaptive) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Aggregate + -> Seq Scan on lineitem_360000 lineitem +-- ensure EXPLAIN EXECUTE doesn't crash +PREPARE task_tracker_query AS + SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030; +EXPLAIN (COSTS FALSE) EXECUTE task_tracker_query; +Aggregate + -> Custom Scan (Citus Adaptive) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Aggregate + -> Index Only Scan using lineitem_pkey_360000 on lineitem_360000 lineitem + Index Cond: (l_orderkey > 9030) +PREPARE router_executor_query AS SELECT l_quantity FROM lineitem WHERE l_orderkey = 5; +EXPLAIN EXECUTE router_executor_query; +Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Index Scan using lineitem_pkey_360000 on lineitem_360000 lineitem (cost=0.28..13.60 rows=4 width=5) + Index Cond: (l_orderkey = 5) +PREPARE real_time_executor_query AS + SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030; +EXPLAIN (COSTS FALSE) EXECUTE real_time_executor_query; +Aggregate + -> Custom Scan (Citus Adaptive) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Aggregate + -> Index Only Scan using lineitem_pkey_360000 on lineitem_360000 lineitem + Index Cond: (l_orderkey > 9030) +-- EXPLAIN EXECUTE of parametrized prepared statements is broken, but +-- at least make sure to fail without crashing +PREPARE router_executor_query_param(int) AS SELECT l_quantity FROM lineitem WHERE l_orderkey = $1; +EXPLAIN EXECUTE router_executor_query_param(5); +Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Index Scan using lineitem_pkey_360000 on lineitem_360000 lineitem (cost=0.28..13.60 rows=4 width=5) + Index Cond: (l_orderkey = 5) +EXPLAIN (ANALYZE ON, COSTS OFF, TIMING OFF, SUMMARY OFF) EXECUTE router_executor_query_param(5); +Custom Scan (Citus Adaptive) (actual rows=3 loops=1) + Task Count: 1 + Tuple data received from nodes: 30 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 30 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Index Scan using lineitem_pkey_360000 on lineitem_360000 lineitem (actual rows=3 loops=1) + Index Cond: (l_orderkey = 5) +\set VERBOSITY TERSE +PREPARE multi_shard_query_param(int) AS UPDATE lineitem SET l_quantity = $1; +BEGIN; +EXPLAIN (COSTS OFF) EXECUTE multi_shard_query_param(5); +Custom Scan (Citus Adaptive) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Update on lineitem_360000 lineitem + -> Seq Scan on lineitem_360000 lineitem +ROLLBACK; +BEGIN; +EXPLAIN (ANALYZE ON, COSTS OFF, TIMING OFF, SUMMARY OFF) EXECUTE multi_shard_query_param(5); +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Update on lineitem_360000 lineitem (actual rows=0 loops=1) + -> Seq Scan on lineitem_360000 lineitem (actual rows=5894 loops=1) +ROLLBACK; +\set VERBOSITY DEFAULT +-- test explain in a transaction with alter table to test we use right connections +BEGIN; +CREATE TABLE explain_table(id int); +SELECT create_distributed_table('explain_table', 'id'); + +ALTER TABLE explain_table ADD COLUMN value int; +ROLLBACK; +-- test explain with local INSERT ... SELECT +EXPLAIN (COSTS OFF) +INSERT INTO lineitem_hash_part +SELECT o_orderkey FROM orders_hash_part LIMIT 3; +Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Limit + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Limit + -> Seq Scan on orders_hash_part_360045 orders_hash_part +SELECT true AS valid FROM explain_json($$ + INSERT INTO lineitem_hash_part (l_orderkey) + SELECT o_orderkey FROM orders_hash_part LIMIT 3; +$$); +t +EXPLAIN (COSTS OFF) +INSERT INTO lineitem_hash_part (l_orderkey, l_quantity) +SELECT o_orderkey, 5 FROM orders_hash_part LIMIT 3; +Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Limit + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Limit + -> Seq Scan on orders_hash_part_360045 orders_hash_part +EXPLAIN (COSTS OFF) +INSERT INTO lineitem_hash_part (l_orderkey) +SELECT s FROM generate_series(1,5) s; +Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Function Scan on generate_series s +-- WHERE EXISTS forces pg12 to materialize cte +EXPLAIN (COSTS OFF) +WITH cte1 AS (SELECT s FROM generate_series(1,10) s) +INSERT INTO lineitem_hash_part +WITH cte1 AS (SELECT * FROM cte1 WHERE EXISTS (SELECT * FROM cte1) LIMIT 5) +SELECT s FROM cte1 WHERE EXISTS (SELECT * FROM cte1); +Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Result + One-Time Filter: $3 + CTE cte1 + -> Function Scan on generate_series s + CTE cte1 + -> Limit + InitPlan 2 (returns $1) + -> CTE Scan on cte1 cte1_1 + -> Result + One-Time Filter: $1 + -> CTE Scan on cte1 cte1_2 + InitPlan 4 (returns $3) + -> CTE Scan on cte1 cte1_3 + -> CTE Scan on cte1 +EXPLAIN (COSTS OFF) +INSERT INTO lineitem_hash_part +( SELECT s FROM generate_series(1,5) s) UNION +( SELECT s FROM generate_series(5,10) s); +Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Subquery Scan on citus_insert_select_subquery + -> HashAggregate + Group Key: s.s + -> Append + -> Function Scan on generate_series s + -> Function Scan on generate_series s_1 +-- explain with recursive planning +EXPLAIN (COSTS OFF, VERBOSE true) +WITH keys AS MATERIALIZED ( + SELECT DISTINCT l_orderkey FROM lineitem_hash_part +), +series AS MATERIALIZED ( + SELECT s FROM generate_series(1,10) s +) +SELECT l_orderkey FROM series JOIN keys ON (s = l_orderkey) +ORDER BY s; +Custom Scan (Citus Adaptive) + Output: remote_scan.l_orderkey + -> Distributed Subplan XXX_1 + -> HashAggregate + Output: remote_scan.l_orderkey + Group Key: remote_scan.l_orderkey + -> Custom Scan (Citus Adaptive) + Output: remote_scan.l_orderkey + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Query: SELECT DISTINCT l_orderkey FROM public.lineitem_hash_part_360041 lineitem_hash_part WHERE true + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Output: l_orderkey + Group Key: lineitem_hash_part.l_orderkey + -> Seq Scan on public.lineitem_hash_part_360041 lineitem_hash_part + Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment + -> Distributed Subplan XXX_2 + -> Function Scan on pg_catalog.generate_series s + Output: s + Function Call: generate_series(1, 10) + Task Count: 1 + Tasks Shown: All + -> Task + Query: SELECT keys.l_orderkey FROM ((SELECT intermediate_result.s FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(s integer)) series JOIN (SELECT intermediate_result.l_orderkey FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(l_orderkey bigint)) keys ON ((series.s OPERATOR(pg_catalog.=) keys.l_orderkey))) ORDER BY series.s + Node: host=localhost port=xxxxx dbname=regression + -> Merge Join + Output: intermediate_result_1.l_orderkey, intermediate_result.s + Merge Cond: (intermediate_result.s = intermediate_result_1.l_orderkey) + -> Sort + Output: intermediate_result.s + Sort Key: intermediate_result.s + -> Function Scan on pg_catalog.read_intermediate_result intermediate_result + Output: intermediate_result.s + Function Call: read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) + -> Sort + Output: intermediate_result_1.l_orderkey + Sort Key: intermediate_result_1.l_orderkey + -> Function Scan on pg_catalog.read_intermediate_result intermediate_result_1 + Output: intermediate_result_1.l_orderkey + Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) +SELECT true AS valid FROM explain_json($$ + WITH result AS ( + SELECT l_quantity, count(*) count_quantity FROM lineitem + GROUP BY l_quantity ORDER BY count_quantity, l_quantity + ), + series AS ( + SELECT s FROM generate_series(1,10) s + ) + SELECT * FROM result JOIN series ON (s = count_quantity) JOIN orders_hash_part ON (s = o_orderkey) +$$); +t +SELECT true AS valid FROM explain_xml($$ + WITH result AS ( + SELECT l_quantity, count(*) count_quantity FROM lineitem + GROUP BY l_quantity ORDER BY count_quantity, l_quantity + ), + series AS ( + SELECT s FROM generate_series(1,10) s + ) + SELECT * FROM result JOIN series ON (s = l_quantity) JOIN orders_hash_part ON (s = o_orderkey) +$$); +t +-- +-- Test EXPLAIN ANALYZE udfs +-- +\a\t +\set default_opts '''{"costs": false, "timing": false, "summary": false}'''::jsonb +CREATE TABLE explain_analyze_test(a int, b text); +INSERT INTO explain_analyze_test VALUES (1, 'value 1'), (2, 'value 2'), (3, 'value 3'), (4, 'value 4'); +-- simple select +BEGIN; +SELECT * FROM worker_save_query_explain_analyze('SELECT 1', :default_opts) as (a int); + a +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT explain_analyze_output FROM worker_last_saved_explain_analyze(); + explain_analyze_output +--------------------------------------------------------------------- + Result (actual rows=1 loops=1)+ + +(1 row) + +END; +-- insert into select +BEGIN; +SELECT * FROM worker_save_query_explain_analyze($Q$ + INSERT INTO explain_analyze_test SELECT i, i::text FROM generate_series(1, 5) i $Q$, + :default_opts) as (a int); + a +--------------------------------------------------------------------- +(0 rows) + +SELECT explain_analyze_output FROM worker_last_saved_explain_analyze(); + explain_analyze_output +--------------------------------------------------------------------- + Insert on explain_analyze_test (actual rows=0 loops=1) + + -> Function Scan on generate_series i (actual rows=5 loops=1)+ + +(1 row) + +ROLLBACK; +-- select from table +BEGIN; +SELECT * FROM worker_save_query_explain_analyze($Q$SELECT * FROM explain_analyze_test$Q$, + :default_opts) as (a int, b text); + a | b +--------------------------------------------------------------------- + 1 | value 1 + 2 | value 2 + 3 | value 3 + 4 | value 4 +(4 rows) + +SELECT explain_analyze_output FROM worker_last_saved_explain_analyze(); + explain_analyze_output +--------------------------------------------------------------------- + Seq Scan on explain_analyze_test (actual rows=4 loops=1)+ + +(1 row) + +ROLLBACK; +-- insert into with returning +BEGIN; +SELECT * FROM worker_save_query_explain_analyze($Q$ + INSERT INTO explain_analyze_test SELECT i, i::text FROM generate_series(1, 5) i + RETURNING a, b$Q$, + :default_opts) as (a int, b text); + a | b +--------------------------------------------------------------------- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 +(5 rows) + +SELECT explain_analyze_output FROM worker_last_saved_explain_analyze(); + explain_analyze_output +--------------------------------------------------------------------- + Insert on explain_analyze_test (actual rows=5 loops=1) + + -> Function Scan on generate_series i (actual rows=5 loops=1)+ + +(1 row) + +ROLLBACK; +-- delete with returning +BEGIN; +SELECT * FROM worker_save_query_explain_analyze($Q$ + DELETE FROM explain_analyze_test WHERE a % 2 = 0 + RETURNING a, b$Q$, + :default_opts) as (a int, b text); + a | b +--------------------------------------------------------------------- + 2 | value 2 + 4 | value 4 +(2 rows) + +SELECT explain_analyze_output FROM worker_last_saved_explain_analyze(); + explain_analyze_output +--------------------------------------------------------------------- + Delete on explain_analyze_test (actual rows=2 loops=1) + + -> Seq Scan on explain_analyze_test (actual rows=2 loops=1)+ + Filter: ((a % 2) = 0) + + Rows Removed by Filter: 2 + + +(1 row) + +ROLLBACK; +-- delete without returning +BEGIN; +SELECT * FROM worker_save_query_explain_analyze($Q$ + DELETE FROM explain_analyze_test WHERE a % 2 = 0$Q$, + :default_opts) as (a int); + a +--------------------------------------------------------------------- +(0 rows) + +SELECT explain_analyze_output FROM worker_last_saved_explain_analyze(); + explain_analyze_output +--------------------------------------------------------------------- + Delete on explain_analyze_test (actual rows=0 loops=1) + + -> Seq Scan on explain_analyze_test (actual rows=2 loops=1)+ + Filter: ((a % 2) = 0) + + Rows Removed by Filter: 2 + + +(1 row) + +ROLLBACK; +-- multiple queries (should ERROR) +SELECT * FROM worker_save_query_explain_analyze('SELECT 1; SELECT 2', :default_opts) as (a int); +ERROR: cannot EXPLAIN ANALYZE multiple queries +-- error in query +SELECT * FROM worker_save_query_explain_analyze('SELECT x', :default_opts) as (a int); +ERROR: column "x" does not exist +-- error in format string +SELECT * FROM worker_save_query_explain_analyze('SELECT 1', '{"format": "invlaid_format"}') as (a int); +ERROR: Invalid explain analyze format: "invlaid_format" +-- test formats +BEGIN; +SELECT * FROM worker_save_query_explain_analyze('SELECT 1', '{"format": "text", "costs": false}') as (a int); + a +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT explain_analyze_output FROM worker_last_saved_explain_analyze(); + explain_analyze_output +--------------------------------------------------------------------- + Result (actual rows=1 loops=1)+ + +(1 row) + +SELECT * FROM worker_save_query_explain_analyze('SELECT 1', '{"format": "json", "costs": false}') as (a int); + a +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT explain_analyze_output FROM worker_last_saved_explain_analyze(); + explain_analyze_output +--------------------------------------------------------------------- + [ + + { + + "Plan": { + + "Node Type": "Result", + + "Parallel Aware": false,+ + "Async Capable": false, + + "Actual Rows": 1, + + "Actual Loops": 1 + + }, + + "Triggers": [ + + ] + + } + + ] +(1 row) + +SELECT * FROM worker_save_query_explain_analyze('SELECT 1', '{"format": "xml", "costs": false}') as (a int); + a +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT explain_analyze_output FROM worker_last_saved_explain_analyze(); + explain_analyze_output +--------------------------------------------------------------------- + + + + + + + Result + + false + + false + + 1 + + 1 + + + + + + + + + + +(1 row) + +SELECT * FROM worker_save_query_explain_analyze('SELECT 1', '{"format": "yaml", "costs": false}') as (a int); + a +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT explain_analyze_output FROM worker_last_saved_explain_analyze(); + explain_analyze_output +--------------------------------------------------------------------- + - Plan: + + Node Type: "Result" + + Parallel Aware: false+ + Async Capable: false + + Actual Rows: 1 + + Actual Loops: 1 + + Triggers: +(1 row) + +END; +-- costs on, timing off +BEGIN; +SELECT * FROM worker_save_query_explain_analyze('SELECT * FROM explain_analyze_test', '{"timing": false, "costs": true}') as (a int); + a +--------------------------------------------------------------------- + 1 + 2 + 3 + 4 +(4 rows) + +SELECT explain_analyze_output ~ 'Seq Scan.*\(cost=0.00.*\) \(actual rows.*\)' FROM worker_last_saved_explain_analyze(); + ?column? +--------------------------------------------------------------------- + t +(1 row) + +END; +-- costs off, timing on +BEGIN; +SELECT * FROM worker_save_query_explain_analyze('SELECT * FROM explain_analyze_test', '{"timing": true, "costs": false}') as (a int); + a +--------------------------------------------------------------------- + 1 + 2 + 3 + 4 +(4 rows) + +SELECT explain_analyze_output ~ 'Seq Scan on explain_analyze_test \(actual time=.* rows=.* loops=1\)' FROM worker_last_saved_explain_analyze(); + ?column? +--------------------------------------------------------------------- + t +(1 row) + +END; +-- summary on +BEGIN; +SELECT * FROM worker_save_query_explain_analyze('SELECT 1', '{"timing": false, "costs": false, "summary": true}') as (a int); + a +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT explain_analyze_output ~ 'Planning Time:.*Execution Time:.*' FROM worker_last_saved_explain_analyze(); + ?column? +--------------------------------------------------------------------- + t +(1 row) + +END; +-- buffers on +BEGIN; +SELECT * FROM worker_save_query_explain_analyze('SELECT * FROM explain_analyze_test', '{"timing": false, "costs": false, "buffers": true}') as (a int); + a +--------------------------------------------------------------------- + 1 + 2 + 3 + 4 +(4 rows) + +SELECT explain_analyze_output ~ 'Buffers:' FROM worker_last_saved_explain_analyze(); + ?column? +--------------------------------------------------------------------- + t +(1 row) + +END; +-- verbose on +BEGIN; +SELECT * FROM worker_save_query_explain_analyze('SELECT * FROM explain_analyze_test', '{"timing": false, "costs": false, "verbose": true}') as (a int); + a +--------------------------------------------------------------------- + 1 + 2 + 3 + 4 +(4 rows) + +SELECT explain_analyze_output ~ 'Output: a, b' FROM worker_last_saved_explain_analyze(); + ?column? +--------------------------------------------------------------------- + t +(1 row) + +END; +-- make sure deleted at transaction end +SELECT * FROM worker_save_query_explain_analyze('SELECT 1', '{}') as (a int); + a +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT count(*) FROM worker_last_saved_explain_analyze(); + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- should be deleted at the end of prepare commit +BEGIN; +SELECT * FROM worker_save_query_explain_analyze('UPDATE explain_analyze_test SET a=6 WHERE a=4', '{}') as (a int); + a +--------------------------------------------------------------------- +(0 rows) + +SELECT count(*) FROM worker_last_saved_explain_analyze(); + count +--------------------------------------------------------------------- + 1 +(1 row) + +PREPARE TRANSACTION 'citus_0_1496350_7_0'; +SELECT count(*) FROM worker_last_saved_explain_analyze(); + count +--------------------------------------------------------------------- + 0 +(1 row) + +COMMIT PREPARED 'citus_0_1496350_7_0'; +-- verify execution time makes sense +BEGIN; +SELECT count(*) FROM worker_save_query_explain_analyze('SELECT pg_sleep(0.05)', :default_opts) as (a int); + count +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT execution_duration BETWEEN 30 AND 200 FROM worker_last_saved_explain_analyze(); + ?column? +--------------------------------------------------------------------- + t +(1 row) + +END; +-- +-- verify we handle parametrized queries properly +-- +CREATE TABLE t(a int); +INSERT INTO t VALUES (1), (2), (3); +-- simple case +PREPARE save_explain AS +SELECT $1, * FROM worker_save_query_explain_analyze('SELECT $1::int', :default_opts) as (a int); +EXECUTE save_explain(1); + ?column? | a +--------------------------------------------------------------------- + 1 | 1 +(1 row) + +deallocate save_explain; +-- Call a UDF first to make sure that we handle stacks of executorBoundParams properly. +-- +-- The prepared statement will first call f() which will force new executor run with new +-- set of parameters. Then it will call worker_save_query_explain_analyze with a +-- parametrized query. If we don't have the correct set of parameters here, it will fail. +CREATE FUNCTION f() RETURNS INT +AS $$ +PREPARE pp1 AS SELECT $1 WHERE $2 = $3; +EXECUTE pp1(4, 5, 5); +deallocate pp1; +SELECT 1$$ LANGUAGE sql volatile; +PREPARE save_explain AS + SELECT $1, CASE WHEN i < 2 THEN + f() = 1 + ELSE + EXISTS(SELECT * FROM worker_save_query_explain_analyze('SELECT $1::int', :default_opts) as (a int) + WHERE a = 1) + END + FROM generate_series(1, 4) i; +EXECUTE save_explain(1); + ?column? | exists +--------------------------------------------------------------------- + 1 | t + 1 | t + 1 | t + 1 | t +(4 rows) + +deallocate save_explain; +DROP FUNCTION f(); +DROP TABLE t; +SELECT * FROM explain_analyze_test ORDER BY a; + a | b +--------------------------------------------------------------------- + 1 | value 1 + 2 | value 2 + 3 | value 3 + 6 | value 4 +(4 rows) + +\a\t +-- +-- Test different cases of EXPLAIN ANALYZE +-- +SET citus.shard_count TO 4; +SET client_min_messages TO WARNING; +SELECT create_distributed_table('explain_analyze_test', 'a'); + +\set default_analyze_flags '(ANALYZE on, COSTS off, TIMING off, SUMMARY off)' +\set default_explain_flags '(ANALYZE off, COSTS off, TIMING off, SUMMARY off)' +-- router SELECT +EXPLAIN :default_analyze_flags SELECT * FROM explain_analyze_test WHERE a = 1; +Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + Task Count: 1 + Tuple data received from nodes: 11 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 11 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on explain_analyze_test_570009 explain_analyze_test (actual rows=1 loops=1) + Filter: (a = 1) +-- multi-shard SELECT +EXPLAIN :default_analyze_flags SELECT count(*) FROM explain_analyze_test; +Aggregate (actual rows=1 loops=1) + -> Custom Scan (Citus Adaptive) (actual rows=4 loops=1) + Task Count: 4 + Tuple data received from nodes: 32 bytes + Tasks Shown: One of 4 + -> Task + Tuple data received from node: 8 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Aggregate (actual rows=1 loops=1) + -> Seq Scan on explain_analyze_test_570009 explain_analyze_test (actual rows=1 loops=1) +-- empty router SELECT +EXPLAIN :default_analyze_flags SELECT * FROM explain_analyze_test WHERE a = 10000; +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 1 + Tuple data received from nodes: 0 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 0 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on explain_analyze_test_570012 explain_analyze_test (actual rows=0 loops=1) + Filter: (a = 10000) + Rows Removed by Filter: 1 +-- empty multi-shard SELECT +EXPLAIN :default_analyze_flags SELECT * FROM explain_analyze_test WHERE b = 'does not exist'; +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 4 + Tuple data received from nodes: 0 bytes + Tasks Shown: One of 4 + -> Task + Tuple data received from node: 0 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on explain_analyze_test_570009 explain_analyze_test (actual rows=0 loops=1) + Filter: (b = 'does not exist'::text) + Rows Removed by Filter: 1 +-- router DML +BEGIN; +EXPLAIN :default_analyze_flags DELETE FROM explain_analyze_test WHERE a = 1; +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Delete on explain_analyze_test_570009 explain_analyze_test (actual rows=0 loops=1) + -> Seq Scan on explain_analyze_test_570009 explain_analyze_test (actual rows=1 loops=1) + Filter: (a = 1) +EXPLAIN :default_analyze_flags UPDATE explain_analyze_test SET b = 'b' WHERE a = 2; +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Update on explain_analyze_test_570012 explain_analyze_test (actual rows=0 loops=1) + -> Seq Scan on explain_analyze_test_570012 explain_analyze_test (actual rows=1 loops=1) + Filter: (a = 2) +SELECT * FROM explain_analyze_test ORDER BY a; +2|b +3|value 3 +6|value 4 +ROLLBACK; +-- multi-shard DML +BEGIN; +EXPLAIN :default_analyze_flags UPDATE explain_analyze_test SET b = 'b' WHERE a IN (1, 2); +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Update on explain_analyze_test_570009 explain_analyze_test (actual rows=0 loops=1) + -> Seq Scan on explain_analyze_test_570009 explain_analyze_test (actual rows=1 loops=1) + Filter: (a = ANY ('{1,2}'::integer[])) +EXPLAIN :default_analyze_flags DELETE FROM explain_analyze_test; +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Delete on explain_analyze_test_570009 explain_analyze_test (actual rows=0 loops=1) + -> Seq Scan on explain_analyze_test_570009 explain_analyze_test (actual rows=1 loops=1) +SELECT * FROM explain_analyze_test ORDER BY a; +ROLLBACK; +-- router DML with RETURNING with empty result +EXPLAIN :default_analyze_flags UPDATE explain_analyze_test SET b = 'something' WHERE a = 10000 RETURNING *; +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 1 + Tuple data received from nodes: 0 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 0 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Update on explain_analyze_test_570012 explain_analyze_test (actual rows=0 loops=1) + -> Seq Scan on explain_analyze_test_570012 explain_analyze_test (actual rows=0 loops=1) + Filter: (a = 10000) + Rows Removed by Filter: 1 +-- multi-shard DML with RETURNING with empty result +EXPLAIN :default_analyze_flags UPDATE explain_analyze_test SET b = 'something' WHERE b = 'does not exist' RETURNING *; +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 4 + Tuple data received from nodes: 0 bytes + Tasks Shown: One of 4 + -> Task + Tuple data received from node: 0 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Update on explain_analyze_test_570009 explain_analyze_test (actual rows=0 loops=1) + -> Seq Scan on explain_analyze_test_570009 explain_analyze_test (actual rows=0 loops=1) + Filter: (b = 'does not exist'::text) + Rows Removed by Filter: 1 +-- single-row insert +BEGIN; +EXPLAIN :default_analyze_flags INSERT INTO explain_analyze_test VALUES (5, 'value 5'); +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Insert on explain_analyze_test_570009 (actual rows=0 loops=1) + -> Result (actual rows=1 loops=1) +ROLLBACK; +-- multi-row insert +BEGIN; +EXPLAIN :default_analyze_flags INSERT INTO explain_analyze_test VALUES (5, 'value 5'), (6, 'value 6'); +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Insert on explain_analyze_test_570009 citus_table_alias (actual rows=0 loops=1) + -> Result (actual rows=1 loops=1) +ROLLBACK; +-- distributed insert/select +BEGIN; +EXPLAIN :default_analyze_flags INSERT INTO explain_analyze_test SELECT * FROM explain_analyze_test; +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Insert on explain_analyze_test_570009 citus_table_alias (actual rows=0 loops=1) + -> Seq Scan on explain_analyze_test_570009 explain_analyze_test (actual rows=1 loops=1) + Filter: (a IS NOT NULL) +ROLLBACK; +DROP TABLE explain_analyze_test; +-- test EXPLAIN ANALYZE works fine with primary keys +CREATE TABLE explain_pk(a int primary key, b int); +SELECT create_distributed_table('explain_pk', 'a'); + +BEGIN; +EXPLAIN :default_analyze_flags INSERT INTO explain_pk VALUES (1, 2), (2, 3); +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Insert on explain_pk_570013 citus_table_alias (actual rows=0 loops=1) + -> Result (actual rows=1 loops=1) +SELECT * FROM explain_pk ORDER BY 1; +1|2 +2|3 +ROLLBACK; +-- test EXPLAIN ANALYZE with non-text output formats +BEGIN; +EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off, FORMAT JSON) INSERT INTO explain_pk VALUES (1, 2), (2, 3); +[ + { + "Plan": { + "Node Type": "Custom Scan", + "Custom Plan Provider": "Citus Adaptive", + "Parallel Aware": false, + "Async Capable": false, + "Actual Rows": 0, + "Actual Loops": 1, + "Distributed Query": { + "Job": { + "Task Count": 2, + "Tasks Shown": "One of 2", + "Tasks": [ + { + "Node": "host=localhost port=xxxxx dbname=regression", + "Remote Plan": [ + [ + { + "Plan": { + "Node Type": "ModifyTable", + "Operation": "Insert", + "Parallel Aware": false, + "Async Capable": false, + "Relation Name": "explain_pk_570013", + "Alias": "citus_table_alias", + "Actual Rows": 0, + "Actual Loops": 1, + "Plans": [ + { + "Node Type": "Result", + "Parent Relationship": "Outer", + "Parallel Aware": false, + "Async Capable": false, + "Actual Rows": 1, + "Actual Loops": 1 + } + ] + }, + "Triggers": [ + ] + } + ] + + ] + } + ] + } + } + }, + "Triggers": [ + ] + } +] +ROLLBACK; +EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off, FORMAT JSON) SELECT * FROM explain_pk; +[ + { + "Plan": { + "Node Type": "Custom Scan", + "Custom Plan Provider": "Citus Adaptive", + "Parallel Aware": false, + "Async Capable": false, + "Actual Rows": 0, + "Actual Loops": 1, + "Distributed Query": { + "Job": { + "Task Count": 4, + "Tuple data received from nodes": "0 bytes", + "Tasks Shown": "One of 4", + "Tasks": [ + { + "Tuple data received from node": "0 bytes", + "Node": "host=localhost port=xxxxx dbname=regression", + "Remote Plan": [ + [ + { + "Plan": { + "Node Type": "Seq Scan", + "Parallel Aware": false, + "Async Capable": false, + "Relation Name": "explain_pk_570013", + "Alias": "explain_pk", + "Actual Rows": 0, + "Actual Loops": 1 + }, + "Triggers": [ + ] + } + ] + + ] + } + ] + } + } + }, + "Triggers": [ + ] + } +] +BEGIN; +EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off, FORMAT XML) INSERT INTO explain_pk VALUES (1, 2), (2, 3); + + + + Custom Scan + Citus Adaptive + false + false + 0 + 1 + + + 2 + One of 2 + + + host=localhost port=xxxxx dbname=regression + + + + + ModifyTable + Insert + false + false + explain_pk_570013 + citus_table_alias + 0 + 1 + + + Result + Outer + false + false + 1 + 1 + + + + + + + + + + + + + + + + + +ROLLBACK; +EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off, FORMAT XML) SELECT * FROM explain_pk; + + + + Custom Scan + Citus Adaptive + false + false + 0 + 1 + + + 4 + 0 bytes + One of 4 + + + 0 bytes + host=localhost port=xxxxx dbname=regression + + + + + Seq Scan + false + false + explain_pk_570013 + explain_pk + 0 + 1 + + + + + + + + + + + + + + + +DROP TABLE explain_pk; +-- test EXPLAIN ANALYZE with CTEs and subqueries +CREATE TABLE dist_table(a int, b int); +SELECT create_distributed_table('dist_table', 'a'); + +CREATE TABLE ref_table(a int); +SELECT create_reference_table('ref_table'); + +INSERT INTO dist_table SELECT i, i*i FROM generate_series(1, 10) i; +INSERT INTO ref_table SELECT i FROM generate_series(1, 10) i; +EXPLAIN :default_analyze_flags +WITH r AS ( + SELECT GREATEST(random(), 2) r, a FROM dist_table +) +SELECT count(distinct a) from r NATURAL JOIN ref_table; +Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + -> Distributed Subplan XXX_1 + Intermediate Data Size: 220 bytes + Result destination: Send to 3 nodes + -> Custom Scan (Citus Adaptive) (actual rows=10 loops=1) + Task Count: 4 + Tuple data received from nodes: 120 bytes + Tasks Shown: One of 4 + -> Task + Tuple data received from node: 48 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on dist_table_570017 dist_table (actual rows=4 loops=1) + Task Count: 1 + Tuple data received from nodes: 8 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 8 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Aggregate (actual rows=1 loops=1) + -> Hash Join (actual rows=10 loops=1) + Hash Cond: (ref_table.a = intermediate_result.a) + -> Seq Scan on ref_table_570021 ref_table (actual rows=10 loops=1) + -> Hash (actual rows=10 loops=1) + -> Function Scan on read_intermediate_result intermediate_result (actual rows=10 loops=1) +EXPLAIN :default_analyze_flags +SELECT count(distinct a) FROM (SELECT GREATEST(random(), 2) r, a FROM dist_table) t NATURAL JOIN ref_table; +Aggregate (actual rows=1 loops=1) + -> Custom Scan (Citus Adaptive) (actual rows=4 loops=1) + Task Count: 4 + Tuple data received from nodes: 32 bytes + Tasks Shown: One of 4 + -> Task + Tuple data received from node: 8 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Aggregate (actual rows=1 loops=1) + -> Merge Join (actual rows=4 loops=1) + Merge Cond: (t.a = ref_table.a) + -> Sort (actual rows=4 loops=1) + Sort Key: t.a + Sort Method: quicksort Memory: 25kB + -> Subquery Scan on t (actual rows=4 loops=1) + -> Seq Scan on dist_table_570017 dist_table (actual rows=4 loops=1) + -> Sort (actual rows=10 loops=1) + Sort Key: ref_table.a + Sort Method: quicksort Memory: 25kB + -> Seq Scan on ref_table_570021 ref_table (actual rows=10 loops=1) +EXPLAIN :default_analyze_flags +SELECT count(distinct a) FROM dist_table +WHERE EXISTS(SELECT random() < 2 FROM dist_table NATURAL JOIN ref_table); +Aggregate (actual rows=1 loops=1) + -> Custom Scan (Citus Adaptive) (actual rows=4 loops=1) + -> Distributed Subplan XXX_1 + Intermediate Data Size: 70 bytes + Result destination: Send to 2 nodes + -> Custom Scan (Citus Adaptive) (actual rows=10 loops=1) + Task Count: 4 + Tuple data received from nodes: 10 bytes + Tasks Shown: One of 4 + -> Task + Tuple data received from node: 4 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Merge Join (actual rows=4 loops=1) + Merge Cond: (dist_table.a = ref_table.a) + -> Sort (actual rows=4 loops=1) + Sort Key: dist_table.a + Sort Method: quicksort Memory: 25kB + -> Seq Scan on dist_table_570017 dist_table (actual rows=4 loops=1) + -> Sort (actual rows=10 loops=1) + Sort Key: ref_table.a + Sort Method: quicksort Memory: 25kB + -> Seq Scan on ref_table_570021 ref_table (actual rows=10 loops=1) + Task Count: 4 + Tuple data received from nodes: 32 bytes + Tasks Shown: One of 4 + -> Task + Tuple data received from node: 8 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Aggregate (actual rows=1 loops=1) + InitPlan 1 (returns $0) + -> Function Scan on read_intermediate_result intermediate_result (actual rows=1 loops=1) + -> Result (actual rows=4 loops=1) + One-Time Filter: $0 + -> Seq Scan on dist_table_570017 dist_table (actual rows=4 loops=1) +BEGIN; +EXPLAIN :default_analyze_flags +WITH r AS ( + INSERT INTO dist_table SELECT a, a * a FROM dist_table + RETURNING a +), s AS ( + SELECT random() < 2, a * a a2 FROM r +) +SELECT count(distinct a2) FROM s; +Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + -> Distributed Subplan XXX_1 + Intermediate Data Size: 100 bytes + Result destination: Write locally + -> Custom Scan (Citus Adaptive) (actual rows=20 loops=1) + Task Count: 4 + Tuple data received from nodes: 160 bytes + Tasks Shown: One of 4 + -> Task + Tuple data received from node: 64 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Insert on dist_table_570017 citus_table_alias (actual rows=8 loops=1) + -> Seq Scan on dist_table_570017 dist_table (actual rows=8 loops=1) + Filter: (a IS NOT NULL) + -> Distributed Subplan XXX_2 + Intermediate Data Size: 150 bytes + Result destination: Write locally + -> Custom Scan (Citus Adaptive) (actual rows=10 loops=1) + Task Count: 1 + Tuple data received from nodes: 50 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 50 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Function Scan on read_intermediate_result intermediate_result (actual rows=10 loops=1) + Task Count: 1 + Tuple data received from nodes: 8 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 8 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Aggregate (actual rows=1 loops=1) + -> Function Scan on read_intermediate_result intermediate_result (actual rows=10 loops=1) +ROLLBACK; +-- https://github.com/citusdata/citus/issues/4074 +prepare ref_select(int) AS select * from ref_table where 1 = $1; +explain :default_analyze_flags execute ref_select(1); +Custom Scan (Citus Adaptive) (actual rows=10 loops=1) + Task Count: 1 + Tuple data received from nodes: 40 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 40 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Result (actual rows=10 loops=1) + One-Time Filter: (1 = $1) + -> Seq Scan on ref_table_570021 ref_table (actual rows=10 loops=1) +deallocate ref_select; +DROP TABLE ref_table, dist_table; +-- test EXPLAIN ANALYZE with different replication factors +SET citus.shard_count = 2; +SET citus.shard_replication_factor = 1; +CREATE TABLE dist_table_rep1(a int); +SELECT create_distributed_table('dist_table_rep1', 'a'); + +SET citus.shard_replication_factor = 2; +CREATE TABLE dist_table_rep2(a int); +SELECT create_distributed_table('dist_table_rep2', 'a'); + +EXPLAIN :default_analyze_flags INSERT INTO dist_table_rep1 VALUES(1), (2), (3), (4), (10), (100) RETURNING *; +Custom Scan (Citus Adaptive) (actual rows=6 loops=1) + Task Count: 2 + Tuple data received from nodes: 24 bytes + Tasks Shown: One of 2 + -> Task + Tuple data received from node: 16 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Insert on dist_table_rep1_570022 citus_table_alias (actual rows=4 loops=1) + -> Values Scan on "*VALUES*" (actual rows=4 loops=1) +EXPLAIN :default_analyze_flags SELECT * from dist_table_rep1; +Custom Scan (Citus Adaptive) (actual rows=6 loops=1) + Task Count: 2 + Tuple data received from nodes: 24 bytes + Tasks Shown: One of 2 + -> Task + Tuple data received from node: 16 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on dist_table_rep1_570022 dist_table_rep1 (actual rows=4 loops=1) +EXPLAIN :default_analyze_flags INSERT INTO dist_table_rep2 VALUES(1), (2), (3), (4), (10), (100) RETURNING *; +Custom Scan (Citus Adaptive) (actual rows=6 loops=1) + Task Count: 2 + Tuple data received from nodes: 48 bytes + Tasks Shown: One of 2 + -> Task + Tuple data received from node: 32 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Insert on dist_table_rep2_570024 citus_table_alias (actual rows=4 loops=1) + -> Values Scan on "*VALUES*" (actual rows=4 loops=1) +EXPLAIN :default_analyze_flags SELECT * from dist_table_rep2; +Custom Scan (Citus Adaptive) (actual rows=6 loops=1) + Task Count: 2 + Tuple data received from nodes: 24 bytes + Tasks Shown: One of 2 + -> Task + Tuple data received from node: 16 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on dist_table_rep2_570024 dist_table_rep2 (actual rows=4 loops=1) +prepare p1 as SELECT * FROM dist_table_rep1; +EXPLAIN :default_analyze_flags EXECUTE p1; +Custom Scan (Citus Adaptive) (actual rows=6 loops=1) + Task Count: 2 + Tuple data received from nodes: 24 bytes + Tasks Shown: One of 2 + -> Task + Tuple data received from node: 16 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on dist_table_rep1_570022 dist_table_rep1 (actual rows=4 loops=1) +EXPLAIN :default_analyze_flags EXECUTE p1; +Custom Scan (Citus Adaptive) (actual rows=6 loops=1) + Task Count: 2 + Tuple data received from nodes: 24 bytes + Tasks Shown: One of 2 + -> Task + Tuple data received from node: 16 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on dist_table_rep1_570022 dist_table_rep1 (actual rows=4 loops=1) +EXPLAIN :default_analyze_flags EXECUTE p1; +Custom Scan (Citus Adaptive) (actual rows=6 loops=1) + Task Count: 2 + Tuple data received from nodes: 24 bytes + Tasks Shown: One of 2 + -> Task + Tuple data received from node: 16 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on dist_table_rep1_570022 dist_table_rep1 (actual rows=4 loops=1) +EXPLAIN :default_analyze_flags EXECUTE p1; +Custom Scan (Citus Adaptive) (actual rows=6 loops=1) + Task Count: 2 + Tuple data received from nodes: 24 bytes + Tasks Shown: One of 2 + -> Task + Tuple data received from node: 16 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on dist_table_rep1_570022 dist_table_rep1 (actual rows=4 loops=1) +EXPLAIN :default_analyze_flags EXECUTE p1; +Custom Scan (Citus Adaptive) (actual rows=6 loops=1) + Task Count: 2 + Tuple data received from nodes: 24 bytes + Tasks Shown: One of 2 + -> Task + Tuple data received from node: 16 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on dist_table_rep1_570022 dist_table_rep1 (actual rows=4 loops=1) +EXPLAIN :default_analyze_flags EXECUTE p1; +Custom Scan (Citus Adaptive) (actual rows=6 loops=1) + Task Count: 2 + Tuple data received from nodes: 24 bytes + Tasks Shown: One of 2 + -> Task + Tuple data received from node: 16 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on dist_table_rep1_570022 dist_table_rep1 (actual rows=4 loops=1) +prepare p2 AS SELECT * FROM dist_table_rep1 WHERE a = $1; +EXPLAIN :default_analyze_flags EXECUTE p2(1); +Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + Task Count: 1 + Tuple data received from nodes: 4 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 4 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on dist_table_rep1_570022 dist_table_rep1 (actual rows=1 loops=1) + Filter: (a = 1) + Rows Removed by Filter: 3 +EXPLAIN :default_analyze_flags EXECUTE p2(1); +Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + Task Count: 1 + Tuple data received from nodes: 4 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 4 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on dist_table_rep1_570022 dist_table_rep1 (actual rows=1 loops=1) + Filter: (a = 1) + Rows Removed by Filter: 3 +EXPLAIN :default_analyze_flags EXECUTE p2(1); +Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + Task Count: 1 + Tuple data received from nodes: 4 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 4 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on dist_table_rep1_570022 dist_table_rep1 (actual rows=1 loops=1) + Filter: (a = 1) + Rows Removed by Filter: 3 +EXPLAIN :default_analyze_flags EXECUTE p2(1); +Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + Task Count: 1 + Tuple data received from nodes: 4 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 4 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on dist_table_rep1_570022 dist_table_rep1 (actual rows=1 loops=1) + Filter: (a = 1) + Rows Removed by Filter: 3 +EXPLAIN :default_analyze_flags EXECUTE p2(1); +Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + Task Count: 1 + Tuple data received from nodes: 4 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 4 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on dist_table_rep1_570022 dist_table_rep1 (actual rows=1 loops=1) + Filter: (a = 1) + Rows Removed by Filter: 3 +EXPLAIN :default_analyze_flags EXECUTE p2(1); +Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + Task Count: 1 + Tuple data received from nodes: 4 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 4 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on dist_table_rep1_570022 dist_table_rep1 (actual rows=1 loops=1) + Filter: (a = 1) + Rows Removed by Filter: 3 +EXPLAIN :default_analyze_flags EXECUTE p2(10); +Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + Task Count: 1 + Tuple data received from nodes: 4 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 4 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on dist_table_rep1_570022 dist_table_rep1 (actual rows=1 loops=1) + Filter: (a = 10) + Rows Removed by Filter: 3 +EXPLAIN :default_analyze_flags EXECUTE p2(100); +Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + Task Count: 1 + Tuple data received from nodes: 4 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 4 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on dist_table_rep1_570023 dist_table_rep1 (actual rows=1 loops=1) + Filter: (a = 100) + Rows Removed by Filter: 1 +prepare p3 AS SELECT * FROM dist_table_rep1 WHERE a = 1; +EXPLAIN :default_analyze_flags EXECUTE p3; +Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + Task Count: 1 + Tuple data received from nodes: 4 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 4 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on dist_table_rep1_570022 dist_table_rep1 (actual rows=1 loops=1) + Filter: (a = 1) + Rows Removed by Filter: 3 +EXPLAIN :default_analyze_flags EXECUTE p3; +Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + Task Count: 1 + Tuple data received from nodes: 4 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 4 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on dist_table_rep1_570022 dist_table_rep1 (actual rows=1 loops=1) + Filter: (a = 1) + Rows Removed by Filter: 3 +EXPLAIN :default_analyze_flags EXECUTE p3; +Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + Task Count: 1 + Tuple data received from nodes: 4 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 4 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on dist_table_rep1_570022 dist_table_rep1 (actual rows=1 loops=1) + Filter: (a = 1) + Rows Removed by Filter: 3 +EXPLAIN :default_analyze_flags EXECUTE p3; +Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + Task Count: 1 + Tuple data received from nodes: 4 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 4 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on dist_table_rep1_570022 dist_table_rep1 (actual rows=1 loops=1) + Filter: (a = 1) + Rows Removed by Filter: 3 +EXPLAIN :default_analyze_flags EXECUTE p3; +Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + Task Count: 1 + Tuple data received from nodes: 4 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 4 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on dist_table_rep1_570022 dist_table_rep1 (actual rows=1 loops=1) + Filter: (a = 1) + Rows Removed by Filter: 3 +EXPLAIN :default_analyze_flags EXECUTE p3; +Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + Task Count: 1 + Tuple data received from nodes: 4 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 4 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on dist_table_rep1_570022 dist_table_rep1 (actual rows=1 loops=1) + Filter: (a = 1) + Rows Removed by Filter: 3 +DROP TABLE dist_table_rep1, dist_table_rep2; +-- https://github.com/citusdata/citus/issues/2009 +CREATE TABLE simple (id integer, name text); +SELECT create_distributed_table('simple', 'id'); + +PREPARE simple_router AS SELECT *, $1 FROM simple WHERE id = 1; +EXPLAIN :default_explain_flags EXECUTE simple_router(1); +Custom Scan (Citus Adaptive) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on simple_570026 simple + Filter: (id = 1) +EXPLAIN :default_analyze_flags EXECUTE simple_router(1); +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 1 + Tuple data received from nodes: 0 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 0 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on simple_570026 simple (actual rows=0 loops=1) + Filter: (id = 1) +EXPLAIN :default_analyze_flags EXECUTE simple_router(1); +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 1 + Tuple data received from nodes: 0 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 0 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on simple_570026 simple (actual rows=0 loops=1) + Filter: (id = 1) +EXPLAIN :default_analyze_flags EXECUTE simple_router(1); +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 1 + Tuple data received from nodes: 0 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 0 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on simple_570026 simple (actual rows=0 loops=1) + Filter: (id = 1) +EXPLAIN :default_analyze_flags EXECUTE simple_router(1); +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 1 + Tuple data received from nodes: 0 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 0 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on simple_570026 simple (actual rows=0 loops=1) + Filter: (id = 1) +EXPLAIN :default_analyze_flags EXECUTE simple_router(1); +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 1 + Tuple data received from nodes: 0 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 0 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on simple_570026 simple (actual rows=0 loops=1) + Filter: (id = 1) +EXPLAIN :default_analyze_flags EXECUTE simple_router(1); +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 1 + Tuple data received from nodes: 0 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 0 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on simple_570026 simple (actual rows=0 loops=1) + Filter: (id = 1) +EXPLAIN :default_analyze_flags EXECUTE simple_router(1); +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 1 + Tuple data received from nodes: 0 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 0 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on simple_570026 simple (actual rows=0 loops=1) + Filter: (id = 1) +deallocate simple_router; +-- prepared multi-row insert +PREPARE insert_query AS INSERT INTO simple VALUES ($1, 2), (2, $2); +EXPLAIN :default_explain_flags EXECUTE insert_query(3, 4); +Custom Scan (Citus Adaptive) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Insert on simple_570026 citus_table_alias + -> Result +EXPLAIN :default_analyze_flags EXECUTE insert_query(3, 4); +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Insert on simple_570026 citus_table_alias (actual rows=0 loops=1) + -> Result (actual rows=1 loops=1) +deallocate insert_query; +-- prepared updates +PREPARE update_query AS UPDATE simple SET name=$1 WHERE name=$2; +EXPLAIN :default_explain_flags EXECUTE update_query('x', 'y'); +Custom Scan (Citus Adaptive) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Update on simple_570026 simple + -> Seq Scan on simple_570026 simple + Filter: (name = 'y'::text) +EXPLAIN :default_analyze_flags EXECUTE update_query('x', 'y'); +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Update on simple_570026 simple (actual rows=0 loops=1) + -> Seq Scan on simple_570026 simple (actual rows=0 loops=1) + Filter: (name = $2) + Rows Removed by Filter: 1 +deallocate update_query; +-- prepared deletes +PREPARE delete_query AS DELETE FROM simple WHERE name=$1 OR name=$2; +EXPLAIN (COSTS OFF) EXECUTE delete_query('x', 'y'); +Custom Scan (Citus Adaptive) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Delete on simple_570026 simple + -> Seq Scan on simple_570026 simple + Filter: ((name = 'x'::text) OR (name = 'y'::text)) +EXPLAIN :default_analyze_flags EXECUTE delete_query('x', 'y'); +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Delete on simple_570026 simple (actual rows=0 loops=1) + -> Seq Scan on simple_570026 simple (actual rows=0 loops=1) + Filter: ((name = $1) OR (name = $2)) + Rows Removed by Filter: 1 +deallocate delete_query; +-- prepared distributed insert/select +-- we don't support EXPLAIN for prepared insert/selects of other types. +PREPARE distributed_insert_select AS INSERT INTO simple SELECT * FROM simple WHERE name IN ($1, $2); +EXPLAIN :default_explain_flags EXECUTE distributed_insert_select('x', 'y'); +Custom Scan (Citus Adaptive) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Insert on simple_570026 citus_table_alias + -> Seq Scan on simple_570026 simple + Filter: ((id IS NOT NULL) AND (name = ANY ('{x,y}'::text[]))) +EXPLAIN :default_analyze_flags EXECUTE distributed_insert_select('x', 'y'); +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Insert on simple_570026 citus_table_alias (actual rows=0 loops=1) + -> Seq Scan on simple_570026 simple (actual rows=0 loops=1) + Filter: ((id IS NOT NULL) AND (name = ANY (ARRAY[$1, $2]))) + Rows Removed by Filter: 1 +deallocate distributed_insert_select; +DROP TABLE simple; +-- prepared cte +BEGIN; +PREPARE cte_query AS +WITH keys AS ( + SELECT count(*) FROM + (SELECT DISTINCT l_orderkey, GREATEST(random(), 2) FROM lineitem_hash_part WHERE l_quantity > $1) t +), +series AS ( + SELECT s FROM generate_series(1, $2) s +), +delete_result AS ( + DELETE FROM lineitem_hash_part WHERE l_quantity < $3 RETURNING * +) +SELECT s FROM series; +EXPLAIN :default_explain_flags EXECUTE cte_query(2, 10, -1); +Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Delete on lineitem_hash_part_360041 lineitem_hash_part + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part + Filter: (l_quantity < '-1'::numeric) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Function Scan on generate_series s +EXPLAIN :default_analyze_flags EXECUTE cte_query(2, 10, -1); +Custom Scan (Citus Adaptive) (actual rows=10 loops=1) + -> Distributed Subplan XXX_1 + Intermediate Data Size: 0 bytes + Result destination: Send to 0 nodes + -> Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 4 + Tuple data received from nodes: 0 bytes + Tasks Shown: One of 4 + -> Task + Tuple data received from node: 0 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Delete on lineitem_hash_part_360041 lineitem_hash_part (actual rows=0 loops=1) + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part (actual rows=0 loops=1) + Filter: (l_quantity < '-1'::numeric) + Rows Removed by Filter: 2885 + Task Count: 1 + Tuple data received from nodes: 40 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 40 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Function Scan on generate_series s (actual rows=10 loops=1) +ROLLBACK; +-- https://github.com/citusdata/citus/issues/2009#issuecomment-653036502 +CREATE TABLE users_table_2 (user_id int primary key, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint); +SELECT create_reference_table('users_table_2'); + +PREPARE p4 (int, int) AS insert into users_table_2 ( value_1, user_id) select value_1, user_id + $2 FROM users_table_2 ON CONFLICT (user_id) DO UPDATE SET value_2 = EXCLUDED.value_1 + $1; +EXPLAIN :default_explain_flags execute p4(20,20); +Custom Scan (Citus Adaptive) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Insert on users_table_2_570028 citus_table_alias + Conflict Resolution: UPDATE + Conflict Arbiter Indexes: users_table_2_pkey_570028 + -> Seq Scan on users_table_2_570028 users_table_2 +EXPLAIN :default_analyze_flags execute p4(20,20); +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Insert on users_table_2_570028 citus_table_alias (actual rows=0 loops=1) + Conflict Resolution: UPDATE + Conflict Arbiter Indexes: users_table_2_pkey_570028 + Tuples Inserted: 0 + Conflicting Tuples: 0 + -> Seq Scan on users_table_2_570028 users_table_2 (actual rows=0 loops=1) +-- simple test to confirm we can fetch long (>4KB) plans +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT * FROM users_table_2 WHERE value_1::text = '00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000X'; +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 1 + Tuple data received from nodes: 0 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 0 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on users_table_2_570028 users_table_2 (actual rows=0 loops=1) + Filter: ((value_1)::text = '00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000X'::text) +DROP TABLE users_table_2; +-- sorted explain analyze output +CREATE TABLE explain_analyze_execution_time (a int); +INSERT INTO explain_analyze_execution_time VALUES (2); +SELECT create_distributed_table('explain_analyze_execution_time', 'a'); + +-- show that we can sort the output wrt execution time +-- we do the following hack to make the test outputs +-- be consistent. First, ingest a single row then add +-- pg_sleep() call on the query. Postgres will only +-- sleep for the shard that has the single row, so that +-- will definitely be slower +set citus.explain_analyze_sort_method to "taskId"; +EXPLAIN (COSTS FALSE, ANALYZE TRUE, TIMING FALSE, SUMMARY FALSE) select a, CASE WHEN pg_sleep(0.4) IS NULL THEN 'x' END from explain_analyze_execution_time; +Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + Task Count: 2 + Tuple data received from nodes: 4 bytes + Tasks Shown: One of 2 + -> Task + Tuple data received from node: 0 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on explain_analyze_execution_time_570029 explain_analyze_execution_time (actual rows=0 loops=1) +set citus.explain_analyze_sort_method to "execution-time"; +EXPLAIN (COSTS FALSE, ANALYZE TRUE, TIMING FALSE, SUMMARY FALSE) select a, CASE WHEN pg_sleep(0.4) IS NULL THEN 'x' END from explain_analyze_execution_time; +Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + Task Count: 2 + Tuple data received from nodes: 4 bytes + Tasks Shown: One of 2 + -> Task + Tuple data received from node: 4 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on explain_analyze_execution_time_570030 explain_analyze_execution_time (actual rows=1 loops=1) +-- reset back +reset citus.explain_analyze_sort_method; +DROP TABLE explain_analyze_execution_time; +CREATE SCHEMA multi_explain; +SET search_path TO multi_explain; +-- test EXPLAIN ANALYZE when original query returns no columns +CREATE TABLE reference_table(a int); +SELECT create_reference_table('reference_table'); + +INSERT INTO reference_table VALUES (1); +EXPLAIN :default_analyze_flags SELECT FROM reference_table; +Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on reference_table_570031 reference_table (actual rows=1 loops=1) +CREATE TABLE distributed_table_1(a int, b int); +SELECT create_distributed_table('distributed_table_1','a'); + +INSERT INTO distributed_table_1 values (1,1); +EXPLAIN :default_analyze_flags SELECT row_number() OVER() AS r FROM distributed_table_1; +WindowAgg (actual rows=1 loops=1) + -> Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on distributed_table_1_570032 distributed_table_1 (actual rows=1 loops=1) +CREATE TABLE distributed_table_2(a int, b int); +SELECT create_distributed_table('distributed_table_2','a'); + +INSERT INTO distributed_table_2 VALUES (1,1); +EXPLAIN :default_analyze_flags +WITH r AS (SELECT row_number() OVER () AS r FROM distributed_table_1) +SELECT * FROM distributed_table_2 +JOIN r ON (r = distributed_table_2.b) +LIMIT 3; +Limit (actual rows=1 loops=1) + -> Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + -> Distributed Subplan XXX_1 + Intermediate Data Size: 14 bytes + Result destination: Send to 2 nodes + -> WindowAgg (actual rows=1 loops=1) + -> Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on distributed_table_1_570032 distributed_table_1 (actual rows=1 loops=1) + Task Count: 2 + Tuple data received from nodes: 16 bytes + Tasks Shown: One of 2 + -> Task + Tuple data received from node: 16 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Limit (actual rows=1 loops=1) + -> Nested Loop (actual rows=1 loops=1) + Join Filter: (distributed_table_2.b = intermediate_result.r) + -> Function Scan on read_intermediate_result intermediate_result (actual rows=1 loops=1) + -> Seq Scan on distributed_table_2_570034 distributed_table_2 (actual rows=1 loops=1) +EXPLAIN :default_analyze_flags SELECT FROM (SELECT * FROM reference_table) subquery; +Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on reference_table_570031 reference_table (actual rows=1 loops=1) +PREPARE dummy_prep_stmt(int) AS SELECT FROM distributed_table_1; +EXPLAIN :default_analyze_flags EXECUTE dummy_prep_stmt(50); +Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + Task Count: 2 + Tasks Shown: One of 2 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on distributed_table_1_570032 distributed_table_1 (actual rows=1 loops=1) +CREATE TYPE multi_explain.int_wrapper_type AS (int_field int); +CREATE TABLE tbl (a int, b multi_explain.int_wrapper_type); +SELECT create_distributed_table('tbl', 'a'); + +EXPLAIN :default_analyze_flags SELECT * FROM tbl; +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 2 + Tuple data received from nodes: 0 bytes + Tasks Shown: One of 2 + -> Task + Tuple data received from node: 0 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on tbl_570036 tbl (actual rows=0 loops=1) +PREPARE q1(int_wrapper_type) AS WITH a AS (SELECT * FROM tbl WHERE b = $1 AND a = 1 OFFSET 0) SELECT * FROM a; +EXPLAIN (COSTS false) EXECUTE q1('(1)'); +Custom Scan (Citus Adaptive) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on tbl_570036 tbl + Filter: ((b = '(1)'::multi_explain.int_wrapper_type) AND (a = 1)) +EXPLAIN :default_analyze_flags EXECUTE q1('(1)'); +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 1 + Tuple data received from nodes: 0 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 0 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on tbl_570036 tbl (actual rows=0 loops=1) + Filter: ((b = $1) AND (a = 1)) +PREPARE q2(int_wrapper_type) AS WITH a AS (UPDATE tbl SET b = $1 WHERE a = 1 RETURNING *) SELECT * FROM a; +EXPLAIN (COSTS false) EXECUTE q2('(1)'); +Custom Scan (Citus Adaptive) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> CTE Scan on a + CTE a + -> Update on tbl_570036 tbl + -> Seq Scan on tbl_570036 tbl + Filter: (a = 1) +EXPLAIN :default_analyze_flags EXECUTE q2('(1)'); +Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 1 + Tuple data received from nodes: 0 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 0 bytes + Node: host=localhost port=xxxxx dbname=regression + -> CTE Scan on a (actual rows=0 loops=1) + CTE a + -> Update on tbl_570036 tbl (actual rows=0 loops=1) + -> Seq Scan on tbl_570036 tbl (actual rows=0 loops=1) + Filter: (a = 1) +-- check when auto explain + analyze is enabled, we do not allow local execution. +CREATE SCHEMA test_auto_explain; +SET search_path TO 'test_auto_explain'; +CREATE TABLE test_ref_table (key int PRIMARY KEY); +SELECT create_reference_table('test_ref_table'); + +LOAD 'auto_explain'; +SET auto_explain.log_min_duration = 0; +set auto_explain.log_analyze to true; +-- the following should not be locally executed since explain analyze is on +select * from test_ref_table; +DROP SCHEMA test_auto_explain CASCADE; +SET client_min_messages TO ERROR; +DROP SCHEMA multi_explain CASCADE; diff --git a/src/test/regress/expected/multi_orderby_limit_pushdown.out b/src/test/regress/expected/multi_orderby_limit_pushdown.out index 9d67c9810..0a625e47f 100644 --- a/src/test/regress/expected/multi_orderby_limit_pushdown.out +++ b/src/test/regress/expected/multi_orderby_limit_pushdown.out @@ -374,17 +374,17 @@ LIMIT 2; (2 rows) EXPLAIN (COSTS OFF) -SELECT ut.user_id, count(DISTINCT ut.value_2) +SELECT ut.user_id, avg(ut.value_2) FROM users_table ut, events_table et WHERE ut.user_id = et.user_id and et.value_2 < 5 GROUP BY ut.user_id ORDER BY 2, AVG(ut.value_1), 1 DESC LIMIT 5; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- Limit -> Sort - Sort Key: remote_scan.count, remote_scan.worker_column_3, remote_scan.user_id DESC + Sort Key: remote_scan.avg, remote_scan.worker_column_3, remote_scan.user_id DESC -> Custom Scan (Citus Adaptive) Task Count: 4 Tasks Shown: One of 4 @@ -392,16 +392,14 @@ LIMIT 5; Node: host=localhost port=xxxxx dbname=regression -> Limit -> Sort - Sort Key: (count(DISTINCT ut.value_2)), (avg(ut.value_1)), ut.user_id DESC - -> GroupAggregate + Sort Key: (avg(ut.value_2)), (avg(ut.value_1)), ut.user_id DESC + -> HashAggregate Group Key: ut.user_id - -> Sort - Sort Key: ut.user_id DESC - -> Hash Join - Hash Cond: (ut.user_id = et.user_id) - -> Seq Scan on users_table_1400256 ut - -> Hash - -> Seq Scan on events_table_1400260 et - Filter: (value_2 < 5) -(21 rows) + -> Hash Join + Hash Cond: (ut.user_id = et.user_id) + -> Seq Scan on users_table_1400256 ut + -> Hash + -> Seq Scan on events_table_1400260 et + Filter: (value_2 < 5) +(19 rows) diff --git a/src/test/regress/expected/multi_select_distinct.out b/src/test/regress/expected/multi_select_distinct.out index 75d47026b..689adcc8a 100644 --- a/src/test/regress/expected/multi_select_distinct.out +++ b/src/test/regress/expected/multi_select_distinct.out @@ -813,7 +813,7 @@ SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode) EXPLAIN (COSTS FALSE) SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode) FROM lineitem_hash_part - GROUP BY l_orderkey + GROUP BY l_orderkey, l_partkey, l_shipmode ORDER BY 1,2; QUERY PLAN --------------------------------------------------------------------- @@ -827,9 +827,9 @@ EXPLAIN (COSTS FALSE) -> Task Node: host=localhost port=xxxxx dbname=regression -> GroupAggregate - Group Key: l_orderkey + Group Key: l_orderkey, l_partkey, l_shipmode -> Sort - Sort Key: l_orderkey + Sort Key: l_orderkey, l_partkey, l_shipmode -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part (14 rows) @@ -839,7 +839,7 @@ SET enable_hashagg TO off; EXPLAIN (COSTS FALSE) SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode) FROM lineitem_hash_part - GROUP BY l_orderkey + GROUP BY l_orderkey, l_partkey, l_shipmode ORDER BY 1,2; QUERY PLAN --------------------------------------------------------------------- @@ -852,9 +852,9 @@ EXPLAIN (COSTS FALSE) -> Task Node: host=localhost port=xxxxx dbname=regression -> GroupAggregate - Group Key: l_orderkey + Group Key: l_orderkey, l_partkey, l_shipmode -> Sort - Sort Key: l_orderkey + Sort Key: l_orderkey, l_partkey, l_shipmode -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part (13 rows) diff --git a/src/test/regress/expected/multi_subquery_in_where_reference_clause.out b/src/test/regress/expected/multi_subquery_in_where_reference_clause.out index 52cbe3917..0f656ee0b 100644 --- a/src/test/regress/expected/multi_subquery_in_where_reference_clause.out +++ b/src/test/regress/expected/multi_subquery_in_where_reference_clause.out @@ -152,7 +152,7 @@ SELECT FROM users_table RIGHT JOIN users_reference_table USING (user_id) WHERE - users_table.value_2 IN + users_reference_table.value_2 IN (SELECT value_2 FROM diff --git a/src/test/regress/expected/multi_view.out b/src/test/regress/expected/multi_view.out index 11f78ea34..3445f442a 100644 --- a/src/test/regress/expected/multi_view.out +++ b/src/test/regress/expected/multi_view.out @@ -92,7 +92,7 @@ SELECT l_orderkey, count(*) FROM priority_lineitem GROUP BY 1 ORDER BY 2 DESC, 1 326 | 7 (5 rows) -CREATE VIEW air_shipped_lineitems AS SELECT * FROM lineitem_hash_part WHERE l_shipmode = 'AIR'; +CREATE VIEW air_shipped_lineitems AS SELECT * FROM lineitem_hash_part table_name_for_view WHERE l_shipmode = 'AIR'; -- join between view and table SELECT count(*) FROM orders_hash_part join air_shipped_lineitems ON (o_orderkey = l_orderkey); count @@ -179,7 +179,7 @@ SELECT o_orderkey, l_linenumber FROM priority_orders left join air_shipped_linei -- it passes planning, fails at execution stage SET client_min_messages TO DEBUG1; SELECT * FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey) ORDER BY o_orderkey DESC, o_custkey DESC, o_orderpriority DESC LIMIT 5; -DEBUG: generating subplan XXX_1 for subquery SELECT lineitem_hash_part.l_orderkey, lineitem_hash_part.l_partkey, lineitem_hash_part.l_suppkey, lineitem_hash_part.l_linenumber, lineitem_hash_part.l_quantity, lineitem_hash_part.l_extendedprice, lineitem_hash_part.l_discount, lineitem_hash_part.l_tax, lineitem_hash_part.l_returnflag, lineitem_hash_part.l_linestatus, lineitem_hash_part.l_shipdate, lineitem_hash_part.l_commitdate, lineitem_hash_part.l_receiptdate, lineitem_hash_part.l_shipinstruct, lineitem_hash_part.l_shipmode, lineitem_hash_part.l_comment FROM public.lineitem_hash_part WHERE (lineitem_hash_part.l_shipmode OPERATOR(pg_catalog.=) 'AIR'::bpchar) +DEBUG: generating subplan XXX_1 for subquery SELECT l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment FROM public.lineitem_hash_part table_name_for_view WHERE (l_shipmode OPERATOR(pg_catalog.=) 'AIR'::bpchar) DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT priority_orders.o_orderkey, priority_orders.o_custkey, priority_orders.o_orderstatus, priority_orders.o_totalprice, priority_orders.o_orderdate, priority_orders.o_orderpriority, priority_orders.o_clerk, priority_orders.o_shippriority, priority_orders.o_comment, air_shipped_lineitems.l_orderkey, air_shipped_lineitems.l_partkey, air_shipped_lineitems.l_suppkey, air_shipped_lineitems.l_linenumber, air_shipped_lineitems.l_quantity, air_shipped_lineitems.l_extendedprice, air_shipped_lineitems.l_discount, air_shipped_lineitems.l_tax, air_shipped_lineitems.l_returnflag, air_shipped_lineitems.l_linestatus, air_shipped_lineitems.l_shipdate, air_shipped_lineitems.l_commitdate, air_shipped_lineitems.l_receiptdate, air_shipped_lineitems.l_shipinstruct, air_shipped_lineitems.l_shipmode, air_shipped_lineitems.l_comment FROM ((SELECT orders_hash_part.o_orderkey, orders_hash_part.o_custkey, orders_hash_part.o_orderstatus, orders_hash_part.o_totalprice, orders_hash_part.o_orderdate, orders_hash_part.o_orderpriority, orders_hash_part.o_clerk, orders_hash_part.o_shippriority, orders_hash_part.o_comment FROM public.orders_hash_part WHERE (orders_hash_part.o_orderpriority OPERATOR(pg_catalog.<) '3-MEDIUM'::bpchar)) priority_orders JOIN (SELECT intermediate_result.l_orderkey, intermediate_result.l_partkey, intermediate_result.l_suppkey, intermediate_result.l_linenumber, intermediate_result.l_quantity, intermediate_result.l_extendedprice, intermediate_result.l_discount, intermediate_result.l_tax, intermediate_result.l_returnflag, intermediate_result.l_linestatus, intermediate_result.l_shipdate, intermediate_result.l_commitdate, intermediate_result.l_receiptdate, intermediate_result.l_shipinstruct, intermediate_result.l_shipmode, intermediate_result.l_comment FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(l_orderkey bigint, l_partkey integer, l_suppkey integer, l_linenumber integer, l_quantity numeric(15,2), l_extendedprice numeric(15,2), l_discount numeric(15,2), l_tax numeric(15,2), l_returnflag character(1), l_linestatus character(1), l_shipdate date, l_commitdate date, l_receiptdate date, l_shipinstruct character(25), l_shipmode character(10), l_comment character varying(44))) air_shipped_lineitems ON ((priority_orders.o_custkey OPERATOR(pg_catalog.=) air_shipped_lineitems.l_suppkey))) ORDER BY priority_orders.o_orderkey DESC, priority_orders.o_custkey DESC, priority_orders.o_orderpriority DESC LIMIT 5 DEBUG: push down of limit count: 5 o_orderkey | o_custkey | o_orderstatus | o_totalprice | o_orderdate | o_orderpriority | o_clerk | o_shippriority | o_comment | l_orderkey | l_partkey | l_suppkey | l_linenumber | l_quantity | l_extendedprice | l_discount | l_tax | l_returnflag | l_linestatus | l_shipdate | l_commitdate | l_receiptdate | l_shipinstruct | l_shipmode | l_comment diff --git a/src/test/regress/expected/non_colocated_subquery_joins.out b/src/test/regress/expected/non_colocated_subquery_joins.out index 1c1a7d935..bcfe06fba 100644 --- a/src/test/regress/expected/non_colocated_subquery_joins.out +++ b/src/test/regress/expected/non_colocated_subquery_joins.out @@ -1079,19 +1079,19 @@ SELECT create_distributed_table('table1','tenant_id'); (1 row) -CREATE VIEW table1_view AS SELECT * from table1 where id < 100; +CREATE VIEW table1_view AS SELECT * from table1 table_name_for_view where id < 100; -- all of the above queries are non-colocated subquery joins -- because the views are replaced with subqueries UPDATE table2 SET id=20 FROM table1_view WHERE table1_view.id=table2.id; DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for subquery SELECT table1.id, table1.tenant_id FROM non_colocated_subquery.table1 WHERE (table1.id OPERATOR(pg_catalog.<) 100) +DEBUG: generating subplan XXX_1 for subquery SELECT id, tenant_id FROM non_colocated_subquery.table1 table_name_for_view WHERE (id OPERATOR(pg_catalog.<) 100) DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE non_colocated_subquery.table2 SET id = 20 FROM (SELECT intermediate_result.id, intermediate_result.tenant_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, tenant_id integer)) table1_view WHERE (table1_view.id OPERATOR(pg_catalog.=) table2.id) DEBUG: Creating router plan UPDATE table2_p1 SET id=20 FROM table1_view WHERE table1_view.id=table2_p1.id; DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for subquery SELECT table1.id, table1.tenant_id FROM non_colocated_subquery.table1 WHERE (table1.id OPERATOR(pg_catalog.<) 100) +DEBUG: generating subplan XXX_1 for subquery SELECT id, tenant_id FROM non_colocated_subquery.table1 table_name_for_view WHERE (id OPERATOR(pg_catalog.<) 100) DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE non_colocated_subquery.table2_p1 SET id = 20 FROM (SELECT intermediate_result.id, intermediate_result.tenant_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, tenant_id integer)) table1_view WHERE (table1_view.id OPERATOR(pg_catalog.=) table2_p1.id) DEBUG: Creating router plan RESET client_min_messages; diff --git a/src/test/regress/expected/recurring_outer_join.out b/src/test/regress/expected/recurring_outer_join.out index 4ff353838..0764f05dc 100644 --- a/src/test/regress/expected/recurring_outer_join.out +++ b/src/test/regress/expected/recurring_outer_join.out @@ -1187,17 +1187,16 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS c -- same test using a view, can be recursively planned CREATE VIEW my_view_1 AS -SELECT * FROM dist_1 t2 WHERE EXISTS ( +SELECT * FROM dist_1 table_name_for_view WHERE EXISTS ( SELECT * FROM dist_1 t4 - WHERE t4.a = t2.a -); + WHERE t4.a = table_name_for_view.a); SELECT COUNT(*) FROM ref_1 t1 LEFT JOIN my_view_1 t3 USING (a); DEBUG: recursively planning right side of the left join since the outer side is a recurring rel DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel -DEBUG: generating subplan XXX_1 for subquery SELECT t2.a, t2.b FROM recurring_outer_join.dist_1 t2 WHERE (EXISTS (SELECT t4.a, t4.b FROM recurring_outer_join.dist_1 t4 WHERE (t4.a OPERATOR(pg_catalog.=) t2.a))) +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_1 table_name_for_view WHERE (EXISTS (SELECT t4.a, t4.b FROM recurring_outer_join.dist_1 t4 WHERE (t4.a OPERATOR(pg_catalog.=) table_name_for_view.a))) DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t3 USING (a)) count --------------------------------------------------------------------- diff --git a/src/test/regress/expected/replicate_reference_tables_to_coordinator.out b/src/test/regress/expected/replicate_reference_tables_to_coordinator.out index 4dbeda307..975f501ef 100644 --- a/src/test/regress/expected/replicate_reference_tables_to_coordinator.out +++ b/src/test/regress/expected/replicate_reference_tables_to_coordinator.out @@ -396,10 +396,10 @@ DROP VIEW numbers_v, local_table_v; -- Joins between reference tables and materialized views are allowed to -- be planned to be executed locally. -- -CREATE MATERIALIZED VIEW numbers_v AS SELECT * FROM numbers WHERE a BETWEEN 1 AND 10; -NOTICE: executing the command locally: SELECT a FROM replicate_ref_to_coordinator.numbers_8000001 numbers WHERE ((a OPERATOR(pg_catalog.>=) 1) AND (a OPERATOR(pg_catalog.<=) 10)) +CREATE MATERIALIZED VIEW numbers_v AS SELECT * FROM numbers table_name_for_view WHERE a BETWEEN 1 AND 10; +NOTICE: executing the command locally: SELECT a FROM replicate_ref_to_coordinator.numbers_8000001 table_name_for_view WHERE ((a OPERATOR(pg_catalog.>=) 1) AND (a OPERATOR(pg_catalog.<=) 10)) REFRESH MATERIALIZED VIEW numbers_v; -NOTICE: executing the command locally: SELECT numbers.a FROM replicate_ref_to_coordinator.numbers_8000001 numbers WHERE ((numbers.a OPERATOR(pg_catalog.>=) 1) AND (numbers.a OPERATOR(pg_catalog.<=) 10)) +NOTICE: executing the command locally: SELECT a FROM replicate_ref_to_coordinator.numbers_8000001 table_name_for_view WHERE ((a OPERATOR(pg_catalog.>=) 1) AND (a OPERATOR(pg_catalog.<=) 10)) SELECT * FROM squares JOIN numbers_v ON squares.a = numbers_v.a ORDER BY 1; NOTICE: executing the command locally: SELECT squares.a, squares.b, numbers_v.a FROM (replicate_ref_to_coordinator.squares_8000000 squares JOIN replicate_ref_to_coordinator.numbers_v ON ((squares.a OPERATOR(pg_catalog.=) numbers_v.a))) ORDER BY squares.a a | b | a diff --git a/src/test/regress/expected/undistribute_table.out b/src/test/regress/expected/undistribute_table.out index 6c77af4fb..15d0e2695 100644 --- a/src/test/regress/expected/undistribute_table.out +++ b/src/test/regress/expected/undistribute_table.out @@ -304,18 +304,18 @@ SELECT create_distributed_table('view_table', 'a'); INSERT INTO view_table VALUES (1, 2, 3), (2, 4, 6), (3, 6, 9); CREATE SCHEMA another_schema; -CREATE VIEW undis_view1 AS SELECT a, b FROM view_table; -CREATE VIEW undis_view2 AS SELECT a, c FROM view_table; +CREATE VIEW undis_view1 AS SELECT a, b FROM view_table table_name_for_view; +CREATE VIEW undis_view2 AS SELECT a, c FROM view_table table_name_for_view; CREATE VIEW another_schema.undis_view3 AS SELECT b, c FROM undis_view1 JOIN undis_view2 ON undis_view1.a = undis_view2.a; SELECT schemaname, viewname, viewowner, definition FROM pg_views WHERE viewname LIKE 'undis\_view%' ORDER BY viewname; schemaname | viewname | viewowner | definition --------------------------------------------------------------------- - undistribute_table | undis_view1 | postgres | SELECT view_table.a, + - | | | view_table.b + - | | | FROM view_table; - undistribute_table | undis_view2 | postgres | SELECT view_table.a, + - | | | view_table.c + - | | | FROM view_table; + undistribute_table | undis_view1 | postgres | SELECT a, + + | | | b + + | | | FROM view_table table_name_for_view; + undistribute_table | undis_view2 | postgres | SELECT a, + + | | | c + + | | | FROM view_table table_name_for_view; another_schema | undis_view3 | postgres | SELECT undis_view1.b, + | | | undis_view2.c + | | | FROM (undis_view1 + @@ -348,12 +348,12 @@ NOTICE: renaming the new table to undistribute_table.view_table SELECT schemaname, viewname, viewowner, definition FROM pg_views WHERE viewname LIKE 'undis\_view%' ORDER BY viewname; schemaname | viewname | viewowner | definition --------------------------------------------------------------------- - undistribute_table | undis_view1 | postgres | SELECT view_table.a, + - | | | view_table.b + - | | | FROM view_table; - undistribute_table | undis_view2 | postgres | SELECT view_table.a, + - | | | view_table.c + - | | | FROM view_table; + undistribute_table | undis_view1 | postgres | SELECT a, + + | | | b + + | | | FROM view_table table_name_for_view; + undistribute_table | undis_view2 | postgres | SELECT a, + + | | | c + + | | | FROM view_table table_name_for_view; another_schema | undis_view3 | postgres | SELECT undis_view1.b, + | | | undis_view2.c + | | | FROM (undis_view1 + diff --git a/src/test/regress/expected/view_propagation.out b/src/test/regress/expected/view_propagation.out index d3d5bdb7b..5591a962f 100644 --- a/src/test/regress/expected/view_propagation.out +++ b/src/test/regress/expected/view_propagation.out @@ -316,13 +316,13 @@ UNION ALL employees e INNER JOIN reporting_line rl ON e.manager_id = rl.employee_id; -- Aliases are supported -CREATE VIEW aliased_opt_prop_view(alias_1, alias_2) AS SELECT * FROM view_table_6; +CREATE VIEW aliased_opt_prop_view(alias_1, alias_2) AS SELECT * FROM view_table_6 table_name_for_view; -- View options are supported CREATE VIEW opt_prop_view WITH(check_option=CASCADED, security_barrier=true) - AS SELECT * FROM view_table_6; + AS SELECT * FROM view_table_6 table_name_for_view; CREATE VIEW sep_opt_prop_view - AS SELECT * FROM view_table_6 + AS SELECT * FROM view_table_6 table_name_for_view WITH LOCAL CHECK OPTION; SELECT * FROM (SELECT pg_identify_object_as_address(classid, objid, objsubid) as obj_identifier from pg_catalog.pg_dist_object) as obj_identifiers where obj_identifier::text like '%opt_prop_view%' ORDER BY 1; obj_identifier @@ -335,27 +335,27 @@ SELECT * FROM (SELECT pg_identify_object_as_address(classid, objid, objsubid) as -- Check definitions and reltoptions of views are correct on workers \c - - - :worker_1_port SELECT definition FROM pg_views WHERE viewname = 'aliased_opt_prop_view'; - definition + definition --------------------------------------------------------------------- - SELECT view_table_6.id AS alias_1, + - view_table_6.val_1 AS alias_2 + - FROM view_prop_schema.view_table_6; + SELECT id AS alias_1, + + val_1 AS alias_2 + + FROM view_prop_schema.view_table_6 table_name_for_view; (1 row) SELECT definition FROM pg_views WHERE viewname = 'opt_prop_view'; - definition + definition --------------------------------------------------------------------- - SELECT view_table_6.id, + - view_table_6.val_1 + - FROM view_prop_schema.view_table_6; + SELECT id, + + val_1 + + FROM view_prop_schema.view_table_6 table_name_for_view; (1 row) SELECT definition FROM pg_views WHERE viewname = 'sep_opt_prop_view'; - definition + definition --------------------------------------------------------------------- - SELECT view_table_6.id, + - view_table_6.val_1 + - FROM view_prop_schema.view_table_6; + SELECT id, + + val_1 + + FROM view_prop_schema.view_table_6 table_name_for_view; (1 row) SELECT relname, reloptions @@ -444,7 +444,7 @@ SELECT create_distributed_table('alter_view_table','id'); (1 row) -CREATE VIEW alter_view_1 AS SELECT * FROM alter_view_table; +CREATE VIEW alter_view_1 AS SELECT * FROM alter_view_table table_name_for_view; -- Set/drop default value is not supported by Citus ALTER VIEW alter_view_1 ALTER COLUMN val1 SET DEFAULT random()::text; ERROR: Citus doesn't support setting or resetting default values for a column of view @@ -465,11 +465,11 @@ ALTER TABLE alter_view_1 SET (check_option=cascaded, security_barrier); ALTER TABLE alter_view_1 SET (check_option=cascaded, security_barrier = true); -- Check the definition on both coordinator and worker node SELECT definition FROM pg_views WHERE viewname = 'alter_view_1'; - definition + definition --------------------------------------------------------------------- - SELECT alter_view_table.id,+ - alter_view_table.val1 + - FROM alter_view_table; + SELECT id, + + val1 + + FROM alter_view_table table_name_for_view; (1 row) SELECT relname, reloptions @@ -482,11 +482,11 @@ WHERE oid = 'view_prop_schema.alter_view_1'::regclass::oid; \c - - - :worker_1_port SELECT definition FROM pg_views WHERE viewname = 'alter_view_1'; - definition + definition --------------------------------------------------------------------- - SELECT alter_view_table.id, + - alter_view_table.val1 + - FROM view_prop_schema.alter_view_table; + SELECT id, + + val1 + + FROM view_prop_schema.alter_view_table table_name_for_view; (1 row) SELECT relname, reloptions diff --git a/src/test/regress/sql/citus_local_tables_mx.sql b/src/test/regress/sql/citus_local_tables_mx.sql index 2bb79a802..2f7c76d6e 100644 --- a/src/test/regress/sql/citus_local_tables_mx.sql +++ b/src/test/regress/sql/citus_local_tables_mx.sql @@ -473,7 +473,7 @@ select run_command_on_workers($$SELECT count(*)=0 from citus_local_tables_mx.v10 select run_command_on_workers($$SELECT count(*)=0 from citus_local_tables_mx.v102$$); CREATE TABLE loc_tb_2 (a int); -CREATE VIEW v104 AS SELECT * from loc_tb_2; +CREATE VIEW v104 AS SELECT * from loc_tb_2 table_name_for_view; SET client_min_messages TO DEBUG1; -- verify the CREATE command for the view is generated correctly diff --git a/src/test/regress/sql/columnar_paths.sql b/src/test/regress/sql/columnar_paths.sql index 92ffa7d66..3c92d4a21 100644 --- a/src/test/regress/sql/columnar_paths.sql +++ b/src/test/regress/sql/columnar_paths.sql @@ -193,7 +193,7 @@ WHERE w2.a = 123; EXPLAIN (COSTS OFF) SELECT sub_1.b, sub_2.a, sub_3.avg FROM - (SELECT b FROM full_correlated WHERE (a > 2) GROUP BY b HAVING count(DISTINCT a) > 0 ORDER BY 1 DESC LIMIT 5) AS sub_1, + (SELECT b FROM full_correlated WHERE (a > 2) GROUP BY b ORDER BY 1 DESC LIMIT 5) AS sub_1, (SELECT a FROM full_correlated WHERE (a > 10) GROUP BY a HAVING count(DISTINCT a) >= 1 ORDER BY 1 DESC LIMIT 3) AS sub_2, (SELECT avg(a) AS AVG FROM full_correlated WHERE (a > 2) GROUP BY a HAVING sum(a) > 10 ORDER BY (sum(d) - avg(a) - COALESCE(array_upper(ARRAY[max(a)],1) * 5, 0)) DESC LIMIT 3) AS sub_3 WHERE sub_2.a < sub_1.b::integer diff --git a/src/test/regress/sql/global_cancel.sql b/src/test/regress/sql/global_cancel.sql index 4a6157489..848c3b01a 100644 --- a/src/test/regress/sql/global_cancel.sql +++ b/src/test/regress/sql/global_cancel.sql @@ -47,9 +47,12 @@ RESET client_min_messages; SELECT pg_typeof(:maintenance_daemon_gpid); +\set VERBOSITY terse + SELECT pg_cancel_backend(:maintenance_daemon_gpid); SELECT pg_terminate_backend(:maintenance_daemon_gpid); +\set VERBOSITY default -- we can cancel our own backend SELECT pg_cancel_backend(citus_backend_gpid()); diff --git a/src/test/regress/sql/local_dist_join_mixed.sql b/src/test/regress/sql/local_dist_join_mixed.sql index b07da2fc8..c6eb53d4e 100644 --- a/src/test/regress/sql/local_dist_join_mixed.sql +++ b/src/test/regress/sql/local_dist_join_mixed.sql @@ -78,14 +78,13 @@ SELECT count(*) FROM distributed JOIN unlogged_local USING (id); CREATE MATERIALIZED VIEW mat_view AS SELECT * FROM local; SELECT count(*) FROM distributed JOIN mat_view USING (id); -CREATE VIEW local_regular_view AS SELECT * FROM local; +CREATE VIEW local_regular_view AS SELECT * FROM local table_name_for_view; CREATE VIEW dist_regular_view AS SELECT * FROM distributed; SELECT count(*) FROM distributed JOIN local_regular_view USING (id); SELECT count(*) FROM local JOIN dist_regular_view USING (id); SELECT count(*) FROM dist_regular_view JOIN local_regular_view USING (id); - -- join alias/table alias SELECT COUNT(*) FROM (distributed JOIN local USING (id)) AS t(a,b,c,d) ORDER BY d,c,a,b LIMIT 3; SELECT COUNT(*) FROM (distributed d1(x,y,y1) JOIN local l1(x,t) USING (x)) AS t(a,b,c,d) ORDER BY d,c,a,b LIMIT 3; diff --git a/src/test/regress/sql/local_table_join.sql b/src/test/regress/sql/local_table_join.sql index 8d0d7d332..393b15378 100644 --- a/src/test/regress/sql/local_table_join.sql +++ b/src/test/regress/sql/local_table_join.sql @@ -362,9 +362,6 @@ select typdefault from ( select a from tbl where typdefault > 'a' limit 1) as subq_0 - where ( - select true as bool from pg_catalog.pg_am limit 1 - ) ) as subq_1 ) as subq_2; @@ -379,9 +376,6 @@ select typdefault from ( select a from tbl where typdefault > 'a' limit 1) as subq_0 - where ( - select true as bool from pg_catalog.pg_am limit 1 - ) ) as subq_1 ) as subq_2; diff --git a/src/test/regress/sql/multi_complex_count_distinct.sql b/src/test/regress/sql/multi_complex_count_distinct.sql index 9957d0959..0e06fc0c8 100644 --- a/src/test/regress/sql/multi_complex_count_distinct.sql +++ b/src/test/regress/sql/multi_complex_count_distinct.sql @@ -1,7 +1,13 @@ -- -- COMPLEX_COUNT_DISTINCT -- - +-- This test file has an alternative output because of the following in PG16: +-- https://github.com/postgres/postgres/commit/1349d2790bf48a4de072931c722f39337e72055e +-- https://github.com/postgres/postgres/commit/f4c7c410ee4a7baa06f51ebb8d5333c169691dd3 +-- The alternative output can be deleted when we drop support for PG15 +-- +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 16 AS server_version_ge_16; SET citus.next_shard_id TO 240000; SET citus.shard_count TO 8; diff --git a/src/test/regress/sql/multi_explain.sql b/src/test/regress/sql/multi_explain.sql index dd4615434..7fa75c8be 100644 --- a/src/test/regress/sql/multi_explain.sql +++ b/src/test/regress/sql/multi_explain.sql @@ -1,6 +1,13 @@ -- -- MULTI_EXPLAIN -- +-- This test file has an alternative output because of the following in PG16: +-- https://github.com/postgres/postgres/commit/1349d2790bf48a4de072931c722f39337e72055e +-- https://github.com/postgres/postgres/commit/f4c7c410ee4a7baa06f51ebb8d5333c169691dd3 +-- The alternative output can be deleted when we drop support for PG15 +-- +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 16 AS server_version_ge_16; SET citus.next_shard_id TO 570000; diff --git a/src/test/regress/sql/multi_orderby_limit_pushdown.sql b/src/test/regress/sql/multi_orderby_limit_pushdown.sql index 821c0130a..7b35d82eb 100644 --- a/src/test/regress/sql/multi_orderby_limit_pushdown.sql +++ b/src/test/regress/sql/multi_orderby_limit_pushdown.sql @@ -177,7 +177,7 @@ ORDER BY 2, AVG(ut.value_1), 1 DESC LIMIT 2; EXPLAIN (COSTS OFF) -SELECT ut.user_id, count(DISTINCT ut.value_2) +SELECT ut.user_id, avg(ut.value_2) FROM users_table ut, events_table et WHERE ut.user_id = et.user_id and et.value_2 < 5 GROUP BY ut.user_id diff --git a/src/test/regress/sql/multi_select_distinct.sql b/src/test/regress/sql/multi_select_distinct.sql index c3ba20cf1..597076199 100644 --- a/src/test/regress/sql/multi_select_distinct.sql +++ b/src/test/regress/sql/multi_select_distinct.sql @@ -303,7 +303,7 @@ SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode) EXPLAIN (COSTS FALSE) SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode) FROM lineitem_hash_part - GROUP BY l_orderkey + GROUP BY l_orderkey, l_partkey, l_shipmode ORDER BY 1,2; -- check the plan if the hash aggreate is disabled. We expect to see sort + unique @@ -312,7 +312,7 @@ SET enable_hashagg TO off; EXPLAIN (COSTS FALSE) SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode) FROM lineitem_hash_part - GROUP BY l_orderkey + GROUP BY l_orderkey, l_partkey, l_shipmode ORDER BY 1,2; SET enable_hashagg TO on; diff --git a/src/test/regress/sql/multi_subquery_in_where_reference_clause.sql b/src/test/regress/sql/multi_subquery_in_where_reference_clause.sql index a3dd9c06e..fc1bb5c17 100644 --- a/src/test/regress/sql/multi_subquery_in_where_reference_clause.sql +++ b/src/test/regress/sql/multi_subquery_in_where_reference_clause.sql @@ -132,7 +132,7 @@ SELECT FROM users_table RIGHT JOIN users_reference_table USING (user_id) WHERE - users_table.value_2 IN + users_reference_table.value_2 IN (SELECT value_2 FROM diff --git a/src/test/regress/sql/multi_view.sql b/src/test/regress/sql/multi_view.sql index d80ed5c97..889dde818 100644 --- a/src/test/regress/sql/multi_view.sql +++ b/src/test/regress/sql/multi_view.sql @@ -37,7 +37,7 @@ CREATE VIEW priority_lineitem AS SELECT li.* FROM lineitem_hash_part li JOIN pri SELECT l_orderkey, count(*) FROM priority_lineitem GROUP BY 1 ORDER BY 2 DESC, 1 LIMIT 5; -CREATE VIEW air_shipped_lineitems AS SELECT * FROM lineitem_hash_part WHERE l_shipmode = 'AIR'; +CREATE VIEW air_shipped_lineitems AS SELECT * FROM lineitem_hash_part table_name_for_view WHERE l_shipmode = 'AIR'; -- join between view and table SELECT count(*) FROM orders_hash_part join air_shipped_lineitems ON (o_orderkey = l_orderkey); diff --git a/src/test/regress/sql/non_colocated_subquery_joins.sql b/src/test/regress/sql/non_colocated_subquery_joins.sql index bde8f5b0a..a74b8e16e 100644 --- a/src/test/regress/sql/non_colocated_subquery_joins.sql +++ b/src/test/regress/sql/non_colocated_subquery_joins.sql @@ -792,7 +792,7 @@ SET citus.shard_replication_factor TO 1; SELECT create_distributed_table('table2','tenant_id'); SELECT create_distributed_table('table1','tenant_id'); -CREATE VIEW table1_view AS SELECT * from table1 where id < 100; +CREATE VIEW table1_view AS SELECT * from table1 table_name_for_view where id < 100; -- all of the above queries are non-colocated subquery joins -- because the views are replaced with subqueries diff --git a/src/test/regress/sql/recurring_outer_join.sql b/src/test/regress/sql/recurring_outer_join.sql index 595d734ec..d33309817 100644 --- a/src/test/regress/sql/recurring_outer_join.sql +++ b/src/test/regress/sql/recurring_outer_join.sql @@ -612,10 +612,9 @@ USING (a); -- same test using a view, can be recursively planned CREATE VIEW my_view_1 AS -SELECT * FROM dist_1 t2 WHERE EXISTS ( +SELECT * FROM dist_1 table_name_for_view WHERE EXISTS ( SELECT * FROM dist_1 t4 - WHERE t4.a = t2.a -); + WHERE t4.a = table_name_for_view.a); SELECT COUNT(*) FROM ref_1 t1 LEFT JOIN diff --git a/src/test/regress/sql/replicate_reference_tables_to_coordinator.sql b/src/test/regress/sql/replicate_reference_tables_to_coordinator.sql index cc5e74cd9..284fdfc66 100644 --- a/src/test/regress/sql/replicate_reference_tables_to_coordinator.sql +++ b/src/test/regress/sql/replicate_reference_tables_to_coordinator.sql @@ -192,7 +192,7 @@ DROP VIEW numbers_v, local_table_v; -- Joins between reference tables and materialized views are allowed to -- be planned to be executed locally. -- -CREATE MATERIALIZED VIEW numbers_v AS SELECT * FROM numbers WHERE a BETWEEN 1 AND 10; +CREATE MATERIALIZED VIEW numbers_v AS SELECT * FROM numbers table_name_for_view WHERE a BETWEEN 1 AND 10; REFRESH MATERIALIZED VIEW numbers_v; SELECT * FROM squares JOIN numbers_v ON squares.a = numbers_v.a ORDER BY 1; diff --git a/src/test/regress/sql/undistribute_table.sql b/src/test/regress/sql/undistribute_table.sql index 22c14696b..737f5a0f9 100644 --- a/src/test/regress/sql/undistribute_table.sql +++ b/src/test/regress/sql/undistribute_table.sql @@ -105,8 +105,8 @@ INSERT INTO view_table VALUES (1, 2, 3), (2, 4, 6), (3, 6, 9); CREATE SCHEMA another_schema; -CREATE VIEW undis_view1 AS SELECT a, b FROM view_table; -CREATE VIEW undis_view2 AS SELECT a, c FROM view_table; +CREATE VIEW undis_view1 AS SELECT a, b FROM view_table table_name_for_view; +CREATE VIEW undis_view2 AS SELECT a, c FROM view_table table_name_for_view; CREATE VIEW another_schema.undis_view3 AS SELECT b, c FROM undis_view1 JOIN undis_view2 ON undis_view1.a = undis_view2.a; SELECT schemaname, viewname, viewowner, definition FROM pg_views WHERE viewname LIKE 'undis\_view%' ORDER BY viewname; diff --git a/src/test/regress/sql/view_propagation.sql b/src/test/regress/sql/view_propagation.sql index 44bbbf7b0..f0d63da85 100644 --- a/src/test/regress/sql/view_propagation.sql +++ b/src/test/regress/sql/view_propagation.sql @@ -207,15 +207,15 @@ UNION ALL INNER JOIN reporting_line rl ON e.manager_id = rl.employee_id; -- Aliases are supported -CREATE VIEW aliased_opt_prop_view(alias_1, alias_2) AS SELECT * FROM view_table_6; +CREATE VIEW aliased_opt_prop_view(alias_1, alias_2) AS SELECT * FROM view_table_6 table_name_for_view; -- View options are supported CREATE VIEW opt_prop_view WITH(check_option=CASCADED, security_barrier=true) - AS SELECT * FROM view_table_6; + AS SELECT * FROM view_table_6 table_name_for_view; CREATE VIEW sep_opt_prop_view - AS SELECT * FROM view_table_6 + AS SELECT * FROM view_table_6 table_name_for_view WITH LOCAL CHECK OPTION; SELECT * FROM (SELECT pg_identify_object_as_address(classid, objid, objsubid) as obj_identifier from pg_catalog.pg_dist_object) as obj_identifiers where obj_identifier::text like '%opt_prop_view%' ORDER BY 1; @@ -273,7 +273,7 @@ CREATE OR REPLACE VIEW view_for_unsup_commands AS SELECT id FROM table_to_test_u CREATE TABLE alter_view_table(id int, val1 text); SELECT create_distributed_table('alter_view_table','id'); -CREATE VIEW alter_view_1 AS SELECT * FROM alter_view_table; +CREATE VIEW alter_view_1 AS SELECT * FROM alter_view_table table_name_for_view; -- Set/drop default value is not supported by Citus ALTER VIEW alter_view_1 ALTER COLUMN val1 SET DEFAULT random()::text;