-- -- MULTI_EXPLAIN -- ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 570000; -- print major version to make version-specific tests clear SELECT substring(version(), '\d+(?:\.\d+)?') AS major_version; major_version --------------- 9.5 (1 row) \a\t SET citus.task_executor_type TO 'real-time'; SET citus.explain_distributed_queries TO on; -- Function that parses explain output as JSON CREATE FUNCTION explain_json(query text) RETURNS jsonb AS $BODY$ DECLARE result jsonb; BEGIN EXECUTE format('EXPLAIN (FORMAT JSON) %s', query) INTO result; RETURN result; END; $BODY$ LANGUAGE plpgsql; -- Function that parses explain output as XML CREATE FUNCTION explain_xml(query text) RETURNS xml AS $BODY$ DECLARE result xml; BEGIN EXECUTE format('EXPLAIN (FORMAT XML) %s', query) INTO result; RETURN result; END; $BODY$ LANGUAGE plpgsql; -- VACUMM related tables to ensure test outputs are stable VACUUM ANALYZE lineitem; VACUUM ANALYZE orders; -- Test Text format EXPLAIN (COSTS FALSE, FORMAT TEXT) SELECT l_quantity, count(*) count_quantity FROM lineitem GROUP BY l_quantity ORDER BY count_quantity, l_quantity; Sort Sort Key: COALESCE((sum((COALESCE((sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint), remote_scan.l_quantity -> HashAggregate Group Key: remote_scan.l_quantity -> Custom Scan (Citus Real-Time) Task Count: 8 Tasks Shown: One of 8 -> Task Node: host=localhost port=57637 dbname=regression -> HashAggregate Group Key: l_quantity -> Seq Scan on lineitem_290001 lineitem -- Test JSON format EXPLAIN (COSTS FALSE, FORMAT JSON) SELECT l_quantity, count(*) count_quantity FROM lineitem GROUP BY l_quantity ORDER BY count_quantity, l_quantity; [ { "Plan": { "Node Type": "Sort", "Sort Key": ["COALESCE((sum((COALESCE((sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint)", "remote_scan.l_quantity"], "Plans": [ { "Node Type": "Aggregate", "Strategy": "Hashed", "Parent Relationship": "Outer", "Group Key": ["remote_scan.l_quantity"], "Plans": [ { "Node Type": "Custom Scan", "Parent Relationship": "Outer", "Custom Plan Provider": "Citus Real-Time", "Distributed Query": { "Job": { "Task Count": 8, "Tasks Shown": "One of 8", "Tasks": [ { "Node": "host=localhost port=57637 dbname=regression", "Remote Plan": [ [ { "Plan": { "Node Type": "Aggregate", "Strategy": "Hashed", "Group Key": ["l_quantity"], "Plans": [ { "Node Type": "Seq Scan", "Parent Relationship": "Outer", "Relation Name": "lineitem_290001", "Alias": "lineitem" } ] } } ] ] } ] } } } ] } ] } } ] -- Validate JSON format SELECT true AS valid FROM explain_json($$ SELECT l_quantity, count(*) count_quantity FROM lineitem GROUP BY l_quantity ORDER BY count_quantity, l_quantity$$); t -- Test XML format EXPLAIN (COSTS FALSE, FORMAT XML) SELECT l_quantity, count(*) count_quantity FROM lineitem GROUP BY l_quantity ORDER BY count_quantity, l_quantity; Sort COALESCE((sum((COALESCE((sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint) remote_scan.l_quantity Aggregate Hashed Outer remote_scan.l_quantity Custom Scan Outer Citus Real-Time 8 One of 8 host=localhost port=57637 dbname=regression Aggregate Hashed l_quantity Seq Scan Outer lineitem_290001 lineitem -- Validate XML format SELECT true AS valid FROM explain_xml($$ SELECT l_quantity, count(*) count_quantity FROM lineitem GROUP BY l_quantity ORDER BY count_quantity, l_quantity$$); t -- Test YAML format EXPLAIN (COSTS FALSE, FORMAT YAML) SELECT l_quantity, count(*) count_quantity FROM lineitem GROUP BY l_quantity ORDER BY count_quantity, l_quantity; - Plan: Node Type: "Sort" Sort Key: - "COALESCE((sum((COALESCE((sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint)" - "remote_scan.l_quantity" Plans: - Node Type: "Aggregate" Strategy: "Hashed" Parent Relationship: "Outer" Group Key: - "remote_scan.l_quantity" Plans: - Node Type: "Custom Scan" Parent Relationship: "Outer" Custom Plan Provider: "Citus Real-Time" Distributed Query: Job: Task Count: 8 Tasks Shown: "One of 8" Tasks: - Node: "host=localhost port=57637 dbname=regression" Remote Plan: - Plan: Node Type: "Aggregate" Strategy: "Hashed" Group Key: - "l_quantity" Plans: - Node Type: "Seq Scan" Parent Relationship: "Outer" Relation Name: "lineitem_290001" Alias: "lineitem" -- Test Text format EXPLAIN (COSTS FALSE, FORMAT TEXT) SELECT l_quantity, count(*) count_quantity FROM lineitem GROUP BY l_quantity ORDER BY count_quantity, l_quantity; Sort Sort Key: COALESCE((sum((COALESCE((sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint), remote_scan.l_quantity -> HashAggregate Group Key: remote_scan.l_quantity -> Custom Scan (Citus Real-Time) Task Count: 8 Tasks Shown: One of 8 -> Task Node: host=localhost port=57637 dbname=regression -> HashAggregate Group Key: l_quantity -> Seq Scan on lineitem_290001 lineitem -- Test verbose EXPLAIN (COSTS FALSE, VERBOSE TRUE) SELECT sum(l_quantity) / avg(l_quantity) FROM lineitem; Aggregate Output: (sum(remote_scan."?column?") / (sum(remote_scan."?column?_1") / sum(remote_scan."?column?_2"))) -> Custom Scan (Citus Real-Time) Output: remote_scan."?column?", remote_scan."?column?_1", remote_scan."?column?_2" Task Count: 8 Tasks Shown: One of 8 -> Task Node: host=localhost port=57637 dbname=regression -> Aggregate Output: sum(l_quantity), sum(l_quantity), count(l_quantity) -> Seq Scan on public.lineitem_290001 lineitem Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment -- Test join EXPLAIN (COSTS FALSE) SELECT * FROM lineitem JOIN orders ON l_orderkey = o_orderkey AND l_quantity < 5.0 ORDER BY l_quantity LIMIT 10; Limit -> Sort Sort Key: remote_scan.l_quantity -> Custom Scan (Citus Real-Time) Task Count: 8 Tasks Shown: One of 8 -> Task Node: host=localhost port=57637 dbname=regression -> Limit -> Sort Sort Key: lineitem.l_quantity -> Merge Join Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) -> Index Scan using orders_pkey_290008 on orders_290008 orders -> Sort Sort Key: lineitem.l_orderkey -> Seq Scan on lineitem_290001 lineitem Filter: (l_quantity < 5.0) -- Test insert EXPLAIN (COSTS FALSE) INSERT INTO lineitem VALUES(1,0); Custom Scan (Citus Router) Task Count: 1 Tasks Shown: All -> Task Node: host=localhost port=57638 dbname=regression -> Insert on lineitem_290000 -> Result -- Test update EXPLAIN (COSTS FALSE) UPDATE lineitem SET l_suppkey = 12 WHERE l_orderkey = 1 AND l_partkey = 0; Custom Scan (Citus Router) Task Count: 1 Tasks Shown: All -> Task Node: host=localhost port=57638 dbname=regression -> Update on lineitem_290000 -> Index Scan using lineitem_pkey_290000 on lineitem_290000 Index Cond: (l_orderkey = 1) Filter: (l_partkey = 0) -- Test delete EXPLAIN (COSTS FALSE) DELETE FROM lineitem WHERE l_orderkey = 1 AND l_partkey = 0; Custom Scan (Citus Router) Task Count: 1 Tasks Shown: All -> Task Node: host=localhost port=57638 dbname=regression -> Delete on lineitem_290000 -> Index Scan using lineitem_pkey_290000 on lineitem_290000 Index Cond: (l_orderkey = 1) Filter: (l_partkey = 0) -- Test single-shard SELECT EXPLAIN (COSTS FALSE) SELECT l_quantity FROM lineitem WHERE l_orderkey = 5; Custom Scan (Citus Router) Task Count: 1 Tasks Shown: All -> Task Node: host=localhost port=57637 dbname=regression -> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem Index Cond: (l_orderkey = 5) SELECT true AS valid FROM explain_xml($$ SELECT l_quantity FROM lineitem WHERE l_orderkey = 5$$); t SELECT true AS valid FROM explain_json($$ SELECT l_quantity FROM lineitem WHERE l_orderkey = 5$$); t -- Test CREATE TABLE ... AS EXPLAIN (COSTS FALSE) CREATE TABLE explain_result AS SELECT * FROM lineitem; Custom Scan (Citus Real-Time) Task Count: 8 Tasks Shown: One of 8 -> Task Node: host=localhost port=57637 dbname=regression -> Seq Scan on lineitem_290001 lineitem -- Test having EXPLAIN (COSTS FALSE, VERBOSE TRUE) SELECT sum(l_quantity) / avg(l_quantity) FROM lineitem HAVING sum(l_quantity) > 100; Aggregate Output: (sum(remote_scan."?column?") / (sum(remote_scan."?column?_1") / sum(remote_scan."?column?_2"))) Filter: (sum(remote_scan.worker_column_4) > '100'::numeric) -> Custom Scan (Citus Real-Time) Output: remote_scan."?column?", remote_scan."?column?_1", remote_scan."?column?_2", remote_scan.worker_column_4 Task Count: 8 Tasks Shown: One of 8 -> Task Node: host=localhost port=57637 dbname=regression -> Aggregate Output: sum(l_quantity), sum(l_quantity), count(l_quantity), sum(l_quantity) -> Seq Scan on public.lineitem_290001 lineitem Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment -- Test having without aggregate EXPLAIN (COSTS FALSE, VERBOSE TRUE) SELECT l_quantity FROM lineitem GROUP BY l_quantity HAVING l_quantity > (100 * random()); HashAggregate Output: remote_scan.l_quantity Group Key: remote_scan.l_quantity Filter: ((remote_scan.worker_column_2)::double precision > ('100'::double precision * random())) -> Custom Scan (Citus Real-Time) Output: remote_scan.l_quantity, remote_scan.worker_column_2 Task Count: 8 Tasks Shown: One of 8 -> Task Node: host=localhost port=57637 dbname=regression -> HashAggregate Output: l_quantity, l_quantity Group Key: lineitem.l_quantity -> Seq Scan on public.lineitem_290001 lineitem Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment -- Test all tasks output SET citus.explain_all_tasks TO on; EXPLAIN (COSTS FALSE) SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030; Aggregate -> Custom Scan (Citus Real-Time) Task Count: 4 Tasks Shown: All -> Task Node: host=localhost port=57637 dbname=regression -> Aggregate -> Seq Scan on lineitem_290005 lineitem Filter: (l_orderkey > 9030) -> Task Node: host=localhost port=57638 dbname=regression -> Aggregate -> Seq Scan on lineitem_290004 lineitem Filter: (l_orderkey > 9030) -> Task Node: host=localhost port=57637 dbname=regression -> Aggregate -> Seq Scan on lineitem_290007 lineitem Filter: (l_orderkey > 9030) -> Task Node: host=localhost port=57638 dbname=regression -> Aggregate -> Seq Scan on lineitem_290006 lineitem Filter: (l_orderkey > 9030) SELECT true AS valid FROM explain_xml($$ SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030$$); t SELECT true AS valid FROM explain_json($$ SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030$$); t -- Test track tracker SET citus.task_executor_type TO 'task-tracker'; SET citus.explain_all_tasks TO off; EXPLAIN (COSTS FALSE) SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030; Aggregate -> Custom Scan (Citus Task-Tracker) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=57637 dbname=regression -> Aggregate -> Seq Scan on lineitem_290005 lineitem Filter: (l_orderkey > 9030) -- Test re-partition join SET citus.large_table_shard_count TO 1; EXPLAIN (COSTS FALSE) SELECT count(*) FROM lineitem, orders, customer, supplier_single_shard WHERE l_orderkey = o_orderkey AND o_custkey = c_custkey AND l_suppkey = s_suppkey; Aggregate -> Custom Scan (Citus Task-Tracker) Task Count: 1 Tasks Shown: None, not supported for re-partition queries -> MapMergeJob Map Task Count: 1 Merge Task Count: 1 -> MapMergeJob Map Task Count: 8 Merge Task Count: 1 EXPLAIN (COSTS FALSE, FORMAT JSON) SELECT count(*) FROM lineitem, orders, customer, supplier_single_shard WHERE l_orderkey = o_orderkey AND o_custkey = c_custkey AND l_suppkey = s_suppkey; [ { "Plan": { "Node Type": "Aggregate", "Strategy": "Plain", "Plans": [ { "Node Type": "Custom Scan", "Parent Relationship": "Outer", "Custom Plan Provider": "Citus Task-Tracker", "Distributed Query": { "Job": { "Task Count": 1, "Tasks Shown": "None, not supported for re-partition queries", "Depended Jobs": [ { "Map Task Count": 1, "Merge Task Count": 1, "Depended Jobs": [ { "Map Task Count": 8, "Merge Task Count": 1 } ] } ] } } } ] } } ] SELECT true AS valid FROM explain_json($$ SELECT count(*) FROM lineitem, orders, customer, supplier_single_shard WHERE l_orderkey = o_orderkey AND o_custkey = c_custkey AND l_suppkey = s_suppkey$$); t EXPLAIN (COSTS FALSE, FORMAT XML) SELECT count(*) FROM lineitem, orders, customer, supplier_single_shard WHERE l_orderkey = o_orderkey AND o_custkey = c_custkey AND l_suppkey = s_suppkey; Aggregate Plain Custom Scan Outer Citus Task-Tracker 1 None, not supported for re-partition queries 1 1 8 1 SELECT true AS valid FROM explain_xml($$ SELECT count(*) FROM lineitem, orders, customer, supplier WHERE l_orderkey = o_orderkey AND o_custkey = c_custkey AND l_suppkey = s_suppkey$$); t -- make sure that EXPLAIN works without -- problems for queries that inlvolves only -- reference tables SELECT true AS valid FROM explain_xml($$ SELECT count(*) FROM nation WHERE n_name = 'CHINA'$$); t SELECT true AS valid FROM explain_xml($$ SELECT count(*) FROM nation, supplier WHERE nation.n_nationkey = supplier.s_nationkey$$); t EXPLAIN (COSTS FALSE, FORMAT YAML) SELECT count(*) FROM lineitem, orders, customer, supplier_single_shard WHERE l_orderkey = o_orderkey AND o_custkey = c_custkey AND l_suppkey = s_suppkey; - Plan: Node Type: "Aggregate" Strategy: "Plain" Plans: - Node Type: "Custom Scan" Parent Relationship: "Outer" Custom Plan Provider: "Citus Task-Tracker" Distributed Query: Job: Task Count: 1 Tasks Shown: "None, not supported for re-partition queries" Depended Jobs: - Map Task Count: 1 Merge Task Count: 1 Depended Jobs: - Map Task Count: 8 Merge Task Count: 1 -- test parallel aggregates SET parallel_setup_cost=0; ERROR: unrecognized configuration parameter "parallel_setup_cost" SET parallel_tuple_cost=0; ERROR: unrecognized configuration parameter "parallel_tuple_cost" SET min_parallel_relation_size=0; ERROR: unrecognized configuration parameter "min_parallel_relation_size" SET max_parallel_workers_per_gather=4; ERROR: unrecognized configuration parameter "max_parallel_workers_per_gather" -- ensure local plans display correctly CREATE TABLE lineitem_clone (LIKE lineitem); EXPLAIN (COSTS FALSE) SELECT avg(l_linenumber) FROM lineitem_clone; Aggregate -> Seq Scan on lineitem_clone -- ensure distributed plans don't break EXPLAIN (COSTS FALSE) SELECT avg(l_linenumber) FROM lineitem; Aggregate -> Custom Scan (Citus Task-Tracker) Task Count: 8 Tasks Shown: One of 8 -> Task Node: host=localhost port=57637 dbname=regression -> Aggregate -> Seq Scan on lineitem_290001 lineitem -- ensure EXPLAIN EXECUTE doesn't crash PREPARE task_tracker_query AS SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030; EXPLAIN (COSTS FALSE) EXECUTE task_tracker_query; Aggregate -> Custom Scan (Citus Task-Tracker) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=57637 dbname=regression -> Aggregate -> Seq Scan on lineitem_290005 lineitem Filter: (l_orderkey > 9030) SET citus.task_executor_type TO 'real-time'; PREPARE router_executor_query AS SELECT l_quantity FROM lineitem WHERE l_orderkey = 5; EXPLAIN EXECUTE router_executor_query; Custom Scan (Citus Router) (cost=0.00..0.00 rows=0 width=0) Task Count: 1 Tasks Shown: All -> Task Node: host=localhost port=57637 dbname=regression -> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem (cost=0.28..11.83 rows=3 width=5) Index Cond: (l_orderkey = 5) PREPARE real_time_executor_query AS SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030; EXPLAIN (COSTS FALSE) EXECUTE real_time_executor_query; Aggregate -> Custom Scan (Citus Real-Time) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=57637 dbname=regression -> Aggregate -> Seq Scan on lineitem_290005 lineitem Filter: (l_orderkey > 9030) -- EXPLAIN EXECUTE of parametrized prepared statements is broken, but -- at least make sure to fail without crashing PREPARE router_executor_query_param(int) AS SELECT l_quantity FROM lineitem WHERE l_orderkey = $1; EXPLAIN EXECUTE router_executor_query_param(5); Custom Scan (Citus Router) (cost=0.00..0.00 rows=0 width=0) Task Count: 1 Tasks Shown: All -> Task Node: host=localhost port=57637 dbname=regression -> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem (cost=0.28..11.83 rows=3 width=5) Index Cond: (l_orderkey = 5) -- test explain in a transaction with alter table to test we use right connections BEGIN; CREATE TABLE explain_table(id int); SELECT create_distributed_table('explain_table', 'id'); ALTER TABLE explain_table ADD COLUMN value int; NOTICE: using one-phase commit for distributed DDL commands HINT: You can enable two-phase commit for extra safety with: SET citus.multi_shard_commit_protocol TO '2pc' EXPLAIN (COSTS FALSE) SELECT value FROM explain_table WHERE id = 1; Custom Scan (Citus Router) Task Count: 1 Tasks Shown: All -> Task Node: host=localhost port=57637 dbname=regression -> Seq Scan on explain_table_570001 explain_table Filter: (id = 1) ROLLBACK;