-- -- MULTI_MX_EXPLAIN -- ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1320000; \c - - - :worker_1_port \c - - - :worker_2_port \c - - - :master_port \a\t SET citus.task_executor_type TO 'real-time'; SET citus.explain_distributed_queries TO on; VACUUM ANALYZE lineitem_mx; VACUUM ANALYZE orders_mx; VACUUM ANALYZE customer_mx; VACUUM ANALYZE supplier_mx; \c - - - :worker_1_port -- Function that parses explain output as JSON CREATE FUNCTION explain_json(query text) RETURNS jsonb AS $BODY$ DECLARE result jsonb; BEGIN EXECUTE format('EXPLAIN (FORMAT JSON) %s', query) INTO result; RETURN result; END; $BODY$ LANGUAGE plpgsql; -- Function that parses explain output as XML CREATE FUNCTION explain_xml(query text) RETURNS xml AS $BODY$ DECLARE result xml; BEGIN EXECUTE format('EXPLAIN (FORMAT XML) %s', query) INTO result; RETURN result; END; $BODY$ LANGUAGE plpgsql; \c - - - :worker_2_port -- Function that parses explain output as JSON CREATE FUNCTION explain_json(query text) RETURNS jsonb AS $BODY$ DECLARE result jsonb; BEGIN EXECUTE format('EXPLAIN (FORMAT JSON) %s', query) INTO result; RETURN result; END; $BODY$ LANGUAGE plpgsql; -- Function that parses explain output as XML CREATE FUNCTION explain_xml(query text) RETURNS xml AS $BODY$ DECLARE result xml; BEGIN EXECUTE format('EXPLAIN (FORMAT XML) %s', query) INTO result; RETURN result; END; $BODY$ LANGUAGE plpgsql; -- Test Text format EXPLAIN (COSTS FALSE, FORMAT TEXT) SELECT l_quantity, count(*) count_quantity FROM lineitem_mx GROUP BY l_quantity ORDER BY count_quantity, l_quantity; Sort Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint), remote_scan.l_quantity -> HashAggregate Group Key: remote_scan.l_quantity -> Custom Scan (Citus Real-Time) Task Count: 16 Tasks Shown: One of 16 -> Task Node: host=localhost port=57637 dbname=regression -> HashAggregate Group Key: l_quantity -> Seq Scan on lineitem_mx_1220052 lineitem_mx -- Test JSON format EXPLAIN (COSTS FALSE, FORMAT JSON) SELECT l_quantity, count(*) count_quantity FROM lineitem_mx GROUP BY l_quantity ORDER BY count_quantity, l_quantity; [ { "Plan": { "Node Type": "Sort", "Parallel Aware": false, "Sort Key": ["COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint)", "remote_scan.l_quantity"], "Plans": [ { "Node Type": "Aggregate", "Strategy": "Hashed", "Partial Mode": "Simple", "Parent Relationship": "Outer", "Parallel Aware": false, "Group Key": ["remote_scan.l_quantity"], "Plans": [ { "Node Type": "Custom Scan", "Parent Relationship": "Outer", "Custom Plan Provider": "Citus Real-Time", "Parallel Aware": false, "Distributed Query": { "Job": { "Task Count": 16, "Tasks Shown": "One of 16", "Tasks": [ { "Node": "host=localhost port=57637 dbname=regression", "Remote Plan": [ [ { "Plan": { "Node Type": "Aggregate", "Strategy": "Hashed", "Partial Mode": "Simple", "Parallel Aware": false, "Group Key": ["l_quantity"], "Plans": [ { "Node Type": "Seq Scan", "Parent Relationship": "Outer", "Parallel Aware": false, "Relation Name": "lineitem_mx_1220052", "Alias": "lineitem_mx" } ] } } ] ] } ] } } } ] } ] } } ] -- Validate JSON format SELECT true AS valid FROM explain_json($$ SELECT l_quantity, count(*) count_quantity FROM lineitem_mx GROUP BY l_quantity ORDER BY count_quantity, l_quantity$$); t \c - - - :worker_1_port -- Test XML format EXPLAIN (COSTS FALSE, FORMAT XML) SELECT l_quantity, count(*) count_quantity FROM lineitem_mx GROUP BY l_quantity ORDER BY count_quantity, l_quantity; Sort false COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint) remote_scan.l_quantity Aggregate Hashed Simple Outer false remote_scan.l_quantity Custom Scan Outer Citus Real-Time false 16 One of 16 host=localhost port=57637 dbname=regression Aggregate Hashed Simple false l_quantity Seq Scan Outer false lineitem_mx_1220052 lineitem_mx -- Validate XML format SELECT true AS valid FROM explain_xml($$ SELECT l_quantity, count(*) count_quantity FROM lineitem_mx GROUP BY l_quantity ORDER BY count_quantity, l_quantity$$); t -- Test YAML format EXPLAIN (COSTS FALSE, FORMAT YAML) SELECT l_quantity, count(*) count_quantity FROM lineitem_mx GROUP BY l_quantity ORDER BY count_quantity, l_quantity; - Plan: Node Type: "Sort" Parallel Aware: false Sort Key: - "COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint)" - "remote_scan.l_quantity" Plans: - Node Type: "Aggregate" Strategy: "Hashed" Partial Mode: "Simple" Parent Relationship: "Outer" Parallel Aware: false Group Key: - "remote_scan.l_quantity" Plans: - Node Type: "Custom Scan" Parent Relationship: "Outer" Custom Plan Provider: "Citus Real-Time" Parallel Aware: false Distributed Query: Job: Task Count: 16 Tasks Shown: "One of 16" Tasks: - Node: "host=localhost port=57637 dbname=regression" Remote Plan: - Plan: Node Type: "Aggregate" Strategy: "Hashed" Partial Mode: "Simple" Parallel Aware: false Group Key: - "l_quantity" Plans: - Node Type: "Seq Scan" Parent Relationship: "Outer" Parallel Aware: false Relation Name: "lineitem_mx_1220052" Alias: "lineitem_mx" -- Test Text format EXPLAIN (COSTS FALSE, FORMAT TEXT) SELECT l_quantity, count(*) count_quantity FROM lineitem_mx GROUP BY l_quantity ORDER BY count_quantity, l_quantity; Sort Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint), remote_scan.l_quantity -> HashAggregate Group Key: remote_scan.l_quantity -> Custom Scan (Citus Real-Time) Task Count: 16 Tasks Shown: One of 16 -> Task Node: host=localhost port=57637 dbname=regression -> HashAggregate Group Key: l_quantity -> Seq Scan on lineitem_mx_1220052 lineitem_mx \c - - - :worker_2_port -- Test verbose EXPLAIN (COSTS FALSE, VERBOSE TRUE) SELECT sum(l_quantity) / avg(l_quantity) FROM lineitem_mx; Aggregate Output: (sum(remote_scan."?column?") / (sum(remote_scan."?column?_1") / pg_catalog.sum(remote_scan."?column?_2"))) -> Custom Scan (Citus Real-Time) Output: remote_scan."?column?", remote_scan."?column?_1", remote_scan."?column?_2" Task Count: 16 Tasks Shown: One of 16 -> Task Node: host=localhost port=57637 dbname=regression -> Aggregate Output: sum(l_quantity), sum(l_quantity), count(l_quantity) -> Seq Scan on public.lineitem_mx_1220052 lineitem_mx Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment -- Test join EXPLAIN (COSTS FALSE) SELECT * FROM lineitem_mx JOIN orders_mx ON l_orderkey = o_orderkey AND l_quantity < 5.0 ORDER BY l_quantity LIMIT 10; Limit -> Sort Sort Key: remote_scan.l_quantity -> Custom Scan (Citus Real-Time) Task Count: 16 Tasks Shown: One of 16 -> Task Node: host=localhost port=57637 dbname=regression -> Limit -> Sort Sort Key: lineitem_mx.l_quantity -> Hash Join Hash Cond: (lineitem_mx.l_orderkey = orders_mx.o_orderkey) -> Seq Scan on lineitem_mx_1220052 lineitem_mx Filter: (l_quantity < 5.0) -> Hash -> Seq Scan on orders_mx_1220068 orders_mx -- Test insert EXPLAIN (COSTS FALSE) INSERT INTO lineitem_mx VALUES(1,0); Custom Scan (Citus Router) Task Count: 1 Tasks Shown: All -> Task Node: host=localhost port=57637 dbname=regression -> Insert on lineitem_mx_1220052 -> Result -- Test update EXPLAIN (COSTS FALSE) UPDATE lineitem_mx SET l_suppkey = 12 WHERE l_orderkey = 1 AND l_partkey = 0; Custom Scan (Citus Router) Task Count: 1 Tasks Shown: All -> Task Node: host=localhost port=57637 dbname=regression -> Update on lineitem_mx_1220052 lineitem_mx -> Index Scan using lineitem_mx_pkey_1220052 on lineitem_mx_1220052 lineitem_mx Index Cond: (l_orderkey = 1) Filter: (l_partkey = 0) -- Test delete EXPLAIN (COSTS FALSE) DELETE FROM lineitem_mx WHERE l_orderkey = 1 AND l_partkey = 0; Custom Scan (Citus Router) Task Count: 1 Tasks Shown: All -> Task Node: host=localhost port=57637 dbname=regression -> Delete on lineitem_mx_1220052 lineitem_mx -> Index Scan using lineitem_mx_pkey_1220052 on lineitem_mx_1220052 lineitem_mx Index Cond: (l_orderkey = 1) Filter: (l_partkey = 0) -- make the outputs more consistent VACUUM ANALYZE lineitem_mx; VACUUM ANALYZE orders_mx; VACUUM ANALYZE customer_mx; VACUUM ANALYZE supplier_mx; -- Test single-shard SELECT EXPLAIN (COSTS FALSE) SELECT l_quantity FROM lineitem_mx WHERE l_orderkey = 5; Custom Scan (Citus Router) Task Count: 1 Tasks Shown: All -> Task Node: host=localhost port=57638 dbname=regression -> Index Scan using lineitem_mx_pkey_1220055 on lineitem_mx_1220055 lineitem_mx Index Cond: (l_orderkey = 5) SELECT true AS valid FROM explain_xml($$ SELECT l_quantity FROM lineitem_mx WHERE l_orderkey = 5$$); t SELECT true AS valid FROM explain_json($$ SELECT l_quantity FROM lineitem_mx WHERE l_orderkey = 5$$); t -- Test CREATE TABLE ... AS EXPLAIN (COSTS FALSE) CREATE TABLE explain_result AS SELECT * FROM lineitem_mx; Custom Scan (Citus Real-Time) Task Count: 16 Tasks Shown: One of 16 -> Task Node: host=localhost port=57637 dbname=regression -> Seq Scan on lineitem_mx_1220052 lineitem_mx -- Test all tasks output SET citus.explain_all_tasks TO on; EXPLAIN (COSTS FALSE) SELECT avg(l_linenumber) FROM lineitem_mx WHERE l_orderkey > 9030; Aggregate -> Custom Scan (Citus Real-Time) Task Count: 16 Tasks Shown: All -> Task Node: host=localhost port=57637 dbname=regression -> Aggregate -> Index Only Scan using lineitem_mx_pkey_1220052 on lineitem_mx_1220052 lineitem_mx Index Cond: (l_orderkey > 9030) -> Task Node: host=localhost port=57638 dbname=regression -> Aggregate -> Index Only Scan using lineitem_mx_pkey_1220053 on lineitem_mx_1220053 lineitem_mx Index Cond: (l_orderkey > 9030) -> Task Node: host=localhost port=57637 dbname=regression -> Aggregate -> Index Only Scan using lineitem_mx_pkey_1220054 on lineitem_mx_1220054 lineitem_mx Index Cond: (l_orderkey > 9030) -> Task Node: host=localhost port=57638 dbname=regression -> Aggregate -> Index Only Scan using lineitem_mx_pkey_1220055 on lineitem_mx_1220055 lineitem_mx Index Cond: (l_orderkey > 9030) -> Task Node: host=localhost port=57637 dbname=regression -> Aggregate -> Index Only Scan using lineitem_mx_pkey_1220056 on lineitem_mx_1220056 lineitem_mx Index Cond: (l_orderkey > 9030) -> Task Node: host=localhost port=57638 dbname=regression -> Aggregate -> Index Only Scan using lineitem_mx_pkey_1220057 on lineitem_mx_1220057 lineitem_mx Index Cond: (l_orderkey > 9030) -> Task Node: host=localhost port=57637 dbname=regression -> Aggregate -> Index Only Scan using lineitem_mx_pkey_1220058 on lineitem_mx_1220058 lineitem_mx Index Cond: (l_orderkey > 9030) -> Task Node: host=localhost port=57638 dbname=regression -> Aggregate -> Index Only Scan using lineitem_mx_pkey_1220059 on lineitem_mx_1220059 lineitem_mx Index Cond: (l_orderkey > 9030) -> Task Node: host=localhost port=57637 dbname=regression -> Aggregate -> Index Only Scan using lineitem_mx_pkey_1220060 on lineitem_mx_1220060 lineitem_mx Index Cond: (l_orderkey > 9030) -> Task Node: host=localhost port=57638 dbname=regression -> Aggregate -> Index Only Scan using lineitem_mx_pkey_1220061 on lineitem_mx_1220061 lineitem_mx Index Cond: (l_orderkey > 9030) -> Task Node: host=localhost port=57637 dbname=regression -> Aggregate -> Index Only Scan using lineitem_mx_pkey_1220062 on lineitem_mx_1220062 lineitem_mx Index Cond: (l_orderkey > 9030) -> Task Node: host=localhost port=57638 dbname=regression -> Aggregate -> Index Only Scan using lineitem_mx_pkey_1220063 on lineitem_mx_1220063 lineitem_mx Index Cond: (l_orderkey > 9030) -> Task Node: host=localhost port=57637 dbname=regression -> Aggregate -> Index Only Scan using lineitem_mx_pkey_1220064 on lineitem_mx_1220064 lineitem_mx Index Cond: (l_orderkey > 9030) -> Task Node: host=localhost port=57638 dbname=regression -> Aggregate -> Seq Scan on lineitem_mx_1220065 lineitem_mx Filter: (l_orderkey > 9030) -> Task Node: host=localhost port=57637 dbname=regression -> Aggregate -> Index Only Scan using lineitem_mx_pkey_1220066 on lineitem_mx_1220066 lineitem_mx Index Cond: (l_orderkey > 9030) -> Task Node: host=localhost port=57638 dbname=regression -> Aggregate -> Index Only Scan using lineitem_mx_pkey_1220067 on lineitem_mx_1220067 lineitem_mx Index Cond: (l_orderkey > 9030) SELECT true AS valid FROM explain_xml($$ SELECT avg(l_linenumber) FROM lineitem_mx WHERE l_orderkey > 9030$$); t SELECT true AS valid FROM explain_json($$ SELECT avg(l_linenumber) FROM lineitem_mx WHERE l_orderkey > 9030$$); t -- Test track tracker SET citus.task_executor_type TO 'task-tracker'; SET citus.explain_all_tasks TO off; EXPLAIN (COSTS FALSE) SELECT avg(l_linenumber) FROM lineitem_mx WHERE l_orderkey > 9030; Aggregate -> Custom Scan (Citus Task-Tracker) Task Count: 16 Tasks Shown: One of 16 -> Task Node: host=localhost port=57637 dbname=regression -> Aggregate -> Index Only Scan using lineitem_mx_pkey_1220052 on lineitem_mx_1220052 lineitem_mx Index Cond: (l_orderkey > 9030) -- Test re-partition join SET citus.large_table_shard_count TO 1; EXPLAIN (COSTS FALSE) SELECT count(*) FROM lineitem_mx, orders_mx, customer_mx, supplier_mx WHERE l_orderkey = o_orderkey AND o_custkey = c_custkey AND l_suppkey = s_suppkey; Aggregate -> Custom Scan (Citus Task-Tracker) Task Count: 16 Tasks Shown: One of 16 -> Task Node: host=localhost port=57637 dbname=regression -> Aggregate -> Hash Join Hash Cond: (lineitem_mx.l_orderkey = orders_mx.o_orderkey) -> Hash Join Hash Cond: (supplier_mx.s_suppkey = lineitem_mx.l_suppkey) -> Seq Scan on supplier_mx_1220087 supplier_mx -> Hash -> Seq Scan on lineitem_mx_1220052 lineitem_mx -> Hash -> Hash Join Hash Cond: (customer_mx.c_custkey = orders_mx.o_custkey) -> Seq Scan on customer_mx_1220084 customer_mx -> Hash -> Seq Scan on orders_mx_1220068 orders_mx EXPLAIN (COSTS FALSE, FORMAT JSON) SELECT count(*) FROM lineitem_mx, orders_mx, customer_mx, supplier_mx WHERE l_orderkey = o_orderkey AND o_custkey = c_custkey AND l_suppkey = s_suppkey; [ { "Plan": { "Node Type": "Aggregate", "Strategy": "Plain", "Partial Mode": "Simple", "Parallel Aware": false, "Plans": [ { "Node Type": "Custom Scan", "Parent Relationship": "Outer", "Custom Plan Provider": "Citus Task-Tracker", "Parallel Aware": false, "Distributed Query": { "Job": { "Task Count": 16, "Tasks Shown": "One of 16", "Tasks": [ { "Node": "host=localhost port=57637 dbname=regression", "Remote Plan": [ [ { "Plan": { "Node Type": "Aggregate", "Strategy": "Plain", "Partial Mode": "Simple", "Parallel Aware": false, "Plans": [ { "Node Type": "Hash Join", "Parent Relationship": "Outer", "Parallel Aware": false, "Join Type": "Inner", "Hash Cond": "(lineitem_mx.l_orderkey = orders_mx.o_orderkey)", "Plans": [ { "Node Type": "Hash Join", "Parent Relationship": "Outer", "Parallel Aware": false, "Join Type": "Inner", "Hash Cond": "(supplier_mx.s_suppkey = lineitem_mx.l_suppkey)", "Plans": [ { "Node Type": "Seq Scan", "Parent Relationship": "Outer", "Parallel Aware": false, "Relation Name": "supplier_mx_1220087", "Alias": "supplier_mx" }, { "Node Type": "Hash", "Parent Relationship": "Inner", "Parallel Aware": false, "Plans": [ { "Node Type": "Seq Scan", "Parent Relationship": "Outer", "Parallel Aware": false, "Relation Name": "lineitem_mx_1220052", "Alias": "lineitem_mx" } ] } ] }, { "Node Type": "Hash", "Parent Relationship": "Inner", "Parallel Aware": false, "Plans": [ { "Node Type": "Hash Join", "Parent Relationship": "Outer", "Parallel Aware": false, "Join Type": "Inner", "Hash Cond": "(customer_mx.c_custkey = orders_mx.o_custkey)", "Plans": [ { "Node Type": "Seq Scan", "Parent Relationship": "Outer", "Parallel Aware": false, "Relation Name": "customer_mx_1220084", "Alias": "customer_mx" }, { "Node Type": "Hash", "Parent Relationship": "Inner", "Parallel Aware": false, "Plans": [ { "Node Type": "Seq Scan", "Parent Relationship": "Outer", "Parallel Aware": false, "Relation Name": "orders_mx_1220068", "Alias": "orders_mx" } ] } ] } ] } ] } ] } } ] ] } ] } } } ] } } ] SELECT true AS valid FROM explain_json($$ SELECT count(*) FROM lineitem_mx, orders_mx, customer_mx, supplier_mx WHERE l_orderkey = o_orderkey AND o_custkey = c_custkey AND l_suppkey = s_suppkey$$); t EXPLAIN (COSTS FALSE, FORMAT XML) SELECT count(*) FROM lineitem_mx, orders_mx, customer_mx, supplier_mx WHERE l_orderkey = o_orderkey AND o_custkey = c_custkey AND l_suppkey = s_suppkey; Aggregate Plain Simple false Custom Scan Outer Citus Task-Tracker false 16 One of 16 host=localhost port=57637 dbname=regression Aggregate Plain Simple false Hash Join Outer false Inner (lineitem_mx.l_orderkey = orders_mx.o_orderkey) Hash Join Outer false Inner (supplier_mx.s_suppkey = lineitem_mx.l_suppkey) Seq Scan Outer false supplier_mx_1220087 supplier_mx Hash Inner false Seq Scan Outer false lineitem_mx_1220052 lineitem_mx Hash Inner false Hash Join Outer false Inner (customer_mx.c_custkey = orders_mx.o_custkey) Seq Scan Outer false customer_mx_1220084 customer_mx Hash Inner false Seq Scan Outer false orders_mx_1220068 orders_mx SELECT true AS valid FROM explain_xml($$ SELECT count(*) FROM lineitem_mx, orders_mx, customer_mx, supplier_mx WHERE l_orderkey = o_orderkey AND o_custkey = c_custkey AND l_suppkey = s_suppkey$$); t EXPLAIN (COSTS FALSE, FORMAT YAML) SELECT count(*) FROM lineitem_mx, orders_mx, customer_mx, supplier_mx WHERE l_orderkey = o_orderkey AND o_custkey = c_custkey AND l_suppkey = s_suppkey; - Plan: Node Type: "Aggregate" Strategy: "Plain" Partial Mode: "Simple" Parallel Aware: false Plans: - Node Type: "Custom Scan" Parent Relationship: "Outer" Custom Plan Provider: "Citus Task-Tracker" Parallel Aware: false Distributed Query: Job: Task Count: 16 Tasks Shown: "One of 16" Tasks: - Node: "host=localhost port=57637 dbname=regression" Remote Plan: - Plan: Node Type: "Aggregate" Strategy: "Plain" Partial Mode: "Simple" Parallel Aware: false Plans: - Node Type: "Hash Join" Parent Relationship: "Outer" Parallel Aware: false Join Type: "Inner" Hash Cond: "(lineitem_mx.l_orderkey = orders_mx.o_orderkey)" Plans: - Node Type: "Hash Join" Parent Relationship: "Outer" Parallel Aware: false Join Type: "Inner" Hash Cond: "(supplier_mx.s_suppkey = lineitem_mx.l_suppkey)" Plans: - Node Type: "Seq Scan" Parent Relationship: "Outer" Parallel Aware: false Relation Name: "supplier_mx_1220087" Alias: "supplier_mx" - Node Type: "Hash" Parent Relationship: "Inner" Parallel Aware: false Plans: - Node Type: "Seq Scan" Parent Relationship: "Outer" Parallel Aware: false Relation Name: "lineitem_mx_1220052" Alias: "lineitem_mx" - Node Type: "Hash" Parent Relationship: "Inner" Parallel Aware: false Plans: - Node Type: "Hash Join" Parent Relationship: "Outer" Parallel Aware: false Join Type: "Inner" Hash Cond: "(customer_mx.c_custkey = orders_mx.o_custkey)" Plans: - Node Type: "Seq Scan" Parent Relationship: "Outer" Parallel Aware: false Relation Name: "customer_mx_1220084" Alias: "customer_mx" - Node Type: "Hash" Parent Relationship: "Inner" Parallel Aware: false Plans: - Node Type: "Seq Scan" Parent Relationship: "Outer" Parallel Aware: false Relation Name: "orders_mx_1220068" Alias: "orders_mx"