--
-- MULTI_EXPLAIN
--
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 570000;
ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 570000;
\a\t
SET citus.task_executor_type TO 'real-time';
SET citus.explain_distributed_queries TO on;
-- Function that parses explain output as JSON
CREATE FUNCTION explain_json(query text)
RETURNS jsonb
AS $BODY$
DECLARE
result jsonb;
BEGIN
EXECUTE format('EXPLAIN (FORMAT JSON) %s', query) INTO result;
RETURN result;
END;
$BODY$ LANGUAGE plpgsql;
-- Function that parses explain output as XML
CREATE FUNCTION explain_xml(query text)
RETURNS xml
AS $BODY$
DECLARE
result xml;
BEGIN
EXECUTE format('EXPLAIN (FORMAT XML) %s', query) INTO result;
RETURN result;
END;
$BODY$ LANGUAGE plpgsql;
-- Test Text format
EXPLAIN (COSTS FALSE, FORMAT TEXT)
SELECT l_quantity, count(*) count_quantity FROM lineitem
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
Distributed Query into pg_merge_job_570000
Executor: Real-Time
Task Count: 8
Tasks Shown: One of 8
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: l_quantity
-> Seq Scan on lineitem_290001 lineitem
Master Query
-> Sort
Sort Key: COALESCE((sum((COALESCE((sum(intermediate_column_570000_1))::bigint, '0'::bigint))))::bigint, '0'::bigint), intermediate_column_570000_0
-> HashAggregate
Group Key: intermediate_column_570000_0
-> Seq Scan on pg_merge_job_570000
-- Test JSON format
EXPLAIN (COSTS FALSE, FORMAT JSON)
SELECT l_quantity, count(*) count_quantity FROM lineitem
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
[
{
"Executor": "Real-Time",
"Job": {
"Task Count": 8,
"Tasks Shown": "One of 8",
"Tasks": [
{
"Node": "host=localhost port=57637 dbname=regression",
"Remote Plan": [
[
{
"Plan": {
"Node Type": "Aggregate",
"Strategy": "Hashed",
"Group Key": ["l_quantity"],
"Plans": [
{
"Node Type": "Seq Scan",
"Parent Relationship": "Outer",
"Relation Name": "lineitem_290001",
"Alias": "lineitem"
}
]
}
}
]
]
}
]
},
"Master Query": [
{
"Plan": {
"Node Type": "Sort",
"Sort Key": ["COALESCE((sum((COALESCE((sum(intermediate_column_570001_1))::bigint, '0'::bigint))))::bigint, '0'::bigint)", "intermediate_column_570001_0"],
"Plans": [
{
"Node Type": "Aggregate",
"Strategy": "Hashed",
"Parent Relationship": "Outer",
"Group Key": ["intermediate_column_570001_0"],
"Plans": [
{
"Node Type": "Seq Scan",
"Parent Relationship": "Outer",
"Relation Name": "pg_merge_job_570001",
"Alias": "pg_merge_job_570001"
}
]
}
]
}
}
]
}
]
-- Validate JSON format
SELECT true AS valid FROM explain_json($$
SELECT l_quantity, count(*) count_quantity FROM lineitem
GROUP BY l_quantity ORDER BY count_quantity, l_quantity$$);
t
-- Test XML format
EXPLAIN (COSTS FALSE, FORMAT XML)
SELECT l_quantity, count(*) count_quantity FROM lineitem
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
Real-Time
8
One of 8
host=localhost port=57637 dbname=regression
Aggregate
Hashed
- l_quantity
Seq Scan
Outer
lineitem_290001
lineitem
Sort
- COALESCE((sum((COALESCE((sum(intermediate_column_570003_1))::bigint, '0'::bigint))))::bigint, '0'::bigint)
- intermediate_column_570003_0
Aggregate
Hashed
Outer
- intermediate_column_570003_0
Seq Scan
Outer
pg_merge_job_570003
pg_merge_job_570003
-- Validate XML format
SELECT true AS valid FROM explain_xml($$
SELECT l_quantity, count(*) count_quantity FROM lineitem
GROUP BY l_quantity ORDER BY count_quantity, l_quantity$$);
t
-- Test YAML format
EXPLAIN (COSTS FALSE, FORMAT YAML)
SELECT l_quantity, count(*) count_quantity FROM lineitem
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
- Executor: "Real-Time"
Job:
Task Count: 8
Tasks Shown: "One of 8"
Tasks:
- Node: "host=localhost port=57637 dbname=regression"
Remote Plan:
- Plan:
Node Type: "Aggregate"
Strategy: "Hashed"
Group Key:
- "l_quantity"
Plans:
- Node Type: "Seq Scan"
Parent Relationship: "Outer"
Relation Name: "lineitem_290001"
Alias: "lineitem"
Master Query:
- Plan:
Node Type: "Sort"
Sort Key:
- "COALESCE((sum((COALESCE((sum(intermediate_column_570005_1))::bigint, '0'::bigint))))::bigint, '0'::bigint)"
- "intermediate_column_570005_0"
Plans:
- Node Type: "Aggregate"
Strategy: "Hashed"
Parent Relationship: "Outer"
Group Key:
- "intermediate_column_570005_0"
Plans:
- Node Type: "Seq Scan"
Parent Relationship: "Outer"
Relation Name: "pg_merge_job_570005"
Alias: "pg_merge_job_570005"
-- Test Text format
EXPLAIN (COSTS FALSE, FORMAT TEXT)
SELECT l_quantity, count(*) count_quantity FROM lineitem
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
Distributed Query into pg_merge_job_570006
Executor: Real-Time
Task Count: 8
Tasks Shown: One of 8
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: l_quantity
-> Seq Scan on lineitem_290001 lineitem
Master Query
-> Sort
Sort Key: COALESCE((sum((COALESCE((sum(intermediate_column_570006_1))::bigint, '0'::bigint))))::bigint, '0'::bigint), intermediate_column_570006_0
-> HashAggregate
Group Key: intermediate_column_570006_0
-> Seq Scan on pg_merge_job_570006
-- Test verbose
EXPLAIN (COSTS FALSE, VERBOSE TRUE)
SELECT sum(l_quantity) / avg(l_quantity) FROM lineitem;
Distributed Query into pg_merge_job_570007
Executor: Real-Time
Task Count: 8
Tasks Shown: One of 8
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
Output: sum(l_quantity), sum(l_quantity), count(l_quantity)
-> Seq Scan on public.lineitem_290001 lineitem
Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
Master Query
-> Aggregate
Output: (sum(intermediate_column_570007_0) / (sum(intermediate_column_570007_1) / sum(intermediate_column_570007_2)))
-> Seq Scan on pg_temp_2.pg_merge_job_570007
Output: intermediate_column_570007_0, intermediate_column_570007_1, intermediate_column_570007_2
-- Test join
EXPLAIN (COSTS FALSE)
SELECT * FROM lineitem
JOIN orders ON l_orderkey = o_orderkey AND l_quantity < 5.0
ORDER BY l_quantity LIMIT 10;
Distributed Query into pg_merge_job_570008
Executor: Real-Time
Task Count: 8
Tasks Shown: One of 8
-> Task
Node: host=localhost port=57637 dbname=regression
-> Limit
-> Sort
Sort Key: lineitem.l_quantity
-> Hash Join
Hash Cond: (lineitem.l_orderkey = orders.o_orderkey)
-> Seq Scan on lineitem_290001 lineitem
Filter: (l_quantity < 5.0)
-> Hash
-> Seq Scan on orders_290008 orders
Master Query
-> Limit
-> Sort
Sort Key: intermediate_column_570008_4
-> Seq Scan on pg_merge_job_570008
-- Test insert
EXPLAIN (COSTS FALSE)
INSERT INTO lineitem VALUES(1,0);
Distributed Query
Executor: Router
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=57638 dbname=regression
-> Insert on lineitem_290000
-> Result
-- Test update
EXPLAIN (COSTS FALSE)
UPDATE lineitem
SET l_suppkey = 12
WHERE l_orderkey = 1 AND l_partkey = 0;
Distributed Query
Executor: Router
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=57638 dbname=regression
-> Update on lineitem_290000
-> Bitmap Heap Scan on lineitem_290000
Recheck Cond: (l_orderkey = 1)
Filter: (l_partkey = 0)
-> Bitmap Index Scan on lineitem_pkey_290000
Index Cond: (l_orderkey = 1)
-- Test delete
EXPLAIN (COSTS FALSE)
DELETE FROM lineitem
WHERE l_orderkey = 1 AND l_partkey = 0;
Distributed Query
Executor: Router
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=57638 dbname=regression
-> Delete on lineitem_290000
-> Bitmap Heap Scan on lineitem_290000
Recheck Cond: (l_orderkey = 1)
Filter: (l_partkey = 0)
-> Bitmap Index Scan on lineitem_pkey_290000
Index Cond: (l_orderkey = 1)
-- Test single-shard SELECT
EXPLAIN (COSTS FALSE)
SELECT l_quantity FROM lineitem WHERE l_orderkey = 5;
Distributed Query into pg_merge_job_570009
Executor: Router
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> Bitmap Heap Scan on lineitem_290000 lineitem
Recheck Cond: (l_orderkey = 5)
-> Bitmap Index Scan on lineitem_pkey_290000
Index Cond: (l_orderkey = 5)
SELECT true AS valid FROM explain_xml($$
SELECT l_quantity FROM lineitem WHERE l_orderkey = 5$$);
t
SELECT true AS valid FROM explain_json($$
SELECT l_quantity FROM lineitem WHERE l_orderkey = 5$$);
t
-- Test CREATE TABLE ... AS
EXPLAIN (COSTS FALSE)
CREATE TABLE explain_result AS
SELECT * FROM lineitem;
Distributed Query into pg_merge_job_570012
Executor: Real-Time
Task Count: 8
Tasks Shown: One of 8
-> Task
Node: host=localhost port=57637 dbname=regression
-> Seq Scan on lineitem_290001 lineitem
Master Query
-> Seq Scan on pg_merge_job_570012
-- Test having
EXPLAIN (COSTS FALSE, VERBOSE TRUE)
SELECT sum(l_quantity) / avg(l_quantity) FROM lineitem
HAVING sum(l_quantity) > 100;
Distributed Query into pg_merge_job_570013
Executor: Real-Time
Task Count: 8
Tasks Shown: One of 8
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
Output: sum(l_quantity), sum(l_quantity), count(l_quantity), sum(l_quantity)
-> Seq Scan on public.lineitem_290001 lineitem
Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
Master Query
-> Aggregate
Output: (sum(intermediate_column_570013_0) / (sum(intermediate_column_570013_1) / sum(intermediate_column_570013_2)))
Filter: (sum(pg_merge_job_570013.intermediate_column_570013_3) > '100'::numeric)
-> Seq Scan on pg_temp_2.pg_merge_job_570013
Output: intermediate_column_570013_0, intermediate_column_570013_1, intermediate_column_570013_2, intermediate_column_570013_3
-- Test having without aggregate
EXPLAIN (COSTS FALSE, VERBOSE TRUE)
SELECT l_quantity FROM lineitem
GROUP BY l_quantity
HAVING l_quantity > (100 * random());
Distributed Query into pg_merge_job_570014
Executor: Real-Time
Task Count: 8
Tasks Shown: One of 8
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Output: l_quantity, l_quantity
Group Key: lineitem.l_quantity
-> Seq Scan on public.lineitem_290001 lineitem
Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
Master Query
-> HashAggregate
Output: intermediate_column_570014_0
Group Key: pg_merge_job_570014.intermediate_column_570014_0
Filter: ((pg_merge_job_570014.intermediate_column_570014_1)::double precision > ('100'::double precision * random()))
-> Seq Scan on pg_temp_2.pg_merge_job_570014
Output: intermediate_column_570014_0, intermediate_column_570014_1
-- Test all tasks output
SET citus.explain_all_tasks TO on;
EXPLAIN (COSTS FALSE)
SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030;
Distributed Query into pg_merge_job_570015
Executor: Real-Time
Task Count: 4
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Seq Scan on lineitem_290005 lineitem
Filter: (l_orderkey > 9030)
-> Task
Node: host=localhost port=57638 dbname=regression
-> Aggregate
-> Seq Scan on lineitem_290004 lineitem
Filter: (l_orderkey > 9030)
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Seq Scan on lineitem_290007 lineitem
Filter: (l_orderkey > 9030)
-> Task
Node: host=localhost port=57638 dbname=regression
-> Aggregate
-> Seq Scan on lineitem_290006 lineitem
Filter: (l_orderkey > 9030)
Master Query
-> Aggregate
-> Seq Scan on pg_merge_job_570015
SELECT true AS valid FROM explain_xml($$
SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030$$);
t
SELECT true AS valid FROM explain_json($$
SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030$$);
t
-- Test track tracker
SET citus.task_executor_type TO 'task-tracker';
SET citus.explain_all_tasks TO off;
EXPLAIN (COSTS FALSE)
SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030;
Distributed Query into pg_merge_job_570018
Executor: Task-Tracker
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Seq Scan on lineitem_290005 lineitem
Filter: (l_orderkey > 9030)
Master Query
-> Aggregate
-> Seq Scan on pg_merge_job_570018
-- Test re-partition join
SET citus.large_table_shard_count TO 1;
EXPLAIN (COSTS FALSE)
SELECT count(*)
FROM lineitem, orders, customer, supplier
WHERE l_orderkey = o_orderkey
AND o_custkey = c_custkey
AND l_suppkey = s_suppkey;
Distributed Query into pg_merge_job_570021
Executor: Task-Tracker
Task Count: 1
Tasks Shown: None, not supported for re-partition queries
-> MapMergeJob
Map Task Count: 1
Merge Task Count: 1
-> MapMergeJob
Map Task Count: 8
Merge Task Count: 1
Master Query
-> Aggregate
-> Seq Scan on pg_merge_job_570021
EXPLAIN (COSTS FALSE, FORMAT JSON)
SELECT count(*)
FROM lineitem, orders, customer, supplier
WHERE l_orderkey = o_orderkey
AND o_custkey = c_custkey
AND l_suppkey = s_suppkey;
[
{
"Executor": "Task-Tracker",
"Job": {
"Task Count": 1,
"Tasks Shown": "None, not supported for re-partition queries",
"Depended Jobs": [
{
"Map Task Count": 1,
"Merge Task Count": 1,
"Depended Jobs": [
{
"Map Task Count": 8,
"Merge Task Count": 1
}
]
}
]
},
"Master Query": [
{
"Plan": {
"Node Type": "Aggregate",
"Strategy": "Plain",
"Plans": [
{
"Node Type": "Seq Scan",
"Parent Relationship": "Outer",
"Relation Name": "pg_merge_job_570024",
"Alias": "pg_merge_job_570024"
}
]
}
}
]
}
]
SELECT true AS valid FROM explain_json($$
SELECT count(*)
FROM lineitem, orders, customer, supplier
WHERE l_orderkey = o_orderkey
AND o_custkey = c_custkey
AND l_suppkey = s_suppkey$$);
t
EXPLAIN (COSTS FALSE, FORMAT XML)
SELECT count(*)
FROM lineitem, orders, customer, supplier
WHERE l_orderkey = o_orderkey
AND o_custkey = c_custkey
AND l_suppkey = s_suppkey;
Task-Tracker
1
None, not supported for re-partition queries
1
1
8
1
Aggregate
Plain
Seq Scan
Outer
pg_merge_job_570030
pg_merge_job_570030
SELECT true AS valid FROM explain_xml($$
SELECT count(*)
FROM lineitem, orders, customer, supplier
WHERE l_orderkey = o_orderkey
AND o_custkey = c_custkey
AND l_suppkey = s_suppkey$$);
t
EXPLAIN (COSTS FALSE, FORMAT YAML)
SELECT count(*)
FROM lineitem, orders, customer, supplier
WHERE l_orderkey = o_orderkey
AND o_custkey = c_custkey
AND l_suppkey = s_suppkey;
- Executor: "Task-Tracker"
Job:
Task Count: 1
Tasks Shown: "None, not supported for re-partition queries"
Depended Jobs:
- Map Task Count: 1
Merge Task Count: 1
Depended Jobs:
- Map Task Count: 8
Merge Task Count: 1
Master Query:
- Plan:
Node Type: "Aggregate"
Strategy: "Plain"
Plans:
- Node Type: "Seq Scan"
Parent Relationship: "Outer"
Relation Name: "pg_merge_job_570036"
Alias: "pg_merge_job_570036"