citus/src/test/regress/expected/multi_explain_0.out

660 lines
24 KiB
Plaintext

--
-- MULTI_EXPLAIN
--
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 570000;
-- print major version to make version-specific tests clear
SELECT substring(version(), '\d+(?:\.\d+)?') AS major_version;
major_version
---------------
9.5
(1 row)
\a\t
SET citus.task_executor_type TO 'real-time';
SET citus.explain_distributed_queries TO on;
-- Function that parses explain output as JSON
CREATE FUNCTION explain_json(query text)
RETURNS jsonb
AS $BODY$
DECLARE
result jsonb;
BEGIN
EXECUTE format('EXPLAIN (FORMAT JSON) %s', query) INTO result;
RETURN result;
END;
$BODY$ LANGUAGE plpgsql;
-- Function that parses explain output as XML
CREATE FUNCTION explain_xml(query text)
RETURNS xml
AS $BODY$
DECLARE
result xml;
BEGIN
EXECUTE format('EXPLAIN (FORMAT XML) %s', query) INTO result;
RETURN result;
END;
$BODY$ LANGUAGE plpgsql;
-- VACUMM related tables to ensure test outputs are stable
VACUUM ANALYZE lineitem;
VACUUM ANALYZE orders;
-- Test Text format
EXPLAIN (COSTS FALSE, FORMAT TEXT)
SELECT l_quantity, count(*) count_quantity FROM lineitem
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
Sort
Sort Key: COALESCE((sum((COALESCE((sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint), remote_scan.l_quantity
-> HashAggregate
Group Key: remote_scan.l_quantity
-> Custom Scan (Citus Real-Time)
Task Count: 8
Tasks Shown: One of 8
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: l_quantity
-> Seq Scan on lineitem_290001 lineitem
-- Test JSON format
EXPLAIN (COSTS FALSE, FORMAT JSON)
SELECT l_quantity, count(*) count_quantity FROM lineitem
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
[
{
"Plan": {
"Node Type": "Sort",
"Sort Key": ["COALESCE((sum((COALESCE((sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint)", "remote_scan.l_quantity"],
"Plans": [
{
"Node Type": "Aggregate",
"Strategy": "Hashed",
"Parent Relationship": "Outer",
"Group Key": ["remote_scan.l_quantity"],
"Plans": [
{
"Node Type": "Custom Scan",
"Parent Relationship": "Outer",
"Custom Plan Provider": "Citus Real-Time",
"Distributed Query": {
"Job": {
"Task Count": 8,
"Tasks Shown": "One of 8",
"Tasks": [
{
"Node": "host=localhost port=57637 dbname=regression",
"Remote Plan": [
[
{
"Plan": {
"Node Type": "Aggregate",
"Strategy": "Hashed",
"Group Key": ["l_quantity"],
"Plans": [
{
"Node Type": "Seq Scan",
"Parent Relationship": "Outer",
"Relation Name": "lineitem_290001",
"Alias": "lineitem"
}
]
}
}
]
]
}
]
}
}
}
]
}
]
}
}
]
-- Validate JSON format
SELECT true AS valid FROM explain_json($$
SELECT l_quantity, count(*) count_quantity FROM lineitem
GROUP BY l_quantity ORDER BY count_quantity, l_quantity$$);
t
-- Test XML format
EXPLAIN (COSTS FALSE, FORMAT XML)
SELECT l_quantity, count(*) count_quantity FROM lineitem
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
<explain xmlns="http://www.postgresql.org/2009/explain">
<Query>
<Plan>
<Node-Type>Sort</Node-Type>
<Sort-Key>
<Item>COALESCE((sum((COALESCE((sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint)</Item>
<Item>remote_scan.l_quantity</Item>
</Sort-Key>
<Plans>
<Plan>
<Node-Type>Aggregate</Node-Type>
<Strategy>Hashed</Strategy>
<Parent-Relationship>Outer</Parent-Relationship>
<Group-Key>
<Item>remote_scan.l_quantity</Item>
</Group-Key>
<Plans>
<Plan>
<Node-Type>Custom Scan</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Custom-Plan-Provider>Citus Real-Time</Custom-Plan-Provider>
<Distributed-Query>
<Job>
<Task-Count>8</Task-Count>
<Tasks-Shown>One of 8</Tasks-Shown>
<Tasks>
<Task>
<Node>host=localhost port=57637 dbname=regression</Node>
<Remote-Plan>
<explain xmlns="http://www.postgresql.org/2009/explain">
<Query>
<Plan>
<Node-Type>Aggregate</Node-Type>
<Strategy>Hashed</Strategy>
<Group-Key>
<Item>l_quantity</Item>
</Group-Key>
<Plans>
<Plan>
<Node-Type>Seq Scan</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Relation-Name>lineitem_290001</Relation-Name>
<Alias>lineitem</Alias>
</Plan>
</Plans>
</Plan>
</Query>
</explain>
</Remote-Plan>
</Task>
</Tasks>
</Job>
</Distributed-Query>
</Plan>
</Plans>
</Plan>
</Plans>
</Plan>
</Query>
</explain>
-- Validate XML format
SELECT true AS valid FROM explain_xml($$
SELECT l_quantity, count(*) count_quantity FROM lineitem
GROUP BY l_quantity ORDER BY count_quantity, l_quantity$$);
t
-- Test YAML format
EXPLAIN (COSTS FALSE, FORMAT YAML)
SELECT l_quantity, count(*) count_quantity FROM lineitem
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
- Plan:
Node Type: "Sort"
Sort Key:
- "COALESCE((sum((COALESCE((sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint)"
- "remote_scan.l_quantity"
Plans:
- Node Type: "Aggregate"
Strategy: "Hashed"
Parent Relationship: "Outer"
Group Key:
- "remote_scan.l_quantity"
Plans:
- Node Type: "Custom Scan"
Parent Relationship: "Outer"
Custom Plan Provider: "Citus Real-Time"
Distributed Query:
Job:
Task Count: 8
Tasks Shown: "One of 8"
Tasks:
- Node: "host=localhost port=57637 dbname=regression"
Remote Plan:
- Plan:
Node Type: "Aggregate"
Strategy: "Hashed"
Group Key:
- "l_quantity"
Plans:
- Node Type: "Seq Scan"
Parent Relationship: "Outer"
Relation Name: "lineitem_290001"
Alias: "lineitem"
-- Test Text format
EXPLAIN (COSTS FALSE, FORMAT TEXT)
SELECT l_quantity, count(*) count_quantity FROM lineitem
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
Sort
Sort Key: COALESCE((sum((COALESCE((sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint), remote_scan.l_quantity
-> HashAggregate
Group Key: remote_scan.l_quantity
-> Custom Scan (Citus Real-Time)
Task Count: 8
Tasks Shown: One of 8
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: l_quantity
-> Seq Scan on lineitem_290001 lineitem
-- Test verbose
EXPLAIN (COSTS FALSE, VERBOSE TRUE)
SELECT sum(l_quantity) / avg(l_quantity) FROM lineitem;
Aggregate
Output: (sum(remote_scan."?column?") / (sum(remote_scan."?column?_1") / sum(remote_scan."?column?_2")))
-> Custom Scan (Citus Real-Time)
Output: remote_scan."?column?", remote_scan."?column?_1", remote_scan."?column?_2"
Task Count: 8
Tasks Shown: One of 8
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
Output: sum(l_quantity), sum(l_quantity), count(l_quantity)
-> Seq Scan on public.lineitem_290001 lineitem
Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
-- Test join
EXPLAIN (COSTS FALSE)
SELECT * FROM lineitem
JOIN orders ON l_orderkey = o_orderkey AND l_quantity < 5.0
ORDER BY l_quantity LIMIT 10;
Limit
-> Sort
Sort Key: remote_scan.l_quantity
-> Custom Scan (Citus Real-Time)
Task Count: 8
Tasks Shown: One of 8
-> Task
Node: host=localhost port=57637 dbname=regression
-> Limit
-> Sort
Sort Key: lineitem.l_quantity
-> Merge Join
Merge Cond: (orders.o_orderkey = lineitem.l_orderkey)
-> Index Scan using orders_pkey_290008 on orders_290008 orders
-> Sort
Sort Key: lineitem.l_orderkey
-> Seq Scan on lineitem_290001 lineitem
Filter: (l_quantity < 5.0)
-- Test insert
EXPLAIN (COSTS FALSE)
INSERT INTO lineitem VALUES(1,0);
Custom Scan (Citus Router)
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=57638 dbname=regression
-> Insert on lineitem_290000
-> Result
-- Test update
EXPLAIN (COSTS FALSE)
UPDATE lineitem
SET l_suppkey = 12
WHERE l_orderkey = 1 AND l_partkey = 0;
Custom Scan (Citus Router)
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=57638 dbname=regression
-> Update on lineitem_290000
-> Index Scan using lineitem_pkey_290000 on lineitem_290000
Index Cond: (l_orderkey = 1)
Filter: (l_partkey = 0)
-- Test delete
EXPLAIN (COSTS FALSE)
DELETE FROM lineitem
WHERE l_orderkey = 1 AND l_partkey = 0;
Custom Scan (Citus Router)
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=57638 dbname=regression
-> Delete on lineitem_290000
-> Index Scan using lineitem_pkey_290000 on lineitem_290000
Index Cond: (l_orderkey = 1)
Filter: (l_partkey = 0)
-- Test single-shard SELECT
EXPLAIN (COSTS FALSE)
SELECT l_quantity FROM lineitem WHERE l_orderkey = 5;
Custom Scan (Citus Router)
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem
Index Cond: (l_orderkey = 5)
SELECT true AS valid FROM explain_xml($$
SELECT l_quantity FROM lineitem WHERE l_orderkey = 5$$);
t
SELECT true AS valid FROM explain_json($$
SELECT l_quantity FROM lineitem WHERE l_orderkey = 5$$);
t
-- Test CREATE TABLE ... AS
EXPLAIN (COSTS FALSE)
CREATE TABLE explain_result AS
SELECT * FROM lineitem;
Custom Scan (Citus Real-Time)
Task Count: 8
Tasks Shown: One of 8
-> Task
Node: host=localhost port=57637 dbname=regression
-> Seq Scan on lineitem_290001 lineitem
-- Test having
EXPLAIN (COSTS FALSE, VERBOSE TRUE)
SELECT sum(l_quantity) / avg(l_quantity) FROM lineitem
HAVING sum(l_quantity) > 100;
Aggregate
Output: (sum(remote_scan."?column?") / (sum(remote_scan."?column?_1") / sum(remote_scan."?column?_2")))
Filter: (sum(remote_scan.worker_column_4) > '100'::numeric)
-> Custom Scan (Citus Real-Time)
Output: remote_scan."?column?", remote_scan."?column?_1", remote_scan."?column?_2", remote_scan.worker_column_4
Task Count: 8
Tasks Shown: One of 8
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
Output: sum(l_quantity), sum(l_quantity), count(l_quantity), sum(l_quantity)
-> Seq Scan on public.lineitem_290001 lineitem
Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
-- Test having without aggregate
EXPLAIN (COSTS FALSE, VERBOSE TRUE)
SELECT l_quantity FROM lineitem
GROUP BY l_quantity
HAVING l_quantity > (100 * random());
HashAggregate
Output: remote_scan.l_quantity
Group Key: remote_scan.l_quantity
Filter: ((remote_scan.worker_column_2)::double precision > ('100'::double precision * random()))
-> Custom Scan (Citus Real-Time)
Output: remote_scan.l_quantity, remote_scan.worker_column_2
Task Count: 8
Tasks Shown: One of 8
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Output: l_quantity, l_quantity
Group Key: lineitem.l_quantity
-> Seq Scan on public.lineitem_290001 lineitem
Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
-- Test all tasks output
SET citus.explain_all_tasks TO on;
EXPLAIN (COSTS FALSE)
SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030;
Aggregate
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Seq Scan on lineitem_290005 lineitem
Filter: (l_orderkey > 9030)
-> Task
Node: host=localhost port=57638 dbname=regression
-> Aggregate
-> Seq Scan on lineitem_290004 lineitem
Filter: (l_orderkey > 9030)
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Seq Scan on lineitem_290007 lineitem
Filter: (l_orderkey > 9030)
-> Task
Node: host=localhost port=57638 dbname=regression
-> Aggregate
-> Seq Scan on lineitem_290006 lineitem
Filter: (l_orderkey > 9030)
SELECT true AS valid FROM explain_xml($$
SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030$$);
t
SELECT true AS valid FROM explain_json($$
SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030$$);
t
-- Test track tracker
SET citus.task_executor_type TO 'task-tracker';
SET citus.explain_all_tasks TO off;
EXPLAIN (COSTS FALSE)
SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030;
Aggregate
-> Custom Scan (Citus Task-Tracker)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Seq Scan on lineitem_290005 lineitem
Filter: (l_orderkey > 9030)
-- Test re-partition join
SET citus.large_table_shard_count TO 1;
EXPLAIN (COSTS FALSE)
SELECT count(*)
FROM lineitem, orders, customer, supplier_single_shard
WHERE l_orderkey = o_orderkey
AND o_custkey = c_custkey
AND l_suppkey = s_suppkey;
Aggregate
-> Custom Scan (Citus Task-Tracker)
Task Count: 1
Tasks Shown: None, not supported for re-partition queries
-> MapMergeJob
Map Task Count: 1
Merge Task Count: 1
-> MapMergeJob
Map Task Count: 8
Merge Task Count: 1
EXPLAIN (COSTS FALSE, FORMAT JSON)
SELECT count(*)
FROM lineitem, orders, customer, supplier_single_shard
WHERE l_orderkey = o_orderkey
AND o_custkey = c_custkey
AND l_suppkey = s_suppkey;
[
{
"Plan": {
"Node Type": "Aggregate",
"Strategy": "Plain",
"Plans": [
{
"Node Type": "Custom Scan",
"Parent Relationship": "Outer",
"Custom Plan Provider": "Citus Task-Tracker",
"Distributed Query": {
"Job": {
"Task Count": 1,
"Tasks Shown": "None, not supported for re-partition queries",
"Depended Jobs": [
{
"Map Task Count": 1,
"Merge Task Count": 1,
"Depended Jobs": [
{
"Map Task Count": 8,
"Merge Task Count": 1
}
]
}
]
}
}
}
]
}
}
]
SELECT true AS valid FROM explain_json($$
SELECT count(*)
FROM lineitem, orders, customer, supplier_single_shard
WHERE l_orderkey = o_orderkey
AND o_custkey = c_custkey
AND l_suppkey = s_suppkey$$);
t
EXPLAIN (COSTS FALSE, FORMAT XML)
SELECT count(*)
FROM lineitem, orders, customer, supplier_single_shard
WHERE l_orderkey = o_orderkey
AND o_custkey = c_custkey
AND l_suppkey = s_suppkey;
<explain xmlns="http://www.postgresql.org/2009/explain">
<Query>
<Plan>
<Node-Type>Aggregate</Node-Type>
<Strategy>Plain</Strategy>
<Plans>
<Plan>
<Node-Type>Custom Scan</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Custom-Plan-Provider>Citus Task-Tracker</Custom-Plan-Provider>
<Distributed-Query>
<Job>
<Task-Count>1</Task-Count>
<Tasks-Shown>None, not supported for re-partition queries</Tasks-Shown>
<Depended-Jobs>
<MapMergeJob>
<Map-Task-Count>1</Map-Task-Count>
<Merge-Task-Count>1</Merge-Task-Count>
<Depended-Jobs>
<MapMergeJob>
<Map-Task-Count>8</Map-Task-Count>
<Merge-Task-Count>1</Merge-Task-Count>
</MapMergeJob>
</Depended-Jobs>
</MapMergeJob>
</Depended-Jobs>
</Job>
</Distributed-Query>
</Plan>
</Plans>
</Plan>
</Query>
</explain>
SELECT true AS valid FROM explain_xml($$
SELECT count(*)
FROM lineitem, orders, customer, supplier
WHERE l_orderkey = o_orderkey
AND o_custkey = c_custkey
AND l_suppkey = s_suppkey$$);
t
-- make sure that EXPLAIN works without
-- problems for queries that inlvolves only
-- reference tables
SELECT true AS valid FROM explain_xml($$
SELECT count(*)
FROM nation
WHERE n_name = 'CHINA'$$);
t
SELECT true AS valid FROM explain_xml($$
SELECT count(*)
FROM nation, supplier
WHERE nation.n_nationkey = supplier.s_nationkey$$);
t
EXPLAIN (COSTS FALSE, FORMAT YAML)
SELECT count(*)
FROM lineitem, orders, customer, supplier_single_shard
WHERE l_orderkey = o_orderkey
AND o_custkey = c_custkey
AND l_suppkey = s_suppkey;
- Plan:
Node Type: "Aggregate"
Strategy: "Plain"
Plans:
- Node Type: "Custom Scan"
Parent Relationship: "Outer"
Custom Plan Provider: "Citus Task-Tracker"
Distributed Query:
Job:
Task Count: 1
Tasks Shown: "None, not supported for re-partition queries"
Depended Jobs:
- Map Task Count: 1
Merge Task Count: 1
Depended Jobs:
- Map Task Count: 8
Merge Task Count: 1
-- test parallel aggregates
SET parallel_setup_cost=0;
ERROR: unrecognized configuration parameter "parallel_setup_cost"
SET parallel_tuple_cost=0;
ERROR: unrecognized configuration parameter "parallel_tuple_cost"
SET min_parallel_relation_size=0;
ERROR: unrecognized configuration parameter "min_parallel_relation_size"
SET max_parallel_workers_per_gather=4;
ERROR: unrecognized configuration parameter "max_parallel_workers_per_gather"
-- ensure local plans display correctly
CREATE TABLE lineitem_clone (LIKE lineitem);
EXPLAIN (COSTS FALSE) SELECT avg(l_linenumber) FROM lineitem_clone;
Aggregate
-> Seq Scan on lineitem_clone
-- ensure distributed plans don't break
EXPLAIN (COSTS FALSE) SELECT avg(l_linenumber) FROM lineitem;
Aggregate
-> Custom Scan (Citus Task-Tracker)
Task Count: 8
Tasks Shown: One of 8
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Seq Scan on lineitem_290001 lineitem
-- ensure EXPLAIN EXECUTE doesn't crash
PREPARE task_tracker_query AS
SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030;
EXPLAIN (COSTS FALSE) EXECUTE task_tracker_query;
Aggregate
-> Custom Scan (Citus Task-Tracker)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Seq Scan on lineitem_290005 lineitem
Filter: (l_orderkey > 9030)
SET citus.task_executor_type TO 'real-time';
PREPARE router_executor_query AS SELECT l_quantity FROM lineitem WHERE l_orderkey = 5;
EXPLAIN EXECUTE router_executor_query;
Custom Scan (Citus Router) (cost=0.00..0.00 rows=0 width=0)
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem (cost=0.28..11.83 rows=3 width=5)
Index Cond: (l_orderkey = 5)
PREPARE real_time_executor_query AS
SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030;
EXPLAIN (COSTS FALSE) EXECUTE real_time_executor_query;
Aggregate
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> Aggregate
-> Seq Scan on lineitem_290005 lineitem
Filter: (l_orderkey > 9030)
-- EXPLAIN EXECUTE of parametrized prepared statements is broken, but
-- at least make sure to fail without crashing
PREPARE router_executor_query_param(int) AS SELECT l_quantity FROM lineitem WHERE l_orderkey = $1;
EXPLAIN EXECUTE router_executor_query_param(5);
Custom Scan (Citus Router) (cost=0.00..0.00 rows=0 width=0)
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem (cost=0.28..11.83 rows=3 width=5)
Index Cond: (l_orderkey = 5)
-- test explain in a transaction with alter table to test we use right connections
BEGIN;
CREATE TABLE explain_table(id int);
SELECT create_distributed_table('explain_table', 'id');
ALTER TABLE explain_table ADD COLUMN value int;
NOTICE: using one-phase commit for distributed DDL commands
HINT: You can enable two-phase commit for extra safety with: SET citus.multi_shard_commit_protocol TO '2pc'
EXPLAIN (COSTS FALSE) SELECT value FROM explain_table WHERE id = 1;
Custom Scan (Citus Router)
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> Seq Scan on explain_table_570001 explain_table
Filter: (id = 1)
ROLLBACK;