diff --git a/src/test/regress/expected/multi_explain_1.out b/src/test/regress/expected/multi_explain_1.out deleted file mode 100644 index 1dad01db7..000000000 --- a/src/test/regress/expected/multi_explain_1.out +++ /dev/null @@ -1,1003 +0,0 @@ --- --- MULTI_EXPLAIN --- -ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 570000; --- print major version to make version-specific tests clear -SELECT substring(version(), '\d+(?:\.\d+)?') AS major_version; - major_version ---------------- - 9.5 -(1 row) - -\a\t -SET citus.task_executor_type TO 'real-time'; -SET citus.explain_distributed_queries TO on; --- Function that parses explain output as JSON -CREATE FUNCTION explain_json(query text) -RETURNS jsonb -AS $BODY$ -DECLARE - result jsonb; -BEGIN - EXECUTE format('EXPLAIN (FORMAT JSON) %s', query) INTO result; - RETURN result; -END; -$BODY$ LANGUAGE plpgsql; --- Function that parses explain output as XML -CREATE FUNCTION explain_xml(query text) -RETURNS xml -AS $BODY$ -DECLARE - result xml; -BEGIN - EXECUTE format('EXPLAIN (FORMAT XML) %s', query) INTO result; - RETURN result; -END; -$BODY$ LANGUAGE plpgsql; --- VACUMM related tables to ensure test outputs are stable -VACUUM ANALYZE lineitem; -VACUUM ANALYZE orders; --- Test Text format -EXPLAIN (COSTS FALSE, FORMAT TEXT) - SELECT l_quantity, count(*) count_quantity FROM lineitem - GROUP BY l_quantity ORDER BY count_quantity, l_quantity; -Sort - Sort Key: COALESCE((sum((COALESCE((sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint), remote_scan.l_quantity - -> HashAggregate - Group Key: remote_scan.l_quantity - -> Custom Scan (Citus Real-Time) - Task Count: 8 - Tasks Shown: One of 8 - -> Task - Node: host=localhost port=57637 dbname=regression - -> HashAggregate - Group Key: l_quantity - -> Seq Scan on lineitem_290001 lineitem --- Test JSON format -EXPLAIN (COSTS FALSE, FORMAT JSON) - SELECT l_quantity, count(*) count_quantity FROM lineitem - GROUP BY l_quantity ORDER BY count_quantity, l_quantity; -[ - { - "Plan": { - "Node Type": "Sort", - "Sort Key": ["COALESCE((sum((COALESCE((sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint)", "remote_scan.l_quantity"], - "Plans": [ - { - "Node Type": "Aggregate", - "Strategy": "Hashed", - "Parent Relationship": "Outer", - "Group Key": ["remote_scan.l_quantity"], - "Plans": [ - { - "Node Type": "Custom Scan", - "Parent Relationship": "Outer", - "Custom Plan Provider": "Citus Real-Time", - "Distributed Query": { - "Job": { - "Task Count": 8, - "Tasks Shown": "One of 8", - "Tasks": [ - { - "Node": "host=localhost port=57637 dbname=regression", - "Remote Plan": [ - [ - { - "Plan": { - "Node Type": "Aggregate", - "Strategy": "Hashed", - "Group Key": ["l_quantity"], - "Plans": [ - { - "Node Type": "Seq Scan", - "Parent Relationship": "Outer", - "Relation Name": "lineitem_290001", - "Alias": "lineitem" - } - ] - } - } - ] - - ] - } - ] - } - } - } - ] - } - ] - } - } -] --- Validate JSON format -SELECT true AS valid FROM explain_json($$ - SELECT l_quantity, count(*) count_quantity FROM lineitem - GROUP BY l_quantity ORDER BY count_quantity, l_quantity$$); -t --- Test XML format -EXPLAIN (COSTS FALSE, FORMAT XML) - SELECT l_quantity, count(*) count_quantity FROM lineitem - GROUP BY l_quantity ORDER BY count_quantity, l_quantity; - - - - Sort - - COALESCE((sum((COALESCE((sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint) - remote_scan.l_quantity - - - - Aggregate - Hashed - Outer - - remote_scan.l_quantity - - - - Custom Scan - Outer - Citus Real-Time - - - 8 - One of 8 - - - host=localhost port=57637 dbname=regression - - - - - Aggregate - Hashed - - l_quantity - - - - Seq Scan - Outer - lineitem_290001 - lineitem - - - - - - - - - - - - - - - - - --- Validate XML format -SELECT true AS valid FROM explain_xml($$ - SELECT l_quantity, count(*) count_quantity FROM lineitem - GROUP BY l_quantity ORDER BY count_quantity, l_quantity$$); -t --- Test YAML format -EXPLAIN (COSTS FALSE, FORMAT YAML) - SELECT l_quantity, count(*) count_quantity FROM lineitem - GROUP BY l_quantity ORDER BY count_quantity, l_quantity; -- Plan: - Node Type: "Sort" - Sort Key: - - "COALESCE((sum((COALESCE((sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint)" - - "remote_scan.l_quantity" - Plans: - - Node Type: "Aggregate" - Strategy: "Hashed" - Parent Relationship: "Outer" - Group Key: - - "remote_scan.l_quantity" - Plans: - - Node Type: "Custom Scan" - Parent Relationship: "Outer" - Custom Plan Provider: "Citus Real-Time" - Distributed Query: - Job: - Task Count: 8 - Tasks Shown: "One of 8" - Tasks: - - Node: "host=localhost port=57637 dbname=regression" - Remote Plan: - - Plan: - Node Type: "Aggregate" - Strategy: "Hashed" - Group Key: - - "l_quantity" - Plans: - - Node Type: "Seq Scan" - Parent Relationship: "Outer" - Relation Name: "lineitem_290001" - Alias: "lineitem" - --- Test Text format -EXPLAIN (COSTS FALSE, FORMAT TEXT) - SELECT l_quantity, count(*) count_quantity FROM lineitem - GROUP BY l_quantity ORDER BY count_quantity, l_quantity; -Sort - Sort Key: COALESCE((sum((COALESCE((sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint), remote_scan.l_quantity - -> HashAggregate - Group Key: remote_scan.l_quantity - -> Custom Scan (Citus Real-Time) - Task Count: 8 - Tasks Shown: One of 8 - -> Task - Node: host=localhost port=57637 dbname=regression - -> HashAggregate - Group Key: l_quantity - -> Seq Scan on lineitem_290001 lineitem --- Test verbose -EXPLAIN (COSTS FALSE, VERBOSE TRUE) - SELECT sum(l_quantity) / avg(l_quantity) FROM lineitem; -Aggregate - Output: (sum(remote_scan."?column?") / (sum(remote_scan."?column?_1") / sum(remote_scan."?column?_2"))) - -> Custom Scan (Citus Real-Time) - Output: remote_scan."?column?", remote_scan."?column?_1", remote_scan."?column?_2" - Task Count: 8 - Tasks Shown: One of 8 - -> Task - Node: host=localhost port=57637 dbname=regression - -> Aggregate - Output: sum(l_quantity), sum(l_quantity), count(l_quantity) - -> Seq Scan on public.lineitem_290001 lineitem - Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment --- Test join -EXPLAIN (COSTS FALSE) - SELECT * FROM lineitem - JOIN orders ON l_orderkey = o_orderkey AND l_quantity < 5.0 - ORDER BY l_quantity LIMIT 10; -Limit - -> Sort - Sort Key: remote_scan.l_quantity - -> Custom Scan (Citus Real-Time) - Task Count: 8 - Tasks Shown: One of 8 - -> Task - Node: host=localhost port=57637 dbname=regression - -> Limit - -> Sort - Sort Key: lineitem.l_quantity - -> Merge Join - Merge Cond: (orders.o_orderkey = lineitem.l_orderkey) - -> Index Scan using orders_pkey_290008 on orders_290008 orders - -> Sort - Sort Key: lineitem.l_orderkey - -> Seq Scan on lineitem_290001 lineitem - Filter: (l_quantity < 5.0) --- Test insert -EXPLAIN (COSTS FALSE) - INSERT INTO lineitem VALUES(1,0); -Custom Scan (Citus Router) - Task Count: 1 - Tasks Shown: All - -> Task - Node: host=localhost port=57638 dbname=regression - -> Insert on lineitem_290000 - -> Result --- Test update -EXPLAIN (COSTS FALSE) - UPDATE lineitem - SET l_suppkey = 12 - WHERE l_orderkey = 1 AND l_partkey = 0; -Custom Scan (Citus Router) - Task Count: 1 - Tasks Shown: All - -> Task - Node: host=localhost port=57638 dbname=regression - -> Update on lineitem_290000 - -> Index Scan using lineitem_pkey_290000 on lineitem_290000 - Index Cond: (l_orderkey = 1) - Filter: (l_partkey = 0) --- Test delete -EXPLAIN (COSTS FALSE) - DELETE FROM lineitem - WHERE l_orderkey = 1 AND l_partkey = 0; -Custom Scan (Citus Router) - Task Count: 1 - Tasks Shown: All - -> Task - Node: host=localhost port=57638 dbname=regression - -> Delete on lineitem_290000 - -> Index Scan using lineitem_pkey_290000 on lineitem_290000 - Index Cond: (l_orderkey = 1) - Filter: (l_partkey = 0) --- Test single-shard SELECT -EXPLAIN (COSTS FALSE) - SELECT l_quantity FROM lineitem WHERE l_orderkey = 5; -Custom Scan (Citus Router) - Task Count: 1 - Tasks Shown: All - -> Task - Node: host=localhost port=57637 dbname=regression - -> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem - Index Cond: (l_orderkey = 5) -SELECT true AS valid FROM explain_xml($$ - SELECT l_quantity FROM lineitem WHERE l_orderkey = 5$$); -t -SELECT true AS valid FROM explain_json($$ - SELECT l_quantity FROM lineitem WHERE l_orderkey = 5$$); -t --- Test CREATE TABLE ... AS -EXPLAIN (COSTS FALSE) - CREATE TABLE explain_result AS - SELECT * FROM lineitem; -Custom Scan (Citus Real-Time) - Task Count: 8 - Tasks Shown: One of 8 - -> Task - Node: host=localhost port=57637 dbname=regression - -> Seq Scan on lineitem_290001 lineitem --- Test having -EXPLAIN (COSTS FALSE, VERBOSE TRUE) - SELECT sum(l_quantity) / avg(l_quantity) FROM lineitem - HAVING sum(l_quantity) > 100; -Aggregate - Output: (sum(remote_scan."?column?") / (sum(remote_scan."?column?_1") / sum(remote_scan."?column?_2"))) - Filter: (sum(remote_scan.worker_column_4) > '100'::numeric) - -> Custom Scan (Citus Real-Time) - Output: remote_scan."?column?", remote_scan."?column?_1", remote_scan."?column?_2", remote_scan.worker_column_4 - Task Count: 8 - Tasks Shown: One of 8 - -> Task - Node: host=localhost port=57637 dbname=regression - -> Aggregate - Output: sum(l_quantity), sum(l_quantity), count(l_quantity), sum(l_quantity) - -> Seq Scan on public.lineitem_290001 lineitem - Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment --- Test having without aggregate -EXPLAIN (COSTS FALSE, VERBOSE TRUE) - SELECT l_quantity FROM lineitem - GROUP BY l_quantity - HAVING l_quantity > (100 * random()); -HashAggregate - Output: remote_scan.l_quantity - Group Key: remote_scan.l_quantity - Filter: ((remote_scan.worker_column_2)::double precision > ('100'::double precision * random())) - -> Custom Scan (Citus Real-Time) - Output: remote_scan.l_quantity, remote_scan.worker_column_2 - Task Count: 8 - Tasks Shown: One of 8 - -> Task - Node: host=localhost port=57637 dbname=regression - -> HashAggregate - Output: l_quantity, l_quantity - Group Key: lineitem.l_quantity - -> Seq Scan on public.lineitem_290001 lineitem - Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment --- Subquery pushdown tests with explain -EXPLAIN (COSTS OFF) -SELECT - avg(array_length(events, 1)) AS event_average -FROM - (SELECT - tenant_id, - user_id, - array_agg(event_type ORDER BY event_time) AS events - FROM - (SELECT - (users.composite_id).tenant_id, - (users.composite_id).user_id, - event_type, - events.event_time - FROM - users, - events - WHERE - (users.composite_id) = (events.composite_id) AND - users.composite_id >= '(1, -9223372036854775808)'::user_composite_type AND - users.composite_id <= '(1, 9223372036854775807)'::user_composite_type AND - event_type IN ('click', 'submit', 'pay')) AS subquery - GROUP BY - tenant_id, - user_id) AS subquery; -Aggregate - -> Custom Scan (Citus Real-Time) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=57637 dbname=regression - -> Aggregate - -> GroupAggregate - Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id) - -> Sort - Sort Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id) - -> Result - One-Time Filter: false --- Union and left join subquery pushdown -EXPLAIN (COSTS OFF) -SELECT - avg(array_length(events, 1)) AS event_average, - hasdone -FROM - (SELECT - subquery_1.tenant_id, - subquery_1.user_id, - array_agg(event ORDER BY event_time) AS events, - COALESCE(hasdone, 'Has not done paying') AS hasdone - FROM - ( - (SELECT - (users.composite_id).tenant_id, - (users.composite_id).user_id, - (users.composite_id) as composite_id, - 'action=>1'AS event, - events.event_time - FROM - users, - events - WHERE - (users.composite_id) = (events.composite_id) AND - users.composite_id >= '(1, -9223372036854775808)'::user_composite_type AND - users.composite_id <= '(1, 9223372036854775807)'::user_composite_type AND - event_type = 'click') - UNION - (SELECT - (users.composite_id).tenant_id, - (users.composite_id).user_id, - (users.composite_id) as composite_id, - 'action=>2'AS event, - events.event_time - FROM - users, - events - WHERE - (users.composite_id) = (events.composite_id) AND - users.composite_id >= '(1, -9223372036854775808)'::user_composite_type AND - users.composite_id <= '(1, 9223372036854775807)'::user_composite_type AND - event_type = 'submit') - ) AS subquery_1 - LEFT JOIN - (SELECT - DISTINCT ON ((composite_id).tenant_id, (composite_id).user_id) composite_id, - (composite_id).tenant_id, - (composite_id).user_id, - 'Has done paying'::TEXT AS hasdone - FROM - events - WHERE - events.composite_id >= '(1, -9223372036854775808)'::user_composite_type AND - events.composite_id <= '(1, 9223372036854775807)'::user_composite_type AND - event_type = 'pay') AS subquery_2 - ON - subquery_1.composite_id = subquery_2.composite_id - GROUP BY - subquery_1.tenant_id, - subquery_1.user_id, - hasdone) AS subquery_top -GROUP BY - hasdone; -HashAggregate - Group Key: remote_scan.hasdone - -> Custom Scan (Citus Real-Time) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=57637 dbname=regression - -> HashAggregate - Group Key: COALESCE(subquery_2.hasdone, 'Has not done paying'::text) - -> GroupAggregate - Group Key: ((composite_id).tenant_id), ((composite_id).user_id), subquery_2.hasdone - -> Sort - Sort Key: ((composite_id).tenant_id), ((composite_id).user_id), subquery_2.hasdone - -> Hash Left Join - Hash Cond: (composite_id = subquery_2.composite_id) - -> Unique - -> Sort - Sort Key: ((composite_id).tenant_id), ((composite_id).user_id), composite_id, ('action=>1'::text), event_time - -> Append - -> Result - One-Time Filter: false - -> Result - One-Time Filter: false - -> Hash - -> Subquery Scan on subquery_2 - -> Unique - -> Sort - Sort Key: ((events.composite_id).tenant_id), ((events.composite_id).user_id) - -> Seq Scan on events_1400027 events - Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type) AND ((event_type)::text = 'pay'::text)) --- Union, left join and having subquery pushdown -EXPLAIN (COSTS OFF) - SELECT - avg(array_length(events, 1)) AS event_average, - count_pay - FROM ( - SELECT - subquery_1.tenant_id, - subquery_1.user_id, - array_agg(event ORDER BY event_time) AS events, - COALESCE(count_pay, 0) AS count_pay - FROM - ( - (SELECT - (users.composite_id).tenant_id, - (users.composite_id).user_id, - (users.composite_id), - 'action=>1'AS event, - events.event_time - FROM - users, - events - WHERE - (users.composite_id) = (events.composite_id) AND - users.composite_id >= '(1, -9223372036854775808)'::user_composite_type AND - users.composite_id <= '(1, 9223372036854775807)'::user_composite_type AND - event_type = 'click') - UNION - (SELECT - (users.composite_id).tenant_id, - (users.composite_id).user_id, - (users.composite_id), - 'action=>2'AS event, - events.event_time - FROM - users, - events - WHERE - (users.composite_id) = (events.composite_id) AND - users.composite_id >= '(1, -9223372036854775808)'::user_composite_type AND - users.composite_id <= '(1, 9223372036854775807)'::user_composite_type AND - event_type = 'submit') - ) AS subquery_1 - LEFT JOIN - (SELECT - (composite_id).tenant_id, - (composite_id).user_id, - composite_id, - COUNT(*) AS count_pay - FROM - events - WHERE - events.composite_id >= '(1, -9223372036854775808)'::user_composite_type AND - events.composite_id <= '(1, 9223372036854775807)'::user_composite_type AND - event_type = 'pay' - GROUP BY - composite_id - HAVING - COUNT(*) > 2) AS subquery_2 - ON - subquery_1.composite_id = subquery_2.composite_id - GROUP BY - subquery_1.tenant_id, - subquery_1.user_id, - count_pay) AS subquery_top -WHERE - array_ndims(events) > 0 -GROUP BY - count_pay -ORDER BY - count_pay; -ERROR: bogus varattno for OUTER_VAR var: 3 --- Lateral join subquery pushdown --- set subquery_pushdown due to limit in the query -SET citus.subquery_pushdown to ON; -EXPLAIN (COSTS OFF) -SELECT - tenant_id, - user_id, - user_lastseen, - event_array -FROM - (SELECT - tenant_id, - user_id, - max(lastseen) as user_lastseen, - array_agg(event_type ORDER BY event_time) AS event_array - FROM - (SELECT - (composite_id).tenant_id, - (composite_id).user_id, - composite_id, - lastseen - FROM - users - WHERE - composite_id >= '(1, -9223372036854775808)'::user_composite_type AND - composite_id <= '(1, 9223372036854775807)'::user_composite_type - ORDER BY - lastseen DESC - LIMIT - 10 - ) AS subquery_top - LEFT JOIN LATERAL - (SELECT - event_type, - event_time - FROM - events - WHERE - (composite_id) = subquery_top.composite_id - ORDER BY - event_time DESC - LIMIT - 99) AS subquery_lateral - ON - true - GROUP BY - tenant_id, - user_id - ) AS shard_union -ORDER BY - user_lastseen DESC -LIMIT - 10; -Limit - -> Sort - Sort Key: remote_scan.user_lastseen DESC - -> Custom Scan (Citus Real-Time) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=57637 dbname=regression - -> Limit - -> Sort - Sort Key: (max(lastseen)) DESC - -> GroupAggregate - Group Key: ((composite_id).tenant_id), ((composite_id).user_id) - -> Sort - Sort Key: ((composite_id).tenant_id), ((composite_id).user_id) - -> Nested Loop Left Join - -> Limit - -> Sort - Sort Key: lastseen DESC - -> Result - One-Time Filter: false - -> Limit - -> Sort - Sort Key: events.event_time DESC - -> Seq Scan on events_1400027 events - Filter: (composite_id = composite_id) --- Test all tasks output -SET citus.explain_all_tasks TO on; -EXPLAIN (COSTS FALSE) - SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030; -Aggregate - -> Custom Scan (Citus Real-Time) - Task Count: 4 - Tasks Shown: All - -> Task - Node: host=localhost port=57637 dbname=regression - -> Aggregate - -> Seq Scan on lineitem_290005 lineitem - Filter: (l_orderkey > 9030) - -> Task - Node: host=localhost port=57638 dbname=regression - -> Aggregate - -> Seq Scan on lineitem_290004 lineitem - Filter: (l_orderkey > 9030) - -> Task - Node: host=localhost port=57637 dbname=regression - -> Aggregate - -> Seq Scan on lineitem_290007 lineitem - Filter: (l_orderkey > 9030) - -> Task - Node: host=localhost port=57638 dbname=regression - -> Aggregate - -> Seq Scan on lineitem_290006 lineitem - Filter: (l_orderkey > 9030) -SELECT true AS valid FROM explain_xml($$ - SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030$$); -t -SELECT true AS valid FROM explain_json($$ - SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030$$); -t --- Test track tracker -SET citus.task_executor_type TO 'task-tracker'; -SET citus.explain_all_tasks TO off; -EXPLAIN (COSTS FALSE) - SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030; -Aggregate - -> Custom Scan (Citus Task-Tracker) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=57637 dbname=regression - -> Aggregate - -> Seq Scan on lineitem_290005 lineitem - Filter: (l_orderkey > 9030) --- Test re-partition join -SET citus.large_table_shard_count TO 1; -EXPLAIN (COSTS FALSE) - SELECT count(*) - FROM lineitem, orders, customer, supplier_single_shard - WHERE l_orderkey = o_orderkey - AND o_custkey = c_custkey - AND l_suppkey = s_suppkey; -Aggregate - -> Custom Scan (Citus Task-Tracker) - Task Count: 1 - Tasks Shown: None, not supported for re-partition queries - -> MapMergeJob - Map Task Count: 1 - Merge Task Count: 1 - -> MapMergeJob - Map Task Count: 8 - Merge Task Count: 1 -EXPLAIN (COSTS FALSE, FORMAT JSON) - SELECT count(*) - FROM lineitem, orders, customer, supplier_single_shard - WHERE l_orderkey = o_orderkey - AND o_custkey = c_custkey - AND l_suppkey = s_suppkey; -[ - { - "Plan": { - "Node Type": "Aggregate", - "Strategy": "Plain", - "Plans": [ - { - "Node Type": "Custom Scan", - "Parent Relationship": "Outer", - "Custom Plan Provider": "Citus Task-Tracker", - "Distributed Query": { - "Job": { - "Task Count": 1, - "Tasks Shown": "None, not supported for re-partition queries", - "Depended Jobs": [ - { - "Map Task Count": 1, - "Merge Task Count": 1, - "Depended Jobs": [ - { - "Map Task Count": 8, - "Merge Task Count": 1 - } - ] - } - ] - } - } - } - ] - } - } -] -SELECT true AS valid FROM explain_json($$ - SELECT count(*) - FROM lineitem, orders, customer, supplier_single_shard - WHERE l_orderkey = o_orderkey - AND o_custkey = c_custkey - AND l_suppkey = s_suppkey$$); -t -EXPLAIN (COSTS FALSE, FORMAT XML) - SELECT count(*) - FROM lineitem, orders, customer, supplier_single_shard - WHERE l_orderkey = o_orderkey - AND o_custkey = c_custkey - AND l_suppkey = s_suppkey; - - - - Aggregate - Plain - - - Custom Scan - Outer - Citus Task-Tracker - - - 1 - None, not supported for re-partition queries - - - 1 - 1 - - - 8 - 1 - - - - - - - - - - - -SELECT true AS valid FROM explain_xml($$ - SELECT count(*) - FROM lineitem, orders, customer, supplier - WHERE l_orderkey = o_orderkey - AND o_custkey = c_custkey - AND l_suppkey = s_suppkey$$); -t --- make sure that EXPLAIN works without --- problems for queries that inlvolves only --- reference tables -SELECT true AS valid FROM explain_xml($$ - SELECT count(*) - FROM nation - WHERE n_name = 'CHINA'$$); -t -SELECT true AS valid FROM explain_xml($$ - SELECT count(*) - FROM nation, supplier - WHERE nation.n_nationkey = supplier.s_nationkey$$); -t -EXPLAIN (COSTS FALSE, FORMAT YAML) - SELECT count(*) - FROM lineitem, orders, customer, supplier_single_shard - WHERE l_orderkey = o_orderkey - AND o_custkey = c_custkey - AND l_suppkey = s_suppkey; -- Plan: - Node Type: "Aggregate" - Strategy: "Plain" - Plans: - - Node Type: "Custom Scan" - Parent Relationship: "Outer" - Custom Plan Provider: "Citus Task-Tracker" - Distributed Query: - Job: - Task Count: 1 - Tasks Shown: "None, not supported for re-partition queries" - Depended Jobs: - - Map Task Count: 1 - Merge Task Count: 1 - Depended Jobs: - - Map Task Count: 8 - Merge Task Count: 1 --- test parallel aggregates -SET parallel_setup_cost=0; -ERROR: unrecognized configuration parameter "parallel_setup_cost" -SET parallel_tuple_cost=0; -ERROR: unrecognized configuration parameter "parallel_tuple_cost" -SET min_parallel_relation_size=0; -ERROR: unrecognized configuration parameter "min_parallel_relation_size" -SET min_parallel_table_scan_size=0; -ERROR: unrecognized configuration parameter "min_parallel_table_scan_size" -SET max_parallel_workers_per_gather=4; -ERROR: unrecognized configuration parameter "max_parallel_workers_per_gather" --- ensure local plans display correctly -CREATE TABLE lineitem_clone (LIKE lineitem); -EXPLAIN (COSTS FALSE) SELECT avg(l_linenumber) FROM lineitem_clone; -Aggregate - -> Seq Scan on lineitem_clone --- ensure distributed plans don't break -EXPLAIN (COSTS FALSE) SELECT avg(l_linenumber) FROM lineitem; -Aggregate - -> Custom Scan (Citus Task-Tracker) - Task Count: 8 - Tasks Shown: One of 8 - -> Task - Node: host=localhost port=57637 dbname=regression - -> Aggregate - -> Seq Scan on lineitem_290001 lineitem --- ensure EXPLAIN EXECUTE doesn't crash -PREPARE task_tracker_query AS - SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030; -EXPLAIN (COSTS FALSE) EXECUTE task_tracker_query; -Aggregate - -> Custom Scan (Citus Task-Tracker) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=57637 dbname=regression - -> Aggregate - -> Seq Scan on lineitem_290005 lineitem - Filter: (l_orderkey > 9030) -SET citus.task_executor_type TO 'real-time'; -PREPARE router_executor_query AS SELECT l_quantity FROM lineitem WHERE l_orderkey = 5; -EXPLAIN EXECUTE router_executor_query; -Custom Scan (Citus Router) (cost=0.00..0.00 rows=0 width=0) - Task Count: 1 - Tasks Shown: All - -> Task - Node: host=localhost port=57637 dbname=regression - -> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem (cost=0.28..11.83 rows=3 width=5) - Index Cond: (l_orderkey = 5) -PREPARE real_time_executor_query AS - SELECT avg(l_linenumber) FROM lineitem WHERE l_orderkey > 9030; -EXPLAIN (COSTS FALSE) EXECUTE real_time_executor_query; -Aggregate - -> Custom Scan (Citus Real-Time) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=57637 dbname=regression - -> Aggregate - -> Seq Scan on lineitem_290005 lineitem - Filter: (l_orderkey > 9030) --- EXPLAIN EXECUTE of parametrized prepared statements is broken, but --- at least make sure to fail without crashing -PREPARE router_executor_query_param(int) AS SELECT l_quantity FROM lineitem WHERE l_orderkey = $1; -EXPLAIN EXECUTE router_executor_query_param(5); -Custom Scan (Citus Router) (cost=0.00..0.00 rows=0 width=0) - Task Count: 1 - Tasks Shown: All - -> Task - Node: host=localhost port=57637 dbname=regression - -> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem (cost=0.28..11.83 rows=3 width=5) - Index Cond: (l_orderkey = 5) --- test explain in a transaction with alter table to test we use right connections -BEGIN; -CREATE TABLE explain_table(id int); -SELECT create_distributed_table('explain_table', 'id'); - -ALTER TABLE explain_table ADD COLUMN value int; -NOTICE: using one-phase commit for distributed DDL commands -HINT: You can enable two-phase commit for extra safety with: SET citus.multi_shard_commit_protocol TO '2pc' -EXPLAIN (COSTS FALSE) SELECT value FROM explain_table WHERE id = 1; -Custom Scan (Citus Router) - Task Count: 1 - Tasks Shown: All - -> Task - Node: host=localhost port=57637 dbname=regression - -> Seq Scan on explain_table_570001 explain_table - Filter: (id = 1) -ROLLBACK; --- test explain with local INSERT ... SELECT -EXPLAIN (COSTS OFF) -INSERT INTO lineitem_hash_part -SELECT o_orderkey FROM orders_hash_part LIMIT 3; -Custom Scan (Citus INSERT ... SELECT via coordinator) - -> Limit - -> Custom Scan (Citus Real-Time) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=57637 dbname=regression - -> Limit - -> Seq Scan on orders_hash_part_360295 orders_hash_part -SELECT true AS valid FROM explain_json($$ - INSERT INTO lineitem_hash_part (l_orderkey) - SELECT o_orderkey FROM orders_hash_part LIMIT 3; -$$); -t -EXPLAIN (COSTS OFF) -INSERT INTO lineitem_hash_part (l_orderkey, l_quantity) -SELECT o_orderkey, 5 FROM orders_hash_part LIMIT 3; -Custom Scan (Citus INSERT ... SELECT via coordinator) - -> Limit - -> Custom Scan (Citus Real-Time) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=57637 dbname=regression - -> Limit - -> Seq Scan on orders_hash_part_360295 orders_hash_part -EXPLAIN (COSTS OFF) -INSERT INTO lineitem_hash_part (l_orderkey) -SELECT s FROM generate_series(1,5) s; -Custom Scan (Citus INSERT ... SELECT via coordinator) - -> Function Scan on generate_series s -EXPLAIN (COSTS OFF) -WITH cte1 AS (SELECT s FROM generate_series(1,10) s) -INSERT INTO lineitem_hash_part -WITH cte1 AS (SELECT * FROM cte1 LIMIT 5) -SELECT s FROM cte1; -Custom Scan (Citus INSERT ... SELECT via coordinator) - -> Subquery Scan on citus_insert_select_subquery - CTE cte1 - -> Function Scan on generate_series s - -> CTE Scan on cte1 - CTE cte1 - -> Limit - -> CTE Scan on cte1 cte1_1 -EXPLAIN (COSTS OFF) -INSERT INTO lineitem_hash_part -( SELECT s FROM generate_series(1,5) s) UNION -( SELECT s FROM generate_series(5,10) s); -Custom Scan (Citus INSERT ... SELECT via coordinator) - -> Subquery Scan on citus_insert_select_subquery - -> HashAggregate - Group Key: s.s - -> Append - -> Function Scan on generate_series s - -> Function Scan on generate_series s_1 diff --git a/src/test/regress/expected/multi_join_order_additional.out b/src/test/regress/expected/multi_join_order_additional.out index 6e50b3a95..05971983c 100644 --- a/src/test/regress/expected/multi_join_order_additional.out +++ b/src/test/regress/expected/multi_join_order_additional.out @@ -2,13 +2,6 @@ -- MULTI_JOIN_ORDER_ADDITIONAL -- ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 650000; --- print whether we're running on 9.5 to make version-specific tests clear -SELECT substring(version(), '\d+(?:\.\d+)?') = '9.5' AS is_95; - is_95 -------- - f -(1 row) - -- Set configuration to print table join order and pruned shards SET citus.explain_distributed_queries TO off; SET citus.log_multi_join_order TO TRUE; diff --git a/src/test/regress/expected/multi_join_order_additional_1.out b/src/test/regress/expected/multi_join_order_additional_1.out deleted file mode 100644 index 8393630fa..000000000 --- a/src/test/regress/expected/multi_join_order_additional_1.out +++ /dev/null @@ -1,260 +0,0 @@ --- --- MULTI_JOIN_ORDER_ADDITIONAL --- -ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 650000; --- print whether we're running on 9.5 to make version-specific tests clear -SELECT substring(version(), '\d+(?:\.\d+)?') = '9.5' AS is_95; - is_95 -------- - t -(1 row) - --- Set configuration to print table join order and pruned shards -SET citus.explain_distributed_queries TO off; -SET citus.log_multi_join_order TO TRUE; -SET citus.task_executor_type = 'task-tracker'; -- can't explain all queries otherwise -SET client_min_messages TO DEBUG2; --- Create new table definitions for use in testing in distributed planning and --- execution functionality. Also create indexes to boost performance. -CREATE TABLE lineitem_hash ( - l_orderkey bigint not null, - l_partkey integer not null, - l_suppkey integer not null, - l_linenumber integer not null, - l_quantity decimal(15, 2) not null, - l_extendedprice decimal(15, 2) not null, - l_discount decimal(15, 2) not null, - l_tax decimal(15, 2) not null, - l_returnflag char(1) not null, - l_linestatus char(1) not null, - l_shipdate date not null, - l_commitdate date not null, - l_receiptdate date not null, - l_shipinstruct char(25) not null, - l_shipmode char(10) not null, - l_comment varchar(44) not null, - PRIMARY KEY(l_orderkey, l_linenumber) ); -DEBUG: CREATE TABLE / PRIMARY KEY will create implicit index "lineitem_hash_pkey" for table "lineitem_hash" -DEBUG: building index "lineitem_hash_pkey" on table "lineitem_hash" -DEBUG: creating and filling new WAL file -DEBUG: done creating and filling new WAL file -SELECT master_create_distributed_table('lineitem_hash', 'l_orderkey', 'hash'); - master_create_distributed_table ---------------------------------- - -(1 row) - -SELECT master_create_worker_shards('lineitem_hash', 2, 1); - master_create_worker_shards ------------------------------ - -(1 row) - -CREATE INDEX lineitem_hash_time_index ON lineitem_hash (l_shipdate); -DEBUG: building index "lineitem_hash_time_index" on table "lineitem_hash" -NOTICE: using one-phase commit for distributed DDL commands -HINT: You can enable two-phase commit for extra safety with: SET citus.multi_shard_commit_protocol TO '2pc' -CREATE TABLE orders_hash ( - o_orderkey bigint not null, - o_custkey integer not null, - o_orderstatus char(1) not null, - o_totalprice decimal(15,2) not null, - o_orderdate date not null, - o_orderpriority char(15) not null, - o_clerk char(15) not null, - o_shippriority integer not null, - o_comment varchar(79) not null, - PRIMARY KEY(o_orderkey) ); -DEBUG: CREATE TABLE / PRIMARY KEY will create implicit index "orders_hash_pkey" for table "orders_hash" -DEBUG: building index "orders_hash_pkey" on table "orders_hash" -SELECT master_create_distributed_table('orders_hash', 'o_orderkey', 'hash'); - master_create_distributed_table ---------------------------------- - -(1 row) - -SELECT master_create_worker_shards('orders_hash', 2, 1); - master_create_worker_shards ------------------------------ - -(1 row) - -CREATE TABLE customer_hash ( - c_custkey integer not null, - c_name varchar(25) not null, - c_address varchar(40) not null, - c_nationkey integer not null, - c_phone char(15) not null, - c_acctbal decimal(15,2) not null, - c_mktsegment char(10) not null, - c_comment varchar(117) not null); -SELECT master_create_distributed_table('customer_hash', 'c_custkey', 'hash'); - master_create_distributed_table ---------------------------------- - -(1 row) - -SELECT master_create_worker_shards('customer_hash', 2, 1); - master_create_worker_shards ------------------------------ - -(1 row) - --- The following query checks that we can correctly handle self-joins -EXPLAIN SELECT l1.l_quantity FROM lineitem l1, lineitem l2 - WHERE l1.l_orderkey = l2.l_orderkey AND l1.l_quantity > 5; -LOG: join order: [ "lineitem" ][ local partition join "lineitem" ] -DEBUG: join prunable for intervals [1,1509] and [2951,4455] -DEBUG: join prunable for intervals [1,1509] and [4480,5986] -DEBUG: join prunable for intervals [1,1509] and [8997,10560] -DEBUG: join prunable for intervals [1,1509] and [10560,12036] -DEBUG: join prunable for intervals [1,1509] and [12036,13473] -DEBUG: join prunable for intervals [1,1509] and [13473,14947] -DEBUG: join prunable for intervals [1509,4964] and [8997,10560] -DEBUG: join prunable for intervals [1509,4964] and [10560,12036] -DEBUG: join prunable for intervals [1509,4964] and [12036,13473] -DEBUG: join prunable for intervals [1509,4964] and [13473,14947] -DEBUG: join prunable for intervals [2951,4455] and [1,1509] -DEBUG: join prunable for intervals [2951,4455] and [4480,5986] -DEBUG: join prunable for intervals [2951,4455] and [8997,10560] -DEBUG: join prunable for intervals [2951,4455] and [10560,12036] -DEBUG: join prunable for intervals [2951,4455] and [12036,13473] -DEBUG: join prunable for intervals [2951,4455] and [13473,14947] -DEBUG: join prunable for intervals [4480,5986] and [1,1509] -DEBUG: join prunable for intervals [4480,5986] and [2951,4455] -DEBUG: join prunable for intervals [4480,5986] and [8997,10560] -DEBUG: join prunable for intervals [4480,5986] and [10560,12036] -DEBUG: join prunable for intervals [4480,5986] and [12036,13473] -DEBUG: join prunable for intervals [4480,5986] and [13473,14947] -DEBUG: join prunable for intervals [8997,10560] and [1,1509] -DEBUG: join prunable for intervals [8997,10560] and [1509,4964] -DEBUG: join prunable for intervals [8997,10560] and [2951,4455] -DEBUG: join prunable for intervals [8997,10560] and [4480,5986] -DEBUG: join prunable for intervals [8997,10560] and [12036,13473] -DEBUG: join prunable for intervals [8997,10560] and [13473,14947] -DEBUG: join prunable for intervals [10560,12036] and [1,1509] -DEBUG: join prunable for intervals [10560,12036] and [1509,4964] -DEBUG: join prunable for intervals [10560,12036] and [2951,4455] -DEBUG: join prunable for intervals [10560,12036] and [4480,5986] -DEBUG: join prunable for intervals [10560,12036] and [13473,14947] -DEBUG: join prunable for intervals [12036,13473] and [1,1509] -DEBUG: join prunable for intervals [12036,13473] and [1509,4964] -DEBUG: join prunable for intervals [12036,13473] and [2951,4455] -DEBUG: join prunable for intervals [12036,13473] and [4480,5986] -DEBUG: join prunable for intervals [12036,13473] and [8997,10560] -DEBUG: join prunable for intervals [13473,14947] and [1,1509] -DEBUG: join prunable for intervals [13473,14947] and [1509,4964] -DEBUG: join prunable for intervals [13473,14947] and [2951,4455] -DEBUG: join prunable for intervals [13473,14947] and [4480,5986] -DEBUG: join prunable for intervals [13473,14947] and [8997,10560] -DEBUG: join prunable for intervals [13473,14947] and [10560,12036] - QUERY PLAN --------------------------------------------------------------------- - Custom Scan (Citus Task-Tracker) (cost=0.00..0.00 rows=0 width=0) - explain statements for distributed queries are not enabled -(2 rows) - --- Update configuration to treat lineitem and orders tables as large -SET citus.large_table_shard_count TO 2; -SET client_min_messages TO LOG; --- The following queries check that we correctly handle joins and OR clauses. In --- particular, these queries check that we factorize out OR clauses if possible, --- and that we default to a cartesian product otherwise. -EXPLAIN SELECT count(*) FROM lineitem, orders - WHERE (l_orderkey = o_orderkey AND l_quantity > 5) - OR (l_orderkey = o_orderkey AND l_quantity < 10); -LOG: join order: [ "lineitem" ][ local partition join "orders" ] - QUERY PLAN --------------------------------------------------------------------------- - Aggregate (cost=0.00..0.00 rows=0 width=0) - -> Custom Scan (Citus Task-Tracker) (cost=0.00..0.00 rows=0 width=0) - explain statements for distributed queries are not enabled -(3 rows) - -EXPLAIN SELECT l_quantity FROM lineitem, orders - WHERE (l_orderkey = o_orderkey OR l_quantity > 5); -LOG: join order: [ "lineitem" ][ cartesian product "orders" ] -ERROR: cannot perform distributed planning on this query -DETAIL: Cartesian products are currently unsupported --- The below queries modify the partition method in pg_dist_partition. We thus --- begin a transaction here so the changes don't impact any other parallel --- running tests. -BEGIN; --- Validate that we take into account the partition method when building the --- join-order plan. -EXPLAIN SELECT count(*) FROM orders, lineitem_hash - WHERE o_orderkey = l_orderkey; -LOG: join order: [ "orders" ][ single partition join "lineitem_hash" ] - QUERY PLAN --------------------------------------------------------------------------- - Aggregate (cost=0.00..0.00 rows=0 width=0) - -> Custom Scan (Citus Task-Tracker) (cost=0.00..0.00 rows=0 width=0) - explain statements for distributed queries are not enabled -(3 rows) - --- Verify we handle local joins between two hash-partitioned tables. -EXPLAIN SELECT count(*) FROM orders_hash, lineitem_hash - WHERE o_orderkey = l_orderkey; -LOG: join order: [ "orders_hash" ][ local partition join "lineitem_hash" ] - QUERY PLAN --------------------------------------------------------------------------- - Aggregate (cost=0.00..0.00 rows=0 width=0) - -> Custom Scan (Citus Task-Tracker) (cost=0.00..0.00 rows=0 width=0) - explain statements for distributed queries are not enabled -(3 rows) - --- Validate that we can handle broadcast joins with hash-partitioned tables. -EXPLAIN SELECT count(*) FROM customer_hash, nation - WHERE c_nationkey = n_nationkey; -LOG: join order: [ "customer_hash" ][ broadcast join "nation" ] - QUERY PLAN --------------------------------------------------------------------------- - Aggregate (cost=0.00..0.00 rows=0 width=0) - -> Custom Scan (Citus Task-Tracker) (cost=0.00..0.00 rows=0 width=0) - explain statements for distributed queries are not enabled -(3 rows) - --- Update the large table shard count for all the following tests. -SET citus.large_table_shard_count TO 1; --- Validate that we don't use a single-partition join method for a hash --- re-partitioned table, thus preventing a partition of just the customer table. -EXPLAIN SELECT count(*) FROM orders, lineitem, customer - WHERE o_custkey = l_partkey AND o_custkey = c_nationkey; -LOG: join order: [ "orders" ][ dual partition join "lineitem" ][ dual partition join "customer" ] - QUERY PLAN --------------------------------------------------------------------------- - Aggregate (cost=0.00..0.00 rows=0 width=0) - -> Custom Scan (Citus Task-Tracker) (cost=0.00..0.00 rows=0 width=0) - explain statements for distributed queries are not enabled -(3 rows) - --- Validate that we don't chose a single-partition join method with a --- hash-partitioned base table -EXPLAIN SELECT count(*) FROM orders, customer_hash - WHERE c_custkey = o_custkey; -LOG: join order: [ "orders" ][ dual partition join "customer_hash" ] - QUERY PLAN --------------------------------------------------------------------------- - Aggregate (cost=0.00..0.00 rows=0 width=0) - -> Custom Scan (Citus Task-Tracker) (cost=0.00..0.00 rows=0 width=0) - explain statements for distributed queries are not enabled -(3 rows) - --- Validate that we can re-partition a hash partitioned table to join with a --- range partitioned one. -EXPLAIN SELECT count(*) FROM orders_hash, customer - WHERE c_custkey = o_custkey; -LOG: join order: [ "orders_hash" ][ single partition join "customer" ] - QUERY PLAN --------------------------------------------------------------------------- - Aggregate (cost=0.00..0.00 rows=0 width=0) - -> Custom Scan (Citus Task-Tracker) (cost=0.00..0.00 rows=0 width=0) - explain statements for distributed queries are not enabled -(3 rows) - -COMMIT; --- Reset client logging level to its previous value -SET client_min_messages TO NOTICE; -DROP TABLE lineitem_hash; -DROP TABLE orders_hash; -DROP TABLE customer_hash; diff --git a/src/test/regress/sql/multi_join_order_additional.sql b/src/test/regress/sql/multi_join_order_additional.sql index 87469c14e..5f4d85a04 100644 --- a/src/test/regress/sql/multi_join_order_additional.sql +++ b/src/test/regress/sql/multi_join_order_additional.sql @@ -5,9 +5,6 @@ ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 650000; --- print whether we're running on 9.5 to make version-specific tests clear -SELECT substring(version(), '\d+(?:\.\d+)?') = '9.5' AS is_95; - -- Set configuration to print table join order and pruned shards SET citus.explain_distributed_queries TO off;