From e03246dd45d0211e362b66f74c7e583c4bac4037 Mon Sep 17 00:00:00 2001 From: jeff-davis Date: Tue, 2 Feb 2021 11:56:00 -0800 Subject: [PATCH] Colummnar: mark custom scan path paralle_safe. (#4619) Enables an overall plan to be parallel (e.g. over a partition hierarchy), even though an individual ColumnarScan is not parallel-aware. Co-authored-by: Jeff Davis --- src/backend/columnar/cstore_customscan.c | 5 + src/test/regress/columnar_am_schedule | 1 + .../expected/columnar_partitioning.out | 126 ++++++++++++++++++ .../expected/columnar_partitioning_1.out | 126 ++++++++++++++++++ .../regress/sql/columnar_partitioning.sql | 55 ++++++++ 5 files changed, 313 insertions(+) create mode 100644 src/test/regress/expected/columnar_partitioning.out create mode 100644 src/test/regress/expected/columnar_partitioning_1.out create mode 100644 src/test/regress/sql/columnar_partitioning.sql diff --git a/src/backend/columnar/cstore_customscan.c b/src/backend/columnar/cstore_customscan.c index 0304dbef3..445ea68f2 100644 --- a/src/backend/columnar/cstore_customscan.c +++ b/src/backend/columnar/cstore_customscan.c @@ -133,6 +133,8 @@ columnar_customscan_init() PGC_USERSET, GUC_NO_SHOW_ALL, NULL, NULL, NULL); + + RegisterCustomScanMethods(&ColumnarScanScanMethods); } @@ -216,6 +218,9 @@ CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) path->parent = rel; path->pathtarget = rel->reltarget; + /* columnar scans are not parallel-aware, but they are parallel-safe */ + path->parallel_safe = rel->consider_parallel; + /* * We don't support pushing join clauses into the quals of a seqscan, but * it could still have required parameterization due to LATERAL refs in diff --git a/src/test/regress/columnar_am_schedule b/src/test/regress/columnar_am_schedule index 1387f9597..a4afb3229 100644 --- a/src/test/regress/columnar_am_schedule +++ b/src/test/regress/columnar_am_schedule @@ -9,6 +9,7 @@ test: am_analyze test: am_data_types test: am_drop test: columnar_fallback_scan +test: columnar_partitioning test: am_empty test: am_insert test: am_update_delete diff --git a/src/test/regress/expected/columnar_partitioning.out b/src/test/regress/expected/columnar_partitioning.out new file mode 100644 index 000000000..4f54824f8 --- /dev/null +++ b/src/test/regress/expected/columnar_partitioning.out @@ -0,0 +1,126 @@ +CREATE TABLE parent(ts timestamptz, i int, n numeric, s text) + PARTITION BY RANGE (ts); +-- row partitions +CREATE TABLE p0 PARTITION OF parent + FOR VALUES FROM ('2020-01-01') TO ('2020-02-01'); +CREATE TABLE p1 PARTITION OF parent + FOR VALUES FROM ('2020-02-01') TO ('2020-03-01'); +CREATE TABLE p2 PARTITION OF parent + FOR VALUES FROM ('2020-03-01') TO ('2020-04-01'); +CREATE TABLE p3 PARTITION OF parent + FOR VALUES FROM ('2020-04-01') TO ('2020-05-01'); +INSERT INTO parent SELECT '2020-01-15', 10, 100, 'one thousand' + FROM generate_series(1,100000); +INSERT INTO parent SELECT '2020-02-15', 20, 200, 'two thousand' + FROM generate_series(1,100000); +INSERT INTO parent SELECT '2020-03-15', 30, 300, 'three thousand' + FROM generate_series(1,100000); +INSERT INTO parent SELECT '2020-04-15', 30, 300, 'three thousand' + FROM generate_series(1,100000); +-- run parallel plans +SET force_parallel_mode = regress; +SET min_parallel_table_scan_size = 1; +SET parallel_tuple_cost = 0; +SET max_parallel_workers = 4; +SET max_parallel_workers_per_gather = 4; +EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent; + QUERY PLAN +--------------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 4 + -> Partial Aggregate + -> Parallel Append + -> Parallel Seq Scan on p0 parent_1 + -> Parallel Seq Scan on p1 parent_2 + -> Parallel Seq Scan on p2 parent_3 + -> Parallel Seq Scan on p3 parent_4 +(9 rows) + +SELECT count(*), sum(i), min(i), max(i) FROM parent; + count | sum | min | max +--------------------------------------------------------------------- + 400000 | 9000000 | 10 | 30 +(1 row) + +-- set older partitions as columnar +SELECT alter_table_set_access_method('p0','columnar'); +NOTICE: creating a new table for public.p0 +NOTICE: Moving the data of public.p0 +NOTICE: Dropping the old public.p0 +NOTICE: Renaming the new table to public.p0 + alter_table_set_access_method +--------------------------------------------------------------------- + +(1 row) + +SELECT alter_table_set_access_method('p1','columnar'); +NOTICE: creating a new table for public.p1 +NOTICE: Moving the data of public.p1 +NOTICE: Dropping the old public.p1 +NOTICE: Renaming the new table to public.p1 + alter_table_set_access_method +--------------------------------------------------------------------- + +(1 row) + +SELECT alter_table_set_access_method('p3','columnar'); +NOTICE: creating a new table for public.p3 +NOTICE: Moving the data of public.p3 +NOTICE: Dropping the old public.p3 +NOTICE: Renaming the new table to public.p3 + alter_table_set_access_method +--------------------------------------------------------------------- + +(1 row) + +-- should also be parallel plan +EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent; + QUERY PLAN +--------------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 4 + -> Partial Aggregate + -> Parallel Append + -> Custom Scan (ColumnarScan) on p3 parent_4 + -> Custom Scan (ColumnarScan) on p0 parent_1 + -> Custom Scan (ColumnarScan) on p1 parent_2 + -> Parallel Seq Scan on p2 parent_3 +(9 rows) + +SELECT count(*), sum(i), min(i), max(i) FROM parent; + count | sum | min | max +--------------------------------------------------------------------- + 400000 | 9000000 | 10 | 30 +(1 row) + +-- and also parallel without custom scan +SET columnar.enable_custom_scan = FALSE; +EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent; + QUERY PLAN +--------------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 4 + -> Partial Aggregate + -> Parallel Append + -> Seq Scan on p0 parent_1 + -> Seq Scan on p1 parent_2 + -> Seq Scan on p3 parent_4 + -> Parallel Seq Scan on p2 parent_3 +(9 rows) + +SELECT count(*), sum(i), min(i), max(i) FROM parent; + count | sum | min | max +--------------------------------------------------------------------- + 400000 | 9000000 | 10 | 30 +(1 row) + +SET columnar.enable_custom_scan TO DEFAULT; +SET force_parallel_mode TO DEFAULT; +SET min_parallel_table_scan_size TO DEFAULT; +SET parallel_tuple_cost TO DEFAULT; +SET max_parallel_workers TO DEFAULT; +SET max_parallel_workers_per_gather TO DEFAULT; +DROP TABLE parent; diff --git a/src/test/regress/expected/columnar_partitioning_1.out b/src/test/regress/expected/columnar_partitioning_1.out new file mode 100644 index 000000000..f68cf23ef --- /dev/null +++ b/src/test/regress/expected/columnar_partitioning_1.out @@ -0,0 +1,126 @@ +CREATE TABLE parent(ts timestamptz, i int, n numeric, s text) + PARTITION BY RANGE (ts); +-- row partitions +CREATE TABLE p0 PARTITION OF parent + FOR VALUES FROM ('2020-01-01') TO ('2020-02-01'); +CREATE TABLE p1 PARTITION OF parent + FOR VALUES FROM ('2020-02-01') TO ('2020-03-01'); +CREATE TABLE p2 PARTITION OF parent + FOR VALUES FROM ('2020-03-01') TO ('2020-04-01'); +CREATE TABLE p3 PARTITION OF parent + FOR VALUES FROM ('2020-04-01') TO ('2020-05-01'); +INSERT INTO parent SELECT '2020-01-15', 10, 100, 'one thousand' + FROM generate_series(1,100000); +INSERT INTO parent SELECT '2020-02-15', 20, 200, 'two thousand' + FROM generate_series(1,100000); +INSERT INTO parent SELECT '2020-03-15', 30, 300, 'three thousand' + FROM generate_series(1,100000); +INSERT INTO parent SELECT '2020-04-15', 30, 300, 'three thousand' + FROM generate_series(1,100000); +-- run parallel plans +SET force_parallel_mode = regress; +SET min_parallel_table_scan_size = 1; +SET parallel_tuple_cost = 0; +SET max_parallel_workers = 4; +SET max_parallel_workers_per_gather = 4; +EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent; + QUERY PLAN +--------------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 4 + -> Partial Aggregate + -> Parallel Append + -> Parallel Seq Scan on p0 + -> Parallel Seq Scan on p1 + -> Parallel Seq Scan on p2 + -> Parallel Seq Scan on p3 +(9 rows) + +SELECT count(*), sum(i), min(i), max(i) FROM parent; + count | sum | min | max +--------------------------------------------------------------------- + 400000 | 9000000 | 10 | 30 +(1 row) + +-- set older partitions as columnar +SELECT alter_table_set_access_method('p0','columnar'); +NOTICE: creating a new table for public.p0 +NOTICE: Moving the data of public.p0 +NOTICE: Dropping the old public.p0 +NOTICE: Renaming the new table to public.p0 + alter_table_set_access_method +--------------------------------------------------------------------- + +(1 row) + +SELECT alter_table_set_access_method('p1','columnar'); +NOTICE: creating a new table for public.p1 +NOTICE: Moving the data of public.p1 +NOTICE: Dropping the old public.p1 +NOTICE: Renaming the new table to public.p1 + alter_table_set_access_method +--------------------------------------------------------------------- + +(1 row) + +SELECT alter_table_set_access_method('p3','columnar'); +NOTICE: creating a new table for public.p3 +NOTICE: Moving the data of public.p3 +NOTICE: Dropping the old public.p3 +NOTICE: Renaming the new table to public.p3 + alter_table_set_access_method +--------------------------------------------------------------------- + +(1 row) + +-- should also be parallel plan +EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent; + QUERY PLAN +--------------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 4 + -> Partial Aggregate + -> Parallel Append + -> Custom Scan (ColumnarScan) on p3 + -> Custom Scan (ColumnarScan) on p0 + -> Custom Scan (ColumnarScan) on p1 + -> Parallel Seq Scan on p2 +(9 rows) + +SELECT count(*), sum(i), min(i), max(i) FROM parent; + count | sum | min | max +--------------------------------------------------------------------- + 400000 | 9000000 | 10 | 30 +(1 row) + +-- and also parallel without custom scan +SET columnar.enable_custom_scan = FALSE; +EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent; + QUERY PLAN +--------------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 4 + -> Partial Aggregate + -> Parallel Append + -> Seq Scan on p0 + -> Seq Scan on p1 + -> Seq Scan on p3 + -> Parallel Seq Scan on p2 +(9 rows) + +SELECT count(*), sum(i), min(i), max(i) FROM parent; + count | sum | min | max +--------------------------------------------------------------------- + 400000 | 9000000 | 10 | 30 +(1 row) + +SET columnar.enable_custom_scan TO DEFAULT; +SET force_parallel_mode TO DEFAULT; +SET min_parallel_table_scan_size TO DEFAULT; +SET parallel_tuple_cost TO DEFAULT; +SET max_parallel_workers TO DEFAULT; +SET max_parallel_workers_per_gather TO DEFAULT; +DROP TABLE parent; diff --git a/src/test/regress/sql/columnar_partitioning.sql b/src/test/regress/sql/columnar_partitioning.sql new file mode 100644 index 000000000..98692c78a --- /dev/null +++ b/src/test/regress/sql/columnar_partitioning.sql @@ -0,0 +1,55 @@ + +CREATE TABLE parent(ts timestamptz, i int, n numeric, s text) + PARTITION BY RANGE (ts); + +-- row partitions +CREATE TABLE p0 PARTITION OF parent + FOR VALUES FROM ('2020-01-01') TO ('2020-02-01'); +CREATE TABLE p1 PARTITION OF parent + FOR VALUES FROM ('2020-02-01') TO ('2020-03-01'); +CREATE TABLE p2 PARTITION OF parent + FOR VALUES FROM ('2020-03-01') TO ('2020-04-01'); +CREATE TABLE p3 PARTITION OF parent + FOR VALUES FROM ('2020-04-01') TO ('2020-05-01'); + +INSERT INTO parent SELECT '2020-01-15', 10, 100, 'one thousand' + FROM generate_series(1,100000); +INSERT INTO parent SELECT '2020-02-15', 20, 200, 'two thousand' + FROM generate_series(1,100000); +INSERT INTO parent SELECT '2020-03-15', 30, 300, 'three thousand' + FROM generate_series(1,100000); +INSERT INTO parent SELECT '2020-04-15', 30, 300, 'three thousand' + FROM generate_series(1,100000); + +-- run parallel plans +SET force_parallel_mode = regress; +SET min_parallel_table_scan_size = 1; +SET parallel_tuple_cost = 0; +SET max_parallel_workers = 4; +SET max_parallel_workers_per_gather = 4; + +EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent; +SELECT count(*), sum(i), min(i), max(i) FROM parent; + +-- set older partitions as columnar +SELECT alter_table_set_access_method('p0','columnar'); +SELECT alter_table_set_access_method('p1','columnar'); +SELECT alter_table_set_access_method('p3','columnar'); + +-- should also be parallel plan +EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent; +SELECT count(*), sum(i), min(i), max(i) FROM parent; + +-- and also parallel without custom scan +SET columnar.enable_custom_scan = FALSE; +EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent; +SELECT count(*), sum(i), min(i), max(i) FROM parent; +SET columnar.enable_custom_scan TO DEFAULT; + +SET force_parallel_mode TO DEFAULT; +SET min_parallel_table_scan_size TO DEFAULT; +SET parallel_tuple_cost TO DEFAULT; +SET max_parallel_workers TO DEFAULT; +SET max_parallel_workers_per_gather TO DEFAULT; + +DROP TABLE parent;