From 3340f17c4e634b908bfefc0cb72f81585347aba3 Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Wed, 8 Sep 2021 13:53:43 +0300
Subject: [PATCH] Prevent planner from choosing parallel scan for columnar
 tables (#5245)

Previously, for regular table scans, we were setting `RelOptInfo->partial_pathlist`
to `NIL` via `set_rel_pathlist_hook` to discard scan `Path`s that need to use any
parallel workers, this was working nicely.

However, when building indexes, this hook doesn't get called so we were not
able to prevent spawning parallel workers when building an index. For this
reason, 9b4dc2f804d67a1f5b41fab9d0dfeaa4fe365739 added basic
implementation for `columnar_parallelscan_*` callbacks but also made some
changes to skip using those workers when building the index.

However, now that we are doing stripe reservation in two stages, we call
`heap_inplace_update` at some point to complete stripe reservation.
However, postgres throws an error if we call `heap_inplace_update` during
a parallel operation, even if we don't actually make use of those workers.

For this reason, with this pr, we make sure to not generate scan `Path`s that
need to use any parallel workers by using `get_relation_info_hook`.

This is indeed useful to prevent spawning parallel workers during index builds.
---
 src/backend/columnar/columnar_customscan.c    | 34 ++++++-
 src/backend/columnar/columnar_tableam.c       | 90 +++++--------------
 .../alter_table_set_access_method.out         | 20 +++++
 .../regress/expected/columnar_indexes.out     | 28 ++++--
 .../sql/alter_table_set_access_method.sql     | 13 +++
 src/test/regress/sql/columnar_indexes.sql     | 24 +++++
 6 files changed, 132 insertions(+), 77 deletions(-)

diff --git a/src/backend/columnar/columnar_customscan.c b/src/backend/columnar/columnar_customscan.c
index be8b5a17b..6acb50741 100644
--- a/src/backend/columnar/columnar_customscan.c
+++ b/src/backend/columnar/columnar_customscan.c
@@ -31,6 +31,7 @@
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
+#include "optimizer/plancat.h"
 #include "optimizer/restrictinfo.h"
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
@@ -95,6 +96,8 @@ static void AddColumnarScanPathsRec(PlannerInfo *root, RelOptInfo *rel,
 /* hooks and callbacks */
 static void ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti,
 									   RangeTblEntry *rte);
+static void ColumnarGetRelationInfoHook(PlannerInfo *root, Oid relationObjectId,
+										bool inhparent, RelOptInfo *rel);
 static Plan * ColumnarScanPath_PlanCustomPath(PlannerInfo *root,
 											  RelOptInfo *rel,
 											  struct CustomPath *best_path,
@@ -126,6 +129,7 @@ static Bitmapset * ColumnarAttrNeeded(ScanState *ss);
 
 /* saved hook value in case of unload */
 static set_rel_pathlist_hook_type PreviousSetRelPathlistHook = NULL;
+static get_relation_info_hook_type PreviousGetRelationInfoHook = NULL;
 
 static bool EnableColumnarCustomScan = true;
 static bool EnableColumnarQualPushdown = true;
@@ -180,6 +184,9 @@ columnar_customscan_init()
 	PreviousSetRelPathlistHook = set_rel_pathlist_hook;
 	set_rel_pathlist_hook = ColumnarSetRelPathlistHook;
 
+	PreviousGetRelationInfoHook = get_relation_info_hook;
+	get_relation_info_hook = ColumnarGetRelationInfoHook;
+
 	/* register customscan specific GUC's */
 	DefineCustomBoolVariable(
 		"columnar.enable_custom_scan",
@@ -274,8 +281,14 @@ ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti,
 							errmsg("sample scans not supported on columnar tables")));
 		}
 
-		/* columnar doesn't support parallel paths */
-		rel->partial_pathlist = NIL;
+		if (list_length(rel->partial_pathlist) != 0)
+		{
+			/*
+			 * Parallel scans on columnar tables are already discardad by
+			 * ColumnarGetRelationInfoHook but be on the safe side.
+			 */
+			elog(ERROR, "parallel scans on columnar are not supported");
+		}
 
 		/*
 		 * There are cases where IndexPath is normally more preferrable over
@@ -318,6 +331,23 @@ ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti,
 }
 
 
+static void
+ColumnarGetRelationInfoHook(PlannerInfo *root, Oid relationObjectId,
+							bool inhparent, RelOptInfo *rel)
+{
+	if (PreviousGetRelationInfoHook)
+	{
+		PreviousGetRelationInfoHook(root, relationObjectId, inhparent, rel);
+	}
+
+	if (IsColumnarTableAmTable(relationObjectId))
+	{
+		/* disable parallel query */
+		rel->rel_parallel_workers = 0;
+	}
+}
+
+
 /*
  * RemovePathsByPredicate removes the paths that removePathPredicate
  * evaluates to true from pathlist of given rel.
diff --git a/src/backend/columnar/columnar_tableam.c b/src/backend/columnar/columnar_tableam.c
index c360fad88..2b99219bd 100644
--- a/src/backend/columnar/columnar_tableam.c
+++ b/src/backend/columnar/columnar_tableam.c
@@ -436,33 +436,21 @@ ErrorIfInvalidRowNumber(uint64 rowNumber)
 static Size
 columnar_parallelscan_estimate(Relation rel)
 {
-	return sizeof(ParallelBlockTableScanDescData);
+	elog(ERROR, "columnar_parallelscan_estimate not implemented");
 }
 
 
 static Size
 columnar_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan)
 {
-	ParallelBlockTableScanDesc bpscan = (ParallelBlockTableScanDesc) pscan;
-
-	bpscan->base.phs_relid = RelationGetRelid(rel);
-	bpscan->phs_nblocks = RelationGetNumberOfBlocks(rel);
-	bpscan->base.phs_syncscan = synchronize_seqscans &&
-								!RelationUsesLocalBuffers(rel) &&
-								bpscan->phs_nblocks > NBuffers / 4;
-	SpinLockInit(&bpscan->phs_mutex);
-	bpscan->phs_startblock = InvalidBlockNumber;
-	pg_atomic_init_u64(&bpscan->phs_nallocated, 0);
-
-	return sizeof(ParallelBlockTableScanDescData);
+	elog(ERROR, "columnar_parallelscan_initialize not implemented");
 }
 
 
 static void
 columnar_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
 {
-	ParallelBlockTableScanDesc bpscan = (ParallelBlockTableScanDesc) pscan;
-	pg_atomic_write_u64(&bpscan->phs_nallocated, 0);
+	elog(ERROR, "columnar_parallelscan_reinitialize not implemented");
 }
 
 
@@ -1293,22 +1281,10 @@ columnar_index_build_range_scan(Relation columnarRelation,
 	if (scan)
 	{
 		/*
-		 * Scan is initialized iff postgres decided to build the index using
-		 * parallel workers. In this case, we simply return for parallel
-		 * workers since we don't support parallel scan on columnar tables.
+		 * Parallel scans on columnar tables are already discardad by
+		 * ColumnarGetRelationInfoHook but be on the safe side.
 		 */
-		if (IsBackgroundWorker)
-		{
-			ereport(DEBUG4, (errmsg("ignoring parallel worker when building "
-									"index since parallel scan on columnar "
-									"tables is not supported")));
-			table_endscan(scan);
-			return 0;
-		}
-
-		ereport(NOTICE, (errmsg("falling back to serial index build since "
-								"parallel scan on columnar tables is not "
-								"supported")));
+		elog(ERROR, "parallel scans on columnar are not supported");
 	}
 
 	/*
@@ -1326,40 +1302,27 @@ columnar_index_build_range_scan(Relation columnarRelation,
 
 	Snapshot snapshot = { 0 };
 	bool snapshotRegisteredByUs = false;
-	if (!scan)
-	{
-		/*
-		 * For serial index build, we begin our own scan. We may also need to
-		 * register a snapshot whose lifetime is under our direct control.
-		 */
-		if (!TransactionIdIsValid(OldestXmin))
-		{
-			snapshot = RegisterSnapshot(GetTransactionSnapshot());
-			snapshotRegisteredByUs = true;
-		}
-		else
-		{
-			snapshot = SnapshotAny;
-		}
 
-		int nkeys = 0;
-		ScanKeyData *scanKey = NULL;
-		bool allowAccessStrategy = true;
-		scan = table_beginscan_strat(columnarRelation, snapshot, nkeys, scanKey,
-									 allowAccessStrategy, allow_sync);
+	/*
+	 * For serial index build, we begin our own scan. We may also need to
+	 * register a snapshot whose lifetime is under our direct control.
+	 */
+	if (!TransactionIdIsValid(OldestXmin))
+	{
+		snapshot = RegisterSnapshot(GetTransactionSnapshot());
+		snapshotRegisteredByUs = true;
 	}
 	else
 	{
-		/*
-		 * For parallel index build, we don't register/unregister own snapshot
-		 * since snapshot is taken from parallel scan. Note that even if we
-		 * don't support parallel index builds, we still continue building the
-		 * index via the main backend and we should still rely on the snapshot
-		 * provided by parallel scan.
-		 */
-		snapshot = scan->rs_snapshot;
+		snapshot = SnapshotAny;
 	}
 
+	int nkeys = 0;
+	ScanKeyData *scanKey = NULL;
+	bool allowAccessStrategy = true;
+	scan = table_beginscan_strat(columnarRelation, snapshot, nkeys, scanKey,
+								 allowAccessStrategy, allow_sync);
+
 	if (progress)
 	{
 		ColumnarReportTotalVirtualBlocks(columnarRelation, snapshot,
@@ -2113,17 +2076,6 @@ static const TableAmRoutine columnar_am_methods = {
 	.scan_rescan = columnar_rescan,
 	.scan_getnextslot = columnar_getnextslot,
 
-	/*
-	 * Postgres calls following three callbacks during index builds, if it
-	 * decides to use parallel workers when building the index. On the other
-	 * hand, we don't support parallel scans on columnar tables but we also
-	 * want to fallback to serial index build. For this reason, we both skip
-	 * parallel workers in columnar_index_build_range_scan and also provide
-	 * basic implementations for those callbacks based on their corresponding
-	 * implementations in heapAM.
-	 * Note that for regular query plans, we already ignore parallel paths via
-	 * ColumnarSetRelPathlistHook.
-	 */
 	.parallelscan_estimate = columnar_parallelscan_estimate,
 	.parallelscan_initialize = columnar_parallelscan_initialize,
 	.parallelscan_reinitialize = columnar_parallelscan_reinitialize,
diff --git a/src/test/regress/expected/alter_table_set_access_method.out b/src/test/regress/expected/alter_table_set_access_method.out
index cf72940b1..eb13ee5ee 100644
--- a/src/test/regress/expected/alter_table_set_access_method.out
+++ b/src/test/regress/expected/alter_table_set_access_method.out
@@ -799,6 +799,26 @@ DEBUG:  pathlist hook for columnar table am
 (0 rows)
 
 RESET client_min_messages;
+create table events (event_id bigserial, event_time timestamptz default now(), payload text);
+create index on events (event_id);
+insert into events (payload) select 'hello-'||s from generate_series(1,10) s;
+BEGIN;
+  SET LOCAL force_parallel_mode = regress;
+  SET LOCAL min_parallel_table_scan_size = 1;
+  SET LOCAL parallel_tuple_cost = 0;
+  SET LOCAL max_parallel_workers = 4;
+  SET LOCAL max_parallel_workers_per_gather = 4;
+  select alter_table_set_access_method('events', 'columnar');
+NOTICE:  creating a new table for alter_table_set_access_method.events
+NOTICE:  moving the data of alter_table_set_access_method.events
+NOTICE:  dropping the old alter_table_set_access_method.events
+NOTICE:  renaming the new table to alter_table_set_access_method.events
+ alter_table_set_access_method
+---------------------------------------------------------------------
+
+(1 row)
+
+COMMIT;
 SET client_min_messages TO WARNING;
 DROP SCHEMA alter_table_set_access_method CASCADE;
 SELECT 1 FROM master_remove_node('localhost', :master_port);
diff --git a/src/test/regress/expected/columnar_indexes.out b/src/test/regress/expected/columnar_indexes.out
index 0026c3af8..f246949e0 100644
--- a/src/test/regress/expected/columnar_indexes.out
+++ b/src/test/regress/expected/columnar_indexes.out
@@ -448,16 +448,10 @@ ERROR:  unsupported access method for the index on columnar table brin_summarize
 CREATE TABLE parallel_scan_test(a int) USING columnar WITH ( parallel_workers = 2 );
 INSERT INTO parallel_scan_test SELECT i FROM generate_series(1,10) i;
 CREATE INDEX ON parallel_scan_test (a);
-NOTICE:  falling back to serial index build since parallel scan on columnar tables is not supported
 VACUUM FULL parallel_scan_test;
-NOTICE:  falling back to serial index build since parallel scan on columnar tables is not supported
 REINDEX TABLE parallel_scan_test;
-NOTICE:  falling back to serial index build since parallel scan on columnar tables is not supported
 CREATE INDEX CONCURRENTLY ON parallel_scan_test (a);
-NOTICE:  falling back to serial index build since parallel scan on columnar tables is not supported
 REINDEX TABLE CONCURRENTLY parallel_scan_test;
-NOTICE:  falling back to serial index build since parallel scan on columnar tables is not supported
-NOTICE:  falling back to serial index build since parallel scan on columnar tables is not supported
 -- test with different data types & indexAM's --
 CREATE TABLE hash_text(a INT, b TEXT) USING columnar;
 INSERT INTO hash_text SELECT i, (i*2)::TEXT FROM generate_series(1, 10) i;
@@ -550,5 +544,27 @@ ERROR:  duplicate key value violates unique constraint "aborted_write_test_pkey"
 DETAIL:  Key (a)=(16999) already exists.
 -- since second INSERT already failed, should not throw a "duplicate key" error
 REINDEX TABLE aborted_write_test;
+create table events (event_id bigserial, event_time timestamptz default now(), payload text) using columnar;
+BEGIN;
+  -- this wouldn't flush any data
+  insert into events (payload) select 'hello-'||s from generate_series(1, 10) s;
+  -- Since table is large enough, normally postgres would prefer using
+  -- parallel workers when building the index.
+  --
+  -- However, before starting to build the index, we will first flush
+  -- the writes of above INSERT and this would try to update the stripe
+  -- reservation entry in columnar.stripe when doing that.
+  --
+  -- However, updating a tuple during a parallel operation is not allowed
+  -- by postgres and throws an error. For this reason, here we don't expect
+  -- following commnad to fail since we prevent using parallel workers for
+  -- columnar tables.
+  SET LOCAL force_parallel_mode = regress;
+  SET LOCAL min_parallel_table_scan_size = 1;
+  SET LOCAL parallel_tuple_cost = 0;
+  SET LOCAL max_parallel_workers = 4;
+  SET LOCAL max_parallel_workers_per_gather = 4;
+  create index on events (event_id);
+COMMIT;
 SET client_min_messages TO WARNING;
 DROP SCHEMA columnar_indexes CASCADE;
diff --git a/src/test/regress/sql/alter_table_set_access_method.sql b/src/test/regress/sql/alter_table_set_access_method.sql
index 4bb75d5c2..3d3ff2599 100644
--- a/src/test/regress/sql/alter_table_set_access_method.sql
+++ b/src/test/regress/sql/alter_table_set_access_method.sql
@@ -264,6 +264,19 @@ SELECT alter_table_set_access_method('abcde_012345678901234567890123456789012345
 SELECT * FROM abcde_0123456789012345678901234567890123456789012345678901234567890123456789;
 RESET client_min_messages;
 
+create table events (event_id bigserial, event_time timestamptz default now(), payload text);
+create index on events (event_id);
+insert into events (payload) select 'hello-'||s from generate_series(1,10) s;
+
+BEGIN;
+  SET LOCAL force_parallel_mode = regress;
+  SET LOCAL min_parallel_table_scan_size = 1;
+  SET LOCAL parallel_tuple_cost = 0;
+  SET LOCAL max_parallel_workers = 4;
+  SET LOCAL max_parallel_workers_per_gather = 4;
+  select alter_table_set_access_method('events', 'columnar');
+COMMIT;
+
 SET client_min_messages TO WARNING;
 DROP SCHEMA alter_table_set_access_method CASCADE;
 SELECT 1 FROM master_remove_node('localhost', :master_port);
diff --git a/src/test/regress/sql/columnar_indexes.sql b/src/test/regress/sql/columnar_indexes.sql
index 3a7445aba..f3a84c13a 100644
--- a/src/test/regress/sql/columnar_indexes.sql
+++ b/src/test/regress/sql/columnar_indexes.sql
@@ -406,5 +406,29 @@ INSERT INTO aborted_write_test VALUES (16999);
 -- since second INSERT already failed, should not throw a "duplicate key" error
 REINDEX TABLE aborted_write_test;
 
+create table events (event_id bigserial, event_time timestamptz default now(), payload text) using columnar;
+BEGIN;
+  -- this wouldn't flush any data
+  insert into events (payload) select 'hello-'||s from generate_series(1, 10) s;
+
+  -- Since table is large enough, normally postgres would prefer using
+  -- parallel workers when building the index.
+  --
+  -- However, before starting to build the index, we will first flush
+  -- the writes of above INSERT and this would try to update the stripe
+  -- reservation entry in columnar.stripe when doing that.
+  --
+  -- However, updating a tuple during a parallel operation is not allowed
+  -- by postgres and throws an error. For this reason, here we don't expect
+  -- following commnad to fail since we prevent using parallel workers for
+  -- columnar tables.
+  SET LOCAL force_parallel_mode = regress;
+  SET LOCAL min_parallel_table_scan_size = 1;
+  SET LOCAL parallel_tuple_cost = 0;
+  SET LOCAL max_parallel_workers = 4;
+  SET LOCAL max_parallel_workers_per_gather = 4;
+  create index on events (event_id);
+COMMIT;
+
 SET client_min_messages TO WARNING;
 DROP SCHEMA columnar_indexes CASCADE;