From 4043abd5aaa93bc9b51a87686f3961ede05145ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Emel=20=C5=9Eim=C5=9Fek?= <emelhasdal@gmail.com>
Date: Tue, 7 Mar 2023 18:15:50 +0300
Subject: [PATCH 01/58] Exclude-Generated-Columns-In-Copy (#6721)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DESCRIPTION: Fixes a bug in shard copy operations.

For copying shards in both shard move and shard split operations, Citus
uses the COPY statement.

A COPY all statement in the following form
` COPY target_shard FROM STDIN;`
throws an error when there is a GENERATED column in the shard table.

In order to fix this issue, we need to exclude the GENERATED columns in
the COPY and the matching SELECT statements. Hence this fix converts the
COPY and SELECT all statements to the following form:
```
COPY target_shard (col1, col2, ..., coln) FROM STDIN;
SELECT (col1, col2, ..., coln) FROM source_shard;
```
where (col1, col2, ..., coln) does not include a GENERATED column.
GENERATED column values are created in the target_shard as the values
are inserted.

Fixes #6705.

---------

Co-authored-by: Teja Mupparti <temuppar@microsoft.com>
Co-authored-by: aykut-bozkurt <51649454+aykut-bozkurt@users.noreply.github.com>
Co-authored-by: Jelte Fennema <jelte.fennema@microsoft.com>
Co-authored-by: Gürkan İndibay <gindibay@microsoft.com>
---
 .../worker_copy_table_to_node_udf.c           |  8 +-
 .../operations/worker_shard_copy.c            | 70 ++++++++++++++--
 .../operations/worker_split_copy_udf.c        |  7 +-
 src/include/distributed/worker_shard_copy.h   |  5 ++
 .../citus_non_blocking_split_shards.out       |  5 +-
 .../citus_split_shard_by_split_points.out     |  5 +-
 src/test/regress/expected/multi_move_mx.out   | 32 ++++++++
 .../expected/worker_split_copy_test.out       | 82 +++++++++++++++++++
 .../sql/citus_non_blocking_split_shards.sql   |  6 +-
 .../sql/citus_split_shard_by_split_points.sql |  6 +-
 src/test/regress/sql/multi_move_mx.sql        | 26 ++++++
 .../regress/sql/worker_split_copy_test.sql    | 58 +++++++++++++
 12 files changed, 295 insertions(+), 15 deletions(-)

diff --git a/src/backend/distributed/operations/worker_copy_table_to_node_udf.c b/src/backend/distributed/operations/worker_copy_table_to_node_udf.c
index 7af80ef55..f0f83744d 100644
--- a/src/backend/distributed/operations/worker_copy_table_to_node_udf.c
+++ b/src/backend/distributed/operations/worker_copy_table_to_node_udf.c
@@ -53,8 +53,14 @@ worker_copy_table_to_node(PG_FUNCTION_ARGS)
 		targetNodeId);
 
 	StringInfo selectShardQueryForCopy = makeStringInfo();
+
+	/*
+	 * Even though we do COPY(SELECT ...) all the columns, we can't just do SELECT * because we need to not COPY generated colums.
+	 */
+	const char *columnList = CopyableColumnNamesFromRelationName(relationSchemaName,
+																 relationName);
 	appendStringInfo(selectShardQueryForCopy,
-					 "SELECT * FROM %s;", relationQualifiedName);
+					 "SELECT %s FROM %s;", columnList, relationQualifiedName);
 
 	ParamListInfo params = NULL;
 	ExecuteQueryStringIntoDestReceiver(selectShardQueryForCopy->data, params,
diff --git a/src/backend/distributed/operations/worker_shard_copy.c b/src/backend/distributed/operations/worker_shard_copy.c
index 9239caffb..e9c2af512 100644
--- a/src/backend/distributed/operations/worker_shard_copy.c
+++ b/src/backend/distributed/operations/worker_shard_copy.c
@@ -73,7 +73,7 @@ static void ShardCopyDestReceiverDestroy(DestReceiver *destReceiver);
 static bool CanUseLocalCopy(uint32_t destinationNodeId);
 static StringInfo ConstructShardCopyStatement(List *destinationShardFullyQualifiedName,
 											  bool
-											  useBinaryFormat);
+											  useBinaryFormat, TupleDesc tupleDesc);
 static void WriteLocalTuple(TupleTableSlot *slot, ShardCopyDestReceiver *copyDest);
 static int ReadFromLocalBufferCallback(void *outBuf, int minRead, int maxRead);
 static void LocalCopyToShard(ShardCopyDestReceiver *copyDest, CopyOutState
@@ -105,7 +105,8 @@ ConnectToRemoteAndStartCopy(ShardCopyDestReceiver *copyDest)
 
 	StringInfo copyStatement = ConstructShardCopyStatement(
 		copyDest->destinationShardFullyQualifiedName,
-		copyDest->copyOutState->binary);
+		copyDest->copyOutState->binary,
+		copyDest->tupleDescriptor);
 
 	if (!SendRemoteCommand(copyDest->connection, copyStatement->data))
 	{
@@ -344,21 +345,80 @@ ShardCopyDestReceiverDestroy(DestReceiver *dest)
 }
 
 
+/*
+ *  CopyableColumnNamesFromTupleDesc function creates and returns a comma seperated column names string  to be used in COPY
+ *  and SELECT statements when copying a table. The COPY and SELECT statements should filter out the GENERATED columns since COPY
+ *  statement fails to handle them. Iterating over the attributes of the table we also need to skip the dropped columns.
+ */
+const char *
+CopyableColumnNamesFromTupleDesc(TupleDesc tupDesc)
+{
+	StringInfo columnList = makeStringInfo();
+	bool firstInList = true;
+
+	for (int i = 0; i < tupDesc->natts; i++)
+	{
+		Form_pg_attribute att = TupleDescAttr(tupDesc, i);
+		if (att->attgenerated || att->attisdropped)
+		{
+			continue;
+		}
+		if (!firstInList)
+		{
+			appendStringInfo(columnList, ",");
+		}
+
+		firstInList = false;
+
+		appendStringInfo(columnList, "%s", quote_identifier(NameStr(att->attname)));
+	}
+
+	return columnList->data;
+}
+
+
+/*
+ *  CopyableColumnNamesFromRelationName function is a wrapper for CopyableColumnNamesFromTupleDesc.
+ */
+const char *
+CopyableColumnNamesFromRelationName(const char *schemaName, const char *relationName)
+{
+	Oid namespaceOid = get_namespace_oid(schemaName, true);
+
+	Oid relationId = get_relname_relid(relationName, namespaceOid);
+
+	Relation relation = relation_open(relationId, AccessShareLock);
+
+	TupleDesc tupleDesc = RelationGetDescr(relation);
+
+	const char *columnList = CopyableColumnNamesFromTupleDesc(tupleDesc);
+
+	relation_close(relation, NoLock);
+
+	return columnList;
+}
+
+
 /*
  * ConstructShardCopyStatement constructs the text of a COPY statement
  * for copying into a result table
  */
 static StringInfo
 ConstructShardCopyStatement(List *destinationShardFullyQualifiedName, bool
-							useBinaryFormat)
+							useBinaryFormat,
+							TupleDesc tupleDesc)
 {
 	char *destinationShardSchemaName = linitial(destinationShardFullyQualifiedName);
 	char *destinationShardRelationName = lsecond(destinationShardFullyQualifiedName);
 
+
 	StringInfo command = makeStringInfo();
-	appendStringInfo(command, "COPY %s.%s FROM STDIN",
+
+	const char *columnList = CopyableColumnNamesFromTupleDesc(tupleDesc);
+
+	appendStringInfo(command, "COPY %s.%s (%s) FROM STDIN",
 					 quote_identifier(destinationShardSchemaName), quote_identifier(
-						 destinationShardRelationName));
+						 destinationShardRelationName), columnList);
 
 	if (useBinaryFormat)
 	{
diff --git a/src/backend/distributed/operations/worker_split_copy_udf.c b/src/backend/distributed/operations/worker_split_copy_udf.c
index b96475992..c154ac040 100644
--- a/src/backend/distributed/operations/worker_split_copy_udf.c
+++ b/src/backend/distributed/operations/worker_split_copy_udf.c
@@ -110,8 +110,13 @@ worker_split_copy(PG_FUNCTION_ARGS)
 													   splitCopyInfoList))));
 
 	StringInfo selectShardQueryForCopy = makeStringInfo();
+	const char *columnList = CopyableColumnNamesFromRelationName(
+		sourceShardToCopySchemaName,
+		sourceShardToCopyName);
+
 	appendStringInfo(selectShardQueryForCopy,
-					 "SELECT * FROM %s;", sourceShardToCopyQualifiedName);
+					 "SELECT %s FROM %s;", columnList,
+					 sourceShardToCopyQualifiedName);
 
 	ParamListInfo params = NULL;
 	ExecuteQueryStringIntoDestReceiver(selectShardQueryForCopy->data, params,
diff --git a/src/include/distributed/worker_shard_copy.h b/src/include/distributed/worker_shard_copy.h
index 2ab2775f9..77f57c761 100644
--- a/src/include/distributed/worker_shard_copy.h
+++ b/src/include/distributed/worker_shard_copy.h
@@ -19,4 +19,9 @@ extern DestReceiver * CreateShardCopyDestReceiver(EState *executorState,
 												  List *destinationShardFullyQualifiedName,
 												  uint32_t destinationNodeId);
 
+extern const char * CopyableColumnNamesFromRelationName(const char *schemaName, const
+														char *relationName);
+
+extern const char * CopyableColumnNamesFromTupleDesc(TupleDesc tupdesc);
+
 #endif /* WORKER_SHARD_COPY_H_ */
diff --git a/src/test/regress/expected/citus_non_blocking_split_shards.out b/src/test/regress/expected/citus_non_blocking_split_shards.out
index d6dde8b7a..fe3cade55 100644
--- a/src/test/regress/expected/citus_non_blocking_split_shards.out
+++ b/src/test/regress/expected/citus_non_blocking_split_shards.out
@@ -60,7 +60,7 @@ SELECT create_reference_table('reference_table');
 
 (1 row)
 
-CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY);
+CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY, genid integer GENERATED ALWAYS AS ( measureid + 3 ) stored, value varchar(44), col_todrop integer);
 CLUSTER colocated_dist_table USING colocated_dist_table_pkey;
 SELECT create_distributed_table('colocated_dist_table', 'measureid', colocate_with:='sensors');
  create_distributed_table
@@ -84,8 +84,9 @@ ALTER TABLE sensors ADD CONSTRAINT fkey_table_to_dist FOREIGN KEY (measureid) RE
 -- END : Create Foreign key constraints.
 -- BEGIN : Load data into tables.
 INSERT INTO reference_table SELECT i FROM generate_series(0,1000)i;
-INSERT INTO colocated_dist_table SELECT i FROM generate_series(0,1000)i;
+INSERT INTO colocated_dist_table(measureid, value, col_todrop) SELECT i,'Value',i FROM generate_series(0,1000)i;
 INSERT INTO sensors SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' FROM generate_series(0,1000)i;
+ALTER TABLE colocated_dist_table DROP COLUMN col_todrop;
 SELECT COUNT(*) FROM sensors;
  count
 ---------------------------------------------------------------------
diff --git a/src/test/regress/expected/citus_split_shard_by_split_points.out b/src/test/regress/expected/citus_split_shard_by_split_points.out
index 87f50da31..13f3b7a36 100644
--- a/src/test/regress/expected/citus_split_shard_by_split_points.out
+++ b/src/test/regress/expected/citus_split_shard_by_split_points.out
@@ -56,7 +56,7 @@ SELECT create_reference_table('reference_table');
 
 (1 row)
 
-CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY);
+CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY, genid integer GENERATED ALWAYS AS ( measureid + 3 ) stored, value varchar(44), col_todrop integer);
 CLUSTER colocated_dist_table USING colocated_dist_table_pkey;
 SELECT create_distributed_table('colocated_dist_table', 'measureid', colocate_with:='sensors');
  create_distributed_table
@@ -80,8 +80,9 @@ ALTER TABLE sensors ADD CONSTRAINT fkey_table_to_dist FOREIGN KEY (measureid) RE
 -- END : Create Foreign key constraints.
 -- BEGIN : Load data into tables.
 INSERT INTO reference_table SELECT i FROM generate_series(0,1000)i;
-INSERT INTO colocated_dist_table SELECT i FROM generate_series(0,1000)i;
+INSERT INTO colocated_dist_table(measureid, value, col_todrop) SELECT i,'Value',i FROM generate_series(0,1000)i;
 INSERT INTO sensors SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' FROM generate_series(0,1000)i;
+ALTER TABLE colocated_dist_table DROP COLUMN col_todrop;
 SELECT COUNT(*) FROM sensors;
  count
 ---------------------------------------------------------------------
diff --git a/src/test/regress/expected/multi_move_mx.out b/src/test/regress/expected/multi_move_mx.out
index 833c9f7df..b6cc5d0d7 100644
--- a/src/test/regress/expected/multi_move_mx.out
+++ b/src/test/regress/expected/multi_move_mx.out
@@ -238,8 +238,40 @@ ORDER BY
 LIMIT 1 OFFSET 1;
 ERROR:  operation is not allowed on this node
 HINT:  Connect to the coordinator and run it again.
+-- Check that shards of a table with GENERATED columns can be moved.
+\c - - - :master_port
+SET citus.shard_count TO 4;
+SET citus.shard_replication_factor TO 1;
+CREATE TABLE mx_table_with_generated_column (a int, b int GENERATED ALWAYS AS ( a + 3 ) STORED, c int);
+SELECT create_distributed_table('mx_table_with_generated_column', 'a');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+-- Check that dropped columns are handled properly in a move.
+ALTER TABLE mx_table_with_generated_column DROP COLUMN c;
+-- Move a shard from worker 1 to worker 2
+SELECT
+        citus_move_shard_placement(shardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical')
+FROM
+        pg_dist_shard NATURAL JOIN pg_dist_shard_placement
+WHERE
+        logicalrelid = 'mx_table_with_generated_column'::regclass
+	AND nodeport = :worker_1_port
+ORDER BY
+        shardid
+LIMIT 1;
+ citus_move_shard_placement
+---------------------------------------------------------------------
+
+(1 row)
+
 -- Cleanup
 \c - - - :master_port
+SET client_min_messages TO WARNING;
+CALL citus_cleanup_orphaned_resources();
+DROP TABLE mx_table_with_generated_column;
 DROP TABLE mx_table_1;
 DROP TABLE mx_table_2;
 DROP TABLE mx_table_3;
diff --git a/src/test/regress/expected/worker_split_copy_test.out b/src/test/regress/expected/worker_split_copy_test.out
index 67d515198..f4fae57e0 100644
--- a/src/test/regress/expected/worker_split_copy_test.out
+++ b/src/test/regress/expected/worker_split_copy_test.out
@@ -142,8 +142,90 @@ SELECT COUNT(*) FROM worker_split_copy_test."test !/ \n _""dist_123_table_810700
 (1 row)
 
 -- END: List updated row count for local targets shard.
+-- Check that GENERATED columns are  handled properly in a shard split operation.
+\c - - - :master_port
+SET search_path TO worker_split_copy_test;
+SET citus.shard_count TO 2;
+SET citus.shard_replication_factor TO 1;
+SET citus.next_shard_id TO 81080000;
+-- BEGIN: Create distributed table and insert data.
+CREATE TABLE worker_split_copy_test.dist_table_with_generated_col(id int primary key, new_id int GENERATED ALWAYS AS ( id + 3 ) stored, value char, col_todrop int);
+SELECT create_distributed_table('dist_table_with_generated_col', 'id');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+-- Check that dropped columns are filtered out in COPY command.
+ALTER TABLE  dist_table_with_generated_col DROP COLUMN col_todrop;
+INSERT INTO dist_table_with_generated_col (id, value) (SELECT g.id, 'N' FROM generate_series(1, 1000) AS g(id));
+-- END: Create distributed table and insert data.
+-- BEGIN: Create target shards in Worker1 and Worker2 for a 2-way split copy.
+\c - - - :worker_1_port
+CREATE TABLE worker_split_copy_test.dist_table_with_generated_col_81080015(id int primary key, new_id int GENERATED ALWAYS AS ( id + 3 ) stored, value char);
+\c - - - :worker_2_port
+CREATE TABLE worker_split_copy_test.dist_table_with_generated_col_81080016(id int primary key, new_id int GENERATED ALWAYS AS ( id + 3 ) stored, value char);
+-- BEGIN: List row count for source shard and targets shard in Worker1.
+\c - - - :worker_1_port
+SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080000;
+ count
+---------------------------------------------------------------------
+   510
+(1 row)
+
+SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080015;
+ count
+---------------------------------------------------------------------
+     0
+(1 row)
+
+-- BEGIN: List row count for target shard in Worker2.
+\c - - - :worker_2_port
+SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080016;
+ count
+---------------------------------------------------------------------
+     0
+(1 row)
+
+\c - - - :worker_1_port
+SELECT * from worker_split_copy(
+	    81080000, -- source shard id to copy
+	    'id',
+	    ARRAY[
+	         -- split copy info for split children 1
+        ROW(81080015, -- destination shard id
+	    -2147483648, -- split range begin
+	    -1073741824, --split range end
+	    :worker_1_node)::pg_catalog.split_copy_info,
+	        -- split copy info for split children 2
+        ROW(81080016,  --destination shard id
+	    -1073741823, --split range begin
+	    -1, --split range end
+	    :worker_2_node)::pg_catalog.split_copy_info
+        ]
+ );
+ worker_split_copy
+---------------------------------------------------------------------
+
+(1 row)
+
+\c - - - :worker_1_port
+SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080015;
+ count
+---------------------------------------------------------------------
+   247
+(1 row)
+
+\c - - - :worker_2_port
+SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080016;
+ count
+---------------------------------------------------------------------
+   263
+(1 row)
+
 -- BEGIN: CLEANUP.
 \c - - - :master_port
 SET client_min_messages TO WARNING;
+CALL citus_cleanup_orphaned_resources();
 DROP SCHEMA worker_split_copy_test CASCADE;
 -- END: CLEANUP.
diff --git a/src/test/regress/sql/citus_non_blocking_split_shards.sql b/src/test/regress/sql/citus_non_blocking_split_shards.sql
index 11275a342..909beac02 100644
--- a/src/test/regress/sql/citus_non_blocking_split_shards.sql
+++ b/src/test/regress/sql/citus_non_blocking_split_shards.sql
@@ -53,7 +53,7 @@ SELECT create_distributed_table('sensors', 'measureid', colocate_with:='none');
 CREATE TABLE reference_table (measureid integer PRIMARY KEY);
 SELECT create_reference_table('reference_table');
 
-CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY);
+CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY, genid integer GENERATED ALWAYS AS ( measureid + 3 ) stored, value varchar(44), col_todrop integer);
 CLUSTER colocated_dist_table USING colocated_dist_table_pkey;
 SELECT create_distributed_table('colocated_dist_table', 'measureid', colocate_with:='sensors');
 
@@ -70,9 +70,11 @@ ALTER TABLE sensors ADD CONSTRAINT fkey_table_to_dist FOREIGN KEY (measureid) RE
 
 -- BEGIN : Load data into tables.
 INSERT INTO reference_table SELECT i FROM generate_series(0,1000)i;
-INSERT INTO colocated_dist_table SELECT i FROM generate_series(0,1000)i;
+INSERT INTO colocated_dist_table(measureid, value, col_todrop) SELECT i,'Value',i FROM generate_series(0,1000)i;
 INSERT INTO sensors SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' FROM generate_series(0,1000)i;
 
+ALTER TABLE colocated_dist_table DROP COLUMN col_todrop;
+
 SELECT COUNT(*) FROM sensors;
 SELECT COUNT(*) FROM reference_table;
 SELECT COUNT(*) FROM colocated_dist_table;
diff --git a/src/test/regress/sql/citus_split_shard_by_split_points.sql b/src/test/regress/sql/citus_split_shard_by_split_points.sql
index f5e7f005a..47b28b9d7 100644
--- a/src/test/regress/sql/citus_split_shard_by_split_points.sql
+++ b/src/test/regress/sql/citus_split_shard_by_split_points.sql
@@ -49,7 +49,7 @@ SELECT create_distributed_table('sensors', 'measureid', colocate_with:='none');
 CREATE TABLE reference_table (measureid integer PRIMARY KEY);
 SELECT create_reference_table('reference_table');
 
-CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY);
+CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY, genid integer GENERATED ALWAYS AS ( measureid + 3 ) stored, value varchar(44), col_todrop integer);
 CLUSTER colocated_dist_table USING colocated_dist_table_pkey;
 SELECT create_distributed_table('colocated_dist_table', 'measureid', colocate_with:='sensors');
 
@@ -66,9 +66,11 @@ ALTER TABLE sensors ADD CONSTRAINT fkey_table_to_dist FOREIGN KEY (measureid) RE
 
 -- BEGIN : Load data into tables.
 INSERT INTO reference_table SELECT i FROM generate_series(0,1000)i;
-INSERT INTO colocated_dist_table SELECT i FROM generate_series(0,1000)i;
+INSERT INTO colocated_dist_table(measureid, value, col_todrop) SELECT i,'Value',i FROM generate_series(0,1000)i;
 INSERT INTO sensors SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' FROM generate_series(0,1000)i;
 
+ALTER TABLE colocated_dist_table DROP COLUMN col_todrop;
+
 SELECT COUNT(*) FROM sensors;
 SELECT COUNT(*) FROM reference_table;
 SELECT COUNT(*) FROM colocated_dist_table;
diff --git a/src/test/regress/sql/multi_move_mx.sql b/src/test/regress/sql/multi_move_mx.sql
index 166069a6e..9cfa8a3db 100644
--- a/src/test/regress/sql/multi_move_mx.sql
+++ b/src/test/regress/sql/multi_move_mx.sql
@@ -151,8 +151,34 @@ ORDER BY
 	shardid
 LIMIT 1 OFFSET 1;
 
+-- Check that shards of a table with GENERATED columns can be moved.
+\c - - - :master_port
+SET citus.shard_count TO 4;
+SET citus.shard_replication_factor TO 1;
+
+CREATE TABLE mx_table_with_generated_column (a int, b int GENERATED ALWAYS AS ( a + 3 ) STORED, c int);
+SELECT create_distributed_table('mx_table_with_generated_column', 'a');
+
+-- Check that dropped columns are handled properly in a move.
+ALTER TABLE mx_table_with_generated_column DROP COLUMN c;
+
+-- Move a shard from worker 1 to worker 2
+SELECT
+        citus_move_shard_placement(shardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical')
+FROM
+        pg_dist_shard NATURAL JOIN pg_dist_shard_placement
+WHERE
+        logicalrelid = 'mx_table_with_generated_column'::regclass
+	AND nodeport = :worker_1_port
+ORDER BY
+        shardid
+LIMIT 1;
+
 -- Cleanup
 \c - - - :master_port
+SET client_min_messages TO WARNING;
+CALL citus_cleanup_orphaned_resources();
+DROP TABLE mx_table_with_generated_column;
 DROP TABLE mx_table_1;
 DROP TABLE mx_table_2;
 DROP TABLE mx_table_3;
diff --git a/src/test/regress/sql/worker_split_copy_test.sql b/src/test/regress/sql/worker_split_copy_test.sql
index 2fac91c69..e2f4f9a23 100644
--- a/src/test/regress/sql/worker_split_copy_test.sql
+++ b/src/test/regress/sql/worker_split_copy_test.sql
@@ -110,8 +110,66 @@ SELECT COUNT(*) FROM worker_split_copy_test."test !/ \n _""dist_123_table_810700
 SELECT COUNT(*) FROM worker_split_copy_test."test !/ \n _""dist_123_table_81070016";
 -- END: List updated row count for local targets shard.
 
+-- Check that GENERATED columns are  handled properly in a shard split operation.
+\c - - - :master_port
+SET search_path TO worker_split_copy_test;
+SET citus.shard_count TO 2;
+SET citus.shard_replication_factor TO 1;
+SET citus.next_shard_id TO 81080000;
+
+-- BEGIN: Create distributed table and insert data.
+CREATE TABLE worker_split_copy_test.dist_table_with_generated_col(id int primary key, new_id int GENERATED ALWAYS AS ( id + 3 ) stored, value char, col_todrop int);
+SELECT create_distributed_table('dist_table_with_generated_col', 'id');
+
+-- Check that dropped columns are filtered out in COPY command.
+ALTER TABLE  dist_table_with_generated_col DROP COLUMN col_todrop;
+
+INSERT INTO dist_table_with_generated_col (id, value) (SELECT g.id, 'N' FROM generate_series(1, 1000) AS g(id));
+
+-- END: Create distributed table and insert data.
+
+-- BEGIN: Create target shards in Worker1 and Worker2 for a 2-way split copy.
+\c - - - :worker_1_port
+CREATE TABLE worker_split_copy_test.dist_table_with_generated_col_81080015(id int primary key, new_id int GENERATED ALWAYS AS ( id + 3 ) stored, value char);
+\c - - - :worker_2_port
+CREATE TABLE worker_split_copy_test.dist_table_with_generated_col_81080016(id int primary key, new_id int GENERATED ALWAYS AS ( id + 3 ) stored, value char);
+
+-- BEGIN: List row count for source shard and targets shard in Worker1.
+\c - - - :worker_1_port
+SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080000;
+SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080015;
+
+-- BEGIN: List row count for target shard in Worker2.
+\c - - - :worker_2_port
+SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080016;
+
+\c - - - :worker_1_port
+SELECT * from worker_split_copy(
+	    81080000, -- source shard id to copy
+	    'id',
+	    ARRAY[
+	         -- split copy info for split children 1
+        ROW(81080015, -- destination shard id
+	    -2147483648, -- split range begin
+	    -1073741824, --split range end
+	    :worker_1_node)::pg_catalog.split_copy_info,
+	        -- split copy info for split children 2
+        ROW(81080016,  --destination shard id
+	    -1073741823, --split range begin
+	    -1, --split range end
+	    :worker_2_node)::pg_catalog.split_copy_info
+        ]
+ );
+
+\c - - - :worker_1_port
+SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080015;
+
+\c - - - :worker_2_port
+SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080016;
+
 -- BEGIN: CLEANUP.
 \c - - - :master_port
 SET client_min_messages TO WARNING;
+CALL citus_cleanup_orphaned_resources();
 DROP SCHEMA worker_split_copy_test CASCADE;
 -- END: CLEANUP.

From d82c11f7931636551f6d97b98cdfcf6d4b20a797 Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Wed, 8 Mar 2023 13:38:51 +0300
Subject: [PATCH 02/58] Refactor CreateDistributedTable() (#6742)

Split the main logic that allows creating a Citus table into the
internal function CreateCitusTable().

Old CreateDistributedTable() function was assuming that it's creating
a reference table when the distribution method is DISTRIBUTE_BY_NONE.
However, soon this won't be the case when adding support for creating
single-shard distributed tables because their distribution method would
also be the same.

Now the internal method CreateCitusTable() doesn't make any assumptions
about table's replication model or such. Instead, it expects callers to
properly set all such metadata bits.

Even more, some of the parameters the old CreateDistributedTable() takes
 --such as the shard count-- were not meaningful for a reference table,
and would be the same as for new table type.
---
 .../distributed/commands/alter_table.c        |  3 +-
 .../commands/create_distributed_table.c       | 95 +++++++++++++------
 src/include/distributed/metadata_utility.h    |  1 +
 3 files changed, 70 insertions(+), 29 deletions(-)

diff --git a/src/backend/distributed/commands/alter_table.c b/src/backend/distributed/commands/alter_table.c
index 0592cb762..f51b62535 100644
--- a/src/backend/distributed/commands/alter_table.c
+++ b/src/backend/distributed/commands/alter_table.c
@@ -1348,8 +1348,7 @@ CreateCitusTableLike(TableConversionState *con)
 	}
 	else if (IsCitusTableType(con->relationId, REFERENCE_TABLE))
 	{
-		CreateDistributedTable(con->newRelationId, NULL, DISTRIBUTE_BY_NONE, 0, false,
-							   NULL);
+		CreateReferenceTable(con->newRelationId);
 	}
 	else if (IsCitusTableType(con->relationId, CITUS_LOCAL_TABLE))
 	{
diff --git a/src/backend/distributed/commands/create_distributed_table.c b/src/backend/distributed/commands/create_distributed_table.c
index 86133322d..101d866f0 100644
--- a/src/backend/distributed/commands/create_distributed_table.c
+++ b/src/backend/distributed/commands/create_distributed_table.c
@@ -106,11 +106,17 @@ static void CreateDistributedTableConcurrently(Oid relationId,
 											   char *colocateWithTableName,
 											   int shardCount,
 											   bool shardCountIsStrict);
-static char DecideReplicationModel(char distributionMethod, char *colocateWithTableName);
+static char DecideDistTableReplicationModel(char distributionMethod,
+											char *colocateWithTableName);
 static List * HashSplitPointsForShardList(List *shardList);
 static List * HashSplitPointsForShardCount(int shardCount);
 static List * WorkerNodesForShardList(List *shardList);
 static List * RoundRobinWorkerNodeList(List *workerNodeList, int listLength);
+static void CreateCitusTable(Oid relationId, char *distributionColumnName,
+							 char distributionMethod,
+							 int shardCount, bool shardCountIsStrict,
+							 char *colocateWithTableName,
+							 char replicationModel);
 static void CreateHashDistributedTableShards(Oid relationId, int shardCount,
 											 Oid colocatedTableId, bool localTableEmpty);
 static uint32 ColocationIdForNewTable(Oid relationId, Var *distributionColumn,
@@ -377,8 +383,8 @@ CreateDistributedTableConcurrently(Oid relationId, char *distributionColumnName,
 
 	EnsureForeignKeysForDistributedTableConcurrently(relationId);
 
-	char replicationModel = DecideReplicationModel(distributionMethod,
-												   colocateWithTableName);
+	char replicationModel = DecideDistTableReplicationModel(distributionMethod,
+															colocateWithTableName);
 
 	/*
 	 * we fail transaction before local table conversion if the table could not be colocated with
@@ -622,8 +628,8 @@ static void
 EnsureColocateWithTableIsValid(Oid relationId, char distributionMethod,
 							   char *distributionColumnName, char *colocateWithTableName)
 {
-	char replicationModel = DecideReplicationModel(distributionMethod,
-												   colocateWithTableName);
+	char replicationModel = DecideDistTableReplicationModel(distributionMethod,
+															colocateWithTableName);
 
 	/*
 	 * we fail transaction before local table conversion if the table could not be colocated with
@@ -860,9 +866,6 @@ create_reference_table(PG_FUNCTION_ARGS)
 	CheckCitusVersion(ERROR);
 	Oid relationId = PG_GETARG_OID(0);
 
-	char *colocateWithTableName = NULL;
-	char *distributionColumnName = NULL;
-
 	EnsureCitusTableCanBeCreated(relationId);
 
 	/* enable create_reference_table on an empty node */
@@ -895,8 +898,7 @@ create_reference_table(PG_FUNCTION_ARGS)
 						errdetail("There are no active worker nodes.")));
 	}
 
-	CreateDistributedTable(relationId, distributionColumnName, DISTRIBUTE_BY_NONE,
-						   ShardCount, false, colocateWithTableName);
+	CreateReferenceTable(relationId);
 	PG_RETURN_VOID();
 }
 
@@ -951,17 +953,61 @@ EnsureRelationExists(Oid relationId)
 
 
 /*
- * CreateDistributedTable creates distributed table in the given configuration.
+ * CreateReferenceTable is a wrapper around CreateCitusTable that creates a
+ * distributed table.
+ */
+void
+CreateDistributedTable(Oid relationId, char *distributionColumnName,
+					   char distributionMethod,
+					   int shardCount, bool shardCountIsStrict,
+					   char *colocateWithTableName)
+{
+	Assert(distributionMethod != DISTRIBUTE_BY_NONE);
+
+	char replicationModel = DecideDistTableReplicationModel(distributionMethod,
+															colocateWithTableName);
+	CreateCitusTable(relationId, distributionColumnName,
+					 distributionMethod, shardCount,
+					 shardCountIsStrict, colocateWithTableName,
+					 replicationModel);
+}
+
+
+/*
+ * CreateReferenceTable is a wrapper around CreateCitusTable that creates a
+ * reference table.
+ */
+void
+CreateReferenceTable(Oid relationId)
+{
+	char *distributionColumnName = NULL;
+	char distributionMethod = DISTRIBUTE_BY_NONE;
+	int shardCount = 1;
+	bool shardCountIsStrict = true;
+	char *colocateWithTableName = NULL;
+	char replicationModel = REPLICATION_MODEL_2PC;
+	CreateCitusTable(relationId, distributionColumnName,
+					 distributionMethod, shardCount,
+					 shardCountIsStrict, colocateWithTableName,
+					 replicationModel);
+}
+
+
+/*
+ * CreateCitusTable is the internal method that creates a Citus table in
+ * given configuration.
+ *
  * This functions contains all necessary logic to create distributed tables. It
  * performs necessary checks to ensure distributing the table is safe. If it is
  * safe to distribute the table, this function creates distributed table metadata,
  * creates shards and copies local data to shards. This function also handles
  * partitioned tables by distributing its partitions as well.
  */
-void
-CreateDistributedTable(Oid relationId, char *distributionColumnName,
-					   char distributionMethod, int shardCount,
-					   bool shardCountIsStrict, char *colocateWithTableName)
+static void
+CreateCitusTable(Oid relationId, char *distributionColumnName,
+				 char distributionMethod, int shardCount,
+				 bool shardCountIsStrict, char *colocateWithTableName,
+				 char replicationModel)
 {
 	/*
 	 * EnsureTableNotDistributed errors out when relation is a citus table but
@@ -1022,9 +1068,6 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName,
 
 	PropagatePrerequisiteObjectsForDistributedTable(relationId);
 
-	char replicationModel = DecideReplicationModel(distributionMethod,
-												   colocateWithTableName);
-
 	Var *distributionColumn = BuildDistributionKeyFromColumnName(relationId,
 																 distributionColumnName,
 																 NoLock);
@@ -1420,18 +1463,16 @@ DropFKeysRelationInvolvedWithTableType(Oid relationId, int tableTypeFlag)
 
 
 /*
- * DecideReplicationModel function decides which replication model should be
- * used depending on given distribution configuration.
+ * DecideDistTableReplicationModel function decides which replication model should be
+ * used for a distributed table depending on given distribution configuration.
  */
 static char
-DecideReplicationModel(char distributionMethod, char *colocateWithTableName)
+DecideDistTableReplicationModel(char distributionMethod, char *colocateWithTableName)
 {
-	if (distributionMethod == DISTRIBUTE_BY_NONE)
-	{
-		return REPLICATION_MODEL_2PC;
-	}
-	else if (pg_strncasecmp(colocateWithTableName, "default", NAMEDATALEN) != 0 &&
-			 !IsColocateWithNone(colocateWithTableName))
+	Assert(distributionMethod != DISTRIBUTE_BY_NONE);
+
+	if (!IsColocateWithDefault(colocateWithTableName) &&
+		!IsColocateWithNone(colocateWithTableName))
 	{
 		text *colocateWithTableNameText = cstring_to_text(colocateWithTableName);
 		Oid colocatedRelationId = ResolveRelationId(colocateWithTableNameText, false);
diff --git a/src/include/distributed/metadata_utility.h b/src/include/distributed/metadata_utility.h
index ceea51678..acb4ae5da 100644
--- a/src/include/distributed/metadata_utility.h
+++ b/src/include/distributed/metadata_utility.h
@@ -325,6 +325,7 @@ extern void DeleteShardPlacementRow(uint64 placementId);
 extern void CreateDistributedTable(Oid relationId, char *distributionColumnName,
 								   char distributionMethod, int shardCount,
 								   bool shardCountIsStrict, char *colocateWithTableName);
+extern void CreateReferenceTable(Oid relationId);
 extern void CreateTruncateTrigger(Oid relationId);
 extern TableConversionReturn * UndistributeTable(TableConversionParameters *params);
 

From e3cf7ace7c1b43c380348e3a5ebd5f65d0f27a76 Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Wed, 8 Mar 2023 15:25:36 +0300
Subject: [PATCH 03/58] Stabilize single_node.sql and others that report
 illegal node removal (#6751)

See
https://app.circleci.com/pipelines/github/citusdata/citus/30859/workflows/223d61db-8c1d-4909-9aea-d8e470f0368b/jobs/1009243.
---
 .../distributed/metadata/node_metadata.c      |  4 +++
 .../planner/multi_physical_planner.c          | 32 +++++++++++++++++--
 .../distributed/multi_physical_planner.h      |  1 +
 3 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/src/backend/distributed/metadata/node_metadata.c b/src/backend/distributed/metadata/node_metadata.c
index f6639f8d2..72103b9e1 100644
--- a/src/backend/distributed/metadata/node_metadata.c
+++ b/src/backend/distributed/metadata/node_metadata.c
@@ -1918,6 +1918,10 @@ ErrorIfNodeContainsNonRemovablePlacements(WorkerNode *workerNode)
 {
 	int32 groupId = workerNode->groupId;
 	List *shardPlacements = AllShardPlacementsOnNodeGroup(groupId);
+
+	/* sort the list to prevent regression tests getting flaky */
+	shardPlacements = SortList(shardPlacements, CompareGroupShardPlacements);
+
 	GroupShardPlacement *placement = NULL;
 	foreach_ptr(placement, shardPlacements)
 	{
diff --git a/src/backend/distributed/planner/multi_physical_planner.c b/src/backend/distributed/planner/multi_physical_planner.c
index 901e9de17..03206ea9b 100644
--- a/src/backend/distributed/planner/multi_physical_planner.c
+++ b/src/backend/distributed/planner/multi_physical_planner.c
@@ -5343,8 +5343,7 @@ ActiveShardPlacementLists(List *taskList)
 
 
 /*
- * CompareShardPlacements compares two shard placements by their tuple oid; this
- * oid reflects the tuple's insertion order into pg_dist_placement.
+ * CompareShardPlacements compares two shard placements by placement id.
  */
 int
 CompareShardPlacements(const void *leftElement, const void *rightElement)
@@ -5370,6 +5369,35 @@ CompareShardPlacements(const void *leftElement, const void *rightElement)
 }
 
 
+/*
+ * CompareGroupShardPlacements compares two group shard placements by placement id.
+ */
+int
+CompareGroupShardPlacements(const void *leftElement, const void *rightElement)
+{
+	const GroupShardPlacement *leftPlacement =
+		*((const GroupShardPlacement **) leftElement);
+	const GroupShardPlacement *rightPlacement =
+		*((const GroupShardPlacement **) rightElement);
+
+	uint64 leftPlacementId = leftPlacement->placementId;
+	uint64 rightPlacementId = rightPlacement->placementId;
+
+	if (leftPlacementId < rightPlacementId)
+	{
+		return -1;
+	}
+	else if (leftPlacementId > rightPlacementId)
+	{
+		return 1;
+	}
+	else
+	{
+		return 0;
+	}
+}
+
+
 /*
  * LeftRotateList returns a copy of the given list that has been cyclically
  * shifted to the left by the given rotation count. For this, the function
diff --git a/src/include/distributed/multi_physical_planner.h b/src/include/distributed/multi_physical_planner.h
index 920541e97..d6ad4c248 100644
--- a/src/include/distributed/multi_physical_planner.h
+++ b/src/include/distributed/multi_physical_planner.h
@@ -553,6 +553,7 @@ extern bool BinaryOpExpression(Expr *clause, Node **leftOperand, Node **rightOpe
 /* helper functions */
 extern Var * MakeInt4Column(void);
 extern int CompareShardPlacements(const void *leftElement, const void *rightElement);
+extern int CompareGroupShardPlacements(const void *leftElement, const void *rightElement);
 extern bool ShardIntervalsOverlap(ShardInterval *firstInterval,
 								  ShardInterval *secondInterval);
 extern bool ShardIntervalsOverlapWithParams(Datum firstMin, Datum firstMax,

From 20a5f3af2b948faca840a5b5fd40f1fca66520ac Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Fri, 10 Mar 2023 13:55:52 +0300
Subject: [PATCH 04/58] Replace CITUS_TABLE_WITH_NO_DIST_KEY checks with
 HasDistributionKey() (#6743)

Now that we will soon add another table type having DISTRIBUTE_BY_NONE
as distribution method and that we want the code to interpret such
tables mostly as distributed tables, let's make the definition of those
other two table types more strict by removing
CITUS_TABLE_WITH_NO_DIST_KEY
macro.

And instead, use HasDistributionKey() check in the places where the
logic applies to all table types that have / don't have a distribution
key. In future PRs, we might want to convert some of those
HasDistributionKey() checks if logic only applies to Citus local /
reference tables, not the others.

And adding HasDistributionKey() also allows us to consider having
DISTRIBUTE_BY_NONE as the distribution method as a "table attribute"
that can apply to distributed tables too, rather something that
determines the table type.
---
 .../distributed/commands/foreign_constraint.c | 21 +++--
 src/backend/distributed/commands/index.c      |  2 +-
 src/backend/distributed/commands/multi_copy.c |  2 +-
 src/backend/distributed/commands/table.c      | 17 ++--
 src/backend/distributed/commands/truncate.c   |  2 +-
 .../distributed/metadata/metadata_cache.c     | 94 ++++++++++++-------
 .../distributed/metadata/metadata_sync.c      |  4 +-
 .../distributed/metadata/node_metadata.c      |  2 +-
 .../distributed/planner/multi_join_order.c    |  2 +-
 .../planner/multi_logical_planner.c           |  2 +-
 .../planner/multi_physical_planner.c          |  8 +-
 .../planner/multi_router_planner.c            |  9 +-
 .../planner/query_colocation_checker.c        |  2 +-
 .../relation_restriction_equivalence.c        |  6 +-
 .../distributed/planner/shard_pruning.c       |  2 +-
 .../transaction/relation_access_tracking.c    | 10 +-
 .../distributed/utils/colocation_utils.c      |  3 +-
 .../distributed/utils/shardinterval_utils.c   |  7 +-
 src/include/distributed/metadata_cache.h      |  9 +-
 19 files changed, 122 insertions(+), 82 deletions(-)

diff --git a/src/backend/distributed/commands/foreign_constraint.c b/src/backend/distributed/commands/foreign_constraint.c
index cf1e43fd4..6f12db13f 100644
--- a/src/backend/distributed/commands/foreign_constraint.c
+++ b/src/backend/distributed/commands/foreign_constraint.c
@@ -221,7 +221,8 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis
 		if (!referencedIsCitus && !selfReferencingTable)
 		{
 			if (IsCitusLocalTableByDistParams(referencingDistMethod,
-											  referencingReplicationModel))
+											  referencingReplicationModel,
+											  referencingColocationId))
 			{
 				ErrorOutForFKeyBetweenPostgresAndCitusLocalTable(referencedTableId);
 			}
@@ -245,8 +246,7 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis
 		if (!selfReferencingTable)
 		{
 			referencedDistMethod = PartitionMethod(referencedTableId);
-			referencedDistKey = IsCitusTableType(referencedTableId,
-												 CITUS_TABLE_WITH_NO_DIST_KEY) ?
+			referencedDistKey = !HasDistributionKey(referencedTableId) ?
 								NULL :
 								DistPartitionKey(referencedTableId);
 			referencedColocationId = TableColocationId(referencedTableId);
@@ -278,9 +278,17 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis
 		}
 
 		bool referencingIsCitusLocalOrRefTable =
-			(referencingDistMethod == DISTRIBUTE_BY_NONE);
+			IsCitusLocalTableByDistParams(referencingDistMethod,
+										  referencingReplicationModel,
+										  referencingColocationId) ||
+			IsReferenceTableByDistParams(referencingDistMethod,
+										 referencingReplicationModel);
 		bool referencedIsCitusLocalOrRefTable =
-			(referencedDistMethod == DISTRIBUTE_BY_NONE);
+			IsCitusLocalTableByDistParams(referencedDistMethod,
+										  referencedReplicationModel,
+										  referencedColocationId) ||
+			IsReferenceTableByDistParams(referencedDistMethod,
+										 referencedReplicationModel);
 		if (referencingIsCitusLocalOrRefTable && referencedIsCitusLocalOrRefTable)
 		{
 			EnsureSupportedFKeyBetweenCitusLocalAndRefTable(constraintForm,
@@ -313,7 +321,8 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis
 		 * reference table is referenced.
 		 */
 		bool referencedIsReferenceTable =
-			(referencedReplicationModel == REPLICATION_MODEL_2PC);
+			IsReferenceTableByDistParams(referencedDistMethod,
+										 referencedReplicationModel);
 		if (!referencedIsReferenceTable && (
 				referencingColocationId == INVALID_COLOCATION_ID ||
 				referencingColocationId != referencedColocationId))
diff --git a/src/backend/distributed/commands/index.c b/src/backend/distributed/commands/index.c
index 5f1598510..aa0715372 100644
--- a/src/backend/distributed/commands/index.c
+++ b/src/backend/distributed/commands/index.c
@@ -1190,7 +1190,7 @@ ErrorIfUnsupportedIndexStmt(IndexStmt *createIndexStatement)
 		 * Non-distributed tables do not have partition key, and unique constraints
 		 * are allowed for them. Thus, we added a short-circuit for non-distributed tables.
 		 */
-		if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
+		if (!HasDistributionKey(relationId))
 		{
 			return;
 		}
diff --git a/src/backend/distributed/commands/multi_copy.c b/src/backend/distributed/commands/multi_copy.c
index b5ac6a519..1203aeff4 100644
--- a/src/backend/distributed/commands/multi_copy.c
+++ b/src/backend/distributed/commands/multi_copy.c
@@ -393,7 +393,7 @@ CitusCopyFrom(CopyStmt *copyStatement, QueryCompletion *completionTag)
 	if (IsCitusTableTypeCacheEntry(cacheEntry, HASH_DISTRIBUTED) ||
 		IsCitusTableTypeCacheEntry(cacheEntry, RANGE_DISTRIBUTED) ||
 		IsCitusTableTypeCacheEntry(cacheEntry, APPEND_DISTRIBUTED) ||
-		IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
+		!HasDistributionKeyCacheEntry(cacheEntry))
 	{
 		CopyToExistingShards(copyStatement, completionTag);
 	}
diff --git a/src/backend/distributed/commands/table.c b/src/backend/distributed/commands/table.c
index 39a652f10..c0d8fa92b 100644
--- a/src/backend/distributed/commands/table.c
+++ b/src/backend/distributed/commands/table.c
@@ -75,7 +75,7 @@ static void DistributePartitionUsingParent(Oid parentRelationId,
 static void ErrorIfMultiLevelPartitioning(Oid parentRelationId, Oid partitionRelationId);
 static void ErrorIfAttachCitusTableToPgLocalTable(Oid parentRelationId,
 												  Oid partitionRelationId);
-static bool AlterTableDefinesFKeyBetweenPostgresAndNonDistTable(
+static bool ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef(
 	AlterTableStmt *alterTableStatement);
 static bool ShouldMarkConnectedRelationsNotAutoConverted(Oid leftRelationId,
 														 Oid rightRelationId);
@@ -1119,7 +1119,7 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand,
 
 	if (ShouldEnableLocalReferenceForeignKeys() &&
 		processUtilityContext != PROCESS_UTILITY_SUBCOMMAND &&
-		AlterTableDefinesFKeyBetweenPostgresAndNonDistTable(alterTableStatement))
+		ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef(alterTableStatement))
 	{
 		/*
 		 * We don't process subcommands generated by postgres.
@@ -1584,12 +1584,12 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand,
 
 
 /*
- * AlterTableDefinesFKeyBetweenPostgresAndNonDistTable returns true if given
+ * ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef returns true if given
  * alter table command defines foreign key between a postgres table and a
  * reference or citus local table.
  */
 static bool
-AlterTableDefinesFKeyBetweenPostgresAndNonDistTable(AlterTableStmt *alterTableStatement)
+ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef(AlterTableStmt *alterTableStatement)
 {
 	List *foreignKeyConstraintList =
 		GetAlterTableAddFKeyConstraintList(alterTableStatement);
@@ -1607,9 +1607,12 @@ AlterTableDefinesFKeyBetweenPostgresAndNonDistTable(AlterTableStmt *alterTableSt
 	if (!IsCitusTable(leftRelationId))
 	{
 		return RelationIdListContainsCitusTableType(rightRelationIdList,
-													CITUS_TABLE_WITH_NO_DIST_KEY);
+													CITUS_LOCAL_TABLE) ||
+			   RelationIdListContainsCitusTableType(rightRelationIdList,
+													REFERENCE_TABLE);
 	}
-	else if (IsCitusTableType(leftRelationId, CITUS_TABLE_WITH_NO_DIST_KEY))
+	else if (IsCitusTableType(leftRelationId, CITUS_LOCAL_TABLE) ||
+			 IsCitusTableType(leftRelationId, REFERENCE_TABLE))
 	{
 		return RelationIdListContainsPostgresTable(rightRelationIdList);
 	}
@@ -3666,7 +3669,7 @@ SetupExecutionModeForAlterTable(Oid relationId, AlterTableCmd *command)
 	 * sequential mode.
 	 */
 	if (executeSequentially &&
-		!IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY) &&
+		HasDistributionKey(relationId) &&
 		ParallelQueryExecutedInTransaction())
 	{
 		char *relationName = get_rel_name(relationId);
diff --git a/src/backend/distributed/commands/truncate.c b/src/backend/distributed/commands/truncate.c
index 0993c287f..52f769a11 100644
--- a/src/backend/distributed/commands/truncate.c
+++ b/src/backend/distributed/commands/truncate.c
@@ -324,7 +324,7 @@ ExecuteTruncateStmtSequentialIfNecessary(TruncateStmt *command)
 	{
 		Oid relationId = RangeVarGetRelid(rangeVar, NoLock, failOK);
 
-		if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY) &&
+		if (IsCitusTable(relationId) && !HasDistributionKey(relationId) &&
 			TableReferenced(relationId))
 		{
 			char *relationName = get_rel_name(relationId);
diff --git a/src/backend/distributed/metadata/metadata_cache.c b/src/backend/distributed/metadata/metadata_cache.c
index 8fd4c5de6..b7753108c 100644
--- a/src/backend/distributed/metadata/metadata_cache.c
+++ b/src/backend/distributed/metadata/metadata_cache.c
@@ -311,7 +311,7 @@ static void InvalidateDistTableCache(void);
 static void InvalidateDistObjectCache(void);
 static bool InitializeTableCacheEntry(int64 shardId, bool missingOk);
 static bool IsCitusTableTypeInternal(char partitionMethod, char replicationModel,
-									 CitusTableType tableType);
+									 uint32 colocationId, CitusTableType tableType);
 static bool RefreshTableCacheEntryIfInvalid(ShardIdCacheEntry *shardEntry, bool
 											missingOk);
 
@@ -450,7 +450,36 @@ bool
 IsCitusTableTypeCacheEntry(CitusTableCacheEntry *tableEntry, CitusTableType tableType)
 {
 	return IsCitusTableTypeInternal(tableEntry->partitionMethod,
-									tableEntry->replicationModel, tableType);
+									tableEntry->replicationModel,
+									tableEntry->colocationId, tableType);
+}
+
+
+/*
+ * HasDistributionKey returs true if given Citus table doesn't have a
+ * distribution key.
+ */
+bool
+HasDistributionKey(Oid relationId)
+{
+	CitusTableCacheEntry *tableEntry = LookupCitusTableCacheEntry(relationId);
+	if (tableEntry == NULL)
+	{
+		ereport(ERROR, (errmsg("relation with oid %u is not a Citus table", relationId)));
+	}
+
+	return HasDistributionKeyCacheEntry(tableEntry);
+}
+
+
+/*
+ * HasDistributionKey returs true if given cache entry identifies a Citus
+ * table that doesn't have a distribution key.
+ */
+bool
+HasDistributionKeyCacheEntry(CitusTableCacheEntry *tableEntry)
+{
+	return tableEntry->partitionMethod != DISTRIBUTE_BY_NONE;
 }
 
 
@@ -460,7 +489,7 @@ IsCitusTableTypeCacheEntry(CitusTableCacheEntry *tableEntry, CitusTableType tabl
  */
 static bool
 IsCitusTableTypeInternal(char partitionMethod, char replicationModel,
-						 CitusTableType tableType)
+						 uint32 colocationId, CitusTableType tableType)
 {
 	switch (tableType)
 	{
@@ -501,12 +530,8 @@ IsCitusTableTypeInternal(char partitionMethod, char replicationModel,
 		case CITUS_LOCAL_TABLE:
 		{
 			return partitionMethod == DISTRIBUTE_BY_NONE &&
-				   replicationModel != REPLICATION_MODEL_2PC;
-		}
-
-		case CITUS_TABLE_WITH_NO_DIST_KEY:
-		{
-			return partitionMethod == DISTRIBUTE_BY_NONE;
+				   replicationModel != REPLICATION_MODEL_2PC &&
+				   colocationId == INVALID_COLOCATION_ID;
 		}
 
 		case ANY_CITUS_TABLE_TYPE:
@@ -529,33 +554,21 @@ IsCitusTableTypeInternal(char partitionMethod, char replicationModel,
 char *
 GetTableTypeName(Oid tableId)
 {
-	bool regularTable = false;
-	char partitionMethod = ' ';
-	char replicationModel = ' ';
-	if (IsCitusTable(tableId))
-	{
-		CitusTableCacheEntry *referencingCacheEntry = GetCitusTableCacheEntry(tableId);
-		partitionMethod = referencingCacheEntry->partitionMethod;
-		replicationModel = referencingCacheEntry->replicationModel;
-	}
-	else
-	{
-		regularTable = true;
-	}
-
-	if (regularTable)
+	if (!IsCitusTable(tableId))
 	{
 		return "regular table";
 	}
-	else if (partitionMethod == 'h')
+
+	CitusTableCacheEntry *tableCacheEntry = GetCitusTableCacheEntry(tableId);
+	if (IsCitusTableTypeCacheEntry(tableCacheEntry, HASH_DISTRIBUTED))
 	{
 		return "distributed table";
 	}
-	else if (partitionMethod == 'n' && replicationModel == 't')
+	else if (IsCitusTableTypeCacheEntry(tableCacheEntry, REFERENCE_TABLE))
 	{
 		return "reference table";
 	}
-	else if (partitionMethod == 'n' && replicationModel != 't')
+	else if (IsCitusTableTypeCacheEntry(tableCacheEntry, CITUS_LOCAL_TABLE))
 	{
 		return "citus local table";
 	}
@@ -765,14 +778,28 @@ PgDistPartitionTupleViaCatalog(Oid relationId)
 
 
 /*
- * IsCitusLocalTableByDistParams returns true if given partitionMethod and
- * replicationModel would identify a citus local table.
+ * IsReferenceTableByDistParams returns true if given partitionMethod and
+ * replicationModel would identify a reference table.
  */
 bool
-IsCitusLocalTableByDistParams(char partitionMethod, char replicationModel)
+IsReferenceTableByDistParams(char partitionMethod, char replicationModel)
 {
 	return partitionMethod == DISTRIBUTE_BY_NONE &&
-		   replicationModel != REPLICATION_MODEL_2PC;
+		   replicationModel == REPLICATION_MODEL_2PC;
+}
+
+
+/*
+ * IsCitusLocalTableByDistParams returns true if given partitionMethod,
+ * replicationModel and colocationId would identify a citus local table.
+ */
+bool
+IsCitusLocalTableByDistParams(char partitionMethod, char replicationModel,
+							  uint32 colocationId)
+{
+	return partitionMethod == DISTRIBUTE_BY_NONE &&
+		   replicationModel != REPLICATION_MODEL_2PC &&
+		   colocationId == INVALID_COLOCATION_ID;
 }
 
 
@@ -4837,11 +4864,14 @@ CitusTableTypeIdList(CitusTableType citusTableType)
 
 		Datum partMethodDatum = datumArray[Anum_pg_dist_partition_partmethod - 1];
 		Datum replicationModelDatum = datumArray[Anum_pg_dist_partition_repmodel - 1];
+		Datum colocationIdDatum = datumArray[Anum_pg_dist_partition_colocationid - 1];
 
 		Oid partitionMethod = DatumGetChar(partMethodDatum);
 		Oid replicationModel = DatumGetChar(replicationModelDatum);
+		uint32 colocationId = DatumGetUInt32(colocationIdDatum);
 
-		if (IsCitusTableTypeInternal(partitionMethod, replicationModel, citusTableType))
+		if (IsCitusTableTypeInternal(partitionMethod, replicationModel, colocationId,
+									 citusTableType))
 		{
 			Datum relationIdDatum = datumArray[Anum_pg_dist_partition_logicalrelid - 1];
 
diff --git a/src/backend/distributed/metadata/metadata_sync.c b/src/backend/distributed/metadata/metadata_sync.c
index 6a5840f78..df9104efd 100644
--- a/src/backend/distributed/metadata/metadata_sync.c
+++ b/src/backend/distributed/metadata/metadata_sync.c
@@ -535,7 +535,7 @@ ShouldSyncTableMetadata(Oid relationId)
 
 	bool hashDistributed = IsCitusTableTypeCacheEntry(tableEntry, HASH_DISTRIBUTED);
 	bool citusTableWithNoDistKey =
-		IsCitusTableTypeCacheEntry(tableEntry, CITUS_TABLE_WITH_NO_DIST_KEY);
+		!HasDistributionKeyCacheEntry(tableEntry);
 
 	return ShouldSyncTableMetadataInternal(hashDistributed, citusTableWithNoDistKey);
 }
@@ -1158,7 +1158,7 @@ DistributionCreateCommand(CitusTableCacheEntry *cacheEntry)
 	char replicationModel = cacheEntry->replicationModel;
 	StringInfo tablePartitionKeyNameString = makeStringInfo();
 
-	if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
+	if (!HasDistributionKeyCacheEntry(cacheEntry))
 	{
 		appendStringInfo(tablePartitionKeyNameString, "NULL");
 	}
diff --git a/src/backend/distributed/metadata/node_metadata.c b/src/backend/distributed/metadata/node_metadata.c
index 72103b9e1..91ffca4fe 100644
--- a/src/backend/distributed/metadata/node_metadata.c
+++ b/src/backend/distributed/metadata/node_metadata.c
@@ -1536,7 +1536,7 @@ get_shard_id_for_distribution_column(PG_FUNCTION_ARGS)
 						errmsg("relation is not distributed")));
 	}
 
-	if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
+	if (!HasDistributionKey(relationId))
 	{
 		List *shardIntervalList = LoadShardIntervalList(relationId);
 		if (shardIntervalList == NIL)
diff --git a/src/backend/distributed/planner/multi_join_order.c b/src/backend/distributed/planner/multi_join_order.c
index 9b2342b20..b1195c664 100644
--- a/src/backend/distributed/planner/multi_join_order.c
+++ b/src/backend/distributed/planner/multi_join_order.c
@@ -1383,7 +1383,7 @@ DistPartitionKey(Oid relationId)
 	CitusTableCacheEntry *partitionEntry = GetCitusTableCacheEntry(relationId);
 
 	/* non-distributed tables do not have partition column */
-	if (IsCitusTableTypeCacheEntry(partitionEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
+	if (!HasDistributionKeyCacheEntry(partitionEntry))
 	{
 		return NULL;
 	}
diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c
index 7e665b567..d9322bf5e 100644
--- a/src/backend/distributed/planner/multi_logical_planner.c
+++ b/src/backend/distributed/planner/multi_logical_planner.c
@@ -228,7 +228,7 @@ TargetListOnPartitionColumn(Query *query, List *targetEntryList)
 		 * If the expression belongs to a non-distributed table continue searching for
 		 * other partition keys.
 		 */
-		if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
+		if (IsCitusTable(relationId) && !HasDistributionKey(relationId))
 		{
 			continue;
 		}
diff --git a/src/backend/distributed/planner/multi_physical_planner.c b/src/backend/distributed/planner/multi_physical_planner.c
index 03206ea9b..be6caf0e2 100644
--- a/src/backend/distributed/planner/multi_physical_planner.c
+++ b/src/backend/distributed/planner/multi_physical_planner.c
@@ -2199,7 +2199,7 @@ QueryPushdownSqlTaskList(Query *query, uint64 jobId,
 		Oid relationId = relationRestriction->relationId;
 
 		CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
-		if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
+		if (!HasDistributionKeyCacheEntry(cacheEntry))
 		{
 			continue;
 		}
@@ -2377,7 +2377,7 @@ ErrorIfUnsupportedShardDistribution(Query *query)
 			nonReferenceRelations = lappend_oid(nonReferenceRelations,
 												relationId);
 		}
-		else if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
+		else if (IsCitusTable(relationId) && !HasDistributionKey(relationId))
 		{
 			/* do not need to handle non-distributed tables */
 			continue;
@@ -2482,7 +2482,7 @@ QueryPushdownTaskCreate(Query *originalQuery, int shardIndex,
 		ShardInterval *shardInterval = NULL;
 
 		CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
-		if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
+		if (!HasDistributionKeyCacheEntry(cacheEntry))
 		{
 			/* non-distributed tables have only one shard */
 			shardInterval = cacheEntry->sortedShardIntervalArray[0];
@@ -3697,7 +3697,7 @@ PartitionedOnColumn(Var *column, List *rangeTableList, List *dependentJobList)
 		Var *partitionColumn = PartitionColumn(relationId, rangeTableId);
 
 		/* non-distributed tables do not have partition columns */
-		if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
+		if (IsCitusTable(relationId) && !HasDistributionKey(relationId))
 		{
 			return false;
 		}
diff --git a/src/backend/distributed/planner/multi_router_planner.c b/src/backend/distributed/planner/multi_router_planner.c
index 5fcb4dfea..f4591a770 100644
--- a/src/backend/distributed/planner/multi_router_planner.c
+++ b/src/backend/distributed/planner/multi_router_planner.c
@@ -2675,7 +2675,7 @@ TargetShardIntervalForFastPathQuery(Query *query, bool *isMultiShardQuery,
 {
 	Oid relationId = ExtractFirstCitusTableId(query);
 
-	if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
+	if (!HasDistributionKey(relationId))
 	{
 		/* we don't need to do shard pruning for non-distributed tables */
 		return list_make1(LoadShardIntervalList(relationId));
@@ -2968,7 +2968,7 @@ BuildRoutesForInsert(Query *query, DeferredErrorMessage **planningError)
 	Assert(query->commandType == CMD_INSERT);
 
 	/* reference tables and citus local tables can only have one shard */
-	if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
+	if (!HasDistributionKeyCacheEntry(cacheEntry))
 	{
 		List *shardIntervalList = LoadShardIntervalList(distributedTableId);
 
@@ -3509,7 +3509,7 @@ ExtractInsertPartitionKeyValue(Query *query)
 	uint32 rangeTableId = 1;
 	Const *singlePartitionValueConst = NULL;
 
-	if (IsCitusTableType(distributedTableId, CITUS_TABLE_WITH_NO_DIST_KEY))
+	if (!HasDistributionKey(distributedTableId))
 	{
 		return NULL;
 	}
@@ -3829,8 +3829,7 @@ ErrorIfQueryHasUnroutableModifyingCTE(Query *queryTree)
 			CitusTableCacheEntry *modificationTableCacheEntry =
 				GetCitusTableCacheEntry(distributedTableId);
 
-			if (IsCitusTableTypeCacheEntry(modificationTableCacheEntry,
-										   CITUS_TABLE_WITH_NO_DIST_KEY))
+			if (!HasDistributionKeyCacheEntry(modificationTableCacheEntry))
 			{
 				return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
 									 "cannot router plan modification of a non-distributed table",
diff --git a/src/backend/distributed/planner/query_colocation_checker.c b/src/backend/distributed/planner/query_colocation_checker.c
index b7cc41068..c5de0ef9e 100644
--- a/src/backend/distributed/planner/query_colocation_checker.c
+++ b/src/backend/distributed/planner/query_colocation_checker.c
@@ -168,7 +168,7 @@ AnchorRte(Query *subquery)
 		{
 			Oid relationId = currentRte->relid;
 
-			if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
+			if (IsCitusTable(relationId) && !HasDistributionKey(relationId))
 			{
 				/*
 				 * Non-distributed tables should not be the anchor rte since they
diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c
index 713f1f4f2..4d131899a 100644
--- a/src/backend/distributed/planner/relation_restriction_equivalence.c
+++ b/src/backend/distributed/planner/relation_restriction_equivalence.c
@@ -703,8 +703,8 @@ EquivalenceListContainsRelationsEquality(List *attributeEquivalenceList,
 		int rteIdentity = GetRTEIdentity(relationRestriction->rte);
 
 		/* we shouldn't check for the equality of non-distributed tables */
-		if (IsCitusTableType(relationRestriction->relationId,
-							 CITUS_TABLE_WITH_NO_DIST_KEY))
+		if (IsCitusTable(relationRestriction->relationId) &&
+			!HasDistributionKey(relationRestriction->relationId))
 		{
 			continue;
 		}
@@ -1933,7 +1933,7 @@ AllRelationsInRestrictionContextColocated(RelationRestrictionContext *restrictio
 	{
 		Oid relationId = relationRestriction->relationId;
 
-		if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
+		if (IsCitusTable(relationId) && !HasDistributionKey(relationId))
 		{
 			continue;
 		}
diff --git a/src/backend/distributed/planner/shard_pruning.c b/src/backend/distributed/planner/shard_pruning.c
index 665c9a75b..5375a70fa 100644
--- a/src/backend/distributed/planner/shard_pruning.c
+++ b/src/backend/distributed/planner/shard_pruning.c
@@ -333,7 +333,7 @@ PruneShards(Oid relationId, Index rangeTableId, List *whereClauseList,
 	}
 
 	/* short circuit for non-distributed tables such as reference table */
-	if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
+	if (!HasDistributionKeyCacheEntry(cacheEntry))
 	{
 		prunedList = ShardArrayToList(cacheEntry->sortedShardIntervalArray,
 									  cacheEntry->shardIntervalArrayLength);
diff --git a/src/backend/distributed/transaction/relation_access_tracking.c b/src/backend/distributed/transaction/relation_access_tracking.c
index a6a8ba5f6..2ecbba5b7 100644
--- a/src/backend/distributed/transaction/relation_access_tracking.c
+++ b/src/backend/distributed/transaction/relation_access_tracking.c
@@ -195,7 +195,7 @@ RecordRelationAccessIfNonDistTable(Oid relationId, ShardPlacementAccessType acce
 	 * recursively calling RecordRelationAccessBase(), so becareful about
 	 * removing this check.
 	 */
-	if (!IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
+	if (IsCitusTable(relationId) && HasDistributionKey(relationId))
 	{
 		return;
 	}
@@ -732,8 +732,8 @@ CheckConflictingRelationAccesses(Oid relationId, ShardPlacementAccessType access
 
 	CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
 
-	if (!(IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY) &&
-		  cacheEntry->referencingRelationsViaForeignKey != NIL))
+	if (HasDistributionKeyCacheEntry(cacheEntry) ||
+		cacheEntry->referencingRelationsViaForeignKey == NIL)
 	{
 		return;
 	}
@@ -931,7 +931,7 @@ HoldsConflictingLockWithReferencedRelations(Oid relationId, ShardPlacementAccess
 		 * We're only interested in foreign keys to reference tables and citus
 		 * local tables.
 		 */
-		if (!IsCitusTableType(referencedRelation, CITUS_TABLE_WITH_NO_DIST_KEY))
+		if (IsCitusTable(referencedRelation) && HasDistributionKey(referencedRelation))
 		{
 			continue;
 		}
@@ -993,7 +993,7 @@ HoldsConflictingLockWithReferencingRelations(Oid relationId, ShardPlacementAcces
 	CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
 	bool holdsConflictingLocks = false;
 
-	Assert(IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY));
+	Assert(!HasDistributionKeyCacheEntry(cacheEntry));
 
 	Oid referencingRelation = InvalidOid;
 	foreach_oid(referencingRelation, cacheEntry->referencingRelationsViaForeignKey)
diff --git a/src/backend/distributed/utils/colocation_utils.c b/src/backend/distributed/utils/colocation_utils.c
index aabfcdf62..985d4c38e 100644
--- a/src/backend/distributed/utils/colocation_utils.c
+++ b/src/backend/distributed/utils/colocation_utils.c
@@ -442,8 +442,7 @@ ShardsIntervalsEqual(ShardInterval *leftShardInterval, ShardInterval *rightShard
 	{
 		return HashPartitionedShardIntervalsEqual(leftShardInterval, rightShardInterval);
 	}
-	else if (IsCitusTableType(leftShardInterval->relationId,
-							  CITUS_TABLE_WITH_NO_DIST_KEY))
+	else if (!HasDistributionKey(leftShardInterval->relationId))
 	{
 		/*
 		 * Reference tables has only a single shard and all reference tables
diff --git a/src/backend/distributed/utils/shardinterval_utils.c b/src/backend/distributed/utils/shardinterval_utils.c
index 2980d11a4..12635f9f4 100644
--- a/src/backend/distributed/utils/shardinterval_utils.c
+++ b/src/backend/distributed/utils/shardinterval_utils.c
@@ -223,8 +223,7 @@ ShardIndex(ShardInterval *shardInterval)
 	 * currently it is not required.
 	 */
 	if (!IsCitusTableTypeCacheEntry(cacheEntry, HASH_DISTRIBUTED) &&
-		!IsCitusTableTypeCacheEntry(
-			cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
+		HasDistributionKeyCacheEntry(cacheEntry))
 	{
 		ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 						errmsg("finding index of a given shard is only supported for "
@@ -233,7 +232,7 @@ ShardIndex(ShardInterval *shardInterval)
 	}
 
 	/* short-circuit for reference tables */
-	if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
+	if (!HasDistributionKeyCacheEntry(cacheEntry))
 	{
 		/*
 		 * Reference tables and citus local tables have only a single shard,
@@ -333,7 +332,7 @@ FindShardIntervalIndex(Datum searchedValue, CitusTableCacheEntry *cacheEntry)
 			shardIndex = CalculateUniformHashRangeIndex(hashedValue, shardCount);
 		}
 	}
-	else if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
+	else if (!HasDistributionKeyCacheEntry(cacheEntry))
 	{
 		/* non-distributed tables have a single shard, all values mapped to that shard */
 		Assert(shardCount == 1);
diff --git a/src/include/distributed/metadata_cache.h b/src/include/distributed/metadata_cache.h
index 07fa50e64..e7cb2514d 100644
--- a/src/include/distributed/metadata_cache.h
+++ b/src/include/distributed/metadata_cache.h
@@ -133,9 +133,6 @@ typedef enum
 	REFERENCE_TABLE,
 	CITUS_LOCAL_TABLE,
 
-	/* table without a dist key such as reference table */
-	CITUS_TABLE_WITH_NO_DIST_KEY,
-
 	ANY_CITUS_TABLE_TYPE
 } CitusTableType;
 
@@ -143,6 +140,8 @@ extern List * AllCitusTableIds(void);
 extern bool IsCitusTableType(Oid relationId, CitusTableType tableType);
 extern bool IsCitusTableTypeCacheEntry(CitusTableCacheEntry *tableEtnry,
 									   CitusTableType tableType);
+bool HasDistributionKey(Oid relationId);
+bool HasDistributionKeyCacheEntry(CitusTableCacheEntry *tableEntry);
 extern char * GetTableTypeName(Oid tableId);
 
 extern void SetCreateCitusTransactionLevel(int val);
@@ -154,7 +153,9 @@ extern List * LookupDistShardTuples(Oid relationId);
 extern char PartitionMethodViaCatalog(Oid relationId);
 extern Var * PartitionColumnViaCatalog(Oid relationId);
 extern uint32 ColocationIdViaCatalog(Oid relationId);
-extern bool IsCitusLocalTableByDistParams(char partitionMethod, char replicationModel);
+bool IsReferenceTableByDistParams(char partitionMethod, char replicationModel);
+extern bool IsCitusLocalTableByDistParams(char partitionMethod, char replicationModel,
+										  uint32 colocationId);
 extern List * CitusTableList(void);
 extern ShardInterval * LoadShardInterval(uint64 shardId);
 extern bool ShardExists(uint64 shardId);

From cc945fa331464d9f388da39dfd30c566243c94fa Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Tue, 14 Mar 2023 10:22:34 +0300
Subject: [PATCH 05/58] Add multi_create_fdw into minimal_schedule (#6759)

So that we can run the tests that require fake_fdw by using minimal
schedule too.

Also move multi_create_fdw.sql up in multi_1_schedule to make it
available to more tests.
---
 src/test/regress/minimal_schedule | 2 +-
 src/test/regress/multi_1_schedule | 9 +--------
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/src/test/regress/minimal_schedule b/src/test/regress/minimal_schedule
index ef2d3dc65..8b0cfff70 100644
--- a/src/test/regress/minimal_schedule
+++ b/src/test/regress/minimal_schedule
@@ -1,2 +1,2 @@
 test: minimal_cluster_management
-test: multi_test_helpers multi_test_helpers_superuser columnar_test_helpers multi_test_catalog_views tablespace
+test: multi_test_helpers multi_test_helpers_superuser multi_create_fdw columnar_test_helpers multi_test_catalog_views tablespace
diff --git a/src/test/regress/multi_1_schedule b/src/test/regress/multi_1_schedule
index 5e2cd17c1..ee81bde38 100644
--- a/src/test/regress/multi_1_schedule
+++ b/src/test/regress/multi_1_schedule
@@ -19,7 +19,7 @@ test: multi_extension
 test: single_node
 test: relation_access_tracking_single_node
 test: single_node_truncate
-test: multi_test_helpers multi_test_helpers_superuser
+test: multi_test_helpers multi_test_helpers_superuser multi_create_fdw
 test: multi_cluster_management
 
 # below tests are placed right after multi_cluster_management as we do
@@ -91,13 +91,6 @@ test: drop_partitioned_table
 test: multi_fix_partition_shard_index_names
 test: partition_wise_join
 
-# ----------
-# Tests for foreign data wrapper support
-# ----------
-test: multi_create_fdw
-
-
-
 # ----------
 # Tests for statistics propagation
 # ----------

From f68fc9e69ce51833bb94520f63b48a63f2f76e08 Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Tue, 14 Mar 2023 14:24:52 +0300
Subject: [PATCH 06/58] Decide core distribution params in CreateCitusTable
 (#6760)

Decide core distribution params in CreateCitusTable to reduce the
chances of
creating Citus tables based on incorrect combinations of distribution
method
and replication model params.

Also introduce DistributedTableParams struct to encapsulate the
parameters
that are specific to distributed tables.
---
 .../commands/create_distributed_table.c       | 283 +++++++++++++-----
 1 file changed, 207 insertions(+), 76 deletions(-)

diff --git a/src/backend/distributed/commands/create_distributed_table.c b/src/backend/distributed/commands/create_distributed_table.c
index 101d866f0..e38395296 100644
--- a/src/backend/distributed/commands/create_distributed_table.c
+++ b/src/backend/distributed/commands/create_distributed_table.c
@@ -94,6 +94,28 @@
 #include "utils/syscache.h"
 #include "utils/inval.h"
 
+
+/* common params that apply to all Citus table types */
+typedef struct
+{
+	char distributionMethod;
+	char replicationModel;
+} CitusTableParams;
+
+
+/*
+ * Params that only apply to distributed tables, i.e., the ones that are
+ * known as DISTRIBUTED_TABLE by Citus metadata.
+ */
+typedef struct
+{
+	int shardCount;
+	bool shardCountIsStrict;
+	char *colocateWithTableName;
+	char *distributionColumnName;
+} DistributedTableParams;
+
+
 /*
  * once every LOG_PER_TUPLE_AMOUNT, the copy will be logged.
  */
@@ -112,17 +134,16 @@ static List * HashSplitPointsForShardList(List *shardList);
 static List * HashSplitPointsForShardCount(int shardCount);
 static List * WorkerNodesForShardList(List *shardList);
 static List * RoundRobinWorkerNodeList(List *workerNodeList, int listLength);
-static void CreateCitusTable(Oid relationId, char *distributionColumnName,
-							 char distributionMethod,
-							 int shardCount, bool shardCountIsStrict,
-							 char *colocateWithTableName,
-							 char replicationModel);
+static CitusTableParams DecideCitusTableParams(CitusTableType tableType,
+											   DistributedTableParams *
+											   distributedTableParams);
+static void CreateCitusTable(Oid relationId, CitusTableType tableType,
+							 DistributedTableParams *distributedTableParams);
 static void CreateHashDistributedTableShards(Oid relationId, int shardCount,
 											 Oid colocatedTableId, bool localTableEmpty);
-static uint32 ColocationIdForNewTable(Oid relationId, Var *distributionColumn,
-									  char distributionMethod, char replicationModel,
-									  int shardCount, bool shardCountIsStrict,
-									  char *colocateWithTableName);
+static uint32 ColocationIdForNewTable(Oid relationId, CitusTableType tableType,
+									  DistributedTableParams *distributedTableParams,
+									  Var *distributionColumn);
 static void EnsureRelationCanBeDistributed(Oid relationId, Var *distributionColumn,
 										   char distributionMethod, uint32 colocationId,
 										   char replicationModel);
@@ -962,14 +983,42 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName,
 					   int shardCount, bool shardCountIsStrict,
 					   char *colocateWithTableName)
 {
-	Assert(distributionMethod != DISTRIBUTE_BY_NONE);
+	CitusTableType tableType;
+	switch (distributionMethod)
+	{
+		case DISTRIBUTE_BY_HASH:
+		{
+			tableType = HASH_DISTRIBUTED;
+			break;
+		}
 
-	char replicationModel = DecideDistTableReplicationModel(distributionMethod,
-															colocateWithTableName);
-	CreateCitusTable(relationId, distributionColumnName,
-					 distributionMethod, shardCount,
-					 shardCountIsStrict, colocateWithTableName,
-					 replicationModel);
+		case DISTRIBUTE_BY_APPEND:
+		{
+			tableType = APPEND_DISTRIBUTED;
+			break;
+		}
+
+		case DISTRIBUTE_BY_RANGE:
+		{
+			tableType = RANGE_DISTRIBUTED;
+			break;
+		}
+
+		default:
+		{
+			ereport(ERROR, (errmsg("unexpected distribution method when "
+								   "deciding Citus table type")));
+			break;
+		}
+	}
+
+	DistributedTableParams distributedTableParams = {
+		.colocateWithTableName = colocateWithTableName,
+		.shardCount = shardCount,
+		.shardCountIsStrict = shardCountIsStrict,
+		.distributionColumnName = distributionColumnName
+	};
+	CreateCitusTable(relationId, tableType, &distributedTableParams);
 }
 
 
@@ -980,16 +1029,7 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName,
 void
 CreateReferenceTable(Oid relationId)
 {
-	char *distributionColumnName = NULL;
-	char distributionMethod = DISTRIBUTE_BY_NONE;
-	int shardCount = 1;
-	bool shardCountIsStrict = true;
-	char *colocateWithTableName = NULL;
-	char replicationModel = REPLICATION_MODEL_2PC;
-	CreateCitusTable(relationId, distributionColumnName,
-					 distributionMethod, shardCount,
-					 shardCountIsStrict, colocateWithTableName,
-					 replicationModel);
+	CreateCitusTable(relationId, REFERENCE_TABLE, NULL);
 }
 
 
@@ -997,6 +1037,9 @@ CreateReferenceTable(Oid relationId)
  * CreateCitusTable is the internal method that creates a Citus table in
  * given configuration.
  *
+ * DistributedTableParams should be non-null only if we're creating a distributed
+ * table.
+ *
  * This functions contains all necessary logic to create distributed tables. It
  * performs necessary checks to ensure distributing the table is safe. If it is
  * safe to distribute the table, this function creates distributed table metadata,
@@ -1004,11 +1047,17 @@ CreateReferenceTable(Oid relationId)
  * partitioned tables by distributing its partitions as well.
  */
 static void
-CreateCitusTable(Oid relationId, char *distributionColumnName,
-				 char distributionMethod, int shardCount,
-				 bool shardCountIsStrict, char *colocateWithTableName,
-				 char replicationModel)
+CreateCitusTable(Oid relationId, CitusTableType tableType,
+				 DistributedTableParams *distributedTableParams)
 {
+	if ((tableType == HASH_DISTRIBUTED || tableType == APPEND_DISTRIBUTED ||
+		 tableType == RANGE_DISTRIBUTED) != (distributedTableParams != NULL))
+	{
+		ereport(ERROR, (errmsg("distributed table params must be provided "
+							   "when creating a distributed table and must "
+							   "not be otherwise")));
+	}
+
 	/*
 	 * EnsureTableNotDistributed errors out when relation is a citus table but
 	 * we don't want to ask user to first undistribute their citus local tables
@@ -1034,11 +1083,8 @@ CreateCitusTable(Oid relationId, char *distributionColumnName,
 	 * that ALTER TABLE hook does the necessary job, which means converting
 	 * local tables to citus local tables to properly support such foreign
 	 * keys.
-	 *
-	 * This function does not expect to create Citus local table, so we blindly
-	 * create reference table when the method is DISTRIBUTE_BY_NONE.
 	 */
-	else if (distributionMethod == DISTRIBUTE_BY_NONE &&
+	else if (tableType == REFERENCE_TABLE &&
 			 ShouldEnableLocalReferenceForeignKeys() &&
 			 HasForeignKeyWithLocalTable(relationId))
 	{
@@ -1068,21 +1114,29 @@ CreateCitusTable(Oid relationId, char *distributionColumnName,
 
 	PropagatePrerequisiteObjectsForDistributedTable(relationId);
 
-	Var *distributionColumn = BuildDistributionKeyFromColumnName(relationId,
-																 distributionColumnName,
-																 NoLock);
+	Var *distributionColumn = NULL;
+	if (distributedTableParams)
+	{
+		distributionColumn = BuildDistributionKeyFromColumnName(relationId,
+																distributedTableParams->
+																distributionColumnName,
+																NoLock);
+	}
+
+	CitusTableParams citusTableParams = DecideCitusTableParams(tableType,
+															   distributedTableParams);
 
 	/*
 	 * ColocationIdForNewTable assumes caller acquires lock on relationId. In our case,
 	 * our caller already acquired lock on relationId.
 	 */
-	uint32 colocationId = ColocationIdForNewTable(relationId, distributionColumn,
-												  distributionMethod, replicationModel,
-												  shardCount, shardCountIsStrict,
-												  colocateWithTableName);
+	uint32 colocationId = ColocationIdForNewTable(relationId, tableType,
+												  distributedTableParams,
+												  distributionColumn);
 
-	EnsureRelationCanBeDistributed(relationId, distributionColumn, distributionMethod,
-								   colocationId, replicationModel);
+	EnsureRelationCanBeDistributed(relationId, distributionColumn,
+								   citusTableParams.distributionMethod,
+								   colocationId, citusTableParams.replicationModel);
 
 	/*
 	 * Make sure that existing reference tables have been replicated to all the nodes
@@ -1111,8 +1165,10 @@ CreateCitusTable(Oid relationId, char *distributionColumnName,
 	bool autoConverted = false;
 
 	/* create an entry for distributed table in pg_dist_partition */
-	InsertIntoPgDistPartition(relationId, distributionMethod, distributionColumn,
-							  colocationId, replicationModel, autoConverted);
+	InsertIntoPgDistPartition(relationId, citusTableParams.distributionMethod,
+							  distributionColumn,
+							  colocationId, citusTableParams.replicationModel,
+							  autoConverted);
 
 	/* foreign tables do not support TRUNCATE trigger */
 	if (RegularTable(relationId))
@@ -1121,17 +1177,14 @@ CreateCitusTable(Oid relationId, char *distributionColumnName,
 	}
 
 	/* create shards for hash distributed and reference tables */
-	if (distributionMethod == DISTRIBUTE_BY_HASH)
+	if (tableType == HASH_DISTRIBUTED)
 	{
-		CreateHashDistributedTableShards(relationId, shardCount, colocatedTableId,
+		CreateHashDistributedTableShards(relationId, distributedTableParams->shardCount,
+										 colocatedTableId,
 										 localTableEmpty);
 	}
-	else if (distributionMethod == DISTRIBUTE_BY_NONE)
+	else if (tableType == REFERENCE_TABLE)
 	{
-		/*
-		 * This function does not expect to create Citus local table, so we blindly
-		 * create reference table when the method is DISTRIBUTE_BY_NONE.
-		 */
 		CreateReferenceTableShard(relationId);
 	}
 
@@ -1173,9 +1226,14 @@ CreateCitusTable(Oid relationId, char *distributionColumnName,
 		{
 			MemoryContextReset(citusPartitionContext);
 
-			CreateDistributedTable(partitionRelationId, distributionColumnName,
-								   distributionMethod, shardCount, false,
-								   parentRelationName);
+			DistributedTableParams childDistributedTableParams = {
+				.colocateWithTableName = parentRelationName,
+				.shardCount = distributedTableParams->shardCount,
+				.shardCountIsStrict = false,
+				.distributionColumnName = distributedTableParams->distributionColumnName,
+			};
+			CreateCitusTable(partitionRelationId, tableType,
+							 &childDistributedTableParams);
 		}
 
 		MemoryContextSwitchTo(oldContext);
@@ -1183,8 +1241,7 @@ CreateCitusTable(Oid relationId, char *distributionColumnName,
 	}
 
 	/* copy over data for hash distributed and reference tables */
-	if (distributionMethod == DISTRIBUTE_BY_HASH ||
-		distributionMethod == DISTRIBUTE_BY_NONE)
+	if (tableType == HASH_DISTRIBUTED || tableType == REFERENCE_TABLE)
 	{
 		if (RegularTable(relationId))
 		{
@@ -1203,6 +1260,70 @@ CreateCitusTable(Oid relationId, char *distributionColumnName,
 }
 
 
+/*
+ * DecideCitusTableParams decides CitusTableParams based on given CitusTableType
+ * and DistributedTableParams if it's a distributed table.
+ *
+ * DistributedTableParams should be non-null only if CitusTableType corresponds
+ * to a distributed table.
+ */
+static
+CitusTableParams
+DecideCitusTableParams(CitusTableType tableType,
+					   DistributedTableParams *distributedTableParams)
+{
+	CitusTableParams citusTableParams = { 0 };
+	switch (tableType)
+	{
+		case HASH_DISTRIBUTED:
+		{
+			citusTableParams.distributionMethod = DISTRIBUTE_BY_HASH;
+			citusTableParams.replicationModel =
+				DecideDistTableReplicationModel(DISTRIBUTE_BY_HASH,
+												distributedTableParams->
+												colocateWithTableName);
+			break;
+		}
+
+		case APPEND_DISTRIBUTED:
+		{
+			citusTableParams.distributionMethod = DISTRIBUTE_BY_APPEND;
+			citusTableParams.replicationModel =
+				DecideDistTableReplicationModel(APPEND_DISTRIBUTED,
+												distributedTableParams->
+												colocateWithTableName);
+			break;
+		}
+
+		case RANGE_DISTRIBUTED:
+		{
+			citusTableParams.distributionMethod = DISTRIBUTE_BY_RANGE;
+			citusTableParams.replicationModel =
+				DecideDistTableReplicationModel(RANGE_DISTRIBUTED,
+												distributedTableParams->
+												colocateWithTableName);
+			break;
+		}
+
+		case REFERENCE_TABLE:
+		{
+			citusTableParams.distributionMethod = DISTRIBUTE_BY_NONE;
+			citusTableParams.replicationModel = REPLICATION_MODEL_2PC;
+			break;
+		}
+
+		default:
+		{
+			ereport(ERROR, (errmsg("unexpected table type when deciding Citus "
+								   "table params")));
+			break;
+		}
+	}
+
+	return citusTableParams;
+}
+
+
 /*
  * PropagatePrerequisiteObjectsForDistributedTable ensures we can create shards
  * on all nodes by ensuring all dependent objects exist on all node.
@@ -1547,28 +1668,34 @@ CreateHashDistributedTableShards(Oid relationId, int shardCount,
 
 
 /*
- * ColocationIdForNewTable returns a colocation id for hash-distributed table
+ * ColocationIdForNewTable returns a colocation id for given table
  * according to given configuration. If there is no such configuration, it
  * creates one and returns colocation id of newly the created colocation group.
+ * Note that DistributedTableParams and the distribution column Var should be
+ * non-null only if CitusTableType corresponds to a distributed table.
+ *
  * For append and range distributed tables, this function errors out if
  * colocateWithTableName parameter is not NULL, otherwise directly returns
  * INVALID_COLOCATION_ID.
  *
+ * For reference tables, returns the common reference table colocation id.
+ *
  * This function assumes its caller take necessary lock on relationId to
  * prevent possible changes on it.
  */
 static uint32
-ColocationIdForNewTable(Oid relationId, Var *distributionColumn,
-						char distributionMethod, char replicationModel,
-						int shardCount, bool shardCountIsStrict,
-						char *colocateWithTableName)
+ColocationIdForNewTable(Oid relationId, CitusTableType tableType,
+						DistributedTableParams *distributedTableParams,
+						Var *distributionColumn)
 {
+	CitusTableParams citusTableParams = DecideCitusTableParams(tableType,
+															   distributedTableParams);
+
 	uint32 colocationId = INVALID_COLOCATION_ID;
 
-	if (distributionMethod == DISTRIBUTE_BY_APPEND ||
-		distributionMethod == DISTRIBUTE_BY_RANGE)
+	if (tableType == APPEND_DISTRIBUTED || tableType == RANGE_DISTRIBUTED)
 	{
-		if (pg_strncasecmp(colocateWithTableName, "default", NAMEDATALEN) != 0)
+		if (!IsColocateWithDefault(distributedTableParams->colocateWithTableName))
 		{
 			ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 							errmsg("cannot distribute relation"),
@@ -1578,7 +1705,7 @@ ColocationIdForNewTable(Oid relationId, Var *distributionColumn,
 
 		return colocationId;
 	}
-	else if (distributionMethod == DISTRIBUTE_BY_NONE)
+	else if (tableType == REFERENCE_TABLE)
 	{
 		return CreateReferenceTableColocationId();
 	}
@@ -1589,27 +1716,29 @@ ColocationIdForNewTable(Oid relationId, Var *distributionColumn,
 		 * can be sure that there will no modifications on the colocation table
 		 * until this transaction is committed.
 		 */
-		Assert(distributionMethod == DISTRIBUTE_BY_HASH);
+		Assert(citusTableParams.distributionMethod == DISTRIBUTE_BY_HASH);
 
 		Oid distributionColumnType = distributionColumn->vartype;
 		Oid distributionColumnCollation = get_typcollation(distributionColumnType);
 
 		/* get an advisory lock to serialize concurrent default group creations */
-		if (IsColocateWithDefault(colocateWithTableName))
+		if (IsColocateWithDefault(distributedTableParams->colocateWithTableName))
 		{
 			AcquireColocationDefaultLock();
 		}
 
 		colocationId = FindColocateWithColocationId(relationId,
-													replicationModel,
+													citusTableParams.replicationModel,
 													distributionColumnType,
 													distributionColumnCollation,
-													shardCount,
+													distributedTableParams->shardCount,
+													distributedTableParams->
 													shardCountIsStrict,
+													distributedTableParams->
 													colocateWithTableName);
 
-		if (IsColocateWithDefault(colocateWithTableName) && (colocationId !=
-															 INVALID_COLOCATION_ID))
+		if (IsColocateWithDefault(distributedTableParams->colocateWithTableName) &&
+			(colocationId != INVALID_COLOCATION_ID))
 		{
 			/*
 			 * we can release advisory lock if there is already a default entry for given params;
@@ -1621,23 +1750,25 @@ ColocationIdForNewTable(Oid relationId, Var *distributionColumn,
 
 		if (colocationId == INVALID_COLOCATION_ID)
 		{
-			if (IsColocateWithDefault(colocateWithTableName))
+			if (IsColocateWithDefault(distributedTableParams->colocateWithTableName))
 			{
 				/*
 				 * Generate a new colocation ID and insert a pg_dist_colocation
 				 * record.
 				 */
-				colocationId = CreateColocationGroup(shardCount, ShardReplicationFactor,
+				colocationId = CreateColocationGroup(distributedTableParams->shardCount,
+													 ShardReplicationFactor,
 													 distributionColumnType,
 													 distributionColumnCollation);
 			}
-			else if (IsColocateWithNone(colocateWithTableName))
+			else if (IsColocateWithNone(distributedTableParams->colocateWithTableName))
 			{
 				/*
 				 * Generate a new colocation ID and insert a pg_dist_colocation
 				 * record.
 				 */
-				colocationId = CreateColocationGroup(shardCount, ShardReplicationFactor,
+				colocationId = CreateColocationGroup(distributedTableParams->shardCount,
+													 ShardReplicationFactor,
 													 distributionColumnType,
 													 distributionColumnCollation);
 			}

From 821f26cc743b04a7926384a3d294a69bda002936 Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Mon, 16 Jan 2023 13:08:51 +0300
Subject: [PATCH 07/58] Fix flaky test detection for upgrade tests

When run_test.py is run for an upgrade_.*_after.sql then, then
automatically run the corresponding uprade_.*_before.sql file first.
This is because all those upgrade_.*_after.sql files depend on the
objects created in upgrade_.*_before.sql files by definition.
---
 src/test/regress/citus_tests/run_test.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/test/regress/citus_tests/run_test.py b/src/test/regress/citus_tests/run_test.py
index a4b303e90..3daac8b6a 100755
--- a/src/test/regress/citus_tests/run_test.py
+++ b/src/test/regress/citus_tests/run_test.py
@@ -15,6 +15,16 @@ import common
 
 import config
 
+
+# Returns true if given test_schedule_line is of the form:
+#   "test: upgrade_ ... _after .."
+def schedule_line_is_upgrade_after(test_schedule_line: str) -> bool:
+    return (
+        test_schedule_line.startswith("test: upgrade_")
+        and "_after" in test_schedule_line
+    )
+
+
 if __name__ == "__main__":
     args = argparse.ArgumentParser()
     args.add_argument(
@@ -172,6 +182,11 @@ if __name__ == "__main__":
 
     if test_file_name in deps:
         dependencies = deps[test_file_name]
+    elif schedule_line_is_upgrade_after(test_schedule_line):
+        dependencies = TestDeps(
+            default_base_schedule(test_schedule),
+            [test_file_name.replace("_after", "_before")],
+        )
     else:
         dependencies = TestDeps(default_base_schedule(test_schedule))
 

From 994f67185f1eba1236ec6c78448328f2b50f7bff Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Mon, 16 Jan 2023 13:38:20 +0300
Subject: [PATCH 08/58] Make upgrade_columnar_after runnable multiple times

This commit hides port numbers in upgrade_columnar_after because the
port numbers assigned to nodes in upgrade schedule differ from the ones
that flaky test detector assigns.
---
 .../expected/upgrade_columnar_after.out       | 42 ++++++++++---------
 .../regress/sql/upgrade_columnar_after.sql    | 18 ++++----
 2 files changed, 32 insertions(+), 28 deletions(-)

diff --git a/src/test/regress/expected/upgrade_columnar_after.out b/src/test/regress/expected/upgrade_columnar_after.out
index 0da9bb17f..8bb09d861 100644
--- a/src/test/regress/expected/upgrade_columnar_after.out
+++ b/src/test/regress/expected/upgrade_columnar_after.out
@@ -228,10 +228,12 @@ BEGIN;
     22
 (1 row)
 
-	-- make sure that serial is preserved
-	-- since we run "after schedule" twice and "rollback" wouldn't undo
-	-- sequence changes, it can be 22 or 33, not a different value
-	SELECT max(id) in (22, 33) FROM text_data;
+	-- Make sure that serial is preserved.
+    --
+	-- Since we might run "after schedule" several times for flaky test
+    -- detection and "rollback" wouldn't undo sequence changes, "id" should
+    -- look like below:
+	SELECT max(id) >= 11 AND max(id) % 11 = 0 FROM text_data;
  ?column?
 ---------------------------------------------------------------------
  t
@@ -292,7 +294,7 @@ SELECT COUNT(*)=10 FROM columnar_schema_members_pg_depend;
 
 DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend;
 -- Check the same for workers too.
-SELECT run_command_on_workers(
+SELECT success, result FROM run_command_on_workers(
 $$
 SELECT pg_class.oid INTO columnar_schema_members
 FROM pg_class, pg_namespace
@@ -308,44 +310,44 @@ WHERE classid = 'pg_am'::regclass::oid AND
 	  deptype = 'n';
 $$
 );
-     run_command_on_workers
+ success |  result
 ---------------------------------------------------------------------
- (localhost,10201,t,"SELECT 10")
- (localhost,10202,t,"SELECT 10")
+ t       | SELECT 10
+ t       | SELECT 10
 (2 rows)
 
-SELECT run_command_on_workers(
+SELECT success, result FROM run_command_on_workers(
 $$
 (TABLE columnar_schema_members EXCEPT TABLE columnar_schema_members_pg_depend)
 UNION
 (TABLE columnar_schema_members_pg_depend EXCEPT TABLE columnar_schema_members);
 $$
 );
- run_command_on_workers
+ success | result
 ---------------------------------------------------------------------
- (localhost,10201,t,"")
- (localhost,10202,t,"")
+ t       |
+ t       |
 (2 rows)
 
-SELECT run_command_on_workers(
+SELECT success, result FROM run_command_on_workers(
 $$
 SELECT COUNT(*)=10 FROM columnar_schema_members_pg_depend;
 $$
 );
- run_command_on_workers
+ success | result
 ---------------------------------------------------------------------
- (localhost,10201,t,t)
- (localhost,10202,t,t)
+ t       | t
+ t       | t
 (2 rows)
 
-SELECT run_command_on_workers(
+SELECT success, result FROM run_command_on_workers(
 $$
 DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend;
 $$
 );
-      run_command_on_workers
+ success |   result
 ---------------------------------------------------------------------
- (localhost,10201,t,"DROP TABLE")
- (localhost,10202,t,"DROP TABLE")
+ t       | DROP TABLE
+ t       | DROP TABLE
 (2 rows)
 
diff --git a/src/test/regress/sql/upgrade_columnar_after.sql b/src/test/regress/sql/upgrade_columnar_after.sql
index f2839645c..cf54ec80a 100644
--- a/src/test/regress/sql/upgrade_columnar_after.sql
+++ b/src/test/regress/sql/upgrade_columnar_after.sql
@@ -101,10 +101,12 @@ BEGIN;
 	INSERT INTO text_data (value) SELECT generate_random_string(1024 * 10) FROM generate_series(0,10);
 	SELECT count(DISTINCT value) FROM text_data;
 
-	-- make sure that serial is preserved
-	-- since we run "after schedule" twice and "rollback" wouldn't undo
-	-- sequence changes, it can be 22 or 33, not a different value
-	SELECT max(id) in (22, 33) FROM text_data;
+	-- Make sure that serial is preserved.
+    --
+	-- Since we might run "after schedule" several times for flaky test
+    -- detection and "rollback" wouldn't undo sequence changes, "id" should
+    -- look like below:
+	SELECT max(id) >= 11 AND max(id) % 11 = 0 FROM text_data;
 
 	-- since we run "after schedule" twice, rollback the transaction
 	-- to avoid getting "table already exists" errors
@@ -160,7 +162,7 @@ DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend;
 
 -- Check the same for workers too.
 
-SELECT run_command_on_workers(
+SELECT success, result FROM run_command_on_workers(
 $$
 SELECT pg_class.oid INTO columnar_schema_members
 FROM pg_class, pg_namespace
@@ -177,7 +179,7 @@ WHERE classid = 'pg_am'::regclass::oid AND
 $$
 );
 
-SELECT run_command_on_workers(
+SELECT success, result FROM run_command_on_workers(
 $$
 (TABLE columnar_schema_members EXCEPT TABLE columnar_schema_members_pg_depend)
 UNION
@@ -185,13 +187,13 @@ UNION
 $$
 );
 
-SELECT run_command_on_workers(
+SELECT success, result FROM run_command_on_workers(
 $$
 SELECT COUNT(*)=10 FROM columnar_schema_members_pg_depend;
 $$
 );
 
-SELECT run_command_on_workers(
+SELECT success, result FROM run_command_on_workers(
 $$
 DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend;
 $$

From 2b4be535de51749878d046d8a1db9659865a0dfa Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Mon, 16 Jan 2023 18:11:00 +0300
Subject: [PATCH 09/58] Do clean-up before upgrade_columnar_before to make it
 runnable multiple times

So that flaky test detector can run upgrade_columnar_before.sql multiple
times.
---
 .../expected/upgrade_columnar_before.out      | 22 +++++++++++++++++
 .../regress/sql/upgrade_columnar_before.sql   | 24 +++++++++++++++++++
 2 files changed, 46 insertions(+)

diff --git a/src/test/regress/expected/upgrade_columnar_before.out b/src/test/regress/expected/upgrade_columnar_before.out
index 28c252e30..a4895c770 100644
--- a/src/test/regress/expected/upgrade_columnar_before.out
+++ b/src/test/regress/expected/upgrade_columnar_before.out
@@ -1,5 +1,27 @@
 -- Test if relying on topological sort of the objects, not their names, works
 -- fine when re-creating objects during pg_upgrade.
+DO
+$$
+BEGIN
+IF EXISTS (SELECT * FROM pg_namespace WHERE nspname = 'upgrade_columnar')
+THEN
+    -- Drop the the table leftover from the earlier run of
+    -- upgrade_columnar_before.sql. Similarly, drop the fake public schema
+    -- created before and rename the original one (renamed to citus_schema)
+    -- back to public.
+    --
+    -- This can only happen if upgrade_columnar_before.sql is run multiple
+    -- times for flaky test detection.
+    DROP TABLE citus_schema.new_columnar_table;
+    DROP SCHEMA public CASCADE;
+    ALTER SCHEMA citus_schema RENAME TO public;
+
+    SET LOCAL client_min_messages TO WARNING;
+    DROP SCHEMA upgrade_columnar CASCADE;
+END IF;
+END
+$$
+LANGUAGE plpgsql;
 ALTER SCHEMA public RENAME TO citus_schema;
 SET search_path TO citus_schema;
 -- As mentioned in https://github.com/citusdata/citus/issues/5447, it
diff --git a/src/test/regress/sql/upgrade_columnar_before.sql b/src/test/regress/sql/upgrade_columnar_before.sql
index ea71dba02..6f39f4234 100644
--- a/src/test/regress/sql/upgrade_columnar_before.sql
+++ b/src/test/regress/sql/upgrade_columnar_before.sql
@@ -1,5 +1,29 @@
 -- Test if relying on topological sort of the objects, not their names, works
 -- fine when re-creating objects during pg_upgrade.
+
+DO
+$$
+BEGIN
+IF EXISTS (SELECT * FROM pg_namespace WHERE nspname = 'upgrade_columnar')
+THEN
+    -- Drop the the table leftover from the earlier run of
+    -- upgrade_columnar_before.sql. Similarly, drop the fake public schema
+    -- created before and rename the original one (renamed to citus_schema)
+    -- back to public.
+    --
+    -- This can only happen if upgrade_columnar_before.sql is run multiple
+    -- times for flaky test detection.
+    DROP TABLE citus_schema.new_columnar_table;
+    DROP SCHEMA public CASCADE;
+    ALTER SCHEMA citus_schema RENAME TO public;
+
+    SET LOCAL client_min_messages TO WARNING;
+    DROP SCHEMA upgrade_columnar CASCADE;
+END IF;
+END
+$$
+LANGUAGE plpgsql;
+
 ALTER SCHEMA public RENAME TO citus_schema;
 SET search_path TO citus_schema;
 

From be0735a329d599e50e60893e92f3aa4d494eb39b Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Thu, 19 Jan 2023 18:13:15 +0300
Subject: [PATCH 10/58]  Use "cpp" to expand "#include" directives in columnar
 sql files

---
 src/backend/columnar/.gitignore |  3 +++
 src/backend/columnar/Makefile   | 47 +++++++++++++++++++++++++++++----
 2 files changed, 45 insertions(+), 5 deletions(-)
 create mode 100644 src/backend/columnar/.gitignore

diff --git a/src/backend/columnar/.gitignore b/src/backend/columnar/.gitignore
new file mode 100644
index 000000000..b70410d1d
--- /dev/null
+++ b/src/backend/columnar/.gitignore
@@ -0,0 +1,3 @@
+# The directory used to store columnar sql files after pre-processing them
+# with 'cpp' in build-time, see src/backend/columnar/Makefile.
+/build/
diff --git a/src/backend/columnar/Makefile b/src/backend/columnar/Makefile
index f9fa09b7c..ded52a98d 100644
--- a/src/backend/columnar/Makefile
+++ b/src/backend/columnar/Makefile
@@ -10,14 +10,51 @@ OBJS += \
 MODULE_big = citus_columnar
 EXTENSION = citus_columnar
 
-columnar_sql_files = $(patsubst $(citus_abs_srcdir)/%,%,$(wildcard $(citus_abs_srcdir)/sql/*.sql))
-columnar_downgrade_sql_files = $(patsubst $(citus_abs_srcdir)/%,%,$(wildcard $(citus_abs_srcdir)/sql/downgrades/*.sql))
-DATA = $(columnar_sql_files) \
-		$(columnar_downgrade_sql_files)
+template_sql_files = $(patsubst $(citus_abs_srcdir)/%,%,$(wildcard $(citus_abs_srcdir)/sql/*.sql))
+template_downgrade_sql_files = $(patsubst $(citus_abs_srcdir)/sql/downgrades/%,%,$(wildcard $(citus_abs_srcdir)/sql/downgrades/*.sql))
+generated_sql_files = $(patsubst %,$(citus_abs_srcdir)/build/%,$(template_sql_files))
+generated_downgrade_sql_files += $(patsubst %,$(citus_abs_srcdir)/build/sql/%,$(template_downgrade_sql_files))
+
+DATA_built = $(generated_sql_files)
 
 PG_CPPFLAGS += -I$(libpq_srcdir) -I$(safestringlib_srcdir)/include
 
 include $(citus_top_builddir)/Makefile.global
 
-.PHONY: install-all
+SQL_DEPDIR=.deps/sql
+SQL_BUILDDIR=build/sql
+
+$(generated_sql_files): $(citus_abs_srcdir)/build/%: %
+	@mkdir -p $(citus_abs_srcdir)/$(SQL_DEPDIR) $(citus_abs_srcdir)/$(SQL_BUILDDIR)
+	@# -MF is used to store dependency files(.Po) in another directory for separation
+	@# -MT is used to change the target of the rule emitted by dependency generation.
+	@# -P is used to inhibit generation of linemarkers in the output from the preprocessor.
+	@# -undef is used to not predefine any system-specific or GCC-specific macros.
+	@# `man cpp` for further information
+	cd $(citus_abs_srcdir) && cpp -undef -w -P -MMD -MP -MF$(SQL_DEPDIR)/$(*F).Po -MT$@ $< > $@
+
+$(generated_downgrade_sql_files): $(citus_abs_srcdir)/build/sql/%: sql/downgrades/%
+	@mkdir -p $(citus_abs_srcdir)/$(SQL_DEPDIR) $(citus_abs_srcdir)/$(SQL_BUILDDIR)
+	@# -MF is used to store dependency files(.Po) in another directory for separation
+	@# -MT is used to change the target of the rule emitted by dependency generation.
+	@# -P is used to inhibit generation of linemarkers in the output from the preprocessor.
+	@# -undef is used to not predefine any system-specific or GCC-specific macros.
+	@# `man cpp` for further information
+	cd $(citus_abs_srcdir) && cpp -undef -w -P -MMD -MP -MF$(SQL_DEPDIR)/$(*F).Po -MT$@ $< > $@
+
+.PHONY: install install-downgrades install-all
+
+cleanup-before-install:
+	rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus_columnar.control
+	rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/columnar--*
+	rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus_columnar--*
+
+install: cleanup-before-install
+
+# install and install-downgrades should be run sequentially
 install-all: install
+	$(MAKE) install-downgrades
+
+install-downgrades: $(generated_downgrade_sql_files)
+	$(INSTALL_DATA) $(generated_downgrade_sql_files) '$(DESTDIR)$(datadir)/$(datamoduledir)/'
+

From 9550ebd118bf961bcd504cc8ff40a820d280f11f Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Wed, 18 Jan 2023 15:32:15 +0300
Subject: [PATCH 11/58] Remove pg_depend entries from columnar metadata indexes
 to columnar-am

In the past, having columnar tables in the cluster was causing pg
upgrades to fail when attempting to access columnar metadata. This is
because, pg_dump doesn't see objects that we use for columnar-am related
booking as the dependencies of the tables using columnar-am.
To fix that; in #5456, we inserted some "normal dependency" edges (from
those objects to columnar-am) into pg_depend.

This helped us ensuring the existency of a class of metadata objects
--such as columnar.storageid_seq-- and helped fixing #5437.

However, the normal-dependency edges that we added for indexes on
columnar metadata tables --such columnar.stripe_pkey-- didn't help at
all because they were indeed causing dependency loops (#5510) and
pg_dump was not able to take those dependency edges into the account.

For this reason, this commit deletes those dependency edges so that
pg_dump stops complaining about them. Note that it's not critical to
delete those edges from pg_depend since they're not breaking pg upgrades
but were triggering some warning messages. And given that backporting
a sql change into older versions is hard a lot, we skip backporting
this.
---
 .../sql/citus_columnar--11.1-1--11.2-1.sql    | 18 ++++++++
 .../citus_columnar--11.2-1--11.1-1.sql        |  3 ++
 .../11.2-1.sql                                | 43 +++++++++++++++++++
 .../latest.sql                                | 17 +++-----
 src/test/regress/expected/multi_extension.out | 37 ++++++++++++++++
 .../expected/upgrade_columnar_after.out       | 24 ++++++++---
 src/test/regress/sql/multi_extension.sql      | 33 ++++++++++++++
 .../regress/sql/upgrade_columnar_after.sql    | 20 ++++++---
 8 files changed, 172 insertions(+), 23 deletions(-)
 create mode 100644 src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/11.2-1.sql

diff --git a/src/backend/columnar/sql/citus_columnar--11.1-1--11.2-1.sql b/src/backend/columnar/sql/citus_columnar--11.1-1--11.2-1.sql
index 60a0401d5..89ccd9e74 100644
--- a/src/backend/columnar/sql/citus_columnar--11.1-1--11.2-1.sql
+++ b/src/backend/columnar/sql/citus_columnar--11.1-1--11.2-1.sql
@@ -1 +1,19 @@
 -- citus_columnar--11.1-1--11.2-1
+
+#include "udfs/columnar_ensure_am_depends_catalog/11.2-1.sql"
+
+DELETE FROM pg_depend
+WHERE classid = 'pg_am'::regclass::oid
+    AND objid IN (select oid from pg_am where amname = 'columnar')
+    AND objsubid = 0
+    AND refclassid = 'pg_class'::regclass::oid
+    AND refobjid IN (
+        'columnar_internal.stripe_first_row_number_idx'::regclass::oid,
+        'columnar_internal.chunk_group_pkey'::regclass::oid,
+        'columnar_internal.chunk_pkey'::regclass::oid,
+        'columnar_internal.options_pkey'::regclass::oid,
+        'columnar_internal.stripe_first_row_number_idx'::regclass::oid,
+        'columnar_internal.stripe_pkey'::regclass::oid
+    )
+    AND refobjsubid = 0
+    AND deptype = 'n';
diff --git a/src/backend/columnar/sql/downgrades/citus_columnar--11.2-1--11.1-1.sql b/src/backend/columnar/sql/downgrades/citus_columnar--11.2-1--11.1-1.sql
index 9acf68da3..c987bfa67 100644
--- a/src/backend/columnar/sql/downgrades/citus_columnar--11.2-1--11.1-1.sql
+++ b/src/backend/columnar/sql/downgrades/citus_columnar--11.2-1--11.1-1.sql
@@ -1 +1,4 @@
 -- citus_columnar--11.2-1--11.1-1
+
+-- Note that we intentionally do not re-insert the pg_depend records that we
+-- deleted via citus_columnar--11.1-1--11.2-1.sql.
diff --git a/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/11.2-1.sql b/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/11.2-1.sql
new file mode 100644
index 000000000..101db17fb
--- /dev/null
+++ b/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/11.2-1.sql
@@ -0,0 +1,43 @@
+CREATE OR REPLACE FUNCTION columnar_internal.columnar_ensure_am_depends_catalog()
+  RETURNS void
+  LANGUAGE plpgsql
+  SET search_path = pg_catalog
+AS $func$
+BEGIN
+  INSERT INTO pg_depend
+  WITH columnar_schema_members(relid) AS (
+    SELECT pg_class.oid AS relid FROM pg_class
+      WHERE relnamespace =
+            COALESCE(
+	       (SELECT pg_namespace.oid FROM pg_namespace WHERE nspname = 'columnar_internal'),
+	       (SELECT pg_namespace.oid FROM pg_namespace WHERE nspname = 'columnar')
+	    )
+        AND relname IN ('chunk',
+                        'chunk_group',
+                        'options',
+                        'storageid_seq',
+                        'stripe')
+  )
+  SELECT -- Define a dependency edge from "columnar table access method" ..
+         'pg_am'::regclass::oid as classid,
+         (select oid from pg_am where amname = 'columnar') as objid,
+         0 as objsubid,
+         -- ... to some objects registered as regclass and that lives in
+         -- "columnar" schema. That contains catalog tables and the sequences
+         -- created in "columnar" schema.
+         --
+         -- Given the possibility of user might have created their own objects
+         -- in columnar schema, we explicitly specify list of objects that we
+         -- are interested in.
+         'pg_class'::regclass::oid as refclassid,
+         columnar_schema_members.relid as refobjid,
+         0 as refobjsubid,
+         'n' as deptype
+  FROM columnar_schema_members
+  -- Avoid inserting duplicate entries into pg_depend.
+  EXCEPT TABLE pg_depend;
+END;
+$func$;
+COMMENT ON FUNCTION columnar_internal.columnar_ensure_am_depends_catalog()
+  IS 'internal function responsible for creating dependencies from columnar '
+     'table access method to the rel objects in columnar schema';
diff --git a/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/latest.sql b/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/latest.sql
index ade15390a..101db17fb 100644
--- a/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/latest.sql
+++ b/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/latest.sql
@@ -1,4 +1,4 @@
-CREATE OR REPLACE FUNCTION citus_internal.columnar_ensure_am_depends_catalog()
+CREATE OR REPLACE FUNCTION columnar_internal.columnar_ensure_am_depends_catalog()
   RETURNS void
   LANGUAGE plpgsql
   SET search_path = pg_catalog
@@ -14,22 +14,17 @@ BEGIN
 	    )
         AND relname IN ('chunk',
                         'chunk_group',
-                        'chunk_group_pkey',
-                        'chunk_pkey',
                         'options',
-                        'options_pkey',
                         'storageid_seq',
-                        'stripe',
-                        'stripe_first_row_number_idx',
-                        'stripe_pkey')
+                        'stripe')
   )
   SELECT -- Define a dependency edge from "columnar table access method" ..
          'pg_am'::regclass::oid as classid,
          (select oid from pg_am where amname = 'columnar') as objid,
          0 as objsubid,
-         -- ... to each object that is registered to pg_class and that lives
-         -- in "columnar" schema. That contains catalog tables, indexes
-         -- created on them and the sequences created in "columnar" schema.
+         -- ... to some objects registered as regclass and that lives in
+         -- "columnar" schema. That contains catalog tables and the sequences
+         -- created in "columnar" schema.
          --
          -- Given the possibility of user might have created their own objects
          -- in columnar schema, we explicitly specify list of objects that we
@@ -43,6 +38,6 @@ BEGIN
   EXCEPT TABLE pg_depend;
 END;
 $func$;
-COMMENT ON FUNCTION citus_internal.columnar_ensure_am_depends_catalog()
+COMMENT ON FUNCTION columnar_internal.columnar_ensure_am_depends_catalog()
   IS 'internal function responsible for creating dependencies from columnar '
      'table access method to the rel objects in columnar schema';
diff --git a/src/test/regress/expected/multi_extension.out b/src/test/regress/expected/multi_extension.out
index 092ec9e5c..ead2a5b85 100644
--- a/src/test/regress/expected/multi_extension.out
+++ b/src/test/regress/expected/multi_extension.out
@@ -1258,6 +1258,43 @@ SELECT * FROM pg_dist_cleanup;
          2 |            0 |           1 | table_with_orphaned_shards_102011 |             0 |           0
 (2 rows)
 
+ALTER EXTENSION citus_columnar UPDATE TO '11.2-1';
+-- Make sure that we defined dependencies from all rel objects (tables,
+-- indexes, sequences ..) to columnar table access method ...
+SELECT pg_class.oid INTO columnar_schema_members
+FROM pg_class, pg_namespace
+WHERE pg_namespace.oid=pg_class.relnamespace AND
+      pg_namespace.nspname='columnar_internal' AND
+      pg_class.relname NOT IN ('chunk_group_pkey',
+                               'chunk_pkey',
+                               'options_pkey',
+                               'stripe_first_row_number_idx',
+                               'stripe_pkey');
+SELECT refobjid INTO columnar_schema_members_pg_depend
+FROM pg_depend
+WHERE classid = 'pg_am'::regclass::oid AND
+      objid = (select oid from pg_am where amname = 'columnar') AND
+      objsubid = 0 AND
+      refclassid = 'pg_class'::regclass::oid AND
+      refobjsubid = 0 AND
+      deptype = 'n';
+-- ... , so this should be empty,
+(TABLE columnar_schema_members EXCEPT TABLE columnar_schema_members_pg_depend)
+UNION
+(TABLE columnar_schema_members_pg_depend EXCEPT TABLE columnar_schema_members);
+ oid
+---------------------------------------------------------------------
+(0 rows)
+
+-- ... , and both columnar_schema_members_pg_depend & columnar_schema_members
+-- should have 5 entries.
+SELECT COUNT(*)=5 FROM columnar_schema_members_pg_depend;
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend;
 -- error out as cleanup records remain
 ALTER EXTENSION citus UPDATE TO '11.0-4';
 ERROR:  pg_dist_cleanup is introduced in Citus 11.1
diff --git a/src/test/regress/expected/upgrade_columnar_after.out b/src/test/regress/expected/upgrade_columnar_after.out
index 8bb09d861..768a057f9 100644
--- a/src/test/regress/expected/upgrade_columnar_after.out
+++ b/src/test/regress/expected/upgrade_columnar_after.out
@@ -267,7 +267,12 @@ ROLLBACK;
 SELECT pg_class.oid INTO columnar_schema_members
 FROM pg_class, pg_namespace
 WHERE pg_namespace.oid=pg_class.relnamespace AND
-      pg_namespace.nspname='columnar_internal';
+      pg_namespace.nspname='columnar_internal' AND
+      pg_class.relname NOT IN ('chunk_group_pkey',
+                               'chunk_pkey',
+                               'options_pkey',
+                               'stripe_first_row_number_idx',
+                               'stripe_pkey');
 SELECT refobjid INTO columnar_schema_members_pg_depend
 FROM pg_depend
 WHERE classid = 'pg_am'::regclass::oid AND
@@ -285,8 +290,8 @@ UNION
 (0 rows)
 
 -- ... , and both columnar_schema_members_pg_depend & columnar_schema_members
--- should have 10 entries.
-SELECT COUNT(*)=10 FROM columnar_schema_members_pg_depend;
+-- should have 5 entries.
+SELECT COUNT(*)=5 FROM columnar_schema_members_pg_depend;
  ?column?
 ---------------------------------------------------------------------
  t
@@ -299,7 +304,12 @@ $$
 SELECT pg_class.oid INTO columnar_schema_members
 FROM pg_class, pg_namespace
 WHERE pg_namespace.oid=pg_class.relnamespace AND
-	  pg_namespace.nspname='columnar_internal';
+      pg_namespace.nspname='columnar_internal' AND
+      pg_class.relname NOT IN ('chunk_group_pkey',
+                               'chunk_pkey',
+                               'options_pkey',
+                               'stripe_first_row_number_idx',
+                               'stripe_pkey');
 SELECT refobjid INTO columnar_schema_members_pg_depend
 FROM pg_depend
 WHERE classid = 'pg_am'::regclass::oid AND
@@ -312,8 +322,8 @@ $$
 );
  success |  result
 ---------------------------------------------------------------------
- t       | SELECT 10
- t       | SELECT 10
+ t       | SELECT 5
+ t       | SELECT 5
 (2 rows)
 
 SELECT success, result FROM run_command_on_workers(
@@ -331,7 +341,7 @@ $$
 
 SELECT success, result FROM run_command_on_workers(
 $$
-SELECT COUNT(*)=10 FROM columnar_schema_members_pg_depend;
+SELECT COUNT(*)=5 FROM columnar_schema_members_pg_depend;
 $$
 );
  success | result
diff --git a/src/test/regress/sql/multi_extension.sql b/src/test/regress/sql/multi_extension.sql
index 8c8ade9d8..d202227ae 100644
--- a/src/test/regress/sql/multi_extension.sql
+++ b/src/test/regress/sql/multi_extension.sql
@@ -556,6 +556,39 @@ ALTER EXTENSION citus UPDATE TO '11.2-1';
 SELECT * FROM pg_dist_placement ORDER BY shardid;
 SELECT * FROM pg_dist_cleanup;
 
+ALTER EXTENSION citus_columnar UPDATE TO '11.2-1';
+
+-- Make sure that we defined dependencies from all rel objects (tables,
+-- indexes, sequences ..) to columnar table access method ...
+SELECT pg_class.oid INTO columnar_schema_members
+FROM pg_class, pg_namespace
+WHERE pg_namespace.oid=pg_class.relnamespace AND
+      pg_namespace.nspname='columnar_internal' AND
+      pg_class.relname NOT IN ('chunk_group_pkey',
+                               'chunk_pkey',
+                               'options_pkey',
+                               'stripe_first_row_number_idx',
+                               'stripe_pkey');
+SELECT refobjid INTO columnar_schema_members_pg_depend
+FROM pg_depend
+WHERE classid = 'pg_am'::regclass::oid AND
+      objid = (select oid from pg_am where amname = 'columnar') AND
+      objsubid = 0 AND
+      refclassid = 'pg_class'::regclass::oid AND
+      refobjsubid = 0 AND
+      deptype = 'n';
+
+-- ... , so this should be empty,
+(TABLE columnar_schema_members EXCEPT TABLE columnar_schema_members_pg_depend)
+UNION
+(TABLE columnar_schema_members_pg_depend EXCEPT TABLE columnar_schema_members);
+
+-- ... , and both columnar_schema_members_pg_depend & columnar_schema_members
+-- should have 5 entries.
+SELECT COUNT(*)=5 FROM columnar_schema_members_pg_depend;
+
+DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend;
+
 -- error out as cleanup records remain
 ALTER EXTENSION citus UPDATE TO '11.0-4';
 
diff --git a/src/test/regress/sql/upgrade_columnar_after.sql b/src/test/regress/sql/upgrade_columnar_after.sql
index cf54ec80a..133fcfde0 100644
--- a/src/test/regress/sql/upgrade_columnar_after.sql
+++ b/src/test/regress/sql/upgrade_columnar_after.sql
@@ -139,7 +139,12 @@ ROLLBACK;
 SELECT pg_class.oid INTO columnar_schema_members
 FROM pg_class, pg_namespace
 WHERE pg_namespace.oid=pg_class.relnamespace AND
-      pg_namespace.nspname='columnar_internal';
+      pg_namespace.nspname='columnar_internal' AND
+      pg_class.relname NOT IN ('chunk_group_pkey',
+                               'chunk_pkey',
+                               'options_pkey',
+                               'stripe_first_row_number_idx',
+                               'stripe_pkey');
 SELECT refobjid INTO columnar_schema_members_pg_depend
 FROM pg_depend
 WHERE classid = 'pg_am'::regclass::oid AND
@@ -155,8 +160,8 @@ UNION
 (TABLE columnar_schema_members_pg_depend EXCEPT TABLE columnar_schema_members);
 
 -- ... , and both columnar_schema_members_pg_depend & columnar_schema_members
--- should have 10 entries.
-SELECT COUNT(*)=10 FROM columnar_schema_members_pg_depend;
+-- should have 5 entries.
+SELECT COUNT(*)=5 FROM columnar_schema_members_pg_depend;
 
 DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend;
 
@@ -167,7 +172,12 @@ $$
 SELECT pg_class.oid INTO columnar_schema_members
 FROM pg_class, pg_namespace
 WHERE pg_namespace.oid=pg_class.relnamespace AND
-	  pg_namespace.nspname='columnar_internal';
+      pg_namespace.nspname='columnar_internal' AND
+      pg_class.relname NOT IN ('chunk_group_pkey',
+                               'chunk_pkey',
+                               'options_pkey',
+                               'stripe_first_row_number_idx',
+                               'stripe_pkey');
 SELECT refobjid INTO columnar_schema_members_pg_depend
 FROM pg_depend
 WHERE classid = 'pg_am'::regclass::oid AND
@@ -189,7 +199,7 @@ $$
 
 SELECT success, result FROM run_command_on_workers(
 $$
-SELECT COUNT(*)=10 FROM columnar_schema_members_pg_depend;
+SELECT COUNT(*)=5 FROM columnar_schema_members_pg_depend;
 $$
 );
 

From b8b85072d6281f228425e40ddb1d2b4fe2f6c6ff Mon Sep 17 00:00:00 2001
From: Jelte Fennema <jelte.fennema@microsoft.com>
Date: Wed, 15 Mar 2023 14:53:14 +0100
Subject: [PATCH 12/58] Add pytest depedencies to Pipfile (#6767)

In #6720 I'm adding a `pytest` based testing framework. This adds the
dependencies for those. They have already been [merged into our docker
files][the-process-merge] in the the-process repo preparation for #6720.
But by not having them on our citus main branch it is impossible to
make changes to the Pipfile, because our CI Dockerfiles and master
are out of date.

Since #6720 will need some more discussion and might take a few more
weeks to be merged, this takes out the Pipfile changes. By merging this
PR we can unblock new Pipfile changes.

Unblocks and partially addresses #6766

[the-process-merge]: https://github.com/citusdata/the-process/pull/117
---
 .circleci/config.yml          |   2 +-
 src/test/regress/Pipfile      |   6 ++
 src/test/regress/Pipfile.lock | 176 ++++++++++++++++++++++++++++++++--
 3 files changed, 175 insertions(+), 9 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 1d65ae59c..8f2d86f15 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -6,7 +6,7 @@ orbs:
 parameters:
   image_suffix:
     type: string
-    default: '-vb84a6c5'
+    default: '-v89059f9'
   pg13_version:
     type: string
     default: '13.10'
diff --git a/src/test/regress/Pipfile b/src/test/regress/Pipfile
index bb848c792..16da96f21 100644
--- a/src/test/regress/Pipfile
+++ b/src/test/regress/Pipfile
@@ -8,6 +8,12 @@ mitmproxy = {editable = true, ref = "main", git = "https://github.com/citusdata/
 construct = "==2.9.45"
 docopt = "==0.6.2"
 cryptography = ">=39.0.1"
+pytest = "*"
+psycopg = "*"
+filelock = "*"
+pytest-asyncio = "*"
+pytest-timeout = "*"
+pytest-xdist = "*"
 
 [dev-packages]
 black = "*"
diff --git a/src/test/regress/Pipfile.lock b/src/test/regress/Pipfile.lock
index 0349032b2..8bf8715ea 100644
--- a/src/test/regress/Pipfile.lock
+++ b/src/test/regress/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "dfc5545eeb592c0dd5ed002b7665d940288c5ead77d2f31a0aa08391569577fc"
+            "sha256": "456a43ce06df947ccbf02db7fcbfd654999acaae25911990d4d74fc04b10c77e"
         },
         "pipfile-spec": 6,
         "requires": {
@@ -24,6 +24,14 @@
             "markers": "python_version >= '3.6'",
             "version": "==3.4.1"
         },
+        "attrs": {
+            "hashes": [
+                "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836",
+                "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==22.2.0"
+        },
         "blinker": {
             "hashes": [
                 "sha256:471aee25f3992bd325afa3772f1063dbdbbca947a041b8b89466dc00d606f8b6"
@@ -245,6 +253,30 @@
             "index": "pypi",
             "version": "==0.6.2"
         },
+        "exceptiongroup": {
+            "hashes": [
+                "sha256:327cbda3da756e2de031a3107b81ab7b3770a602c4d16ca618298c526f4bec1e",
+                "sha256:bcb67d800a4497e1b404c2dd44fca47d3b7a5e5433dbab67f96c1a685cdfdf23"
+            ],
+            "markers": "python_version < '3.11'",
+            "version": "==1.1.0"
+        },
+        "execnet": {
+            "hashes": [
+                "sha256:8f694f3ba9cc92cab508b152dcfe322153975c29bda272e2fd7f3f00f36e47c5",
+                "sha256:a295f7cc774947aac58dde7fdc85f4aa00c42adf5d8f5468fc630c1acf30a142"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
+            "version": "==1.9.0"
+        },
+        "filelock": {
+            "hashes": [
+                "sha256:7b319f24340b51f55a2bf7a12ac0755a9b03e718311dac567a0f4f7fabd2f5de",
+                "sha256:f58d535af89bb9ad5cd4df046f741f8553a418c01a7856bf0d173bbc9f6bd16d"
+            ],
+            "index": "pypi",
+            "version": "==3.9.0"
+        },
         "flask": {
             "hashes": [
                 "sha256:59da8a3170004800a2837844bfa84d49b022550616070f7cb1a659682b2e7c9f",
@@ -285,6 +317,14 @@
             "markers": "python_full_version >= '3.6.1'",
             "version": "==6.0.1"
         },
+        "iniconfig": {
+            "hashes": [
+                "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3",
+                "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"
+            ],
+            "markers": "python_version >= '3.7'",
+            "version": "==2.0.0"
+        },
         "itsdangerous": {
             "hashes": [
                 "sha256:2c2349112351b88699d8d4b6b075022c0808887cb7ad10069318a8b0bc88db44",
@@ -435,6 +475,14 @@
             ],
             "version": "==1.0.4"
         },
+        "packaging": {
+            "hashes": [
+                "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2",
+                "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"
+            ],
+            "markers": "python_version >= '3.7'",
+            "version": "==23.0"
+        },
         "passlib": {
             "hashes": [
                 "sha256:aa6bca462b8d8bda89c70b382f0c298a20b5560af6cbfa2dce410c0a2fb669f1",
@@ -442,6 +490,14 @@
             ],
             "version": "==1.7.4"
         },
+        "pluggy": {
+            "hashes": [
+                "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159",
+                "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==1.0.0"
+        },
         "protobuf": {
             "hashes": [
                 "sha256:0c44e01f74109decea196b5b313b08edb5316df77313995594a6981e95674259",
@@ -469,6 +525,14 @@
             "markers": "python_version >= '3.5'",
             "version": "==3.18.3"
         },
+        "psycopg": {
+            "hashes": [
+                "sha256:59b4a71536b146925513c0234dfd1dc42b81e65d56ce5335dff4813434dbc113",
+                "sha256:b1500c42063abaa01d30b056f0b300826b8dd8d586900586029a294ce74af327"
+            ],
+            "index": "pypi",
+            "version": "==3.1.8"
+        },
         "publicsuffix2": {
             "hashes": [
                 "sha256:00f8cc31aa8d0d5592a5ced19cccba7de428ebca985db26ac852d920ddd6fe7b",
@@ -514,7 +578,7 @@
                 "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1",
                 "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"
             ],
-            "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2'",
+            "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
             "version": "==2.4.7"
         },
         "pyperclip": {
@@ -523,6 +587,38 @@
             ],
             "version": "==1.8.2"
         },
+        "pytest": {
+            "hashes": [
+                "sha256:c7c6ca206e93355074ae32f7403e8ea12163b1163c976fee7d4d84027c162be5",
+                "sha256:d45e0952f3727241918b8fd0f376f5ff6b301cc0777c6f9a556935c92d8a7d42"
+            ],
+            "index": "pypi",
+            "version": "==7.2.1"
+        },
+        "pytest-asyncio": {
+            "hashes": [
+                "sha256:83cbf01169ce3e8eb71c6c278ccb0574d1a7a3bb8eaaf5e50e0ad342afb33b36",
+                "sha256:f129998b209d04fcc65c96fc85c11e5316738358909a8399e93be553d7656442"
+            ],
+            "index": "pypi",
+            "version": "==0.20.3"
+        },
+        "pytest-timeout": {
+            "hashes": [
+                "sha256:c07ca07404c612f8abbe22294b23c368e2e5104b521c1790195561f37e1ac3d9",
+                "sha256:f6f50101443ce70ad325ceb4473c4255e9d74e3c7cd0ef827309dfa4c0d975c6"
+            ],
+            "index": "pypi",
+            "version": "==2.1.0"
+        },
+        "pytest-xdist": {
+            "hashes": [
+                "sha256:336098e3bbd8193276867cc87db8b22903c3927665dff9d1ac8684c02f597b68",
+                "sha256:fa10f95a2564cd91652f2d132725183c3b590d9fdcdec09d3677386ecf4c1ce9"
+            ],
+            "index": "pypi",
+            "version": "==3.2.0"
+        },
         "ruamel.yaml": {
             "hashes": [
                 "sha256:1a771fc92d3823682b7f0893ad56cb5a5c87c48e62b5399d6f42c8759a583b33",
@@ -531,6 +627,46 @@
             "markers": "python_version >= '3'",
             "version": "==0.17.16"
         },
+        "ruamel.yaml.clib": {
+            "hashes": [
+                "sha256:045e0626baf1c52e5527bd5db361bc83180faaba2ff586e763d3d5982a876a9e",
+                "sha256:15910ef4f3e537eea7fe45f8a5d19997479940d9196f357152a09031c5be59f3",
+                "sha256:184faeaec61dbaa3cace407cffc5819f7b977e75360e8d5ca19461cd851a5fc5",
+                "sha256:1f08fd5a2bea9c4180db71678e850b995d2a5f4537be0e94557668cf0f5f9497",
+                "sha256:2aa261c29a5545adfef9296b7e33941f46aa5bbd21164228e833412af4c9c75f",
+                "sha256:3110a99e0f94a4a3470ff67fc20d3f96c25b13d24c6980ff841e82bafe827cac",
+                "sha256:3243f48ecd450eddadc2d11b5feb08aca941b5cd98c9b1db14b2fd128be8c697",
+                "sha256:370445fd795706fd291ab00c9df38a0caed0f17a6fb46b0f607668ecb16ce763",
+                "sha256:40d030e2329ce5286d6b231b8726959ebbe0404c92f0a578c0e2482182e38282",
+                "sha256:41d0f1fa4c6830176eef5b276af04c89320ea616655d01327d5ce65e50575c94",
+                "sha256:4a4d8d417868d68b979076a9be6a38c676eca060785abaa6709c7b31593c35d1",
+                "sha256:4b3a93bb9bc662fc1f99c5c3ea8e623d8b23ad22f861eb6fce9377ac07ad6072",
+                "sha256:5bc0667c1eb8f83a3752b71b9c4ba55ef7c7058ae57022dd9b29065186a113d9",
+                "sha256:721bc4ba4525f53f6a611ec0967bdcee61b31df5a56801281027a3a6d1c2daf5",
+                "sha256:763d65baa3b952479c4e972669f679fe490eee058d5aa85da483ebae2009d231",
+                "sha256:7bdb4c06b063f6fd55e472e201317a3bb6cdeeee5d5a38512ea5c01e1acbdd93",
+                "sha256:8831a2cedcd0f0927f788c5bdf6567d9dc9cc235646a434986a852af1cb54b4b",
+                "sha256:91a789b4aa0097b78c93e3dc4b40040ba55bef518f84a40d4442f713b4094acb",
+                "sha256:92460ce908546ab69770b2e576e4f99fbb4ce6ab4b245345a3869a0a0410488f",
+                "sha256:99e77daab5d13a48a4054803d052ff40780278240a902b880dd37a51ba01a307",
+                "sha256:a234a20ae07e8469da311e182e70ef6b199d0fbeb6c6cc2901204dd87fb867e8",
+                "sha256:a7b301ff08055d73223058b5c46c55638917f04d21577c95e00e0c4d79201a6b",
+                "sha256:be2a7ad8fd8f7442b24323d24ba0b56c51219513cfa45b9ada3b87b76c374d4b",
+                "sha256:bf9a6bc4a0221538b1a7de3ed7bca4c93c02346853f44e1cd764be0023cd3640",
+                "sha256:c3ca1fbba4ae962521e5eb66d72998b51f0f4d0f608d3c0347a48e1af262efa7",
+                "sha256:d000f258cf42fec2b1bbf2863c61d7b8918d31ffee905da62dede869254d3b8a",
+                "sha256:d5859983f26d8cd7bb5c287ef452e8aacc86501487634573d260968f753e1d71",
+                "sha256:d5e51e2901ec2366b79f16c2299a03e74ba4531ddcfacc1416639c557aef0ad8",
+                "sha256:debc87a9516b237d0466a711b18b6ebeb17ba9f391eb7f91c649c5c4ec5006c7",
+                "sha256:df5828871e6648db72d1c19b4bd24819b80a755c4541d3409f0f7acd0f335c80",
+                "sha256:ecdf1a604009bd35c674b9225a8fa609e0282d9b896c03dd441a91e5f53b534e",
+                "sha256:efa08d63ef03d079dcae1dfe334f6c8847ba8b645d08df286358b1f5293d24ab",
+                "sha256:f01da5790e95815eb5a8a138508c01c758e5f5bc0ce4286c4f7028b8dd7ac3d0",
+                "sha256:f34019dced51047d6f70cb9383b2ae2853b7fc4dce65129a5acd49f4f9256646"
+            ],
+            "markers": "python_version < '3.10' and platform_python_implementation == 'CPython'",
+            "version": "==0.2.7"
+        },
         "sortedcontainers": {
             "hashes": [
                 "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88",
@@ -538,6 +674,14 @@
             ],
             "version": "==2.4.0"
         },
+        "tomli": {
+            "hashes": [
+                "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc",
+                "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"
+            ],
+            "markers": "python_version < '3.11'",
+            "version": "==2.0.1"
+        },
         "tornado": {
             "hashes": [
                 "sha256:1d54d13ab8414ed44de07efecb97d4ef7c39f7438cf5e976ccd356bebb1b5fca",
@@ -555,6 +699,14 @@
             "markers": "python_version >= '3.7'",
             "version": "==6.2"
         },
+        "typing-extensions": {
+            "hashes": [
+                "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb",
+                "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"
+            ],
+            "markers": "python_version >= '3.7'",
+            "version": "==4.5.0"
+        },
         "urwid": {
             "hashes": [
                 "sha256:588bee9c1cb208d0906a9f73c613d2bd32c3ed3702012f51efe318a3f2127eae"
@@ -563,11 +715,11 @@
         },
         "werkzeug": {
             "hashes": [
-                "sha256:7ea2d48322cc7c0f8b3a215ed73eabd7b5d75d0b50e31ab006286ccff9e00b8f",
-                "sha256:f979ab81f58d7318e064e99c4506445d60135ac5cd2e177a2de0089bfd4c9bd5"
+                "sha256:2e1ccc9417d4da358b9de6f174e3ac094391ea1d4fbef2d667865d819dfd0afe",
+                "sha256:56433961bc1f12533306c624f3be5e744389ac61d722175d543e1751285da612"
             ],
             "markers": "python_version >= '3.7'",
-            "version": "==2.2.2"
+            "version": "==2.2.3"
         },
         "wsproto": {
             "hashes": [
@@ -690,11 +842,11 @@
         },
         "flake8-bugbear": {
             "hashes": [
-                "sha256:04a115e5f9c8e87c38bdbbcdf9f58223ffe05469c07c9a7bd8633330bc4d078b",
-                "sha256:55902ab5a48c5ea53d8689ecd146eda548e72f2724192b9c1d68f6d975d13c06"
+                "sha256:39259814a83f33c8409417ee12dd4050c9c0bb4c8707c12fc18ae62b2f3ddee1",
+                "sha256:f136bd0ca2684f101168bba2310dec541e11aa6b252260c17dcf58d18069a740"
             ],
             "index": "pypi",
-            "version": "==23.1.20"
+            "version": "==23.2.13"
         },
         "isort": {
             "hashes": [
@@ -767,6 +919,14 @@
             ],
             "markers": "python_version < '3.11'",
             "version": "==2.0.1"
+        },
+        "typing-extensions": {
+            "hashes": [
+                "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb",
+                "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"
+            ],
+            "markers": "python_version >= '3.7'",
+            "version": "==4.5.0"
         }
     }
 }

From 1e42cd3da03584d25aa0ace99574cbca05293189 Mon Sep 17 00:00:00 2001
From: Teja Mupparti <temuppar@microsoft.com>
Date: Wed, 21 Dec 2022 19:33:13 -0800
Subject: [PATCH 13/58] Support MERGE on distributed tables with restrictions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This implements the phase - II of MERGE sql support

Support routable query where all the tables in the merge-sql are distributed, co-located, and both the source and
target relations are joined on the distribution column with a constant qual. This should be a Citus single-task
query. Below is an example.

SELECT create_distributed_table('t1', 'id');
SELECT create_distributed_table('s1', 'id', colocate_with => ‘t1’);

MERGE INTO t1
USING s1 ON t1.id = s1.id AND t1.id = 100
WHEN MATCHED THEN
UPDATE SET val = s1.val + 10
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT (id, val, src) VALUES (s1.id, s1.val, s1.src)

Basically, MERGE checks to see if

There are a minimum of two distributed tables (source and a target).
All the distributed tables are indeed colocated.
MERGE relations are joined on the distribution column
MERGE .. USING .. ON target.dist_key = source.dist_key
The query should touch only a single shard i.e. JOIN AND with a constant qual
MERGE .. USING .. ON target.dist_key = source.dist_key AND target.dist_key = <>
If any of the conditions are not met, it raises an exception.

(cherry picked from commit 44c387b978a51b0c0e87c7f9aec154cfc3041da1)

This implements MERGE phase3

Support pushdown query where all the tables in the merge-sql are Citus-distributed, co-located, and both
the source and target relations are joined on the distribution column. This will generate multiple tasks
which execute independently after pushdown.

SELECT create_distributed_table('t1', 'id');
SELECT create_distributed_table('s1', 'id', colocate_with => ‘t1’);

MERGE INTO t1
USING s1
ON t1.id = s1.id
        WHEN MATCHED THEN
                UPDATE SET val = s1.val + 10
        WHEN MATCHED THEN
                DELETE
        WHEN NOT MATCHED THEN
                INSERT (id, val, src) VALUES (s1.id, s1.val, s1.src)

*The only exception for both the phases II and III is, UPDATEs and INSERTs must be done on the same shard-group
as the joined key; for example, below scenarios are NOT supported as the key-value to be inserted/updated is not
guaranteed to be on the same node as the id distribution-column.

MERGE INTO target t
USING source s ON (t.customer_id = s.customer_id)
WHEN NOT MATCHED THEN - -
     INSERT(customer_id, …) VALUES (<non-local-constant-key-value>, ……);

OR this scenario where we update the distribution column itself

MERGE INTO target t
USING source s On (t.customer_id = s.customer_id)
WHEN MATCHED THEN
     UPDATE SET customer_id = 100;

(cherry picked from commit fa7b8949a88bf614d5a07fc33f6159d9efa5d087)
---
 .../distributed/planner/distributed_planner.c |  194 +--
 .../planner/fast_path_router_planner.c        |    4 +-
 .../planner/multi_physical_planner.c          |   17 +-
 .../planner/multi_router_planner.c            |  353 ++++-
 .../relation_restriction_equivalence.c        |   31 +-
 src/include/distributed/distributed_planner.h |    4 +
 .../relation_restriction_equivalence.h        |   11 +
 src/test/regress/bin/normalize.sed            |    4 +
 src/test/regress/expected/merge.out           | 1306 ++++++++++++++++-
 src/test/regress/expected/pg15.out            |   10 +-
 src/test/regress/expected/pgmerge.out         |   14 +-
 src/test/regress/sql/merge.sql                |  742 +++++++++-
 src/test/regress/sql/pgmerge.sql              |    8 +
 13 files changed, 2450 insertions(+), 248 deletions(-)

diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c
index 701ae4ff5..262258d7f 100644
--- a/src/backend/distributed/planner/distributed_planner.c
+++ b/src/backend/distributed/planner/distributed_planner.c
@@ -75,9 +75,6 @@ static uint64 NextPlanId = 1;
 /* keep track of planner call stack levels */
 int PlannerLevel = 0;
 
-static void ErrorIfQueryHasUnsupportedMergeCommand(Query *queryTree,
-												   List *rangeTableList);
-static bool ContainsMergeCommandWalker(Node *node);
 static bool ListContainsDistributedTableRTE(List *rangeTableList,
 											bool *maybeHasForeignDistributedTable);
 static bool IsUpdateOrDelete(Query *query);
@@ -132,7 +129,7 @@ static PlannedStmt * PlanDistributedStmt(DistributedPlanningContext *planContext
 static RTEListProperties * GetRTEListProperties(List *rangeTableList);
 static List * TranslatedVars(PlannerInfo *root, int relationIndex);
 static void WarnIfListHasForeignDistributedTable(List *rangeTableList);
-static void ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList);
+
 
 /* Distributed planner hook */
 PlannedStmt *
@@ -200,12 +197,6 @@ distributed_planner(Query *parse,
 
 		if (!fastPathRouterQuery)
 		{
-			/*
-			 * Fast path queries cannot have merge command, and we
-			 * prevent the remaining here.
-			 */
-			ErrorIfQueryHasUnsupportedMergeCommand(parse, rangeTableList);
-
 			/*
 			 * When there are partitioned tables (not applicable to fast path),
 			 * pretend that they are regular tables to avoid unnecessary work
@@ -304,44 +295,11 @@ distributed_planner(Query *parse,
 }
 
 
-/*
- * ErrorIfQueryHasUnsupportedMergeCommand walks over the query tree and bails out
- * if there is no Merge command (e.g., CMD_MERGE) in the query tree. For merge,
- * looks for all supported combinations, throws an exception if any violations
- * are seen.
- */
-static void
-ErrorIfQueryHasUnsupportedMergeCommand(Query *queryTree, List *rangeTableList)
-{
-	/*
-	 * Postgres currently doesn't support Merge queries inside subqueries and
-	 * ctes, but lets be defensive and do query tree walk anyway.
-	 *
-	 * We do not call this path for fast-path queries to avoid this additional
-	 * overhead.
-	 */
-	if (!ContainsMergeCommandWalker((Node *) queryTree))
-	{
-		/* No MERGE found */
-		return;
-	}
-
-
-	/*
-	 * In Citus we have limited support for MERGE, it's allowed
-	 * only if all the tables(target, source or any CTE) tables
-	 * are are local i.e. a combination of Citus local and Non-Citus
-	 * tables (regular Postgres tables).
-	 */
-	ErrorIfMergeHasUnsupportedTables(queryTree, rangeTableList);
-}
-
-
 /*
  * ContainsMergeCommandWalker walks over the node and finds if there are any
  * Merge command (e.g., CMD_MERGE) in the node.
  */
-static bool
+bool
 ContainsMergeCommandWalker(Node *node)
 {
 	#if PG_VERSION_NUM < PG_VERSION_15
@@ -676,7 +634,8 @@ bool
 IsUpdateOrDelete(Query *query)
 {
 	return query->commandType == CMD_UPDATE ||
-		   query->commandType == CMD_DELETE;
+		   query->commandType == CMD_DELETE ||
+		   query->commandType == CMD_MERGE;
 }
 
 
@@ -2611,148 +2570,3 @@ WarnIfListHasForeignDistributedTable(List *rangeTableList)
 		}
 	}
 }
-
-
-/*
- * IsMergeAllowedOnRelation takes a relation entry and checks if MERGE command is
- * permitted on special relations, such as materialized view, returns true only if
- * it's a "source" relation.
- */
-bool
-IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte)
-{
-	if (!IsMergeQuery(parse))
-	{
-		return false;
-	}
-
-	RangeTblEntry *targetRte = rt_fetch(parse->resultRelation, parse->rtable);
-
-	/* Is it a target relation? */
-	if (targetRte->relid == rte->relid)
-	{
-		return false;
-	}
-
-	return true;
-}
-
-
-/*
- * ErrorIfMergeHasUnsupportedTables checks if all the tables(target, source or any CTE
- * present) in the MERGE command are local i.e. a combination of Citus local and Non-Citus
- * tables (regular Postgres tables), raises an exception for all other combinations.
- */
-static void
-ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList)
-{
-	ListCell *tableCell = NULL;
-
-	foreach(tableCell, rangeTableList)
-	{
-		RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(tableCell);
-		Oid relationId = rangeTableEntry->relid;
-
-		switch (rangeTableEntry->rtekind)
-		{
-			case RTE_RELATION:
-			{
-				/* Check the relation type */
-				break;
-			}
-
-			case RTE_SUBQUERY:
-			case RTE_FUNCTION:
-			case RTE_TABLEFUNC:
-			case RTE_VALUES:
-			case RTE_JOIN:
-			case RTE_CTE:
-			{
-				/* Skip them as base table(s) will be checked */
-				continue;
-			}
-
-			/*
-			 * RTE_NAMEDTUPLESTORE is typically used in ephmeral named relations,
-			 * such as, trigger data; until we find a genuine use case, raise an
-			 * exception.
-			 * RTE_RESULT is a node added by the planner and we shouldn't
-			 * encounter it in the parse tree.
-			 */
-			case RTE_NAMEDTUPLESTORE:
-			case RTE_RESULT:
-			{
-				ereport(ERROR,
-						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						 errmsg("MERGE command is not supported with "
-								"Tuplestores and results")));
-				break;
-			}
-
-			default:
-			{
-				ereport(ERROR,
-						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						 errmsg("MERGE command: Unrecognized range table entry.")));
-			}
-		}
-
-		/* RTE Relation can be of various types, check them now */
-
-		/* skip the regular views as they are replaced with subqueries */
-		if (rangeTableEntry->relkind == RELKIND_VIEW)
-		{
-			continue;
-		}
-
-		if (rangeTableEntry->relkind == RELKIND_MATVIEW ||
-			rangeTableEntry->relkind == RELKIND_FOREIGN_TABLE)
-		{
-			/* Materialized view or Foreign table as target is not allowed */
-			if (IsMergeAllowedOnRelation(parse, rangeTableEntry))
-			{
-				/* Non target relation is ok */
-				continue;
-			}
-			else
-			{
-				ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-								errmsg("MERGE command is not allowed "
-									   "on materialized view")));
-			}
-		}
-
-		if (rangeTableEntry->relkind != RELKIND_RELATION &&
-			rangeTableEntry->relkind != RELKIND_PARTITIONED_TABLE)
-		{
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("Unexpected relation type(relkind:%c) in MERGE command",
-							rangeTableEntry->relkind)));
-		}
-
-		Assert(rangeTableEntry->relid != 0);
-
-		/* Distributed tables and Reference tables are not supported yet */
-		if (IsCitusTableType(relationId, REFERENCE_TABLE) ||
-			IsCitusTableType(relationId, DISTRIBUTED_TABLE))
-		{
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("MERGE command is not supported on "
-							"distributed/reference tables yet")));
-		}
-
-		/* Regular Postgres tables and Citus local tables are allowed */
-		if (!IsCitusTable(relationId) ||
-			IsCitusTableType(relationId, CITUS_LOCAL_TABLE))
-		{
-			continue;
-		}
-
-
-		/* Any other Citus table type missing ? */
-	}
-
-	/* All the tables are local, supported */
-}
diff --git a/src/backend/distributed/planner/fast_path_router_planner.c b/src/backend/distributed/planner/fast_path_router_planner.c
index aa029f3c0..e7d91a101 100644
--- a/src/backend/distributed/planner/fast_path_router_planner.c
+++ b/src/backend/distributed/planner/fast_path_router_planner.c
@@ -54,8 +54,6 @@
 bool EnableFastPathRouterPlanner = true;
 
 static bool ColumnAppearsMultipleTimes(Node *quals, Var *distributionKey);
-static bool ConjunctionContainsColumnFilter(Node *node, Var *column,
-											Node **distributionKeyValue);
 static bool DistKeyInSimpleOpExpression(Expr *clause, Var *distColumn,
 										Node **distributionKeyValue);
 
@@ -294,7 +292,7 @@ ColumnAppearsMultipleTimes(Node *quals, Var *distributionKey)
  *
  * If the conjuction contains column filter which is const, distributionKeyValue is set.
  */
-static bool
+bool
 ConjunctionContainsColumnFilter(Node *node, Var *column, Node **distributionKeyValue)
 {
 	if (node == NULL)
diff --git a/src/backend/distributed/planner/multi_physical_planner.c b/src/backend/distributed/planner/multi_physical_planner.c
index be6caf0e2..b30dddeb7 100644
--- a/src/backend/distributed/planner/multi_physical_planner.c
+++ b/src/backend/distributed/planner/multi_physical_planner.c
@@ -2225,14 +2225,17 @@ QueryPushdownSqlTaskList(Query *query, uint64 jobId,
 		}
 
 		/*
-		 * For left joins we don't care about the shards pruned for the right hand side.
-		 * If the right hand side would prune to a smaller set we should still send it to
-		 * all tables of the left hand side. However if the right hand side is bigger than
-		 * the left hand side we don't have to send the query to any shard that is not
-		 * matching anything on the left hand side.
+		 * For left joins we don't care about the shards pruned for
+		 * the right hand side. If the right hand side would prune
+		 * to a smaller set we should still send it to all tables
+		 * of the left hand side. However if the right hand side is
+		 * bigger than the left hand side we don't have to send the
+		 * query to any shard that is not matching anything on the
+		 * left hand side.
 		 *
-		 * Instead we will simply skip any RelationRestriction if it is an OUTER join and
-		 * the table is part of the non-outer side of the join.
+		 * Instead we will simply skip any RelationRestriction if it
+		 * is an OUTER join and the table is part of the non-outer
+		 * side of the join.
 		 */
 		if (IsInnerTableOfOuterJoin(relationRestriction))
 		{
diff --git a/src/backend/distributed/planner/multi_router_planner.c b/src/backend/distributed/planner/multi_router_planner.c
index f4591a770..99beff2c8 100644
--- a/src/backend/distributed/planner/multi_router_planner.c
+++ b/src/backend/distributed/planner/multi_router_planner.c
@@ -185,7 +185,6 @@ static DeferredErrorMessage * TargetlistAndFunctionsSupported(Oid resultRelation
 															  List *targetList,
 															  CmdType commandType,
 															  List *returningList);
-
 /*
  * CreateRouterPlan attempts to create a router executor plan for the given
  * SELECT statement. ->planningError is set if planning fails.
@@ -905,6 +904,85 @@ NodeIsFieldStore(Node *node)
 }
 
 
+/*
+ * MergeQuerySupported does check for a MERGE command in the query, if it finds
+ * one, it will verify the below criteria
+ * - Supported tables and combinations in ErrorIfMergeHasUnsupportedTables
+ * - Distributed tables requirements in ErrorIfDistTablesNotColocated
+ * - Checks target-lists and functions-in-quals in TargetlistAndFunctionsSupported
+ */
+static DeferredErrorMessage *
+MergeQuerySupported(Query *originalQuery,
+					PlannerRestrictionContext *plannerRestrictionContext)
+{
+	/* For non-MERGE commands it's a no-op */
+	if (!QueryHasMergeCommand(originalQuery))
+	{
+		return NULL;
+	}
+
+	List *rangeTableList = ExtractRangeTableEntryList(originalQuery);
+	RangeTblEntry *resultRte = ExtractResultRelationRTE(originalQuery);
+
+	/*
+	 * Fast path queries cannot have merge command, and we prevent the remaining here.
+	 * In Citus we have limited support for MERGE, it's allowed only if all
+	 * the tables(target, source or any CTE) tables are are local i.e. a
+	 * combination of Citus local and Non-Citus tables (regular Postgres tables)
+	 * or distributed tables with some restrictions, please see header of routine
+	 * ErrorIfDistTablesNotColocated for details.
+	 */
+	DeferredErrorMessage *deferredError =
+		ErrorIfMergeHasUnsupportedTables(originalQuery,
+										 rangeTableList,
+										 plannerRestrictionContext);
+	if (deferredError)
+	{
+		return deferredError;
+	}
+
+	Oid resultRelationId = resultRte->relid;
+	deferredError =
+		TargetlistAndFunctionsSupported(resultRelationId,
+										originalQuery->jointree,
+										originalQuery->jointree->quals,
+										originalQuery->targetList,
+										originalQuery->commandType,
+										originalQuery->returningList);
+	if (deferredError)
+	{
+		return deferredError;
+	}
+
+	#if PG_VERSION_NUM >= PG_VERSION_15
+
+	/*
+	 * MERGE is a special case where we have multiple modify statements
+	 * within itself. Check each INSERT/UPDATE/DELETE individually.
+	 */
+	MergeAction *action = NULL;
+	foreach_ptr(action, originalQuery->mergeActionList)
+	{
+		Assert(originalQuery->returningList == NULL);
+		deferredError =
+			TargetlistAndFunctionsSupported(resultRelationId,
+											originalQuery->jointree,
+											action->qual,
+											action->targetList,
+											action->commandType,
+											originalQuery->returningList);
+		if (deferredError)
+		{
+			return deferredError;
+		}
+	}
+
+	#endif
+
+	return NULL;
+}
+
+
 /*
  * ModifyQuerySupported returns NULL if the query only contains supported
  * features, otherwise it returns an error description.
@@ -920,8 +998,17 @@ ModifyQuerySupported(Query *queryTree, Query *originalQuery, bool multiShardQuer
 					 PlannerRestrictionContext *plannerRestrictionContext)
 {
 	Oid distributedTableId = InvalidOid;
-	DeferredErrorMessage *error = ModifyPartialQuerySupported(queryTree, multiShardQuery,
-															  &distributedTableId);
+	DeferredErrorMessage *error = MergeQuerySupported(originalQuery,
+													  plannerRestrictionContext);
+	if (error)
+	{
+		/*
+		 * For MERGE, we do not do recursive plannning, simply bail out.
+		 */
+		RaiseDeferredError(error, ERROR);
+	}
+
+	error = ModifyPartialQuerySupported(queryTree, multiShardQuery, &distributedTableId);
 	if (error)
 	{
 		return error;
@@ -3969,3 +4056,263 @@ CompareInsertValuesByShardId(const void *leftElement, const void *rightElement)
 		}
 	}
 }
+
+
+/*
+ * IsMergeAllowedOnRelation takes a relation entry and checks if MERGE command is
+ * permitted on special relations, such as materialized view, returns true only if
+ * it's a "source" relation.
+ */
+bool
+IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte)
+{
+	if (!IsMergeQuery(parse))
+	{
+		return false;
+	}
+
+	RangeTblEntry *targetRte = rt_fetch(parse->resultRelation, parse->rtable);
+
+	/* Is it a target relation? */
+	if (targetRte->relid == rte->relid)
+	{
+		return false;
+	}
+
+	return true;
+}
+
+
+/*
+ * ErrorIfDistTablesNotColocated Checks to see if
+ *
+ *   - There are a minimum of two distributed tables (source and a target).
+ *   - All the distributed tables are indeed colocated.
+ *   - MERGE relations are joined on the distribution column
+ *          MERGE .. USING .. ON target.dist_key = source.dist_key
+ *
+ * If any of the conditions are not met, it raises an exception.
+ */
+static DeferredErrorMessage *
+ErrorIfDistTablesNotColocated(Query *parse, List *distTablesList,
+							  PlannerRestrictionContext *plannerRestrictionContext)
+{
+	/* All MERGE tables must be distributed */
+	if (list_length(distTablesList) < 2)
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "For MERGE command, both the source and target "
+							 "must be distributed", NULL, NULL);
+	}
+
+	/* All distributed tables must be colocated */
+	if (!AllRelationsInListColocated(distTablesList, RANGETABLE_ENTRY))
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "For MERGE command, all the distributed tables "
+							 "must be colocated", NULL, NULL);
+	}
+
+	/* Are source and target tables joined on distribution column? */
+	if (!RestrictionEquivalenceForPartitionKeys(plannerRestrictionContext))
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "MERGE command is only supported when distributed "
+							 "tables are joined on their distribution column",
+							 NULL, NULL);
+	}
+
+	return NULL;
+}
+
+
+/*
+ * ErrorIfMergeHasUnsupportedTables checks if all the tables(target, source or any CTE
+ * present) in the MERGE command are local i.e. a combination of Citus local and Non-Citus
+ * tables (regular Postgres tables), or distributed tables with some restrictions, please
+ * see header of routine ErrorIfDistTablesNotColocated for details, raises an exception
+ * for all other combinations.
+ */
+static DeferredErrorMessage *
+ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList,
+								 PlannerRestrictionContext *restrictionContext)
+{
+	List *distTablesList = NIL;
+	bool foundLocalTables = false;
+
+	RangeTblEntry *rangeTableEntry = NULL;
+	foreach_ptr(rangeTableEntry, rangeTableList)
+	{
+		Oid relationId = rangeTableEntry->relid;
+
+		switch (rangeTableEntry->rtekind)
+		{
+			case RTE_RELATION:
+			{
+				/* Check the relation type */
+				break;
+			}
+
+			case RTE_SUBQUERY:
+			case RTE_FUNCTION:
+			case RTE_TABLEFUNC:
+			case RTE_VALUES:
+			case RTE_JOIN:
+			case RTE_CTE:
+			{
+				/* Skip them as base table(s) will be checked */
+				continue;
+			}
+
+			/*
+			 * RTE_NAMEDTUPLESTORE is typically used in ephmeral named relations,
+			 * such as, trigger data; until we find a genuine use case, raise an
+			 * exception.
+			 * RTE_RESULT is a node added by the planner and we shouldn't
+			 * encounter it in the parse tree.
+			 */
+			case RTE_NAMEDTUPLESTORE:
+			case RTE_RESULT:
+			{
+				return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+									 "MERGE command is not supported with "
+									 "Tuplestores and results",
+									 NULL, NULL);
+			}
+
+			default:
+			{
+				return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+									 "MERGE command: Unrecognized range table entry.",
+									 NULL, NULL);
+			}
+		}
+
+		/* RTE Relation can be of various types, check them now */
+
+		/* skip the regular views as they are replaced with subqueries */
+		if (rangeTableEntry->relkind == RELKIND_VIEW)
+		{
+			continue;
+		}
+
+		if (rangeTableEntry->relkind == RELKIND_MATVIEW ||
+			rangeTableEntry->relkind == RELKIND_FOREIGN_TABLE)
+		{
+			/* Materialized view or Foreign table as target is not allowed */
+			if (IsMergeAllowedOnRelation(parse, rangeTableEntry))
+			{
+				/* Non target relation is ok */
+				continue;
+			}
+			else
+			{
+				/* Usually we don't reach this exception as the Postgres parser catches it */
+				StringInfo errorMessage = makeStringInfo();
+				appendStringInfo(errorMessage,
+								 "MERGE command is not allowed on "
+								 "relation type(relkind:%c)", rangeTableEntry->relkind);
+				return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, errorMessage->data,
+									 NULL, NULL);
+			}
+		}
+
+		if (rangeTableEntry->relkind != RELKIND_RELATION &&
+			rangeTableEntry->relkind != RELKIND_PARTITIONED_TABLE)
+		{
+			StringInfo errorMessage = makeStringInfo();
+			appendStringInfo(errorMessage, "Unexpected table type(relkind:%c) "
+										   "in MERGE command", rangeTableEntry->relkind);
+			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, errorMessage->data,
+								 NULL, NULL);
+		}
+
+		Assert(rangeTableEntry->relid != 0);
+
+		/* Reference tables are not supported yet */
+		if (IsCitusTableType(relationId, REFERENCE_TABLE))
+		{
+			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+								 "MERGE command is not supported on reference "
+								 "tables yet", NULL, NULL);
+		}
+
+		/* Append/Range tables are not supported */
+		if (IsCitusTableType(relationId, APPEND_DISTRIBUTED) ||
+			IsCitusTableType(relationId, RANGE_DISTRIBUTED))
+		{
+			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+								 "For MERGE command, all the distributed tables "
+								 "must be colocated, for append/range distribution, "
+								 "colocation is not supported", NULL,
+								 "Consider using hash distribution instead");
+		}
+
+		/*
+		 * For now, save all distributed tables, later (below) we will
+		 * check for supported combination(s).
+		 */
+		if (IsCitusTableType(relationId, DISTRIBUTED_TABLE))
+		{
+			distTablesList = lappend(distTablesList, rangeTableEntry);
+			continue;
+		}
+
+		/* Regular Postgres tables and Citus local tables are allowed */
+		if (!IsCitusTable(relationId) ||
+			IsCitusTableType(relationId, CITUS_LOCAL_TABLE))
+		{
+			foundLocalTables = true;
+			continue;
+		}
+
+		/* Any other Citus table type missing ? */
+	}
+
+	/* Ensure all tables are indeed local */
+	if (foundLocalTables && list_length(distTablesList) == 0)
+	{
+		/* All the tables are local, supported */
+		return NULL;
+	}
+	else if (foundLocalTables && list_length(distTablesList) > 0)
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "MERGE command is not supported with "
+							 "combination of distributed/local tables yet",
+							 NULL, NULL);
+	}
+
+	/* Ensure all distributed tables are indeed co-located */
+	return ErrorIfDistTablesNotColocated(parse, distTablesList, restrictionContext);
+}
+
+
+/*
+ * QueryHasMergeCommand walks over the query tree and returns false if there
+ * is no Merge command (e.g., CMD_MERGE), true otherwise.
+ */
+static bool
+QueryHasMergeCommand(Query *queryTree)
+{
+	/* function is void for pre-15 versions of Postgres */
+	#if PG_VERSION_NUM < PG_VERSION_15
+	return false;
+	#else
+
+	/*
+	 * Postgres currently doesn't support Merge queries inside subqueries and
+	 * ctes, but lets be defensive and do query tree walk anyway.
+	 *
+	 * We do not call this path for fast-path queries to avoid this additional
+	 * overhead.
+	 */
+	if (!ContainsMergeCommandWalker((Node *) queryTree))
+	{
+		/* No MERGE found */
+		return false;
+	}
+
+	return true;
+	#endif
+}
diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c
index 4d131899a..f76a95d26 100644
--- a/src/backend/distributed/planner/relation_restriction_equivalence.c
+++ b/src/backend/distributed/planner/relation_restriction_equivalence.c
@@ -151,8 +151,6 @@ static void ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass
 													  secondClass);
 static Var * PartitionKeyForRTEIdentityInQuery(Query *query, int targetRTEIndex,
 											   Index *partitionKeyIndex);
-static bool AllRelationsInRestrictionContextColocated(RelationRestrictionContext *
-													  restrictionContext);
 static bool IsNotSafeRestrictionToRecursivelyPlan(Node *node);
 static JoinRestrictionContext * FilterJoinRestrictionContext(
 	JoinRestrictionContext *joinRestrictionContext, Relids
@@ -383,7 +381,8 @@ SafeToPushdownUnionSubquery(Query *originalQuery,
 		return false;
 	}
 
-	if (!AllRelationsInRestrictionContextColocated(restrictionContext))
+	if (!AllRelationsInListColocated(restrictionContext->relationRestrictionList,
+									 RESTRICTION_CONTEXT))
 	{
 		/* distribution columns are equal, but tables are not co-located */
 		return false;
@@ -1919,19 +1918,33 @@ FindQueryContainingRTEIdentityInternal(Node *node,
 
 
 /*
- * AllRelationsInRestrictionContextColocated determines whether all of the relations in the
- * given relation restrictions list are co-located.
+ * AllRelationsInListColocated determines whether all of the relations in the
+ * given list are co-located.
+ * Note: The list can be of dofferent types, which is specified by ListEntryType
  */
-static bool
-AllRelationsInRestrictionContextColocated(RelationRestrictionContext *restrictionContext)
+bool
+AllRelationsInListColocated(List *relationList, ListEntryType entryType)
 {
+	void *varPtr = NULL;
+	RangeTblEntry *rangeTableEntry = NULL;
 	RelationRestriction *relationRestriction = NULL;
 	int initialColocationId = INVALID_COLOCATION_ID;
 
 	/* check whether all relations exists in the main restriction list */
-	foreach_ptr(relationRestriction, restrictionContext->relationRestrictionList)
+	foreach_ptr(varPtr, relationList)
 	{
-		Oid relationId = relationRestriction->relationId;
+		Oid relationId = InvalidOid;
+
+		if (entryType == RANGETABLE_ENTRY)
+		{
+			rangeTableEntry = (RangeTblEntry *) varPtr;
+			relationId = rangeTableEntry->relid;
+		}
+		else if (entryType == RESTRICTION_CONTEXT)
+		{
+			relationRestriction = (RelationRestriction *) varPtr;
+			relationId = relationRestriction->relationId;
+		}
 
 		if (IsCitusTable(relationId) && !HasDistributionKey(relationId))
 		{
diff --git a/src/include/distributed/distributed_planner.h b/src/include/distributed/distributed_planner.h
index 29c3c7154..19bd9f0c2 100644
--- a/src/include/distributed/distributed_planner.h
+++ b/src/include/distributed/distributed_planner.h
@@ -256,5 +256,9 @@ extern struct DistributedPlan * CreateDistributedPlan(uint64 planId,
 													  plannerRestrictionContext);
 
 extern bool IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte);
+extern bool ConjunctionContainsColumnFilter(Node *node,
+											Var *column,
+											Node **distributionKeyValue);
+extern bool ContainsMergeCommandWalker(Node *node);
 
 #endif /* DISTRIBUTED_PLANNER_H */
diff --git a/src/include/distributed/relation_restriction_equivalence.h b/src/include/distributed/relation_restriction_equivalence.h
index ccd50a6db..4fd9c7015 100644
--- a/src/include/distributed/relation_restriction_equivalence.h
+++ b/src/include/distributed/relation_restriction_equivalence.h
@@ -17,6 +17,15 @@
 
 #define SINGLE_RTE_INDEX 1
 
+/*
+ * Represents the pointer type that's being passed in the list.
+ */
+typedef enum ListEntryType
+{
+	RANGETABLE_ENTRY, /* RangeTblEntry */
+	RESTRICTION_CONTEXT /* RelationRestriction */
+} ListEntryType;
+
 extern bool AllDistributionKeysInQueryAreEqual(Query *originalQuery,
 											   PlannerRestrictionContext *
 											   plannerRestrictionContext);
@@ -54,4 +63,6 @@ extern RelationRestrictionContext * FilterRelationRestrictionContext(
 	RelationRestrictionContext *relationRestrictionContext,
 	Relids
 	queryRteIdentities);
+extern bool AllRelationsInListColocated(List *relationList, ListEntryType entryType);
+
 #endif /* RELATION_RESTRICTION_EQUIVALENCE_H */
diff --git a/src/test/regress/bin/normalize.sed b/src/test/regress/bin/normalize.sed
index df343a077..2ebb31f47 100644
--- a/src/test/regress/bin/normalize.sed
+++ b/src/test/regress/bin/normalize.sed
@@ -28,6 +28,10 @@ s/\(ref_id\)=\([0-9]+\)/(ref_id)=(X)/g
 # shard table names for multi_subtransactions
 s/"t2_[0-9]+"/"t2_xxxxxxx"/g
 
+# shard table names for MERGE tests
+s/merge_schema\.([_a-z0-9]+)_40[0-9]+ /merge_schema.\1_xxxxxxx /g
+s/pgmerge_schema\.([_a-z0-9]+)_40[0-9]+ /pgmerge_schema.\1_xxxxxxx /g
+
 # shard table names for multi_subquery
 s/ keyval(1|2|ref)_[0-9]+ / keyval\1_xxxxxxx /g
 
diff --git a/src/test/regress/expected/merge.out b/src/test/regress/expected/merge.out
index 6fc472b70..02671acd0 100644
--- a/src/test/regress/expected/merge.out
+++ b/src/test/regress/expected/merge.out
@@ -18,6 +18,7 @@ SET search_path TO merge_schema;
 SET citus.shard_count TO 4;
 SET citus.next_shard_id TO 4000000;
 SET citus.explain_all_tasks to true;
+SET citus.shard_replication_factor TO 1;
 SELECT 1 FROM master_add_node('localhost', :master_port, groupid => 0);
 NOTICE:  localhost:xxxxx is the coordinator and already contains metadata, skipping syncing the metadata
  ?column?
@@ -214,9 +215,45 @@ HINT:  To remove the local data, run: SELECT truncate_local_data_after_distribut
 
 (1 row)
 
+-- Updates one of the row with customer_id  = 30002
+SELECT * from target t WHERE t.customer_id  = 30002;
+ customer_id | last_order_id | order_center | order_count |        last_order
+---------------------------------------------------------------------
+       30002 |           103 | AX           |          -1 | Sun Jan 17 19:53:00 2021
+(1 row)
+
+-- Turn on notice to print tasks sent to nodes
+SET citus.log_remote_commands to true;
 MERGE INTO target t
    USING source s
-   ON (t.customer_id = s.customer_id)
+   ON (t.customer_id = s.customer_id) AND t.customer_id = 30002
+   WHEN MATCHED AND t.order_center = 'XX' THEN
+       DELETE
+   WHEN MATCHED THEN
+       UPDATE SET     -- Existing customer, update the order count and last_order_id
+           order_count = t.order_count + 1,
+           last_order_id = s.order_id
+   WHEN NOT MATCHED THEN
+       DO NOTHING;
+NOTICE:  issuing MERGE INTO merge_schema.target_xxxxxxx t USING merge_schema.source_xxxxxxx s ON ((t.customer_id OPERATOR(pg_catalog.=) s.customer_id) AND (t.customer_id OPERATOR(pg_catalog.=) 30002)) WHEN MATCHED AND ((t.order_center COLLATE "default") OPERATOR(pg_catalog.=) 'XX'::text) THEN DELETE WHEN MATCHED THEN UPDATE SET last_order_id = s.order_id, order_count = (t.order_count OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN DO NOTHING
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+SET citus.log_remote_commands to false;
+SELECT * from target t WHERE t.customer_id  = 30002;
+ customer_id | last_order_id | order_center | order_count |        last_order
+---------------------------------------------------------------------
+       30002 |           103 | AX           |           0 | Sun Jan 17 19:53:00 2021
+(1 row)
+
+-- Deletes one of the row with customer_id  = 30004
+SELECT * from target t WHERE t.customer_id  = 30004;
+ customer_id | last_order_id | order_center | order_count |        last_order
+---------------------------------------------------------------------
+       30004 |            99 | XX           |          -1 | Fri Sep 11 03:23:00 2020
+(1 row)
+
+MERGE INTO target t
+   USING source s
+   ON (t.customer_id = s.customer_id) AND t.customer_id = 30004
    WHEN MATCHED AND t.order_center = 'XX' THEN
        DELETE
    WHEN MATCHED THEN
@@ -226,7 +263,11 @@ MERGE INTO target t
    WHEN NOT MATCHED THEN       -- New entry, record it.
        INSERT (customer_id, last_order_id, order_center, order_count, last_order)
            VALUES (customer_id, s.order_id, s.order_center, 123, s.order_time);
-ERROR:  MERGE command is not supported on distributed/reference tables yet
+SELECT * from target t WHERE t.customer_id  = 30004;
+ customer_id | last_order_id | order_center | order_count | last_order
+---------------------------------------------------------------------
+(0 rows)
+
 --
 -- Test MERGE with CTE as source
 --
@@ -386,18 +427,61 @@ HINT:  To remove the local data, run: SELECT truncate_local_data_after_distribut
 
 (1 row)
 
+SELECT * FROM t1 order by id;
+ id | val
+---------------------------------------------------------------------
+  1 |   0
+  2 |   0
+  5 |   0
+(3 rows)
+
+SET citus.log_remote_commands to true;
 WITH s1_res AS (
 	SELECT * FROM s1
 )
 MERGE INTO t1
-	USING s1_res ON (s1_res.id = t1.id)
+	USING s1_res ON (s1_res.id = t1.id) AND t1.id = 6
 	WHEN MATCHED AND s1_res.val = 0 THEN
 		DELETE
 	WHEN MATCHED THEN
 		UPDATE SET val = t1.val + 1
 	WHEN NOT MATCHED THEN
 		INSERT (id, val) VALUES (s1_res.id, s1_res.val);
-ERROR:  MERGE command is not supported on distributed/reference tables yet
+NOTICE:  issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing WITH s1_res AS (SELECT s1.id, s1.val FROM merge_schema.s1_xxxxxxx s1) MERGE INTO merge_schema.t1_xxxxxxx t1 USING s1_res ON ((s1_res.id OPERATOR(pg_catalog.=) t1.id) AND (t1.id OPERATOR(pg_catalog.=) 6)) WHEN MATCHED AND (s1_res.val OPERATOR(pg_catalog.=) 0) THEN DELETE WHEN MATCHED THEN UPDATE SET val = (t1.val OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing WITH s1_res AS (SELECT s1.id, s1.val FROM merge_schema.s1_xxxxxxx s1) MERGE INTO merge_schema.t1_xxxxxxx t1 USING s1_res ON ((s1_res.id OPERATOR(pg_catalog.=) t1.id) AND (t1.id OPERATOR(pg_catalog.=) 6)) WHEN MATCHED AND (s1_res.val OPERATOR(pg_catalog.=) 0) THEN DELETE WHEN MATCHED THEN UPDATE SET val = (t1.val OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing WITH s1_res AS (SELECT s1.id, s1.val FROM merge_schema.s1_xxxxxxx s1) MERGE INTO merge_schema.t1_xxxxxxx t1 USING s1_res ON ((s1_res.id OPERATOR(pg_catalog.=) t1.id) AND (t1.id OPERATOR(pg_catalog.=) 6)) WHEN MATCHED AND (s1_res.val OPERATOR(pg_catalog.=) 0) THEN DELETE WHEN MATCHED THEN UPDATE SET val = (t1.val OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing WITH s1_res AS (SELECT s1.id, s1.val FROM merge_schema.s1_xxxxxxx s1) MERGE INTO merge_schema.t1_xxxxxxx t1 USING s1_res ON ((s1_res.id OPERATOR(pg_catalog.=) t1.id) AND (t1.id OPERATOR(pg_catalog.=) 6)) WHEN MATCHED AND (s1_res.val OPERATOR(pg_catalog.=) 0) THEN DELETE WHEN MATCHED THEN UPDATE SET val = (t1.val OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx'
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx'
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing COMMIT PREPARED 'citus_xx_xx_xx_xx'
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing COMMIT PREPARED 'citus_xx_xx_xx_xx'
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+SET citus.log_remote_commands to false;
+-- Other than id 6 everything else is a NO match, and should appear in target
+SELECT * FROM t1 order by 1, 2;
+ id | val
+---------------------------------------------------------------------
+  1 |   0
+  1 |   0
+  2 |   0
+  2 |   1
+  3 |   1
+  4 |   1
+  5 |   0
+  6 |   1
+(8 rows)
+
 --
 -- Test with multiple join conditions
 --
@@ -553,16 +637,38 @@ HINT:  To remove the local data, run: SELECT truncate_local_data_after_distribut
 
 (1 row)
 
+SELECT * FROM t2 ORDER BY 1;
+ id | val |  src
+---------------------------------------------------------------------
+  1 |   0 | target
+  2 |   0 | target
+  3 |   1 | match
+  4 |   0 | match
+(4 rows)
+
+SET citus.log_remote_commands to true;
 MERGE INTO t2
 USING s2
-ON t2.id = s2.id AND t2.src = s2.src
+ON t2.id = s2.id AND t2.src = s2.src AND t2.id = 4
 	WHEN MATCHED AND t2.val = 1 THEN
 		UPDATE SET val = s2.val + 10
 	WHEN MATCHED THEN
 		DELETE
 	WHEN NOT MATCHED THEN
-		INSERT (id, val, src) VALUES (s2.id, s2.val, s2.src);
-ERROR:  MERGE command is not supported on distributed/reference tables yet
+		DO NOTHING;
+NOTICE:  issuing MERGE INTO merge_schema.t2_xxxxxxx t2 USING merge_schema.s2_xxxxxxx s2 ON ((t2.id OPERATOR(pg_catalog.=) s2.id) AND (t2.src OPERATOR(pg_catalog.=) s2.src) AND (t2.id OPERATOR(pg_catalog.=) 4)) WHEN MATCHED AND (t2.val OPERATOR(pg_catalog.=) 1) THEN UPDATE SET val = (s2.val OPERATOR(pg_catalog.+) 10) WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN DO NOTHING
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+SET citus.log_remote_commands to false;
+-- Row with id = 4 is a match for delete clause, row should be deleted
+-- Row with id = 3 is a NO match, row from source will be inserted
+SELECT * FROM t2 ORDER BY 1;
+ id | val |  src
+---------------------------------------------------------------------
+  1 |   0 | target
+  2 |   0 | target
+  3 |   1 | match
+(3 rows)
+
 --
 -- With sub-query as the MERGE source
 --
@@ -943,7 +1049,7 @@ WHEN MATCHED THEN
 UPDATE SET value = vl_source.value, id = vl_target.id + 1
 WHEN NOT MATCHED THEN
 INSERT VALUES(vl_source.ID, vl_source.value);
-DEBUG:  <Deparsed MERGE query: MERGE INTO merge_schema.vl_target_4000036 vl_target USING (SELECT vl.id, vl.value FROM (VALUES (100,'source1'::text), (200,'source2'::text)) vl(id, value)) vl_source ON (vl_source.id OPERATOR(pg_catalog.=) vl_target.id) WHEN MATCHED THEN UPDATE SET id = (vl_target.id OPERATOR(pg_catalog.+) 1), value = (vl_source.value COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, value) VALUES (vl_source.id, (vl_source.value COLLATE "default"))>
+DEBUG:  <Deparsed MERGE query: MERGE INTO merge_schema.vl_target_xxxxxxx vl_target USING (SELECT vl.id, vl.value FROM (VALUES (100,'source1'::text), (200,'source2'::text)) vl(id, value)) vl_source ON (vl_source.id OPERATOR(pg_catalog.=) vl_target.id) WHEN MATCHED THEN UPDATE SET id = (vl_target.id OPERATOR(pg_catalog.+) 1), value = (vl_source.value COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, value) VALUES (vl_source.id, (vl_source.value COLLATE "default"))>
 RESET client_min_messages;
 SELECT * INTO vl_local FROM vl_target ORDER BY 1 ;
 -- Should be equal
@@ -996,7 +1102,7 @@ WHEN MATCHED THEN
 DO NOTHING
 WHEN NOT MATCHED THEN
 INSERT VALUES(rs_source.id);
-DEBUG:  <Deparsed MERGE query: MERGE INTO merge_schema.rs_target_4000037 rs_target USING (SELECT id.id FROM merge_schema.f_immutable(99) id(id) WHERE (id.id OPERATOR(pg_catalog.=) ANY (SELECT 99))) rs_source ON (rs_source.id OPERATOR(pg_catalog.=) rs_target.id) WHEN MATCHED THEN DO NOTHING  WHEN NOT MATCHED THEN INSERT (id) VALUES (rs_source.id)>
+DEBUG:  <Deparsed MERGE query: MERGE INTO merge_schema.rs_target_xxxxxxx rs_target USING (SELECT id.id FROM merge_schema.f_immutable(99) id(id) WHERE (id.id OPERATOR(pg_catalog.=) ANY (SELECT 99))) rs_source ON (rs_source.id OPERATOR(pg_catalog.=) rs_target.id) WHEN MATCHED THEN DO NOTHING  WHEN NOT MATCHED THEN INSERT (id) VALUES (rs_source.id)>
 RESET client_min_messages;
 SELECT * INTO rs_local FROM rs_target ORDER BY 1 ;
 -- Should be equal
@@ -1132,7 +1238,7 @@ DEBUG:  function does not have co-located tables
 DEBUG:  generating subplan XXX_1 for subquery SELECT id, source FROM merge_schema.f_dist() f(id integer, source character varying)
 DEBUG:  <Deparsed MERGE query: MERGE INTO merge_schema.fn_target USING (SELECT intermediate_result.id, intermediate_result.source FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, source character varying)) fn_source ON (fn_source.id OPERATOR(pg_catalog.=) fn_target.id) WHEN MATCHED THEN DO NOTHING  WHEN NOT MATCHED THEN INSERT (id, data) VALUES (fn_source.id, fn_source.source)>
 DEBUG:  Plan XXX query after replacing subqueries and CTEs: MERGE INTO merge_schema.fn_target USING (SELECT intermediate_result.id, intermediate_result.source FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, source character varying)) fn_source ON (fn_source.id OPERATOR(pg_catalog.=) fn_target.id) WHEN MATCHED THEN DO NOTHING  WHEN NOT MATCHED THEN INSERT (id, data) VALUES (fn_source.id, fn_source.source)
-DEBUG:  <Deparsed MERGE query: MERGE INTO merge_schema.fn_target_4000040 fn_target USING (SELECT intermediate_result.id, intermediate_result.source FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, source character varying)) fn_source ON (fn_source.id OPERATOR(pg_catalog.=) fn_target.id) WHEN MATCHED THEN DO NOTHING  WHEN NOT MATCHED THEN INSERT (id, data) VALUES (fn_source.id, fn_source.source)>
+DEBUG:  <Deparsed MERGE query: MERGE INTO merge_schema.fn_target_xxxxxxx fn_target USING (SELECT intermediate_result.id, intermediate_result.source FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, source character varying)) fn_source ON (fn_source.id OPERATOR(pg_catalog.=) fn_target.id) WHEN MATCHED THEN DO NOTHING  WHEN NOT MATCHED THEN INSERT (id, data) VALUES (fn_source.id, fn_source.source)>
 RESET client_min_messages;
 SELECT * INTO fn_local FROM fn_target ORDER BY 1 ;
 -- Should be equal
@@ -1204,7 +1310,7 @@ MERGE INTO ft_target
 		DELETE
 	WHEN NOT MATCHED THEN
 		INSERT (id, user_val) VALUES (foreign_table.id, foreign_table.user_val);
-DEBUG:  <Deparsed MERGE query: MERGE INTO merge_schema.ft_target USING merge_schema.foreign_table_4000046 foreign_table ON (foreign_table.id OPERATOR(pg_catalog.=) ft_target.id) WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, user_val) VALUES (foreign_table.id, (foreign_table.user_val COLLATE "default"))>
+DEBUG:  <Deparsed MERGE query: MERGE INTO merge_schema.ft_target USING merge_schema.foreign_table_xxxxxxx foreign_table ON (foreign_table.id OPERATOR(pg_catalog.=) ft_target.id) WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, user_val) VALUES (foreign_table.id, (foreign_table.user_val COLLATE "default"))>
 RESET client_min_messages;
 SELECT * FROM ft_target;
  id | user_val
@@ -1213,9 +1319,866 @@ SELECT * FROM ft_target;
   3 | source
 (2 rows)
 
+--
+-- complex joins on the source side
+--
+-- source(join of two relations) relation is an unaliased join
+CREATE TABLE target_cj(tid int, src text, val int);
+CREATE TABLE source_cj1(sid1 int, src1 text, val1 int);
+CREATE TABLE source_cj2(sid2 int, src2 text, val2 int);
+INSERT INTO target_cj VALUES (1, 'target', 0);
+INSERT INTO target_cj VALUES (2, 'target', 0);
+INSERT INTO target_cj VALUES (2, 'target', 0);
+INSERT INTO target_cj VALUES (3, 'target', 0);
+INSERT INTO source_cj1 VALUES (2, 'source-1', 10);
+INSERT INTO source_cj2 VALUES (2, 'source-2', 20);
+BEGIN;
+MERGE INTO target_cj t
+USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2
+ON t.tid = sid1 AND t.tid = 2
+WHEN MATCHED THEN
+        UPDATE SET src = src2
+WHEN NOT MATCHED THEN
+        DO NOTHING;
+-- Gold result to compare against
+SELECT * FROM target_cj ORDER BY 1;
+ tid |   src    | val
+---------------------------------------------------------------------
+   1 | target   |   0
+   2 | source-2 |   0
+   2 | source-2 |   0
+   3 | target   |   0
+(4 rows)
+
+ROLLBACK;
+BEGIN;
+-- try accessing columns from either side of the source join
+MERGE INTO target_cj t
+USING source_cj1 s2
+        INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10
+ON t.tid = sid1 AND t.tid = 2
+WHEN MATCHED THEN
+        UPDATE SET tid = sid2, src = src1, val = val2
+WHEN NOT MATCHED THEN
+        DO NOTHING;
+-- Gold result to compare against
+SELECT * FROM target_cj ORDER BY 1;
+ tid |   src    | val
+---------------------------------------------------------------------
+   1 | target   |   0
+   2 | source-1 |  20
+   2 | source-1 |  20
+   3 | target   |   0
+(4 rows)
+
+ROLLBACK;
+-- Test the same scenarios with distributed tables
+SELECT create_distributed_table('target_cj', 'tid');
+NOTICE:  Copying data from local table...
+NOTICE:  copying the data has completed
+DETAIL:  The local data in the table is no longer visible, but is still on disk.
+HINT:  To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.target_cj$$)
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_distributed_table('source_cj1', 'sid1');
+NOTICE:  Copying data from local table...
+NOTICE:  copying the data has completed
+DETAIL:  The local data in the table is no longer visible, but is still on disk.
+HINT:  To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.source_cj1$$)
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_distributed_table('source_cj2', 'sid2');
+NOTICE:  Copying data from local table...
+NOTICE:  copying the data has completed
+DETAIL:  The local data in the table is no longer visible, but is still on disk.
+HINT:  To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.source_cj2$$)
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+BEGIN;
+SET citus.log_remote_commands to true;
+MERGE INTO target_cj t
+USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2
+ON t.tid = sid1 AND t.tid = 2
+WHEN MATCHED THEN
+        UPDATE SET src = src2
+WHEN NOT MATCHED THEN
+        DO NOTHING;
+NOTICE:  issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.target_cj_xxxxxxx t USING (merge_schema.source_cj1_xxxxxxx s1 JOIN merge_schema.source_cj2_xxxxxxx s2 ON ((s1.sid1 OPERATOR(pg_catalog.=) s2.sid2))) ON ((t.tid OPERATOR(pg_catalog.=) s1.sid1) AND (t.tid OPERATOR(pg_catalog.=) 2)) WHEN MATCHED THEN UPDATE SET src = s2.src2 WHEN NOT MATCHED THEN DO NOTHING
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+SET citus.log_remote_commands to false;
+SELECT * FROM target_cj ORDER BY 1;
+ tid |   src    | val
+---------------------------------------------------------------------
+   1 | target   |   0
+   2 | source-2 |   0
+   2 | source-2 |   0
+   3 | target   |   0
+(4 rows)
+
+ROLLBACK;
+BEGIN;
+-- try accessing columns from either side of the source join
+MERGE INTO target_cj t
+USING source_cj1 s2
+        INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10
+ON t.tid = sid1 AND t.tid = 2
+WHEN MATCHED THEN
+        UPDATE SET src = src1, val = val2
+WHEN NOT MATCHED THEN
+        DO NOTHING;
+SELECT * FROM target_cj ORDER BY 1;
+ tid |   src    | val
+---------------------------------------------------------------------
+   1 | target   |   0
+   2 | source-1 |  20
+   2 | source-1 |  20
+   3 | target   |   0
+(4 rows)
+
+ROLLBACK;
+-- sub-query as a source
+BEGIN;
+MERGE INTO target_cj t
+USING (SELECT * FROM source_cj1 WHERE sid1 = 2) sub
+ON t.tid = sub.sid1 AND t.tid = 2
+WHEN MATCHED THEN
+	UPDATE SET src = sub.src1, val = val1
+WHEN NOT MATCHED THEN
+	DO NOTHING;
+SELECT * FROM target_cj ORDER BY 1;
+ tid |   src    | val
+---------------------------------------------------------------------
+   1 | target   |   0
+   2 | source-1 |  10
+   2 | source-1 |  10
+   3 | target   |   0
+(4 rows)
+
+ROLLBACK;
+-- Test self-join
+BEGIN;
+SELECT * FROM target_cj ORDER BY 1;
+ tid |  src   | val
+---------------------------------------------------------------------
+   1 | target |   0
+   2 | target |   0
+   2 | target |   0
+   3 | target |   0
+(4 rows)
+
+set citus.log_remote_commands to true;
+MERGE INTO target_cj t1
+USING (SELECT * FROM target_cj) sub
+ON t1.tid = sub.tid AND t1.tid = 3
+WHEN MATCHED THEN
+	UPDATE SET src = sub.src, val = sub.val + 100
+WHEN NOT MATCHED THEN
+	DO NOTHING;
+NOTICE:  issuing MERGE INTO merge_schema.target_cj_xxxxxxx t1 USING (SELECT target_cj.tid, target_cj.src, target_cj.val FROM merge_schema.target_cj_xxxxxxx target_cj) sub ON ((t1.tid OPERATOR(pg_catalog.=) sub.tid) AND (t1.tid OPERATOR(pg_catalog.=) 3)) WHEN MATCHED THEN UPDATE SET src = sub.src, val = (sub.val OPERATOR(pg_catalog.+) 100) WHEN NOT MATCHED THEN DO NOTHING
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+set citus.log_remote_commands to false;
+SELECT * FROM target_cj ORDER BY 1;
+ tid |  src   | val
+---------------------------------------------------------------------
+   1 | target |   0
+   2 | target |   0
+   2 | target |   0
+   3 | target | 100
+(4 rows)
+
+ROLLBACK;
+-- Test PREPARE
+PREPARE foo(int) AS
+MERGE INTO target_cj target
+USING (SELECT * FROM source_cj1) sub
+ON target.tid = sub.sid1 AND target.tid = $1
+WHEN MATCHED THEN
+        UPDATE SET val = sub.val1
+WHEN NOT MATCHED THEN
+        DO NOTHING;
+SELECT * FROM target_cj ORDER BY 1;
+ tid |  src   | val
+---------------------------------------------------------------------
+   1 | target |   0
+   2 | target |   0
+   2 | target |   0
+   3 | target |   0
+(4 rows)
+
+BEGIN;
+EXECUTE foo(2);
+EXECUTE foo(2);
+EXECUTE foo(2);
+EXECUTE foo(2);
+EXECUTE foo(2);
+SELECT * FROM target_cj ORDER BY 1;
+ tid |  src   | val
+---------------------------------------------------------------------
+   1 | target |   0
+   2 | target |  10
+   2 | target |  10
+   3 | target |   0
+(4 rows)
+
+ROLLBACK;
+BEGIN;
+SET citus.log_remote_commands to true;
+SET client_min_messages TO DEBUG1;
+EXECUTE foo(2);
+DEBUG:  <Deparsed MERGE query: MERGE INTO merge_schema.target_cj_xxxxxxx target USING (SELECT source_cj1.sid1, source_cj1.src1, source_cj1.val1 FROM merge_schema.source_cj1_xxxxxxx source_cj1) sub ON ((target.tid OPERATOR(pg_catalog.=) sub.sid1) AND (target.tid OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = sub.val1 WHEN NOT MATCHED THEN DO NOTHING >
+DEBUG:  <Deparsed MERGE query: MERGE INTO merge_schema.target_cj_xxxxxxx target USING (SELECT source_cj1.sid1, source_cj1.src1, source_cj1.val1 FROM merge_schema.source_cj1_xxxxxxx source_cj1) sub ON ((target.tid OPERATOR(pg_catalog.=) sub.sid1) AND (target.tid OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = sub.val1 WHEN NOT MATCHED THEN DO NOTHING >
+DEBUG:  <Deparsed MERGE query: MERGE INTO merge_schema.target_cj_xxxxxxx target USING (SELECT source_cj1.sid1, source_cj1.src1, source_cj1.val1 FROM merge_schema.source_cj1_xxxxxxx source_cj1) sub ON ((target.tid OPERATOR(pg_catalog.=) sub.sid1) AND (target.tid OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = sub.val1 WHEN NOT MATCHED THEN DO NOTHING >
+DEBUG:  <Deparsed MERGE query: MERGE INTO merge_schema.target_cj_xxxxxxx target USING (SELECT source_cj1.sid1, source_cj1.src1, source_cj1.val1 FROM merge_schema.source_cj1_xxxxxxx source_cj1) sub ON ((target.tid OPERATOR(pg_catalog.=) sub.sid1) AND (target.tid OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = sub.val1 WHEN NOT MATCHED THEN DO NOTHING >
+DEBUG:  <Deparsed MERGE query: MERGE INTO merge_schema.target_cj_xxxxxxx target USING (SELECT source_cj1.sid1, source_cj1.src1, source_cj1.val1 FROM merge_schema.source_cj1_xxxxxxx source_cj1) sub ON ((target.tid OPERATOR(pg_catalog.=) sub.sid1) AND (target.tid OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = sub.val1 WHEN NOT MATCHED THEN DO NOTHING >
+NOTICE:  issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.target_cj_xxxxxxx target USING (SELECT source_cj1.sid1, source_cj1.src1, source_cj1.val1 FROM merge_schema.source_cj1_xxxxxxx source_cj1) sub ON ((target.tid OPERATOR(pg_catalog.=) sub.sid1) AND (target.tid OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = sub.val1 WHEN NOT MATCHED THEN DO NOTHING
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+RESET client_min_messages;
+EXECUTE foo(2);
+NOTICE:  issuing MERGE INTO merge_schema.target_cj_xxxxxxx target USING (SELECT source_cj1.sid1, source_cj1.src1, source_cj1.val1 FROM merge_schema.source_cj1_xxxxxxx source_cj1) sub ON ((target.tid OPERATOR(pg_catalog.=) sub.sid1) AND (target.tid OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = sub.val1 WHEN NOT MATCHED THEN DO NOTHING
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+SET citus.log_remote_commands to false;
+SELECT * FROM target_cj ORDER BY 1;
+ tid |  src   | val
+---------------------------------------------------------------------
+   1 | target |   0
+   2 | target |  10
+   2 | target |  10
+   3 | target |   0
+(4 rows)
+
+ROLLBACK;
+-- Test distributed tables, must be co-located and joined on distribution column.
+--
+-- We create two sets of source and target tables, one set is Postgres and the other
+-- is Citus distributed. Run the _exact_ MERGE SQL on both the sets and compare the
+-- final results of target tables of Postgres and Citus, the result should match.
+-- This is repeated for various MERGE SQL combinations
+--
+CREATE TABLE pg_target(id int, val varchar);
+CREATE TABLE pg_source(id int, val varchar);
+CREATE TABLE citus_target(id int, val varchar);
+CREATE TABLE citus_source(id int, val varchar);
+-- Half of the source rows do not match
+INSERT INTO pg_target SELECT i, 'target' FROM generate_series(250, 500) i;
+INSERT INTO pg_source SELECT i, 'source' FROM generate_series(1, 500) i;
+INSERT INTO citus_target SELECT i, 'target' FROM generate_series(250, 500) i;
+INSERT INTO citus_source SELECT i, 'source' FROM generate_series(1, 500) i;
+SELECT create_distributed_table('citus_target', 'id');
+NOTICE:  Copying data from local table...
+NOTICE:  copying the data has completed
+DETAIL:  The local data in the table is no longer visible, but is still on disk.
+HINT:  To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.citus_target$$)
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_distributed_table('citus_source', 'id');
+NOTICE:  Copying data from local table...
+NOTICE:  copying the data has completed
+DETAIL:  The local data in the table is no longer visible, but is still on disk.
+HINT:  To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.citus_source$$)
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+--
+-- This routine compares the target tables of Postgres and Citus and
+-- returns true if they match, false if the results do not match.
+--
+CREATE OR REPLACE FUNCTION compare_tables() RETURNS BOOLEAN AS $$
+DECLARE ret BOOL;
+BEGIN
+SELECT count(1) = 0 INTO ret
+    FROM pg_target
+    FULL OUTER JOIN citus_target
+        USING (id, val)
+    WHERE pg_target.id IS NULL
+        OR citus_target.id IS NULL;
+RETURN ret;
+END
+$$ LANGUAGE PLPGSQL;
+-- Make sure we start with exact data in Postgres and Citus
+SELECT compare_tables();
+ compare_tables
+---------------------------------------------------------------------
+ t
+(1 row)
+
+-- Run the MERGE on both Postgres and Citus, and compare the final target tables
+BEGIN;
+SET citus.log_remote_commands to true;
+MERGE INTO pg_target t
+USING pg_source s
+ON t.id = s.id
+WHEN MATCHED AND t.id > 400 THEN
+	UPDATE SET val = t.val || 'Updated by Merge'
+WHEN MATCHED THEN
+	DELETE
+WHEN NOT MATCHED THEN
+        INSERT VALUES(s.id, s.val);
+MERGE INTO citus_target t
+USING citus_source s
+ON t.id = s.id
+WHEN MATCHED AND t.id > 400 THEN
+	UPDATE SET val = t.val || 'Updated by Merge'
+WHEN MATCHED THEN
+	DELETE
+WHEN NOT MATCHED THEN
+        INSERT VALUES(s.id, s.val);
+NOTICE:  issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+SET citus.log_remote_commands to false;
+SELECT compare_tables();
+ compare_tables
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+--
+-- ON clause filter on source
+--
+BEGIN;
+SET citus.log_remote_commands to true;
+MERGE INTO pg_target t
+USING pg_source s
+ON t.id = s.id AND s.id < 100
+WHEN MATCHED AND t.id > 400 THEN
+	UPDATE SET val = t.val || 'Updated by Merge'
+WHEN MATCHED THEN
+	DELETE
+WHEN NOT MATCHED THEN
+        INSERT VALUES(s.id, s.val);
+MERGE INTO citus_target t
+USING citus_source s
+ON t.id = s.id AND s.id < 100
+WHEN MATCHED AND t.id > 400 THEN
+	UPDATE SET val = t.val || 'Updated by Merge'
+WHEN MATCHED THEN
+	DELETE
+WHEN NOT MATCHED THEN
+        INSERT VALUES(s.id, s.val);
+NOTICE:  issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+SET citus.log_remote_commands to false;
+SELECT compare_tables();
+ compare_tables
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+--
+-- ON clause filter on target
+--
+BEGIN;
+SET citus.log_remote_commands to true;
+MERGE INTO pg_target t
+USING pg_source s
+ON t.id = s.id AND t.id < 100
+WHEN MATCHED AND t.id > 400 THEN
+	UPDATE SET val = t.val || 'Updated by Merge'
+WHEN MATCHED THEN
+	DELETE
+WHEN NOT MATCHED THEN
+        INSERT VALUES(s.id, s.val);
+MERGE INTO citus_target t
+USING citus_source s
+ON t.id = s.id AND t.id < 100
+WHEN MATCHED AND t.id > 400 THEN
+	UPDATE SET val = t.val || 'Updated by Merge'
+WHEN MATCHED THEN
+	DELETE
+WHEN NOT MATCHED THEN
+        INSERT VALUES(s.id, s.val);
+NOTICE:  issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (t.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (t.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (t.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (t.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+SET citus.log_remote_commands to false;
+SELECT compare_tables();
+ compare_tables
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+--
+-- NOT MATCHED clause filter on source
+--
+BEGIN;
+SET citus.log_remote_commands to true;
+MERGE INTO pg_target t
+USING pg_source s
+ON t.id = s.id
+WHEN MATCHED THEN
+	DO NOTHING
+WHEN NOT MATCHED AND s.id < 100 THEN
+        INSERT VALUES(s.id, s.val);
+MERGE INTO citus_target t
+USING citus_source s
+ON t.id = s.id
+WHEN MATCHED THEN
+	DO NOTHING
+WHEN NOT MATCHED AND s.id < 100 THEN
+        INSERT VALUES(s.id, s.val);
+NOTICE:  issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED THEN DO NOTHING  WHEN NOT MATCHED AND (s.id OPERATOR(pg_catalog.<) 100) THEN INSERT (id, val) VALUES (s.id, s.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED THEN DO NOTHING  WHEN NOT MATCHED AND (s.id OPERATOR(pg_catalog.<) 100) THEN INSERT (id, val) VALUES (s.id, s.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED THEN DO NOTHING  WHEN NOT MATCHED AND (s.id OPERATOR(pg_catalog.<) 100) THEN INSERT (id, val) VALUES (s.id, s.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED THEN DO NOTHING  WHEN NOT MATCHED AND (s.id OPERATOR(pg_catalog.<) 100) THEN INSERT (id, val) VALUES (s.id, s.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+SET citus.log_remote_commands to false;
+SELECT compare_tables();
+ compare_tables
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+--
+-- Test constant filter in ON clause to check if shards are pruned
+-- with restriction information
+--
+--
+-- Though constant filter is present, this won't prune shards as
+-- NOT MATCHED clause is present
+--
+BEGIN;
+SET citus.log_remote_commands to true;
+MERGE INTO pg_target t
+USING pg_source s
+ON t.id = s.id AND s.id = 250
+WHEN MATCHED THEN
+        UPDATE SET val = t.val || 'Updated by Merge'
+WHEN NOT MATCHED THEN
+        INSERT VALUES(s.id, s.val);
+MERGE INTO citus_target t
+USING citus_source s
+ON t.id = s.id AND s.id = 250
+WHEN MATCHED THEN
+        UPDATE SET val = t.val || 'Updated by Merge'
+WHEN NOT MATCHED THEN
+        INSERT VALUES(s.id, s.val);
+NOTICE:  issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+SET citus.log_remote_commands to false;
+SELECT compare_tables();
+ compare_tables
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+-- This will prune shards with restriction information as NOT MATCHED is void
+BEGIN;
+SET citus.log_remote_commands to true;
+MERGE INTO pg_target t
+USING pg_source s
+ON t.id = s.id AND s.id = 250
+WHEN MATCHED THEN
+        UPDATE SET val = t.val || 'Updated by Merge'
+WHEN NOT MATCHED THEN
+        DO NOTHING;
+MERGE INTO citus_target t
+USING citus_source s
+ON t.id = s.id AND s.id = 250
+WHEN MATCHED THEN
+        UPDATE SET val = t.val || 'Updated by Merge'
+WHEN NOT MATCHED THEN
+        DO NOTHING;
+NOTICE:  issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+SET citus.log_remote_commands to false;
+SELECT compare_tables();
+ compare_tables
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+-- Test CTE with distributed tables
+CREATE VIEW pg_source_view AS SELECT * FROM pg_source WHERE id < 400;
+WARNING:  "view pg_source_view" has dependency to "table pg_source" that is not in Citus' metadata
+DETAIL:  "view pg_source_view" will be created only locally
+HINT:  Distribute "table pg_source" first to distribute "view pg_source_view"
+CREATE VIEW citus_source_view AS SELECT * FROM citus_source WHERE id < 400;
+BEGIN;
+SEt citus.log_remote_commands to true;
+WITH cte AS (
+        SELECT * FROM pg_source_view
+)
+MERGE INTO pg_target t
+USING cte
+ON cte.id = t.id
+WHEN MATCHED AND t.id > 350 THEN
+    UPDATE SET val = t.val || 'Updated by CTE'
+WHEN NOT MATCHED THEN
+        INSERT VALUES (cte.id, cte.val)
+WHEN MATCHED AND t.id < 350 THEN
+        DELETE;
+WITH cte AS (
+        SELECT * FROM citus_source_view
+)
+MERGE INTO citus_target t
+USING cte
+ON cte.id = t.id
+WHEN MATCHED AND t.id > 350 THEN
+    UPDATE SET val = t.val || 'Updated by CTE'
+WHEN NOT MATCHED THEN
+        INSERT VALUES (cte.id, cte.val)
+WHEN MATCHED AND t.id < 350 THEN
+        DELETE;
+NOTICE:  issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing WITH cte AS (SELECT citus_source_view.id, citus_source_view.val FROM (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source WHERE (citus_source.id OPERATOR(pg_catalog.<) 400)) citus_source_view) MERGE INTO merge_schema.citus_target_xxxxxxx t USING cte ON (cte.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by CTE'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (cte.id, cte.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing WITH cte AS (SELECT citus_source_view.id, citus_source_view.val FROM (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source WHERE (citus_source.id OPERATOR(pg_catalog.<) 400)) citus_source_view) MERGE INTO merge_schema.citus_target_xxxxxxx t USING cte ON (cte.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by CTE'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (cte.id, cte.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing WITH cte AS (SELECT citus_source_view.id, citus_source_view.val FROM (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source WHERE (citus_source.id OPERATOR(pg_catalog.<) 400)) citus_source_view) MERGE INTO merge_schema.citus_target_xxxxxxx t USING cte ON (cte.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by CTE'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (cte.id, cte.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing WITH cte AS (SELECT citus_source_view.id, citus_source_view.val FROM (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source WHERE (citus_source.id OPERATOR(pg_catalog.<) 400)) citus_source_view) MERGE INTO merge_schema.citus_target_xxxxxxx t USING cte ON (cte.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by CTE'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (cte.id, cte.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+SET citus.log_remote_commands to false;
+SELECT compare_tables();
+ compare_tables
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+-- Test sub-query with distributed tables
+BEGIN;
+SEt citus.log_remote_commands to true;
+MERGE INTO pg_target t
+USING (SELECT * FROM pg_source) subq
+ON subq.id = t.id
+WHEN MATCHED AND t.id > 350 THEN
+    UPDATE SET val = t.val || 'Updated by subquery'
+WHEN NOT MATCHED THEN
+        INSERT VALUES (subq.id, subq.val)
+WHEN MATCHED AND t.id < 350 THEN
+        DELETE;
+MERGE INTO citus_target t
+USING (SELECT * FROM citus_source) subq
+ON subq.id = t.id
+WHEN MATCHED AND t.id > 350 THEN
+    UPDATE SET val = t.val || 'Updated by subquery'
+WHEN NOT MATCHED THEN
+        INSERT VALUES (subq.id, subq.val)
+WHEN MATCHED AND t.id < 350 THEN
+        DELETE;
+NOTICE:  issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) subq ON (subq.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by subquery'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (subq.id, subq.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) subq ON (subq.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by subquery'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (subq.id, subq.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) subq ON (subq.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by subquery'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (subq.id, subq.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) subq ON (subq.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by subquery'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (subq.id, subq.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+SET citus.log_remote_commands to false;
+SELECT compare_tables();
+ compare_tables
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+-- Test PREPARE
+PREPARE pg_prep(int) AS
+MERGE INTO pg_target
+USING (SELECT * FROM pg_source) sub
+ON pg_target.id = sub.id AND pg_target.id = $1
+WHEN MATCHED THEN
+        UPDATE SET val = 'Updated by prepare using ' || sub.val
+WHEN NOT MATCHED THEN
+        DO NOTHING;
+PREPARE citus_prep(int) AS
+MERGE INTO citus_target
+USING (SELECT * FROM citus_source) sub
+ON citus_target.id = sub.id AND citus_target.id = $1
+WHEN MATCHED THEN
+        UPDATE SET val = 'Updated by prepare using ' || sub.val
+WHEN NOT MATCHED THEN
+        DO NOTHING;
+BEGIN;
+SET citus.log_remote_commands to true;
+SELECT * FROM pg_target WHERE id = 500; -- before merge
+ id  |  val
+---------------------------------------------------------------------
+ 500 | target
+(1 row)
+
+EXECUTE pg_prep(500);
+SELECT * FROM pg_target WHERE id = 500; -- non-cached
+ id  |               val
+---------------------------------------------------------------------
+ 500 | Updated by prepare using source
+(1 row)
+
+EXECUTE pg_prep(500);
+EXECUTE pg_prep(500);
+EXECUTE pg_prep(500);
+EXECUTE pg_prep(500);
+EXECUTE pg_prep(500);
+SELECT * FROM pg_target WHERE id = 500; -- cached
+ id  |               val
+---------------------------------------------------------------------
+ 500 | Updated by prepare using source
+(1 row)
+
+SELECT * FROM citus_target WHERE id = 500; -- before merge
+NOTICE:  issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing SELECT id, val FROM merge_schema.citus_target_xxxxxxx citus_target WHERE (id OPERATOR(pg_catalog.=) 500)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+ id  |  val
+---------------------------------------------------------------------
+ 500 | target
+(1 row)
+
+EXECUTE citus_prep(500);
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+SELECT * FROM citus_target WHERE id = 500; -- non-cached
+NOTICE:  issuing SELECT id, val FROM merge_schema.citus_target_xxxxxxx citus_target WHERE (id OPERATOR(pg_catalog.=) 500)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+ id  |               val
+---------------------------------------------------------------------
+ 500 | Updated by prepare using source
+(1 row)
+
+EXECUTE citus_prep(500);
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+EXECUTE citus_prep(500);
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+EXECUTE citus_prep(500);
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+EXECUTE citus_prep(500);
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+EXECUTE citus_prep(500);
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+SELECT * FROM citus_target WHERE id = 500; -- cached
+NOTICE:  issuing SELECT id, val FROM merge_schema.citus_target_xxxxxxx citus_target WHERE (id OPERATOR(pg_catalog.=) 500)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+ id  |               val
+---------------------------------------------------------------------
+ 500 | Updated by prepare using source
+(1 row)
+
+SET citus.log_remote_commands to false;
+SELECT compare_tables();
+ compare_tables
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+-- Test partitions + distributed tables
+CREATE TABLE pg_pa_target (tid integer, balance float, val text)
+	PARTITION BY LIST (tid);
+CREATE TABLE citus_pa_target (tid integer, balance float, val text)
+	PARTITION BY LIST (tid);
+CREATE TABLE part1 PARTITION OF pg_pa_target FOR VALUES IN (1,4)
+  WITH (autovacuum_enabled=off);
+CREATE TABLE part2 PARTITION OF pg_pa_target FOR VALUES IN (2,5,6)
+  WITH (autovacuum_enabled=off);
+CREATE TABLE part3 PARTITION OF pg_pa_target FOR VALUES IN (3,8,9)
+  WITH (autovacuum_enabled=off);
+CREATE TABLE part4 PARTITION OF pg_pa_target DEFAULT
+  WITH (autovacuum_enabled=off);
+CREATE TABLE part5 PARTITION OF citus_pa_target FOR VALUES IN (1,4)
+  WITH (autovacuum_enabled=off);
+CREATE TABLE part6 PARTITION OF citus_pa_target FOR VALUES IN (2,5,6)
+  WITH (autovacuum_enabled=off);
+CREATE TABLE part7 PARTITION OF citus_pa_target FOR VALUES IN (3,8,9)
+  WITH (autovacuum_enabled=off);
+CREATE TABLE part8 PARTITION OF citus_pa_target DEFAULT
+  WITH (autovacuum_enabled=off);
+CREATE TABLE pg_pa_source (sid integer, delta float);
+CREATE TABLE citus_pa_source (sid integer, delta float);
+-- insert many rows to the source table
+INSERT INTO pg_pa_source SELECT id, id * 10  FROM generate_series(1,14) AS id;
+INSERT INTO citus_pa_source SELECT id, id * 10  FROM generate_series(1,14) AS id;
+-- insert a few rows in the target table (odd numbered tid)
+INSERT INTO pg_pa_target SELECT id, id * 100, 'initial' FROM generate_series(1,14,2) AS id;
+INSERT INTO citus_pa_target SELECT id, id * 100, 'initial' FROM generate_series(1,14,2) AS id;
+SELECT create_distributed_table('citus_pa_target', 'tid');
+NOTICE:  Copying data from local table...
+NOTICE:  copying the data has completed
+DETAIL:  The local data in the table is no longer visible, but is still on disk.
+HINT:  To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.part5$$)
+NOTICE:  Copying data from local table...
+NOTICE:  copying the data has completed
+DETAIL:  The local data in the table is no longer visible, but is still on disk.
+HINT:  To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.part6$$)
+NOTICE:  Copying data from local table...
+NOTICE:  copying the data has completed
+DETAIL:  The local data in the table is no longer visible, but is still on disk.
+HINT:  To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.part7$$)
+NOTICE:  Copying data from local table...
+NOTICE:  copying the data has completed
+DETAIL:  The local data in the table is no longer visible, but is still on disk.
+HINT:  To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.part8$$)
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_distributed_table('citus_pa_source', 'sid');
+NOTICE:  Copying data from local table...
+NOTICE:  copying the data has completed
+DETAIL:  The local data in the table is no longer visible, but is still on disk.
+HINT:  To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.citus_pa_source$$)
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE OR REPLACE FUNCTION pa_compare_tables() RETURNS BOOLEAN AS $$
+DECLARE ret BOOL;
+BEGIN
+SELECT count(1) = 0 INTO ret
+    FROM pg_pa_target
+    FULL OUTER JOIN citus_pa_target
+        USING (tid, balance, val)
+    WHERE pg_pa_target.tid IS NULL
+        OR citus_pa_target.tid IS NULL;
+RETURN ret;
+END
+$$ LANGUAGE PLPGSQL;
+-- try simple MERGE
+BEGIN;
+MERGE INTO pg_pa_target t
+  USING pg_pa_source s
+  ON t.tid = s.sid
+  WHEN MATCHED THEN
+    UPDATE SET balance = balance + delta, val = val || ' updated by merge'
+  WHEN NOT MATCHED THEN
+    INSERT VALUES (sid, delta, 'inserted by merge');
+MERGE INTO citus_pa_target t
+  USING citus_pa_source s
+  ON t.tid = s.sid
+  WHEN MATCHED THEN
+    UPDATE SET balance = balance + delta, val = val || ' updated by merge'
+  WHEN NOT MATCHED THEN
+    INSERT VALUES (sid, delta, 'inserted by merge');
+SELECT pa_compare_tables();
+ pa_compare_tables
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+-- same with a constant qual
+BEGIN;
+MERGE INTO pg_pa_target t
+  USING pg_pa_source s
+  ON t.tid = s.sid AND tid = 1
+  WHEN MATCHED THEN
+    UPDATE SET balance = balance + delta, val = val || ' updated by merge'
+  WHEN NOT MATCHED THEN
+    INSERT VALUES (sid, delta, 'inserted by merge');
+MERGE INTO citus_pa_target t
+  USING citus_pa_source s
+  ON t.tid = s.sid AND tid = 1
+  WHEN MATCHED THEN
+    UPDATE SET balance = balance + delta, val = val || ' updated by merge'
+  WHEN NOT MATCHED THEN
+    INSERT VALUES (sid, delta, 'inserted by merge');
+SELECT pa_compare_tables();
+ pa_compare_tables
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
 --
 -- Error and Unsupported scenarios
 --
+-- try updating the distribution key column
+BEGIN;
+MERGE INTO target_cj t
+  USING source_cj1 s
+  ON t.tid = s.sid1 AND t.tid = 2
+  WHEN MATCHED THEN
+    UPDATE SET tid = tid + 9, src = src || ' updated by merge'
+  WHEN NOT MATCHED THEN
+    INSERT VALUES (sid1, 'inserted by merge', val1);
+ERROR:  modifying the partition value of rows is not allowed
+ROLLBACK;
 -- Foreign table as target
 MERGE INTO foreign_table
 	USING ft_target ON (foreign_table.id = ft_target.id)
@@ -1274,7 +2237,54 @@ MERGE INTO t1
 		UPDATE SET val = t1.val + 1
 	WHEN NOT MATCHED THEN
 		INSERT (id, val) VALUES (s1.id, s1.val);
-ERROR:  MERGE command is not supported on distributed/reference tables yet
+ERROR:  MERGE command is not supported with combination of distributed/local tables yet
+-- Now both s1 and t1 are distributed tables
+SELECT undistribute_table('t1');
+NOTICE:  creating a new table for merge_schema.t1
+NOTICE:  moving the data of merge_schema.t1
+NOTICE:  dropping the old merge_schema.t1
+NOTICE:  renaming the new table to merge_schema.t1
+ undistribute_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_distributed_table('t1', 'id');
+NOTICE:  Copying data from local table...
+NOTICE:  copying the data has completed
+DETAIL:  The local data in the table is no longer visible, but is still on disk.
+HINT:  To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.t1$$)
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+-- We have a potential pitfall where a function can be invoked in
+-- the MERGE conditions which can insert/update to a random shard
+CREATE OR REPLACE function merge_when_and_write() RETURNS BOOLEAN
+LANGUAGE PLPGSQL AS
+$$
+BEGIN
+        INSERT INTO t1 VALUES (100, 100);
+        RETURN TRUE;
+END;
+$$;
+-- Test preventing "ON" join condition from writing to the database
+BEGIN;
+MERGE INTO t1
+USING s1 ON t1.id = s1.id AND t1.id = 2 AND (merge_when_and_write())
+WHEN MATCHED THEN
+        UPDATE SET val = t1.val + s1.val;
+ERROR:  functions used in the WHERE/ON/WHEN clause of modification queries on distributed tables must not be VOLATILE
+ROLLBACK;
+-- Test preventing WHEN clause(s) from writing to the database
+BEGIN;
+MERGE INTO t1
+USING s1 ON t1.id = s1.id AND t1.id = 2
+WHEN MATCHED AND (merge_when_and_write()) THEN
+        UPDATE SET val = t1.val + s1.val;
+ERROR:  functions used in the WHERE/ON/WHEN clause of modification queries on distributed tables must not be VOLATILE
+ROLLBACK;
 -- Joining on partition columns with sub-query
 MERGE INTO t1
 	USING (SELECT * FROM s1) sub ON (sub.val = t1.id) -- sub.val is not a distribution column
@@ -1284,7 +2294,7 @@ MERGE INTO t1
 		UPDATE SET val = t1.val + 1
 	WHEN NOT MATCHED THEN
 		INSERT (id, val) VALUES (sub.id, sub.val);
-ERROR:  MERGE command is not supported on distributed/reference tables yet
+ERROR:  MERGE command is only supported when distributed tables are joined on their distribution column
 -- Joining on partition columns with CTE
 WITH s1_res AS (
 	SELECT * FROM s1
@@ -1297,7 +2307,7 @@ MERGE INTO t1
 		UPDATE SET val = t1.val + 1
 	WHEN NOT MATCHED THEN
 		INSERT (id, val) VALUES (s1_res.id, s1_res.val);
-ERROR:  MERGE command is not supported on distributed/reference tables yet
+ERROR:  MERGE command is only supported when distributed tables are joined on their distribution column
 -- Constant Join condition
 WITH s1_res AS (
 	SELECT * FROM s1
@@ -1310,7 +2320,7 @@ MERGE INTO t1
 		UPDATE SET val = t1.val + 1
 	WHEN NOT MATCHED THEN
 		INSERT (id, val) VALUES (s1_res.id, s1_res.val);
-ERROR:  MERGE command is not supported on distributed/reference tables yet
+ERROR:  MERGE command is only supported when distributed tables are joined on their distribution column
 -- With a single WHEN clause, which causes a non-left join
 WITH s1_res AS (
      SELECT * FROM s1
@@ -1319,7 +2329,7 @@ WITH s1_res AS (
  WHEN MATCHED THEN DELETE
 	WHEN NOT MATCHED THEN
 		INSERT (id, val) VALUES (s1_res.id, s1_res.val);
-ERROR:  MERGE command is not supported on distributed/reference tables yet
+ERROR:  MERGE command is only supported when distributed tables are joined on their distribution column
 --
 -- Reference tables
 --
@@ -1371,7 +2381,7 @@ MERGE INTO t1
 		UPDATE SET val = t1.val + 1
 	WHEN NOT MATCHED THEN
 		INSERT (id, val) VALUES (s1.id, s1.val);
-ERROR:  MERGE command is not supported on distributed/reference tables yet
+ERROR:  MERGE command is not supported on reference tables yet
 --
 -- Postgres + Citus-Distributed table
 --
@@ -1413,7 +2423,7 @@ MERGE INTO t1
 		UPDATE SET val = t1.val + 1
 	WHEN NOT MATCHED THEN
 		INSERT (id, val) VALUES (s1.id, s1.val);
-ERROR:  MERGE command is not supported on distributed/reference tables yet
+ERROR:  MERGE command is not supported with combination of distributed/local tables yet
 MERGE INTO t1
 	USING (SELECT * FROM s1) sub ON (sub.id = t1.id)
 	WHEN MATCHED AND sub.val = 0 THEN
@@ -1422,7 +2432,7 @@ MERGE INTO t1
 		UPDATE SET val = t1.val + 1
 	WHEN NOT MATCHED THEN
 		INSERT (id, val) VALUES (sub.id, sub.val);
-ERROR:  MERGE command is not supported on distributed/reference tables yet
+ERROR:  MERGE command is not supported with combination of distributed/local tables yet
 CREATE TABLE pg(val int);
 SELECT create_distributed_table('s1', 'id');
 NOTICE:  Copying data from local table...
@@ -1443,7 +2453,7 @@ MERGE INTO t1
 		UPDATE SET val = t1.val + 1
 	WHEN NOT MATCHED THEN
 		INSERT (id, val) VALUES (sub.id, sub.val);
-ERROR:  MERGE command is not supported on distributed/reference tables yet
+ERROR:  MERGE command is not supported with combination of distributed/local tables yet
 -- Mix Postgres table in CTE
 WITH pg_res AS (
 	SELECT * FROM pg
@@ -1456,7 +2466,7 @@ MERGE INTO t1
 		UPDATE SET val = t1.val + 1
 	WHEN NOT MATCHED THEN
 		INSERT (id, val) VALUES (sub.id, sub.val);
-ERROR:  MERGE command is not supported on distributed/reference tables yet
+ERROR:  MERGE command is not supported with combination of distributed/local tables yet
 -- Match more than one source row should fail same as Postgres behavior
 SELECT undistribute_table('t1');
 NOTICE:  creating a new table for merge_schema.t1
@@ -1511,6 +2521,234 @@ WHEN NOT MATCHED THEN
     INSERT VALUES(mv_source.id, mv_source.val);
 ERROR:  cannot execute MERGE on relation "mv_source"
 DETAIL:  This operation is not supported for materialized views.
+-- Distributed tables *must* be colocated
+CREATE TABLE dist_target(id int, val varchar);
+SELECT create_distributed_table('dist_target', 'id');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE TABLE dist_source(id int, val varchar);
+SELECT create_distributed_table('dist_source', 'id', colocate_with => 'none');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+MERGE INTO dist_target
+USING dist_source
+ON dist_target.id = dist_source.id
+WHEN MATCHED THEN
+UPDATE SET val = dist_source.val
+WHEN NOT MATCHED THEN
+INSERT VALUES(dist_source.id, dist_source.val);
+ERROR:  For MERGE command, all the distributed tables must be colocated
+-- Distributed tables *must* be joined on distribution column
+CREATE TABLE dist_colocated(id int, val int);
+SELECT create_distributed_table('dist_colocated', 'id', colocate_with => 'dist_target');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+MERGE INTO dist_target
+USING dist_colocated
+ON dist_target.id = dist_colocated.val -- val is not the distribution column
+WHEN MATCHED THEN
+UPDATE SET val = dist_colocated.val
+WHEN NOT MATCHED THEN
+INSERT VALUES(dist_colocated.id, dist_colocated.val);
+ERROR:  MERGE command is only supported when distributed tables are joined on their distribution column
+-- Both the source and target must be distributed
+MERGE INTO dist_target
+USING (SELECT 100 id) AS source
+ON dist_target.id = source.id AND dist_target.val = 'const'
+WHEN MATCHED THEN
+UPDATE SET val = 'source'
+WHEN NOT MATCHED THEN
+INSERT VALUES(source.id, 'source');
+ERROR:  For MERGE command, both the source and target must be distributed
+-- Non-hash distributed tables (append/range).
+CREATE VIEW show_tables AS
+SELECT logicalrelid, partmethod
+FROM pg_dist_partition
+WHERE (logicalrelid = 'dist_target'::regclass) OR (logicalrelid = 'dist_source'::regclass)
+ORDER BY 1;
+SELECT undistribute_table('dist_source');
+NOTICE:  creating a new table for merge_schema.dist_source
+NOTICE:  moving the data of merge_schema.dist_source
+NOTICE:  dropping the old merge_schema.dist_source
+NOTICE:  drop cascades to view show_tables
+CONTEXT:  SQL statement "DROP TABLE merge_schema.dist_source CASCADE"
+NOTICE:  renaming the new table to merge_schema.dist_source
+ undistribute_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_distributed_table('dist_source', 'id', 'append');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM show_tables;
+ logicalrelid | partmethod
+---------------------------------------------------------------------
+ dist_target  | h
+ dist_source  | a
+(2 rows)
+
+MERGE INTO dist_target
+USING dist_source
+ON dist_target.id = dist_source.id
+WHEN MATCHED THEN
+UPDATE SET val = dist_source.val
+WHEN NOT MATCHED THEN
+INSERT VALUES(dist_source.id, dist_source.val);
+ERROR:  For MERGE command, all the distributed tables must be colocated, for append/range distribution, colocation is not supported
+HINT:  Consider using hash distribution instead
+SELECT undistribute_table('dist_source');
+NOTICE:  creating a new table for merge_schema.dist_source
+NOTICE:  moving the data of merge_schema.dist_source
+NOTICE:  dropping the old merge_schema.dist_source
+NOTICE:  drop cascades to view show_tables
+CONTEXT:  SQL statement "DROP TABLE merge_schema.dist_source CASCADE"
+NOTICE:  renaming the new table to merge_schema.dist_source
+ undistribute_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_distributed_table('dist_source', 'id', 'range');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM show_tables;
+ logicalrelid | partmethod
+---------------------------------------------------------------------
+ dist_target  | h
+ dist_source  | r
+(2 rows)
+
+MERGE INTO dist_target
+USING dist_source
+ON dist_target.id = dist_source.id
+WHEN MATCHED THEN
+UPDATE SET val = dist_source.val
+WHEN NOT MATCHED THEN
+INSERT VALUES(dist_source.id, dist_source.val);
+ERROR:  For MERGE command, all the distributed tables must be colocated, for append/range distribution, colocation is not supported
+HINT:  Consider using hash distribution instead
+-- Both are append tables
+SELECT undistribute_table('dist_target');
+NOTICE:  creating a new table for merge_schema.dist_target
+NOTICE:  moving the data of merge_schema.dist_target
+NOTICE:  dropping the old merge_schema.dist_target
+NOTICE:  drop cascades to view show_tables
+CONTEXT:  SQL statement "DROP TABLE merge_schema.dist_target CASCADE"
+NOTICE:  renaming the new table to merge_schema.dist_target
+ undistribute_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT undistribute_table('dist_source');
+NOTICE:  creating a new table for merge_schema.dist_source
+NOTICE:  moving the data of merge_schema.dist_source
+NOTICE:  dropping the old merge_schema.dist_source
+NOTICE:  drop cascades to view show_tables
+CONTEXT:  SQL statement "DROP TABLE merge_schema.dist_source CASCADE"
+NOTICE:  renaming the new table to merge_schema.dist_source
+ undistribute_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_distributed_table('dist_target', 'id', 'append');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_distributed_table('dist_source', 'id', 'append');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM show_tables;
+ logicalrelid | partmethod
+---------------------------------------------------------------------
+ dist_target  | a
+ dist_source  | a
+(2 rows)
+
+MERGE INTO dist_target
+USING dist_source
+ON dist_target.id = dist_source.id
+WHEN MATCHED THEN
+UPDATE SET val = dist_source.val
+WHEN NOT MATCHED THEN
+INSERT VALUES(dist_source.id, dist_source.val);
+ERROR:  For MERGE command, all the distributed tables must be colocated, for append/range distribution, colocation is not supported
+HINT:  Consider using hash distribution instead
+-- Both are range tables
+SELECT undistribute_table('dist_target');
+NOTICE:  creating a new table for merge_schema.dist_target
+NOTICE:  moving the data of merge_schema.dist_target
+NOTICE:  dropping the old merge_schema.dist_target
+NOTICE:  drop cascades to view show_tables
+CONTEXT:  SQL statement "DROP TABLE merge_schema.dist_target CASCADE"
+NOTICE:  renaming the new table to merge_schema.dist_target
+ undistribute_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT undistribute_table('dist_source');
+NOTICE:  creating a new table for merge_schema.dist_source
+NOTICE:  moving the data of merge_schema.dist_source
+NOTICE:  dropping the old merge_schema.dist_source
+NOTICE:  drop cascades to view show_tables
+CONTEXT:  SQL statement "DROP TABLE merge_schema.dist_source CASCADE"
+NOTICE:  renaming the new table to merge_schema.dist_source
+ undistribute_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_distributed_table('dist_target', 'id', 'range');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_distributed_table('dist_source', 'id', 'range');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM show_tables;
+ logicalrelid | partmethod
+---------------------------------------------------------------------
+ dist_target  | r
+ dist_source  | r
+(2 rows)
+
+MERGE INTO dist_target
+USING dist_source
+ON dist_target.id = dist_source.id
+WHEN MATCHED THEN
+UPDATE SET val = dist_source.val
+WHEN NOT MATCHED THEN
+INSERT VALUES(dist_source.id, dist_source.val);
+ERROR:  For MERGE command, all the distributed tables must be colocated, for append/range distribution, colocation is not supported
+HINT:  Consider using hash distribution instead
 DROP SERVER foreign_server CASCADE;
 NOTICE:  drop cascades to 3 other objects
 DETAIL:  drop cascades to user mapping for postgres on server foreign_server
@@ -1519,8 +2757,9 @@ drop cascades to foreign table foreign_table
 NOTICE:  foreign table "foreign_table_4000046" does not exist, skipping
 CONTEXT:  SQL statement "SELECT citus_drop_all_shards(v_obj.objid, v_obj.schema_name, v_obj.object_name, drop_shards_metadata_only := false)"
 PL/pgSQL function citus_drop_trigger() line XX at PERFORM
+DROP FUNCTION merge_when_and_write();
 DROP SCHEMA merge_schema CASCADE;
-NOTICE:  drop cascades to 56 other objects
+NOTICE:  drop cascades to 75 other objects
 DETAIL:  drop cascades to function insert_data()
 drop cascades to table pg_result
 drop cascades to table local_local
@@ -1572,11 +2811,30 @@ drop cascades to table ft_target
 drop cascades to table ft_source_4000045
 drop cascades to table ft_source
 drop cascades to extension postgres_fdw
+drop cascades to table target_cj
+drop cascades to table source_cj1
+drop cascades to table source_cj2
+drop cascades to table pg_target
+drop cascades to table pg_source
+drop cascades to table citus_target
+drop cascades to table citus_source
+drop cascades to function compare_tables()
+drop cascades to view pg_source_view
+drop cascades to view citus_source_view
+drop cascades to table pg_pa_target
+drop cascades to table citus_pa_target
+drop cascades to table pg_pa_source
+drop cascades to table citus_pa_source
+drop cascades to function pa_compare_tables()
 drop cascades to table pg
-drop cascades to table t1_4000062
-drop cascades to table s1_4000063
+drop cascades to table t1_4000110
+drop cascades to table s1_4000111
 drop cascades to table t1
 drop cascades to table s1
+drop cascades to table dist_colocated
+drop cascades to table dist_target
+drop cascades to table dist_source
+drop cascades to view show_tables
 SELECT 1 FROM master_remove_node('localhost', :master_port);
  ?column?
 ---------------------------------------------------------------------
diff --git a/src/test/regress/expected/pg15.out b/src/test/regress/expected/pg15.out
index 7a41b25ec..d92686b93 100644
--- a/src/test/regress/expected/pg15.out
+++ b/src/test/regress/expected/pg15.out
@@ -315,7 +315,7 @@ SELECT create_reference_table('tbl2');
 
 MERGE INTO tbl1 USING tbl2 ON (true)
 WHEN MATCHED THEN DELETE;
-ERROR:  MERGE command is not supported on distributed/reference tables yet
+ERROR:  MERGE command is not supported on reference tables yet
 -- now, both are reference, still not supported
 SELECT create_reference_table('tbl1');
  create_reference_table
@@ -325,7 +325,7 @@ SELECT create_reference_table('tbl1');
 
 MERGE INTO tbl1 USING tbl2 ON (true)
 WHEN MATCHED THEN DELETE;
-ERROR:  MERGE command is not supported on distributed/reference tables yet
+ERROR:  MERGE command is not supported on reference tables yet
 -- now, both distributed, not works
 SELECT undistribute_table('tbl1');
 NOTICE:  creating a new table for pg15.tbl1
@@ -419,14 +419,14 @@ SELECT create_distributed_table('tbl2', 'x');
 
 MERGE INTO tbl1 USING tbl2 ON (true)
 WHEN MATCHED THEN DELETE;
-ERROR:  MERGE command is not supported on distributed/reference tables yet
+ERROR:  MERGE command is only supported when distributed tables are joined on their distribution column
 -- also, not inside subqueries & ctes
 WITH targq AS (
     SELECT * FROM tbl2
 )
 MERGE INTO tbl1 USING targq ON (true)
 WHEN MATCHED THEN DELETE;
-ERROR:  MERGE command is not supported on distributed/reference tables yet
+ERROR:  MERGE command is only supported when distributed tables are joined on their distribution column
 -- crashes on beta3, fixed on 15 stable
 --WITH foo AS (
 --  MERGE INTO tbl1 USING tbl2 ON (true)
@@ -441,7 +441,7 @@ USING tbl2
 ON (true)
 WHEN MATCHED THEN
     UPDATE SET x = (SELECT count(*) FROM tbl2);
-ERROR:  MERGE command is not supported on distributed/reference tables yet
+ERROR:  MERGE command is only supported when distributed tables are joined on their distribution column
 -- test numeric types with negative scale
 CREATE TABLE numeric_negative_scale(numeric_column numeric(3,-1), orig_value int);
 INSERT into numeric_negative_scale SELECT x,x FROM generate_series(111, 115) x;
diff --git a/src/test/regress/expected/pgmerge.out b/src/test/regress/expected/pgmerge.out
index b90760691..0bedf356f 100644
--- a/src/test/regress/expected/pgmerge.out
+++ b/src/test/regress/expected/pgmerge.out
@@ -910,7 +910,15 @@ MERGE INTO wq_target t
 USING wq_source s ON t.tid = s.sid
 WHEN MATCHED AND (merge_when_and_write()) THEN
 	UPDATE SET balance = t.balance + s.balance;
-ERROR:  functions used in UPDATE queries on distributed tables must not be VOLATILE
+ERROR:  functions used in the WHERE/ON/WHEN clause of modification queries on distributed tables must not be VOLATILE
+ROLLBACK;
+-- Test preventing ON condition from writing to the database
+BEGIN;
+MERGE INTO wq_target t
+USING wq_source s ON t.tid = s.sid AND (merge_when_and_write())
+WHEN MATCHED THEN
+	UPDATE SET balance = t.balance + s.balance;
+ERROR:  functions used in the WHERE/ON/WHEN clause of modification queries on distributed tables must not be VOLATILE
 ROLLBACK;
 drop function merge_when_and_write();
 DROP TABLE wq_target, wq_source;
@@ -1891,7 +1899,7 @@ MERGE INTO pa_target t
     UPDATE SET balance = balance + delta, val = val || ' updated by merge'
   WHEN NOT MATCHED THEN
     INSERT VALUES (slogts::timestamp, sid, delta, 'inserted by merge');
-DEBUG:  <Deparsed MERGE query: MERGE INTO pgmerge_schema.pa_target t USING (SELECT '2017-01-15'::text AS slogts, pa_source.sid, pa_source.delta FROM pgmerge_schema.pa_source_4001021 pa_source WHERE (pa_source.sid OPERATOR(pg_catalog.<) 10)) s ON (t.tid OPERATOR(pg_catalog.=) s.sid) WHEN MATCHED THEN UPDATE SET balance = (t.balance OPERATOR(pg_catalog.+) s.delta), val = (t.val OPERATOR(pg_catalog.||) ' updated by merge'::text) WHEN NOT MATCHED THEN INSERT (logts, tid, balance, val) VALUES ((s.slogts)::timestamp without time zone, s.sid, s.delta, 'inserted by merge'::text)>
+DEBUG:  <Deparsed MERGE query: MERGE INTO pgmerge_schema.pa_target t USING (SELECT '2017-01-15'::text AS slogts, pa_source.sid, pa_source.delta FROM pgmerge_schema.pa_source_xxxxxxx pa_source WHERE (pa_source.sid OPERATOR(pg_catalog.<) 10)) s ON (t.tid OPERATOR(pg_catalog.=) s.sid) WHEN MATCHED THEN UPDATE SET balance = (t.balance OPERATOR(pg_catalog.+) s.delta), val = (t.val OPERATOR(pg_catalog.||) ' updated by merge'::text) WHEN NOT MATCHED THEN INSERT (logts, tid, balance, val) VALUES ((s.slogts)::timestamp without time zone, s.sid, s.delta, 'inserted by merge'::text)>
 SELECT * FROM pa_target ORDER BY tid;
           logts           | tid | balance |           val
 ---------------------------------------------------------------------
@@ -2083,7 +2091,7 @@ WHEN MATCHED THEN UPDATE
 WHEN NOT MATCHED THEN INSERT
      (city_id, logdate, peaktemp, unitsales)
    VALUES (city_id, logdate, peaktemp, unitsales);
-DEBUG:  <Deparsed MERGE query: MERGE INTO pgmerge_schema.measurement m USING pgmerge_schema.new_measurement_4001026 nm ON ((m.city_id OPERATOR(pg_catalog.=) nm.city_id) AND (m.logdate OPERATOR(pg_catalog.=) nm.logdate)) WHEN MATCHED AND (nm.peaktemp IS NULL) THEN DELETE WHEN MATCHED THEN UPDATE SET peaktemp = GREATEST(m.peaktemp, nm.peaktemp), unitsales = (m.unitsales OPERATOR(pg_catalog.+) COALESCE(nm.unitsales, 0)) WHEN NOT MATCHED THEN INSERT (city_id, logdate, peaktemp, unitsales) VALUES (nm.city_id, nm.logdate, nm.peaktemp, nm.unitsales)>
+DEBUG:  <Deparsed MERGE query: MERGE INTO pgmerge_schema.measurement m USING pgmerge_schema.new_measurement_xxxxxxx nm ON ((m.city_id OPERATOR(pg_catalog.=) nm.city_id) AND (m.logdate OPERATOR(pg_catalog.=) nm.logdate)) WHEN MATCHED AND (nm.peaktemp IS NULL) THEN DELETE WHEN MATCHED THEN UPDATE SET peaktemp = GREATEST(m.peaktemp, nm.peaktemp), unitsales = (m.unitsales OPERATOR(pg_catalog.+) COALESCE(nm.unitsales, 0)) WHEN NOT MATCHED THEN INSERT (city_id, logdate, peaktemp, unitsales) VALUES (nm.city_id, nm.logdate, nm.peaktemp, nm.unitsales)>
 RESET client_min_messages;
 SELECT tableoid::regclass, * FROM measurement ORDER BY city_id, logdate;
        tableoid       | city_id |  logdate   | peaktemp | unitsales
diff --git a/src/test/regress/sql/merge.sql b/src/test/regress/sql/merge.sql
index c266b5333..12294b2c9 100644
--- a/src/test/regress/sql/merge.sql
+++ b/src/test/regress/sql/merge.sql
@@ -19,6 +19,7 @@ SET search_path TO merge_schema;
 SET citus.shard_count TO 4;
 SET citus.next_shard_id TO 4000000;
 SET citus.explain_all_tasks to true;
+SET citus.shard_replication_factor TO 1;
 SELECT 1 FROM master_add_node('localhost', :master_port, groupid => 0);
 
 CREATE TABLE source
@@ -143,9 +144,33 @@ SELECT undistribute_table('source');
 SELECT create_distributed_table('target', 'customer_id');
 SELECT create_distributed_table('source', 'customer_id');
 
+-- Updates one of the row with customer_id  = 30002
+SELECT * from target t WHERE t.customer_id  = 30002;
+-- Turn on notice to print tasks sent to nodes
+SET citus.log_remote_commands to true;
 MERGE INTO target t
    USING source s
-   ON (t.customer_id = s.customer_id)
+   ON (t.customer_id = s.customer_id) AND t.customer_id = 30002
+
+   WHEN MATCHED AND t.order_center = 'XX' THEN
+       DELETE
+
+   WHEN MATCHED THEN
+       UPDATE SET     -- Existing customer, update the order count and last_order_id
+           order_count = t.order_count + 1,
+           last_order_id = s.order_id
+
+   WHEN NOT MATCHED THEN
+       DO NOTHING;
+
+SET citus.log_remote_commands to false;
+SELECT * from target t WHERE t.customer_id  = 30002;
+
+-- Deletes one of the row with customer_id  = 30004
+SELECT * from target t WHERE t.customer_id  = 30004;
+MERGE INTO target t
+   USING source s
+   ON (t.customer_id = s.customer_id) AND t.customer_id = 30004
 
    WHEN MATCHED AND t.order_center = 'XX' THEN
        DELETE
@@ -158,6 +183,7 @@ MERGE INTO target t
    WHEN NOT MATCHED THEN       -- New entry, record it.
        INSERT (customer_id, last_order_id, order_center, order_count, last_order)
            VALUES (customer_id, s.order_id, s.order_center, 123, s.order_time);
+SELECT * from target t WHERE t.customer_id  = 30004;
 
 --
 -- Test MERGE with CTE as source
@@ -243,11 +269,13 @@ SELECT create_distributed_table('t1', 'id');
 SELECT create_distributed_table('s1', 'id');
 
 
+SELECT * FROM t1 order by id;
+SET citus.log_remote_commands to true;
 WITH s1_res AS (
 	SELECT * FROM s1
 )
 MERGE INTO t1
-	USING s1_res ON (s1_res.id = t1.id)
+	USING s1_res ON (s1_res.id = t1.id) AND t1.id = 6
 
 	WHEN MATCHED AND s1_res.val = 0 THEN
 		DELETE
@@ -255,6 +283,9 @@ MERGE INTO t1
 		UPDATE SET val = t1.val + 1
 	WHEN NOT MATCHED THEN
 		INSERT (id, val) VALUES (s1_res.id, s1_res.val);
+SET citus.log_remote_commands to false;
+-- Other than id 6 everything else is a NO match, and should appear in target
+SELECT * FROM t1 order by 1, 2;
 
 --
 -- Test with multiple join conditions
@@ -325,15 +356,21 @@ SELECT undistribute_table('s2');
 SELECT create_distributed_table('t2', 'id');
 SELECT create_distributed_table('s2', 'id');
 
+SELECT * FROM t2 ORDER BY 1;
+SET citus.log_remote_commands to true;
 MERGE INTO t2
 USING s2
-ON t2.id = s2.id AND t2.src = s2.src
+ON t2.id = s2.id AND t2.src = s2.src AND t2.id = 4
 	WHEN MATCHED AND t2.val = 1 THEN
 		UPDATE SET val = s2.val + 10
 	WHEN MATCHED THEN
 		DELETE
 	WHEN NOT MATCHED THEN
-		INSERT (id, val, src) VALUES (s2.id, s2.val, s2.src);
+		DO NOTHING;
+SET citus.log_remote_commands to false;
+-- Row with id = 4 is a match for delete clause, row should be deleted
+-- Row with id = 3 is a NO match, row from source will be inserted
+SELECT * FROM t2 ORDER BY 1;
 
 --
 -- With sub-query as the MERGE source
@@ -824,10 +861,577 @@ RESET client_min_messages;
 
 SELECT * FROM ft_target;
 
+--
+-- complex joins on the source side
+--
+
+-- source(join of two relations) relation is an unaliased join
+
+CREATE TABLE target_cj(tid int, src text, val int);
+CREATE TABLE source_cj1(sid1 int, src1 text, val1 int);
+CREATE TABLE source_cj2(sid2 int, src2 text, val2 int);
+
+INSERT INTO target_cj VALUES (1, 'target', 0);
+INSERT INTO target_cj VALUES (2, 'target', 0);
+INSERT INTO target_cj VALUES (2, 'target', 0);
+INSERT INTO target_cj VALUES (3, 'target', 0);
+
+INSERT INTO source_cj1 VALUES (2, 'source-1', 10);
+INSERT INTO source_cj2 VALUES (2, 'source-2', 20);
+
+BEGIN;
+MERGE INTO target_cj t
+USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2
+ON t.tid = sid1 AND t.tid = 2
+WHEN MATCHED THEN
+        UPDATE SET src = src2
+WHEN NOT MATCHED THEN
+        DO NOTHING;
+-- Gold result to compare against
+SELECT * FROM target_cj ORDER BY 1;
+ROLLBACK;
+
+BEGIN;
+-- try accessing columns from either side of the source join
+MERGE INTO target_cj t
+USING source_cj1 s2
+        INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10
+ON t.tid = sid1 AND t.tid = 2
+WHEN MATCHED THEN
+        UPDATE SET tid = sid2, src = src1, val = val2
+WHEN NOT MATCHED THEN
+        DO NOTHING;
+-- Gold result to compare against
+SELECT * FROM target_cj ORDER BY 1;
+ROLLBACK;
+
+-- Test the same scenarios with distributed tables
+
+SELECT create_distributed_table('target_cj', 'tid');
+SELECT create_distributed_table('source_cj1', 'sid1');
+SELECT create_distributed_table('source_cj2', 'sid2');
+
+BEGIN;
+SET citus.log_remote_commands to true;
+MERGE INTO target_cj t
+USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2
+ON t.tid = sid1 AND t.tid = 2
+WHEN MATCHED THEN
+        UPDATE SET src = src2
+WHEN NOT MATCHED THEN
+        DO NOTHING;
+SET citus.log_remote_commands to false;
+SELECT * FROM target_cj ORDER BY 1;
+ROLLBACK;
+
+BEGIN;
+-- try accessing columns from either side of the source join
+MERGE INTO target_cj t
+USING source_cj1 s2
+        INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10
+ON t.tid = sid1 AND t.tid = 2
+WHEN MATCHED THEN
+        UPDATE SET src = src1, val = val2
+WHEN NOT MATCHED THEN
+        DO NOTHING;
+SELECT * FROM target_cj ORDER BY 1;
+ROLLBACK;
+
+-- sub-query as a source
+BEGIN;
+MERGE INTO target_cj t
+USING (SELECT * FROM source_cj1 WHERE sid1 = 2) sub
+ON t.tid = sub.sid1 AND t.tid = 2
+WHEN MATCHED THEN
+	UPDATE SET src = sub.src1, val = val1
+WHEN NOT MATCHED THEN
+	DO NOTHING;
+SELECT * FROM target_cj ORDER BY 1;
+ROLLBACK;
+
+-- Test self-join
+BEGIN;
+SELECT * FROM target_cj ORDER BY 1;
+set citus.log_remote_commands to true;
+MERGE INTO target_cj t1
+USING (SELECT * FROM target_cj) sub
+ON t1.tid = sub.tid AND t1.tid = 3
+WHEN MATCHED THEN
+	UPDATE SET src = sub.src, val = sub.val + 100
+WHEN NOT MATCHED THEN
+	DO NOTHING;
+set citus.log_remote_commands to false;
+SELECT * FROM target_cj ORDER BY 1;
+ROLLBACK;
+
+
+-- Test PREPARE
+PREPARE foo(int) AS
+MERGE INTO target_cj target
+USING (SELECT * FROM source_cj1) sub
+ON target.tid = sub.sid1 AND target.tid = $1
+WHEN MATCHED THEN
+        UPDATE SET val = sub.val1
+WHEN NOT MATCHED THEN
+        DO NOTHING;
+
+SELECT * FROM target_cj ORDER BY 1;
+
+BEGIN;
+EXECUTE foo(2);
+EXECUTE foo(2);
+EXECUTE foo(2);
+EXECUTE foo(2);
+EXECUTE foo(2);
+SELECT * FROM target_cj ORDER BY 1;
+ROLLBACK;
+
+BEGIN;
+
+SET citus.log_remote_commands to true;
+SET client_min_messages TO DEBUG1;
+EXECUTE foo(2);
+RESET client_min_messages;
+
+EXECUTE foo(2);
+SET citus.log_remote_commands to false;
+
+SELECT * FROM target_cj ORDER BY 1;
+ROLLBACK;
+
+-- Test distributed tables, must be co-located and joined on distribution column.
+
+--
+-- We create two sets of source and target tables, one set is Postgres and the other
+-- is Citus distributed. Run the _exact_ MERGE SQL on both the sets and compare the
+-- final results of target tables of Postgres and Citus, the result should match.
+-- This is repeated for various MERGE SQL combinations
+--
+CREATE TABLE pg_target(id int, val varchar);
+CREATE TABLE pg_source(id int, val varchar);
+CREATE TABLE citus_target(id int, val varchar);
+CREATE TABLE citus_source(id int, val varchar);
+
+-- Half of the source rows do not match
+INSERT INTO pg_target SELECT i, 'target' FROM generate_series(250, 500) i;
+INSERT INTO pg_source SELECT i, 'source' FROM generate_series(1, 500) i;
+
+INSERT INTO citus_target SELECT i, 'target' FROM generate_series(250, 500) i;
+INSERT INTO citus_source SELECT i, 'source' FROM generate_series(1, 500) i;
+
+SELECT create_distributed_table('citus_target', 'id');
+SELECT create_distributed_table('citus_source', 'id');
+
+--
+-- This routine compares the target tables of Postgres and Citus and
+-- returns true if they match, false if the results do not match.
+--
+CREATE OR REPLACE FUNCTION compare_tables() RETURNS BOOLEAN AS $$
+DECLARE ret BOOL;
+BEGIN
+SELECT count(1) = 0 INTO ret
+    FROM pg_target
+    FULL OUTER JOIN citus_target
+        USING (id, val)
+    WHERE pg_target.id IS NULL
+        OR citus_target.id IS NULL;
+RETURN ret;
+END
+$$ LANGUAGE PLPGSQL;
+
+-- Make sure we start with exact data in Postgres and Citus
+SELECT compare_tables();
+
+-- Run the MERGE on both Postgres and Citus, and compare the final target tables
+
+BEGIN;
+SET citus.log_remote_commands to true;
+
+MERGE INTO pg_target t
+USING pg_source s
+ON t.id = s.id
+WHEN MATCHED AND t.id > 400 THEN
+	UPDATE SET val = t.val || 'Updated by Merge'
+WHEN MATCHED THEN
+	DELETE
+WHEN NOT MATCHED THEN
+        INSERT VALUES(s.id, s.val);
+
+MERGE INTO citus_target t
+USING citus_source s
+ON t.id = s.id
+WHEN MATCHED AND t.id > 400 THEN
+	UPDATE SET val = t.val || 'Updated by Merge'
+WHEN MATCHED THEN
+	DELETE
+WHEN NOT MATCHED THEN
+        INSERT VALUES(s.id, s.val);
+
+SET citus.log_remote_commands to false;
+SELECT compare_tables();
+ROLLBACK;
+
+--
+-- ON clause filter on source
+--
+BEGIN;
+SET citus.log_remote_commands to true;
+
+MERGE INTO pg_target t
+USING pg_source s
+ON t.id = s.id AND s.id < 100
+WHEN MATCHED AND t.id > 400 THEN
+	UPDATE SET val = t.val || 'Updated by Merge'
+WHEN MATCHED THEN
+	DELETE
+WHEN NOT MATCHED THEN
+        INSERT VALUES(s.id, s.val);
+
+MERGE INTO citus_target t
+USING citus_source s
+ON t.id = s.id AND s.id < 100
+WHEN MATCHED AND t.id > 400 THEN
+	UPDATE SET val = t.val || 'Updated by Merge'
+WHEN MATCHED THEN
+	DELETE
+WHEN NOT MATCHED THEN
+        INSERT VALUES(s.id, s.val);
+
+SET citus.log_remote_commands to false;
+SELECT compare_tables();
+ROLLBACK;
+
+--
+-- ON clause filter on target
+--
+BEGIN;
+SET citus.log_remote_commands to true;
+
+MERGE INTO pg_target t
+USING pg_source s
+ON t.id = s.id AND t.id < 100
+WHEN MATCHED AND t.id > 400 THEN
+	UPDATE SET val = t.val || 'Updated by Merge'
+WHEN MATCHED THEN
+	DELETE
+WHEN NOT MATCHED THEN
+        INSERT VALUES(s.id, s.val);
+
+MERGE INTO citus_target t
+USING citus_source s
+ON t.id = s.id AND t.id < 100
+WHEN MATCHED AND t.id > 400 THEN
+	UPDATE SET val = t.val || 'Updated by Merge'
+WHEN MATCHED THEN
+	DELETE
+WHEN NOT MATCHED THEN
+        INSERT VALUES(s.id, s.val);
+
+SET citus.log_remote_commands to false;
+SELECT compare_tables();
+ROLLBACK;
+
+--
+-- NOT MATCHED clause filter on source
+--
+BEGIN;
+SET citus.log_remote_commands to true;
+
+MERGE INTO pg_target t
+USING pg_source s
+ON t.id = s.id
+WHEN MATCHED THEN
+	DO NOTHING
+WHEN NOT MATCHED AND s.id < 100 THEN
+        INSERT VALUES(s.id, s.val);
+
+MERGE INTO citus_target t
+USING citus_source s
+ON t.id = s.id
+WHEN MATCHED THEN
+	DO NOTHING
+WHEN NOT MATCHED AND s.id < 100 THEN
+        INSERT VALUES(s.id, s.val);
+
+SET citus.log_remote_commands to false;
+SELECT compare_tables();
+ROLLBACK;
+
+--
+-- Test constant filter in ON clause to check if shards are pruned
+-- with restriction information
+--
+
+--
+-- Though constant filter is present, this won't prune shards as
+-- NOT MATCHED clause is present
+--
+BEGIN;
+SET citus.log_remote_commands to true;
+
+MERGE INTO pg_target t
+USING pg_source s
+ON t.id = s.id AND s.id = 250
+WHEN MATCHED THEN
+        UPDATE SET val = t.val || 'Updated by Merge'
+WHEN NOT MATCHED THEN
+        INSERT VALUES(s.id, s.val);
+
+MERGE INTO citus_target t
+USING citus_source s
+ON t.id = s.id AND s.id = 250
+WHEN MATCHED THEN
+        UPDATE SET val = t.val || 'Updated by Merge'
+WHEN NOT MATCHED THEN
+        INSERT VALUES(s.id, s.val);
+
+SET citus.log_remote_commands to false;
+SELECT compare_tables();
+ROLLBACK;
+
+-- This will prune shards with restriction information as NOT MATCHED is void
+BEGIN;
+SET citus.log_remote_commands to true;
+
+MERGE INTO pg_target t
+USING pg_source s
+ON t.id = s.id AND s.id = 250
+WHEN MATCHED THEN
+        UPDATE SET val = t.val || 'Updated by Merge'
+WHEN NOT MATCHED THEN
+        DO NOTHING;
+
+MERGE INTO citus_target t
+USING citus_source s
+ON t.id = s.id AND s.id = 250
+WHEN MATCHED THEN
+        UPDATE SET val = t.val || 'Updated by Merge'
+WHEN NOT MATCHED THEN
+        DO NOTHING;
+
+SET citus.log_remote_commands to false;
+SELECT compare_tables();
+ROLLBACK;
+
+-- Test CTE with distributed tables
+CREATE VIEW pg_source_view AS SELECT * FROM pg_source WHERE id < 400;
+CREATE VIEW citus_source_view AS SELECT * FROM citus_source WHERE id < 400;
+
+BEGIN;
+SEt citus.log_remote_commands to true;
+
+WITH cte AS (
+        SELECT * FROM pg_source_view
+)
+MERGE INTO pg_target t
+USING cte
+ON cte.id = t.id
+WHEN MATCHED AND t.id > 350 THEN
+    UPDATE SET val = t.val || 'Updated by CTE'
+WHEN NOT MATCHED THEN
+        INSERT VALUES (cte.id, cte.val)
+WHEN MATCHED AND t.id < 350 THEN
+        DELETE;
+
+WITH cte AS (
+        SELECT * FROM citus_source_view
+)
+MERGE INTO citus_target t
+USING cte
+ON cte.id = t.id
+WHEN MATCHED AND t.id > 350 THEN
+    UPDATE SET val = t.val || 'Updated by CTE'
+WHEN NOT MATCHED THEN
+        INSERT VALUES (cte.id, cte.val)
+WHEN MATCHED AND t.id < 350 THEN
+        DELETE;
+
+SET citus.log_remote_commands to false;
+SELECT compare_tables();
+ROLLBACK;
+
+
+-- Test sub-query with distributed tables
+BEGIN;
+SEt citus.log_remote_commands to true;
+
+MERGE INTO pg_target t
+USING (SELECT * FROM pg_source) subq
+ON subq.id = t.id
+WHEN MATCHED AND t.id > 350 THEN
+    UPDATE SET val = t.val || 'Updated by subquery'
+WHEN NOT MATCHED THEN
+        INSERT VALUES (subq.id, subq.val)
+WHEN MATCHED AND t.id < 350 THEN
+        DELETE;
+
+MERGE INTO citus_target t
+USING (SELECT * FROM citus_source) subq
+ON subq.id = t.id
+WHEN MATCHED AND t.id > 350 THEN
+    UPDATE SET val = t.val || 'Updated by subquery'
+WHEN NOT MATCHED THEN
+        INSERT VALUES (subq.id, subq.val)
+WHEN MATCHED AND t.id < 350 THEN
+        DELETE;
+
+SET citus.log_remote_commands to false;
+SELECT compare_tables();
+ROLLBACK;
+
+-- Test PREPARE
+PREPARE pg_prep(int) AS
+MERGE INTO pg_target
+USING (SELECT * FROM pg_source) sub
+ON pg_target.id = sub.id AND pg_target.id = $1
+WHEN MATCHED THEN
+        UPDATE SET val = 'Updated by prepare using ' || sub.val
+WHEN NOT MATCHED THEN
+        DO NOTHING;
+
+PREPARE citus_prep(int) AS
+MERGE INTO citus_target
+USING (SELECT * FROM citus_source) sub
+ON citus_target.id = sub.id AND citus_target.id = $1
+WHEN MATCHED THEN
+        UPDATE SET val = 'Updated by prepare using ' || sub.val
+WHEN NOT MATCHED THEN
+        DO NOTHING;
+
+BEGIN;
+SET citus.log_remote_commands to true;
+
+SELECT * FROM pg_target WHERE id = 500; -- before merge
+EXECUTE pg_prep(500);
+SELECT * FROM pg_target WHERE id = 500; -- non-cached
+EXECUTE pg_prep(500);
+EXECUTE pg_prep(500);
+EXECUTE pg_prep(500);
+EXECUTE pg_prep(500);
+EXECUTE pg_prep(500);
+SELECT * FROM pg_target WHERE id = 500; -- cached
+
+SELECT * FROM citus_target WHERE id = 500; -- before merge
+EXECUTE citus_prep(500);
+SELECT * FROM citus_target WHERE id = 500; -- non-cached
+EXECUTE citus_prep(500);
+EXECUTE citus_prep(500);
+EXECUTE citus_prep(500);
+EXECUTE citus_prep(500);
+EXECUTE citus_prep(500);
+SELECT * FROM citus_target WHERE id = 500; -- cached
+
+SET citus.log_remote_commands to false;
+SELECT compare_tables();
+ROLLBACK;
+
+-- Test partitions + distributed tables
+
+CREATE TABLE pg_pa_target (tid integer, balance float, val text)
+	PARTITION BY LIST (tid);
+CREATE TABLE citus_pa_target (tid integer, balance float, val text)
+	PARTITION BY LIST (tid);
+
+CREATE TABLE part1 PARTITION OF pg_pa_target FOR VALUES IN (1,4)
+  WITH (autovacuum_enabled=off);
+CREATE TABLE part2 PARTITION OF pg_pa_target FOR VALUES IN (2,5,6)
+  WITH (autovacuum_enabled=off);
+CREATE TABLE part3 PARTITION OF pg_pa_target FOR VALUES IN (3,8,9)
+  WITH (autovacuum_enabled=off);
+CREATE TABLE part4 PARTITION OF pg_pa_target DEFAULT
+  WITH (autovacuum_enabled=off);
+CREATE TABLE part5 PARTITION OF citus_pa_target FOR VALUES IN (1,4)
+  WITH (autovacuum_enabled=off);
+CREATE TABLE part6 PARTITION OF citus_pa_target FOR VALUES IN (2,5,6)
+  WITH (autovacuum_enabled=off);
+CREATE TABLE part7 PARTITION OF citus_pa_target FOR VALUES IN (3,8,9)
+  WITH (autovacuum_enabled=off);
+CREATE TABLE part8 PARTITION OF citus_pa_target DEFAULT
+  WITH (autovacuum_enabled=off);
+
+CREATE TABLE pg_pa_source (sid integer, delta float);
+CREATE TABLE citus_pa_source (sid integer, delta float);
+
+-- insert many rows to the source table
+INSERT INTO pg_pa_source SELECT id, id * 10  FROM generate_series(1,14) AS id;
+INSERT INTO citus_pa_source SELECT id, id * 10  FROM generate_series(1,14) AS id;
+-- insert a few rows in the target table (odd numbered tid)
+INSERT INTO pg_pa_target SELECT id, id * 100, 'initial' FROM generate_series(1,14,2) AS id;
+INSERT INTO citus_pa_target SELECT id, id * 100, 'initial' FROM generate_series(1,14,2) AS id;
+
+SELECT create_distributed_table('citus_pa_target', 'tid');
+SELECT create_distributed_table('citus_pa_source', 'sid');
+
+CREATE OR REPLACE FUNCTION pa_compare_tables() RETURNS BOOLEAN AS $$
+DECLARE ret BOOL;
+BEGIN
+SELECT count(1) = 0 INTO ret
+    FROM pg_pa_target
+    FULL OUTER JOIN citus_pa_target
+        USING (tid, balance, val)
+    WHERE pg_pa_target.tid IS NULL
+        OR citus_pa_target.tid IS NULL;
+RETURN ret;
+END
+$$ LANGUAGE PLPGSQL;
+
+-- try simple MERGE
+BEGIN;
+MERGE INTO pg_pa_target t
+  USING pg_pa_source s
+  ON t.tid = s.sid
+  WHEN MATCHED THEN
+    UPDATE SET balance = balance + delta, val = val || ' updated by merge'
+  WHEN NOT MATCHED THEN
+    INSERT VALUES (sid, delta, 'inserted by merge');
+
+MERGE INTO citus_pa_target t
+  USING citus_pa_source s
+  ON t.tid = s.sid
+  WHEN MATCHED THEN
+    UPDATE SET balance = balance + delta, val = val || ' updated by merge'
+  WHEN NOT MATCHED THEN
+    INSERT VALUES (sid, delta, 'inserted by merge');
+
+SELECT pa_compare_tables();
+ROLLBACK;
+
+-- same with a constant qual
+BEGIN;
+MERGE INTO pg_pa_target t
+  USING pg_pa_source s
+  ON t.tid = s.sid AND tid = 1
+  WHEN MATCHED THEN
+    UPDATE SET balance = balance + delta, val = val || ' updated by merge'
+  WHEN NOT MATCHED THEN
+    INSERT VALUES (sid, delta, 'inserted by merge');
+
+MERGE INTO citus_pa_target t
+  USING citus_pa_source s
+  ON t.tid = s.sid AND tid = 1
+  WHEN MATCHED THEN
+    UPDATE SET balance = balance + delta, val = val || ' updated by merge'
+  WHEN NOT MATCHED THEN
+    INSERT VALUES (sid, delta, 'inserted by merge');
+
+SELECT pa_compare_tables();
+ROLLBACK;
+
 --
 -- Error and Unsupported scenarios
 --
 
+-- try updating the distribution key column
+BEGIN;
+MERGE INTO target_cj t
+  USING source_cj1 s
+  ON t.tid = s.sid1 AND t.tid = 2
+  WHEN MATCHED THEN
+    UPDATE SET tid = tid + 9, src = src || ' updated by merge'
+  WHEN NOT MATCHED THEN
+    INSERT VALUES (sid1, 'inserted by merge', val1);
+ROLLBACK;
+
 -- Foreign table as target
 MERGE INTO foreign_table
 	USING ft_target ON (foreign_table.id = ft_target.id)
@@ -854,6 +1458,38 @@ MERGE INTO t1
 	WHEN NOT MATCHED THEN
 		INSERT (id, val) VALUES (s1.id, s1.val);
 
+-- Now both s1 and t1 are distributed tables
+SELECT undistribute_table('t1');
+SELECT create_distributed_table('t1', 'id');
+
+-- We have a potential pitfall where a function can be invoked in
+-- the MERGE conditions which can insert/update to a random shard
+CREATE OR REPLACE function merge_when_and_write() RETURNS BOOLEAN
+LANGUAGE PLPGSQL AS
+$$
+BEGIN
+        INSERT INTO t1 VALUES (100, 100);
+        RETURN TRUE;
+END;
+$$;
+
+-- Test preventing "ON" join condition from writing to the database
+BEGIN;
+MERGE INTO t1
+USING s1 ON t1.id = s1.id AND t1.id = 2 AND (merge_when_and_write())
+WHEN MATCHED THEN
+        UPDATE SET val = t1.val + s1.val;
+ROLLBACK;
+
+-- Test preventing WHEN clause(s) from writing to the database
+BEGIN;
+MERGE INTO t1
+USING s1 ON t1.id = s1.id AND t1.id = 2
+WHEN MATCHED AND (merge_when_and_write()) THEN
+        UPDATE SET val = t1.val + s1.val;
+ROLLBACK;
+
+
 -- Joining on partition columns with sub-query
 MERGE INTO t1
 	USING (SELECT * FROM s1) sub ON (sub.val = t1.id) -- sub.val is not a distribution column
@@ -997,6 +1633,104 @@ WHEN MATCHED THEN
 WHEN NOT MATCHED THEN
     INSERT VALUES(mv_source.id, mv_source.val);
 
+-- Distributed tables *must* be colocated
+CREATE TABLE dist_target(id int, val varchar);
+SELECT create_distributed_table('dist_target', 'id');
+CREATE TABLE dist_source(id int, val varchar);
+SELECT create_distributed_table('dist_source', 'id', colocate_with => 'none');
+
+MERGE INTO dist_target
+USING dist_source
+ON dist_target.id = dist_source.id
+WHEN MATCHED THEN
+UPDATE SET val = dist_source.val
+WHEN NOT MATCHED THEN
+INSERT VALUES(dist_source.id, dist_source.val);
+
+-- Distributed tables *must* be joined on distribution column
+CREATE TABLE dist_colocated(id int, val int);
+SELECT create_distributed_table('dist_colocated', 'id', colocate_with => 'dist_target');
+
+MERGE INTO dist_target
+USING dist_colocated
+ON dist_target.id = dist_colocated.val -- val is not the distribution column
+WHEN MATCHED THEN
+UPDATE SET val = dist_colocated.val
+WHEN NOT MATCHED THEN
+INSERT VALUES(dist_colocated.id, dist_colocated.val);
+
+
+-- Both the source and target must be distributed
+MERGE INTO dist_target
+USING (SELECT 100 id) AS source
+ON dist_target.id = source.id AND dist_target.val = 'const'
+WHEN MATCHED THEN
+UPDATE SET val = 'source'
+WHEN NOT MATCHED THEN
+INSERT VALUES(source.id, 'source');
+
+-- Non-hash distributed tables (append/range).
+CREATE VIEW show_tables AS
+SELECT logicalrelid, partmethod
+FROM pg_dist_partition
+WHERE (logicalrelid = 'dist_target'::regclass) OR (logicalrelid = 'dist_source'::regclass)
+ORDER BY 1;
+
+SELECT undistribute_table('dist_source');
+SELECT create_distributed_table('dist_source', 'id', 'append');
+SELECT * FROM show_tables;
+
+MERGE INTO dist_target
+USING dist_source
+ON dist_target.id = dist_source.id
+WHEN MATCHED THEN
+UPDATE SET val = dist_source.val
+WHEN NOT MATCHED THEN
+INSERT VALUES(dist_source.id, dist_source.val);
+
+SELECT undistribute_table('dist_source');
+SELECT create_distributed_table('dist_source', 'id', 'range');
+SELECT * FROM show_tables;
+
+MERGE INTO dist_target
+USING dist_source
+ON dist_target.id = dist_source.id
+WHEN MATCHED THEN
+UPDATE SET val = dist_source.val
+WHEN NOT MATCHED THEN
+INSERT VALUES(dist_source.id, dist_source.val);
+
+-- Both are append tables
+SELECT undistribute_table('dist_target');
+SELECT undistribute_table('dist_source');
+SELECT create_distributed_table('dist_target', 'id', 'append');
+SELECT create_distributed_table('dist_source', 'id', 'append');
+SELECT * FROM show_tables;
+
+MERGE INTO dist_target
+USING dist_source
+ON dist_target.id = dist_source.id
+WHEN MATCHED THEN
+UPDATE SET val = dist_source.val
+WHEN NOT MATCHED THEN
+INSERT VALUES(dist_source.id, dist_source.val);
+
+-- Both are range tables
+SELECT undistribute_table('dist_target');
+SELECT undistribute_table('dist_source');
+SELECT create_distributed_table('dist_target', 'id', 'range');
+SELECT create_distributed_table('dist_source', 'id', 'range');
+SELECT * FROM show_tables;
+
+MERGE INTO dist_target
+USING dist_source
+ON dist_target.id = dist_source.id
+WHEN MATCHED THEN
+UPDATE SET val = dist_source.val
+WHEN NOT MATCHED THEN
+INSERT VALUES(dist_source.id, dist_source.val);
+
 DROP SERVER foreign_server CASCADE;
+DROP FUNCTION merge_when_and_write();
 DROP SCHEMA merge_schema CASCADE;
 SELECT 1 FROM master_remove_node('localhost', :master_port);
diff --git a/src/test/regress/sql/pgmerge.sql b/src/test/regress/sql/pgmerge.sql
index 6842f516a..83bf01a68 100644
--- a/src/test/regress/sql/pgmerge.sql
+++ b/src/test/regress/sql/pgmerge.sql
@@ -608,6 +608,14 @@ USING wq_source s ON t.tid = s.sid
 WHEN MATCHED AND (merge_when_and_write()) THEN
 	UPDATE SET balance = t.balance + s.balance;
 ROLLBACK;
+
+-- Test preventing ON condition from writing to the database
+BEGIN;
+MERGE INTO wq_target t
+USING wq_source s ON t.tid = s.sid AND (merge_when_and_write())
+WHEN MATCHED THEN
+	UPDATE SET balance = t.balance + s.balance;
+ROLLBACK;
 drop function merge_when_and_write();
 
 DROP TABLE wq_target, wq_source;

From cf5513628151410d839a469f9fcafc1ead68f912 Mon Sep 17 00:00:00 2001
From: Teja Mupparti <temuppar@microsoft.com>
Date: Tue, 31 Jan 2023 18:23:44 -0800
Subject: [PATCH 14/58] 1) Restrict MERGE command INSERT to the source's
 distribution column

Fixes #6672

2) Move all MERGE related routines to a new file merge_planner.c

3) Make ConjunctionContainsColumnFilter() static again, and rearrange the code in MergeQuerySupported()
4) Restore the original format in the comments section.
5) Add big serial test. Implement latest set of comments
---
 .../distributed/planner/distributed_planner.c |  41 +-
 .../planner/fast_path_router_planner.c        |   5 +-
 .../distributed/planner/merge_planner.c       | 701 ++++++++++++++++++
 .../planner/multi_physical_planner.c          |  17 +-
 .../planner/multi_router_planner.c            | 398 +---------
 .../planner/query_pushdown_planning.c         |  12 +-
 .../relation_restriction_equivalence.c        |  68 +-
 src/include/distributed/distributed_planner.h |   6 -
 src/include/distributed/merge_planner.h       |  26 +
 .../distributed/multi_router_planner.h        |  13 +-
 .../relation_restriction_equivalence.h        |  12 +-
 src/test/regress/create_schedule              |   1 +
 src/test/regress/expected/merge.out           | 444 +++++++++--
 src/test/regress/expected/merge_arbitrary.out | 150 ++++
 .../regress/expected/merge_arbitrary_0.out    |   6 +
 .../expected/merge_arbitrary_create.out       |  72 ++
 .../expected/merge_arbitrary_create_0.out     |   6 +
 src/test/regress/expected/pg15.out            |  31 +-
 src/test/regress/expected/pgmerge.out         |  12 +-
 src/test/regress/sql/merge.sql                | 235 +++++-
 src/test/regress/sql/merge_arbitrary.sql      | 133 ++++
 .../regress/sql/merge_arbitrary_create.sql    |  50 ++
 src/test/regress/sql/pg15.sql                 |  23 +-
 src/test/regress/sql/pgmerge.sql              |   6 +-
 src/test/regress/sql_schedule                 |   1 +
 25 files changed, 1920 insertions(+), 549 deletions(-)
 create mode 100644 src/backend/distributed/planner/merge_planner.c
 create mode 100644 src/include/distributed/merge_planner.h
 create mode 100644 src/test/regress/expected/merge_arbitrary.out
 create mode 100644 src/test/regress/expected/merge_arbitrary_0.out
 create mode 100644 src/test/regress/expected/merge_arbitrary_create.out
 create mode 100644 src/test/regress/expected/merge_arbitrary_create_0.out
 create mode 100644 src/test/regress/sql/merge_arbitrary.sql
 create mode 100644 src/test/regress/sql/merge_arbitrary_create.sql

diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c
index 262258d7f..17b63ee0a 100644
--- a/src/backend/distributed/planner/distributed_planner.c
+++ b/src/backend/distributed/planner/distributed_planner.c
@@ -77,7 +77,7 @@ int PlannerLevel = 0;
 
 static bool ListContainsDistributedTableRTE(List *rangeTableList,
 											bool *maybeHasForeignDistributedTable);
-static bool IsUpdateOrDelete(Query *query);
+static bool IsUpdateOrDeleteOrMerge(Query *query);
 static PlannedStmt * CreateDistributedPlannedStmt(
 	DistributedPlanningContext *planContext);
 static PlannedStmt * InlineCtesAndCreateDistributedPlannedStmt(uint64 planId,
@@ -153,7 +153,7 @@ distributed_planner(Query *parse,
 		 * We cannot have merge command for this path as well because
 		 * there cannot be recursively planned merge command.
 		 */
-		Assert(!ContainsMergeCommandWalker((Node *) parse));
+		Assert(!IsMergeQuery(parse));
 
 		needsDistributedPlanning = true;
 	}
@@ -295,39 +295,6 @@ distributed_planner(Query *parse,
 }
 
 
-/*
- * ContainsMergeCommandWalker walks over the node and finds if there are any
- * Merge command (e.g., CMD_MERGE) in the node.
- */
-bool
-ContainsMergeCommandWalker(Node *node)
-{
-	#if PG_VERSION_NUM < PG_VERSION_15
-	return false;
-	#endif
-
-	if (node == NULL)
-	{
-		return false;
-	}
-
-	if (IsA(node, Query))
-	{
-		Query *query = (Query *) node;
-		if (IsMergeQuery(query))
-		{
-			return true;
-		}
-
-		return query_tree_walker((Query *) node, ContainsMergeCommandWalker, NULL, 0);
-	}
-
-	return expression_tree_walker(node, ContainsMergeCommandWalker, NULL);
-
-	return false;
-}
-
-
 /*
  * ExtractRangeTableEntryList is a wrapper around ExtractRangeTableEntryWalker.
  * The function traverses the input query and returns all the range table
@@ -631,7 +598,7 @@ IsMultiTaskPlan(DistributedPlan *distributedPlan)
  * IsUpdateOrDelete returns true if the query performs an update or delete.
  */
 bool
-IsUpdateOrDelete(Query *query)
+IsUpdateOrDeleteOrMerge(Query *query)
 {
 	return query->commandType == CMD_UPDATE ||
 		   query->commandType == CMD_DELETE ||
@@ -809,7 +776,7 @@ CreateDistributedPlannedStmt(DistributedPlanningContext *planContext)
 	 * if it is planned as a multi shard modify query.
 	 */
 	if ((distributedPlan->planningError ||
-		 (IsUpdateOrDelete(planContext->originalQuery) && IsMultiTaskPlan(
+		 (IsUpdateOrDeleteOrMerge(planContext->originalQuery) && IsMultiTaskPlan(
 			  distributedPlan))) &&
 		hasUnresolvedParams)
 	{
diff --git a/src/backend/distributed/planner/fast_path_router_planner.c b/src/backend/distributed/planner/fast_path_router_planner.c
index e7d91a101..ecb62478a 100644
--- a/src/backend/distributed/planner/fast_path_router_planner.c
+++ b/src/backend/distributed/planner/fast_path_router_planner.c
@@ -56,6 +56,9 @@ bool EnableFastPathRouterPlanner = true;
 static bool ColumnAppearsMultipleTimes(Node *quals, Var *distributionKey);
 static bool DistKeyInSimpleOpExpression(Expr *clause, Var *distColumn,
 										Node **distributionKeyValue);
+static bool ConjunctionContainsColumnFilter(Node *node,
+											Var *column,
+											Node **distributionKeyValue);
 
 
 /*
@@ -292,7 +295,7 @@ ColumnAppearsMultipleTimes(Node *quals, Var *distributionKey)
  *
  * If the conjuction contains column filter which is const, distributionKeyValue is set.
  */
-bool
+static bool
 ConjunctionContainsColumnFilter(Node *node, Var *column, Node **distributionKeyValue)
 {
 	if (node == NULL)
diff --git a/src/backend/distributed/planner/merge_planner.c b/src/backend/distributed/planner/merge_planner.c
new file mode 100644
index 000000000..03fd9e00d
--- /dev/null
+++ b/src/backend/distributed/planner/merge_planner.c
@@ -0,0 +1,701 @@
+/*-------------------------------------------------------------------------
+ *
+ * merge_planner.c
+ *
+ * This file contains functions to help plan MERGE queries.
+ *
+ * Copyright (c) Citus Data, Inc.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include <stddef.h>
+
+#include "postgres.h"
+#include "nodes/makefuncs.h"
+#include "optimizer/optimizer.h"
+#include "parser/parsetree.h"
+#include "utils/lsyscache.h"
+
+#include "distributed/citus_clauses.h"
+#include "distributed/listutils.h"
+#include "distributed/merge_planner.h"
+#include "distributed/multi_logical_optimizer.h"
+#include "distributed/multi_router_planner.h"
+#include "distributed/pg_version_constants.h"
+#include "distributed/query_pushdown_planning.h"
+
+#if PG_VERSION_NUM >= PG_VERSION_15
+
+static DeferredErrorMessage * CheckIfRTETypeIsUnsupported(Query *parse,
+														  RangeTblEntry *rangeTableEntry);
+static DeferredErrorMessage * ErrorIfDistTablesNotColocated(Query *parse,
+															List *
+															distTablesList,
+															PlannerRestrictionContext
+															*
+															plannerRestrictionContext);
+static DeferredErrorMessage * ErrorIfMergeHasUnsupportedTables(Query *parse,
+															   List *rangeTableList,
+															   PlannerRestrictionContext *
+															   restrictionContext);
+static bool IsDistributionColumnInMergeSource(Expr *columnExpression, Query *query, bool
+											  skipOuterVars);
+static DeferredErrorMessage * InsertDistributionColumnMatchesSource(Query *query,
+																	RangeTblEntry *
+																	resultRte);
+
+static DeferredErrorMessage * MergeQualAndTargetListFunctionsSupported(Oid
+																	   resultRelationId,
+																	   FromExpr *joinTree,
+																	   Node *quals,
+																	   List *targetList,
+																	   CmdType commandType);
+#endif
+
+
+/*
+ * MergeQuerySupported does check for a MERGE command in the query, if it finds
+ * one, it will verify the below criteria
+ * - Supported tables and combinations in ErrorIfMergeHasUnsupportedTables
+ * - Distributed tables requirements in ErrorIfDistTablesNotColocated
+ * - Checks target-lists and functions-in-quals in TargetlistAndFunctionsSupported
+ */
+DeferredErrorMessage *
+MergeQuerySupported(Query *originalQuery, bool multiShardQuery,
+					PlannerRestrictionContext *plannerRestrictionContext)
+{
+	/* function is void for pre-15 versions of Postgres */
+	#if PG_VERSION_NUM < PG_VERSION_15
+
+	return NULL;
+
+	#else
+
+	/* For non-MERGE commands it's a no-op */
+	if (!IsMergeQuery(originalQuery))
+	{
+		return NULL;
+	}
+
+	/*
+	 * TODO: For now, we are adding an exception where any volatile or stable
+	 * functions are not allowed in the MERGE query, but this will become too
+	 * restrictive as this will prevent many useful and simple cases, such as,
+	 * INSERT VALUES(ts::timestamp), bigserial column inserts etc. But without
+	 * this restriction, we have a potential danger of some of the function(s)
+	 * getting executed at the worker which will result in incorrect behavior.
+	 */
+	if (contain_mutable_functions((Node *) originalQuery))
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "non-IMMUTABLE functions are not yet supported "
+							 "in MERGE sql with distributed tables ",
+							 NULL, NULL);
+	}
+
+	List *rangeTableList = ExtractRangeTableEntryList(originalQuery);
+	RangeTblEntry *resultRte = ExtractResultRelationRTE(originalQuery);
+
+	/*
+	 * Fast path queries cannot have merge command, and we prevent the remaining here.
+	 * In Citus we have limited support for MERGE, it's allowed only if all
+	 * the tables(target, source or any CTE) tables are are local i.e. a
+	 * combination of Citus local and Non-Citus tables (regular Postgres tables)
+	 * or distributed tables with some restrictions, please see header of routine
+	 * ErrorIfDistTablesNotColocated for details.
+	 */
+	DeferredErrorMessage *deferredError =
+		ErrorIfMergeHasUnsupportedTables(originalQuery,
+										 rangeTableList,
+										 plannerRestrictionContext);
+	if (deferredError)
+	{
+		/* MERGE's unsupported combination, raise the exception */
+		RaiseDeferredError(deferredError, ERROR);
+	}
+
+	Oid resultRelationId = resultRte->relid;
+	deferredError = MergeQualAndTargetListFunctionsSupported(resultRelationId,
+															 originalQuery->jointree,
+															 originalQuery->jointree->
+															 quals,
+															 originalQuery->targetList,
+															 originalQuery->commandType);
+	if (deferredError)
+	{
+		return deferredError;
+	}
+
+	/*
+	 * MERGE is a special case where we have multiple modify statements
+	 * within itself. Check each INSERT/UPDATE/DELETE individually.
+	 */
+	MergeAction *action = NULL;
+	foreach_ptr(action, originalQuery->mergeActionList)
+	{
+		Assert(originalQuery->returningList == NULL);
+		deferredError = MergeQualAndTargetListFunctionsSupported(resultRelationId,
+																 originalQuery->jointree,
+																 action->qual,
+																 action->targetList,
+																 action->commandType);
+		if (deferredError)
+		{
+			/* MERGE's unsupported scenario, raise the exception */
+			RaiseDeferredError(deferredError, ERROR);
+		}
+	}
+
+	deferredError =
+		InsertDistributionColumnMatchesSource(originalQuery, resultRte);
+	if (deferredError)
+	{
+		/* MERGE's unsupported scenario, raise the exception */
+		RaiseDeferredError(deferredError, ERROR);
+	}
+
+	if (multiShardQuery)
+	{
+		deferredError =
+			DeferErrorIfUnsupportedSubqueryPushdown(originalQuery,
+													plannerRestrictionContext);
+		if (deferredError)
+		{
+			return deferredError;
+		}
+	}
+
+	if (HasDangerousJoinUsing(originalQuery->rtable, (Node *) originalQuery->jointree))
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "a join with USING causes an internal naming "
+							 "conflict, use ON instead", NULL, NULL);
+	}
+
+	return NULL;
+
+	#endif
+}
+
+
+/*
+ * IsMergeAllowedOnRelation takes a relation entry and checks if MERGE command is
+ * permitted on special relations, such as materialized view, returns true only if
+ * it's a "source" relation.
+ */
+bool
+IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte)
+{
+	if (!IsMergeQuery(parse))
+	{
+		return false;
+	}
+
+	/* Fetch the MERGE target relation */
+	RangeTblEntry *targetRte = rt_fetch(parse->resultRelation, parse->rtable);
+
+	/* Is it a target relation? */
+	if (targetRte->relid == rte->relid)
+	{
+		return false;
+	}
+
+	return true;
+}
+
+
+#if PG_VERSION_NUM >= PG_VERSION_15
+
+/*
+ * ErrorIfDistTablesNotColocated Checks to see if
+ *
+ *   - There are a minimum of two distributed tables (source and a target).
+ *   - All the distributed tables are indeed colocated.
+ *
+ * If any of the conditions are not met, it raises an exception.
+ */
+static DeferredErrorMessage *
+ErrorIfDistTablesNotColocated(Query *parse, List *distTablesList,
+							  PlannerRestrictionContext *
+							  plannerRestrictionContext)
+{
+	/* All MERGE tables must be distributed */
+	if (list_length(distTablesList) < 2)
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "For MERGE command, both the source and target "
+							 "must be distributed", NULL, NULL);
+	}
+
+	/* All distributed tables must be colocated */
+	if (!AllRelationsInRTEListColocated(distTablesList))
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "For MERGE command, all the distributed tables "
+							 "must be colocated", NULL, NULL);
+	}
+
+	return NULL;
+}
+
+
+/*
+ * ErrorIfRTETypeIsUnsupported Checks for types of tables that are not supported, such
+ * as, reference tables, append-distributed tables and materialized view as target relation.
+ * Routine returns NULL for the supported types, error message for everything else.
+ */
+static DeferredErrorMessage *
+CheckIfRTETypeIsUnsupported(Query *parse, RangeTblEntry *rangeTableEntry)
+{
+	if (rangeTableEntry->relkind == RELKIND_MATVIEW ||
+		rangeTableEntry->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		/* Materialized view or Foreign table as target is not allowed */
+		if (IsMergeAllowedOnRelation(parse, rangeTableEntry))
+		{
+			/* Non target relation is ok */
+			return NULL;
+		}
+		else
+		{
+			/* Usually we don't reach this exception as the Postgres parser catches it */
+			StringInfo errorMessage = makeStringInfo();
+			appendStringInfo(errorMessage, "MERGE command is not allowed on "
+										   "relation type(relkind:%c)",
+							 rangeTableEntry->relkind);
+			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+								 errorMessage->data, NULL, NULL);
+		}
+	}
+
+	if (rangeTableEntry->relkind != RELKIND_RELATION &&
+		rangeTableEntry->relkind != RELKIND_PARTITIONED_TABLE)
+	{
+		StringInfo errorMessage = makeStringInfo();
+		appendStringInfo(errorMessage, "Unexpected table type(relkind:%c) "
+									   "in MERGE command", rangeTableEntry->relkind);
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 errorMessage->data, NULL, NULL);
+	}
+
+	Assert(rangeTableEntry->relid != 0);
+
+	/* Reference tables are not supported yet */
+	if (IsCitusTableType(rangeTableEntry->relid, REFERENCE_TABLE))
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "MERGE command is not supported on reference "
+							 "tables yet", NULL, NULL);
+	}
+
+	/* Append/Range tables are not supported */
+	if (IsCitusTableType(rangeTableEntry->relid, APPEND_DISTRIBUTED) ||
+		IsCitusTableType(rangeTableEntry->relid, RANGE_DISTRIBUTED))
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "For MERGE command, all the distributed tables "
+							 "must be colocated, for append/range distribution, "
+							 "colocation is not supported", NULL,
+							 "Consider using hash distribution instead");
+	}
+
+	return NULL;
+}
+
+
+/*
+ * ErrorIfMergeHasUnsupportedTables checks if all the tables(target, source or any CTE
+ * present) in the MERGE command are local i.e. a combination of Citus local and Non-Citus
+ * tables (regular Postgres tables), or distributed tables with some restrictions, please
+ * see header of routine ErrorIfDistTablesNotColocated for details, raises an exception
+ * for all other combinations.
+ */
+static DeferredErrorMessage *
+ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList,
+								 PlannerRestrictionContext *restrictionContext)
+{
+	List *distTablesList = NIL;
+	bool foundLocalTables = false;
+
+	RangeTblEntry *rangeTableEntry = NULL;
+	foreach_ptr(rangeTableEntry, rangeTableList)
+	{
+		Oid relationId = rangeTableEntry->relid;
+
+		switch (rangeTableEntry->rtekind)
+		{
+			case RTE_RELATION:
+			{
+				/* Check the relation type */
+				break;
+			}
+
+			case RTE_SUBQUERY:
+			case RTE_FUNCTION:
+			case RTE_TABLEFUNC:
+			case RTE_VALUES:
+			case RTE_JOIN:
+			case RTE_CTE:
+			{
+				/* Skip them as base table(s) will be checked */
+				continue;
+			}
+
+			/*
+			 * RTE_NAMEDTUPLESTORE is typically used in ephmeral named relations,
+			 * such as, trigger data; until we find a genuine use case, raise an
+			 * exception.
+			 * RTE_RESULT is a node added by the planner and we shouldn't
+			 * encounter it in the parse tree.
+			 */
+			case RTE_NAMEDTUPLESTORE:
+			case RTE_RESULT:
+			{
+				return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+									 "MERGE command is not supported with "
+									 "Tuplestores and results",
+									 NULL, NULL);
+			}
+
+			default:
+			{
+				return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+									 "MERGE command: Unrecognized range table entry.",
+									 NULL, NULL);
+			}
+		}
+
+		/* RTE Relation can be of various types, check them now */
+
+		/* skip the regular views as they are replaced with subqueries */
+		if (rangeTableEntry->relkind == RELKIND_VIEW)
+		{
+			continue;
+		}
+
+		DeferredErrorMessage *errorMessage =
+			CheckIfRTETypeIsUnsupported(parse, rangeTableEntry);
+		if (errorMessage)
+		{
+			return errorMessage;
+		}
+
+		/*
+		 * For now, save all distributed tables, later (below) we will
+		 * check for supported combination(s).
+		 */
+		if (IsCitusTableType(relationId, DISTRIBUTED_TABLE))
+		{
+			distTablesList = lappend(distTablesList, rangeTableEntry);
+			continue;
+		}
+
+		/* Regular Postgres tables and Citus local tables are allowed */
+		if (!IsCitusTable(relationId) ||
+			IsCitusTableType(relationId, CITUS_LOCAL_TABLE))
+		{
+			foundLocalTables = true;
+			continue;
+		}
+
+		/* Any other Citus table type missing ? */
+	}
+
+	/* Ensure all tables are indeed local */
+	if (foundLocalTables && list_length(distTablesList) == 0)
+	{
+		/* All the tables are local, supported */
+		return NULL;
+	}
+	else if (foundLocalTables && list_length(distTablesList) > 0)
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "MERGE command is not supported with "
+							 "combination of distributed/local tables yet",
+							 NULL, NULL);
+	}
+
+	/* Ensure all distributed tables are indeed co-located */
+	return ErrorIfDistTablesNotColocated(parse,
+										 distTablesList,
+										 restrictionContext);
+}
+
+
+/*
+ * IsPartitionColumnInMerge returns true if the given column is a partition column.
+ * The function uses FindReferencedTableColumn to find the original relation
+ * id and column that the column expression refers to. It then checks whether
+ * that column is a partition column of the relation.
+ *
+ * Also, the function returns always false for reference tables given that
+ * reference tables do not have partition column.
+ *
+ * If skipOuterVars is true, then it doesn't process the outervars.
+ */
+bool
+IsDistributionColumnInMergeSource(Expr *columnExpression, Query *query, bool
+								  skipOuterVars)
+{
+	bool isDistributionColumn = false;
+	Var *column = NULL;
+	RangeTblEntry *relationRTE = NULL;
+
+	/* ParentQueryList is same as the original query for MERGE */
+	FindReferencedTableColumn(columnExpression, list_make1(query), query, &column,
+							  &relationRTE,
+							  skipOuterVars);
+	Oid relationId = relationRTE ? relationRTE->relid : InvalidOid;
+	if (relationId != InvalidOid && column != NULL)
+	{
+		Var *distributionColumn = DistPartitionKey(relationId);
+
+		/* not all distributed tables have partition column */
+		if (distributionColumn != NULL && column->varattno ==
+			distributionColumn->varattno)
+		{
+			isDistributionColumn = true;
+		}
+	}
+
+	return isDistributionColumn;
+}
+
+
+/*
+ * InsertDistributionColumnMatchesSource check to see if MERGE is inserting a
+ * value into the target which is not from the source table, if so, it
+ * raises an exception.
+ * Note: Inserting random values other than the joined column values will
+ * result in unexpected behaviour of rows ending up in incorrect shards, to
+ * prevent such mishaps, we disallow such inserts here.
+ */
+static DeferredErrorMessage *
+InsertDistributionColumnMatchesSource(Query *query, RangeTblEntry *resultRte)
+{
+	Assert(IsMergeQuery(query));
+
+	if (!IsCitusTableType(resultRte->relid, DISTRIBUTED_TABLE))
+	{
+		return NULL;
+	}
+
+	bool foundDistributionColumn = false;
+	MergeAction *action = NULL;
+	foreach_ptr(action, query->mergeActionList)
+	{
+		/* Skip MATCHED clause as INSERTS are not allowed in it*/
+		if (action->matched)
+		{
+			continue;
+		}
+
+		/* NOT MATCHED can have either INSERT or DO NOTHING */
+		if (action->commandType == CMD_NOTHING)
+		{
+			return NULL;
+		}
+
+		if (action->targetList == NIL)
+		{
+			/* INSERT DEFAULT VALUES is not allowed */
+			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+								 "cannot perform MERGE INSERT with DEFAULTS",
+								 NULL, NULL);
+		}
+
+		Assert(action->commandType == CMD_INSERT);
+		Var *targetKey = PartitionColumn(resultRte->relid, 1);
+
+		TargetEntry *targetEntry = NULL;
+		foreach_ptr(targetEntry, action->targetList)
+		{
+			AttrNumber originalAttrNo = targetEntry->resno;
+
+			/* skip processing of target table non-partition columns */
+			if (originalAttrNo != targetKey->varattno)
+			{
+				continue;
+			}
+
+			foundDistributionColumn = true;
+
+			if (IsA(targetEntry->expr, Var))
+			{
+				if (IsDistributionColumnInMergeSource(targetEntry->expr, query, true))
+				{
+					return NULL;
+				}
+				else
+				{
+					return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+										 "MERGE INSERT must use the source table "
+										 "distribution column value",
+										 NULL, NULL);
+				}
+			}
+			else
+			{
+				return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+									 "MERGE INSERT must refer a source column "
+									 "for distribution column ",
+									 NULL, NULL);
+			}
+		}
+
+		if (!foundDistributionColumn)
+		{
+			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+								 "MERGE INSERT must have distribution column as value",
+								 NULL, NULL);
+		}
+	}
+
+	return NULL;
+}
+
+
+/*
+ * MergeQualAndTargetListFunctionsSupported Checks WHEN/ON clause actions to see what functions
+ * are allowed, if we are updating distribution column, etc.
+ */
+static DeferredErrorMessage *
+MergeQualAndTargetListFunctionsSupported(Oid resultRelationId, FromExpr *joinTree,
+										 Node *quals,
+										 List *targetList, CmdType commandType)
+{
+	uint32 rangeTableId = 1;
+	Var *distributionColumn = NULL;
+	if (IsCitusTable(resultRelationId) && HasDistributionKey(resultRelationId))
+	{
+		distributionColumn = PartitionColumn(resultRelationId, rangeTableId);
+	}
+
+	ListCell *targetEntryCell = NULL;
+	bool hasVarArgument = false; /* A STABLE function is passed a Var argument */
+	bool hasBadCoalesce = false; /* CASE/COALESCE passed a mutable function */
+	foreach(targetEntryCell, targetList)
+	{
+		TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
+
+		/* skip resjunk entries: UPDATE adds some for ctid, etc. */
+		if (targetEntry->resjunk)
+		{
+			continue;
+		}
+
+		bool targetEntryDistributionColumn = false;
+		AttrNumber targetColumnAttrNumber = InvalidAttrNumber;
+
+		if (distributionColumn)
+		{
+			if (commandType == CMD_UPDATE)
+			{
+				/*
+				 * Note that it is not possible to give an alias to
+				 * UPDATE table SET ...
+				 */
+				if (targetEntry->resname)
+				{
+					targetColumnAttrNumber = get_attnum(resultRelationId,
+														targetEntry->resname);
+					if (targetColumnAttrNumber == distributionColumn->varattno)
+					{
+						targetEntryDistributionColumn = true;
+					}
+				}
+			}
+		}
+
+		if (targetEntryDistributionColumn &&
+			TargetEntryChangesValue(targetEntry, distributionColumn, joinTree))
+		{
+			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+								 "updating the distribution column is not "
+								 "allowed in MERGE actions",
+								 NULL, NULL);
+		}
+
+		if (FindNodeMatchingCheckFunction((Node *) targetEntry->expr,
+										  CitusIsVolatileFunction))
+		{
+			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+								 "functions used in MERGE actions on distributed "
+								 "tables must not be VOLATILE",
+								 NULL, NULL);
+		}
+
+		if (MasterIrreducibleExpression((Node *) targetEntry->expr,
+										&hasVarArgument, &hasBadCoalesce))
+		{
+			Assert(hasVarArgument || hasBadCoalesce);
+		}
+
+		if (FindNodeMatchingCheckFunction((Node *) targetEntry->expr,
+										  NodeIsFieldStore))
+		{
+			/* DELETE cannot do field indirection already */
+			Assert(commandType == CMD_UPDATE || commandType == CMD_INSERT);
+			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+								 "inserting or modifying composite type fields is not "
+								 "supported", NULL,
+								 "Use the column name to insert or update the composite "
+								 "type as a single value");
+		}
+	}
+
+
+	/*
+	 * Check the condition, convert list of expressions into expression tree for further processing
+	 */
+	if (quals)
+	{
+		if (IsA(quals, List))
+		{
+			quals = (Node *) make_ands_explicit((List *) quals);
+		}
+
+		if (FindNodeMatchingCheckFunction((Node *) quals, CitusIsVolatileFunction))
+		{
+			StringInfo errorMessage = makeStringInfo();
+			appendStringInfo(errorMessage, "functions used in the %s clause of MERGE "
+										   "queries on distributed tables must not be VOLATILE",
+							 (commandType == CMD_MERGE) ? "ON" : "WHEN");
+			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+								 errorMessage->data, NULL, NULL);
+		}
+		else if (MasterIrreducibleExpression(quals, &hasVarArgument, &hasBadCoalesce))
+		{
+			Assert(hasVarArgument || hasBadCoalesce);
+		}
+	}
+
+	if (hasVarArgument)
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "STABLE functions used in MERGE queries "
+							 "cannot be called with column references",
+							 NULL, NULL);
+	}
+
+	if (hasBadCoalesce)
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "non-IMMUTABLE functions are not allowed in CASE or "
+							 "COALESCE statements",
+							 NULL, NULL);
+	}
+
+	if (quals != NULL && nodeTag(quals) == T_CurrentOfExpr)
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "cannot run MERGE actions with cursors",
+							 NULL, NULL);
+	}
+
+	return NULL;
+}
+
+
+#endif
diff --git a/src/backend/distributed/planner/multi_physical_planner.c b/src/backend/distributed/planner/multi_physical_planner.c
index b30dddeb7..be6caf0e2 100644
--- a/src/backend/distributed/planner/multi_physical_planner.c
+++ b/src/backend/distributed/planner/multi_physical_planner.c
@@ -2225,17 +2225,14 @@ QueryPushdownSqlTaskList(Query *query, uint64 jobId,
 		}
 
 		/*
-		 * For left joins we don't care about the shards pruned for
-		 * the right hand side. If the right hand side would prune
-		 * to a smaller set we should still send it to all tables
-		 * of the left hand side. However if the right hand side is
-		 * bigger than the left hand side we don't have to send the
-		 * query to any shard that is not matching anything on the
-		 * left hand side.
+		 * For left joins we don't care about the shards pruned for the right hand side.
+		 * If the right hand side would prune to a smaller set we should still send it to
+		 * all tables of the left hand side. However if the right hand side is bigger than
+		 * the left hand side we don't have to send the query to any shard that is not
+		 * matching anything on the left hand side.
 		 *
-		 * Instead we will simply skip any RelationRestriction if it
-		 * is an OUTER join and the table is part of the non-outer
-		 * side of the join.
+		 * Instead we will simply skip any RelationRestriction if it is an OUTER join and
+		 * the table is part of the non-outer side of the join.
 		 */
 		if (IsInnerTableOfOuterJoin(relationRestriction))
 		{
diff --git a/src/backend/distributed/planner/multi_router_planner.c b/src/backend/distributed/planner/multi_router_planner.c
index 99beff2c8..407aeaf65 100644
--- a/src/backend/distributed/planner/multi_router_planner.c
+++ b/src/backend/distributed/planner/multi_router_planner.c
@@ -33,6 +33,7 @@
 #include "distributed/intermediate_result_pruning.h"
 #include "distributed/metadata_utility.h"
 #include "distributed/coordinator_protocol.h"
+#include "distributed/merge_planner.h"
 #include "distributed/metadata_cache.h"
 #include "distributed/multi_executor.h"
 #include "distributed/multi_join_order.h"
@@ -125,21 +126,15 @@ static bool IsTidColumn(Node *node);
 static DeferredErrorMessage * ModifyPartialQuerySupported(Query *queryTree, bool
 														  multiShardQuery,
 														  Oid *distributedTableId);
-static bool NodeIsFieldStore(Node *node);
-static DeferredErrorMessage * MultiShardUpdateDeleteMergeSupported(Query *originalQuery,
-																   PlannerRestrictionContext
-																   *
-																   plannerRestrictionContext);
+static DeferredErrorMessage * MultiShardUpdateDeleteSupported(Query *originalQuery,
+															  PlannerRestrictionContext
+															  *
+															  plannerRestrictionContext);
 static DeferredErrorMessage * SingleShardUpdateDeleteSupported(Query *originalQuery,
 															   PlannerRestrictionContext *
 															   plannerRestrictionContext);
-static bool HasDangerousJoinUsing(List *rtableList, Node *jtnode);
-static bool MasterIrreducibleExpression(Node *expression, bool *varArgument,
-										bool *badCoalesce);
 static bool MasterIrreducibleExpressionWalker(Node *expression, WalkerState *state);
 static bool MasterIrreducibleExpressionFunctionChecker(Oid func_id, void *context);
-static bool TargetEntryChangesValue(TargetEntry *targetEntry, Var *column,
-									FromExpr *joinTree);
 static Job * RouterInsertJob(Query *originalQuery);
 static void ErrorIfNoShardsExist(CitusTableCacheEntry *cacheEntry);
 static DeferredErrorMessage * DeferErrorIfModifyView(Query *queryTree);
@@ -179,12 +174,8 @@ static void ReorderTaskPlacementsByTaskAssignmentPolicy(Job *job,
 static bool ModifiesLocalTableWithRemoteCitusLocalTable(List *rangeTableList);
 static DeferredErrorMessage * DeferErrorIfUnsupportedLocalTableJoin(List *rangeTableList);
 static bool IsLocallyAccessibleCitusLocalTable(Oid relationId);
-static DeferredErrorMessage * TargetlistAndFunctionsSupported(Oid resultRelationId,
-															  FromExpr *joinTree,
-															  Node *quals,
-															  List *targetList,
-															  CmdType commandType,
-															  List *returningList);
+
+
 /*
  * CreateRouterPlan attempts to create a router executor plan for the given
  * SELECT statement. ->planningError is set if planning fails.
@@ -521,7 +512,7 @@ IsTidColumn(Node *node)
  * updating distribution column, etc.
  * Note: This subset of checks are repeated for each MERGE modify action.
  */
-static DeferredErrorMessage *
+DeferredErrorMessage *
 TargetlistAndFunctionsSupported(Oid resultRelationId, FromExpr *joinTree, Node *quals,
 								List *targetList,
 								CmdType commandType, List *returningList)
@@ -897,92 +888,13 @@ IsLocallyAccessibleCitusLocalTable(Oid relationId)
 /*
  * NodeIsFieldStore returns true if given Node is a FieldStore object.
  */
-static bool
+bool
 NodeIsFieldStore(Node *node)
 {
 	return node && IsA(node, FieldStore);
 }
 
 
-/*
- * MergeQuerySupported does check for a MERGE command in the query, if it finds
- * one, it will verify the below criteria
- * - Supported tables and combinations in ErrorIfMergeHasUnsupportedTables
- * - Distributed tables requirements in ErrorIfDistTablesNotColocated
- * - Checks target-lists and functions-in-quals in TargetlistAndFunctionsSupported
- */
-static DeferredErrorMessage *
-MergeQuerySupported(Query *originalQuery,
-					PlannerRestrictionContext *plannerRestrictionContext)
-{
-	/* For non-MERGE commands it's a no-op */
-	if (!QueryHasMergeCommand(originalQuery))
-	{
-		return NULL;
-	}
-
-	List *rangeTableList = ExtractRangeTableEntryList(originalQuery);
-	RangeTblEntry *resultRte = ExtractResultRelationRTE(originalQuery);
-
-	/*
-	 * Fast path queries cannot have merge command, and we prevent the remaining here.
-	 * In Citus we have limited support for MERGE, it's allowed only if all
-	 * the tables(target, source or any CTE) tables are are local i.e. a
-	 * combination of Citus local and Non-Citus tables (regular Postgres tables)
-	 * or distributed tables with some restrictions, please see header of routine
-	 * ErrorIfDistTablesNotColocated for details.
-	 */
-	DeferredErrorMessage *deferredError =
-		ErrorIfMergeHasUnsupportedTables(originalQuery,
-										 rangeTableList,
-										 plannerRestrictionContext);
-	if (deferredError)
-	{
-		return deferredError;
-	}
-
-	Oid resultRelationId = resultRte->relid;
-	deferredError =
-		TargetlistAndFunctionsSupported(resultRelationId,
-										originalQuery->jointree,
-										originalQuery->jointree->quals,
-										originalQuery->targetList,
-										originalQuery->commandType,
-										originalQuery->returningList);
-	if (deferredError)
-	{
-		return deferredError;
-	}
-
-	#if PG_VERSION_NUM >= PG_VERSION_15
-
-	/*
-	 * MERGE is a special case where we have multiple modify statements
-	 * within itself. Check each INSERT/UPDATE/DELETE individually.
-	 */
-	MergeAction *action = NULL;
-	foreach_ptr(action, originalQuery->mergeActionList)
-	{
-		Assert(originalQuery->returningList == NULL);
-		deferredError =
-			TargetlistAndFunctionsSupported(resultRelationId,
-											originalQuery->jointree,
-											action->qual,
-											action->targetList,
-											action->commandType,
-											originalQuery->returningList);
-		if (deferredError)
-		{
-			return deferredError;
-		}
-	}
-
-	#endif
-
-	return NULL;
-}
-
-
 /*
  * ModifyQuerySupported returns NULL if the query only contains supported
  * features, otherwise it returns an error description.
@@ -998,14 +910,11 @@ ModifyQuerySupported(Query *queryTree, Query *originalQuery, bool multiShardQuer
 					 PlannerRestrictionContext *plannerRestrictionContext)
 {
 	Oid distributedTableId = InvalidOid;
-	DeferredErrorMessage *error = MergeQuerySupported(originalQuery,
+	DeferredErrorMessage *error = MergeQuerySupported(originalQuery, multiShardQuery,
 													  plannerRestrictionContext);
 	if (error)
 	{
-		/*
-		 * For MERGE, we do not do recursive plannning, simply bail out.
-		 */
-		RaiseDeferredError(error, ERROR);
+		return error;
 	}
 
 	error = ModifyPartialQuerySupported(queryTree, multiShardQuery, &distributedTableId);
@@ -1178,13 +1087,13 @@ ModifyQuerySupported(Query *queryTree, Query *originalQuery, bool multiShardQuer
 		}
 	}
 
-	if (commandType != CMD_INSERT)
+	if (commandType != CMD_INSERT && commandType != CMD_MERGE)
 	{
 		DeferredErrorMessage *errorMessage = NULL;
 
 		if (multiShardQuery)
 		{
-			errorMessage = MultiShardUpdateDeleteMergeSupported(
+			errorMessage = MultiShardUpdateDeleteSupported(
 				originalQuery,
 				plannerRestrictionContext);
 		}
@@ -1365,12 +1274,12 @@ ErrorIfOnConflictNotSupported(Query *queryTree)
 
 
 /*
- * MultiShardUpdateDeleteMergeSupported returns the error message if the update/delete is
+ * MultiShardUpdateDeleteSupported returns the error message if the update/delete is
  * not pushdownable, otherwise it returns NULL.
  */
 static DeferredErrorMessage *
-MultiShardUpdateDeleteMergeSupported(Query *originalQuery,
-									 PlannerRestrictionContext *plannerRestrictionContext)
+MultiShardUpdateDeleteSupported(Query *originalQuery,
+								PlannerRestrictionContext *plannerRestrictionContext)
 {
 	DeferredErrorMessage *errorMessage = NULL;
 	RangeTblEntry *resultRangeTable = ExtractResultRelationRTE(originalQuery);
@@ -1401,8 +1310,9 @@ MultiShardUpdateDeleteMergeSupported(Query *originalQuery,
 	}
 	else
 	{
-		errorMessage = DeferErrorIfUnsupportedSubqueryPushdown(originalQuery,
-															   plannerRestrictionContext);
+		errorMessage = DeferErrorIfUnsupportedSubqueryPushdown(
+			originalQuery,
+			plannerRestrictionContext);
 	}
 
 	return errorMessage;
@@ -1442,7 +1352,7 @@ SingleShardUpdateDeleteSupported(Query *originalQuery,
  * HasDangerousJoinUsing search jointree for unnamed JOIN USING. Check the
  * implementation of has_dangerous_join_using in ruleutils.
  */
-static bool
+bool
 HasDangerousJoinUsing(List *rtableList, Node *joinTreeNode)
 {
 	if (IsA(joinTreeNode, RangeTblRef))
@@ -1546,7 +1456,7 @@ IsMergeQuery(Query *query)
  * which do, but for now we just error out. That makes both the code and user-education
  * easier.
  */
-static bool
+bool
 MasterIrreducibleExpression(Node *expression, bool *varArgument, bool *badCoalesce)
 {
 	WalkerState data;
@@ -1694,7 +1604,7 @@ MasterIrreducibleExpressionFunctionChecker(Oid func_id, void *context)
  * expression is a value that is implied by the qualifiers of the join
  * tree, or the target entry sets a different column.
  */
-static bool
+bool
 TargetEntryChangesValue(TargetEntry *targetEntry, Var *column, FromExpr *joinTree)
 {
 	bool isColumnValueChanged = true;
@@ -1965,8 +1875,8 @@ RouterJob(Query *originalQuery, PlannerRestrictionContext *plannerRestrictionCon
 	if (*planningError)
 	{
 		/*
-		 * For MERGE, we do _not_ plan anything other than Router job, let's
-		 * not continue further down the lane in distributed planning, simply
+		 * For MERGE, we do _not_ plan any other router job than the MERGE job itself,
+		 * let's not continue further down the lane in distributed planning, simply
 		 * bail out.
 		 */
 		if (IsMergeQuery(originalQuery))
@@ -4056,263 +3966,3 @@ CompareInsertValuesByShardId(const void *leftElement, const void *rightElement)
 		}
 	}
 }
-
-
-/*
- * IsMergeAllowedOnRelation takes a relation entry and checks if MERGE command is
- * permitted on special relations, such as materialized view, returns true only if
- * it's a "source" relation.
- */
-bool
-IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte)
-{
-	if (!IsMergeQuery(parse))
-	{
-		return false;
-	}
-
-	RangeTblEntry *targetRte = rt_fetch(parse->resultRelation, parse->rtable);
-
-	/* Is it a target relation? */
-	if (targetRte->relid == rte->relid)
-	{
-		return false;
-	}
-
-	return true;
-}
-
-
-/*
- * ErrorIfDistTablesNotColocated Checks to see if
- *
- *   - There are a minimum of two distributed tables (source and a target).
- *   - All the distributed tables are indeed colocated.
- *   - MERGE relations are joined on the distribution column
- *          MERGE .. USING .. ON target.dist_key = source.dist_key
- *
- * If any of the conditions are not met, it raises an exception.
- */
-static DeferredErrorMessage *
-ErrorIfDistTablesNotColocated(Query *parse, List *distTablesList,
-							  PlannerRestrictionContext *plannerRestrictionContext)
-{
-	/* All MERGE tables must be distributed */
-	if (list_length(distTablesList) < 2)
-	{
-		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
-							 "For MERGE command, both the source and target "
-							 "must be distributed", NULL, NULL);
-	}
-
-	/* All distributed tables must be colocated */
-	if (!AllRelationsInListColocated(distTablesList, RANGETABLE_ENTRY))
-	{
-		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
-							 "For MERGE command, all the distributed tables "
-							 "must be colocated", NULL, NULL);
-	}
-
-	/* Are source and target tables joined on distribution column? */
-	if (!RestrictionEquivalenceForPartitionKeys(plannerRestrictionContext))
-	{
-		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
-							 "MERGE command is only supported when distributed "
-							 "tables are joined on their distribution column",
-							 NULL, NULL);
-	}
-
-	return NULL;
-}
-
-
-/*
- * ErrorIfMergeHasUnsupportedTables checks if all the tables(target, source or any CTE
- * present) in the MERGE command are local i.e. a combination of Citus local and Non-Citus
- * tables (regular Postgres tables), or distributed tables with some restrictions, please
- * see header of routine ErrorIfDistTablesNotColocated for details, raises an exception
- * for all other combinations.
- */
-static DeferredErrorMessage *
-ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList,
-								 PlannerRestrictionContext *restrictionContext)
-{
-	List *distTablesList = NIL;
-	bool foundLocalTables = false;
-
-	RangeTblEntry *rangeTableEntry = NULL;
-	foreach_ptr(rangeTableEntry, rangeTableList)
-	{
-		Oid relationId = rangeTableEntry->relid;
-
-		switch (rangeTableEntry->rtekind)
-		{
-			case RTE_RELATION:
-			{
-				/* Check the relation type */
-				break;
-			}
-
-			case RTE_SUBQUERY:
-			case RTE_FUNCTION:
-			case RTE_TABLEFUNC:
-			case RTE_VALUES:
-			case RTE_JOIN:
-			case RTE_CTE:
-			{
-				/* Skip them as base table(s) will be checked */
-				continue;
-			}
-
-			/*
-			 * RTE_NAMEDTUPLESTORE is typically used in ephmeral named relations,
-			 * such as, trigger data; until we find a genuine use case, raise an
-			 * exception.
-			 * RTE_RESULT is a node added by the planner and we shouldn't
-			 * encounter it in the parse tree.
-			 */
-			case RTE_NAMEDTUPLESTORE:
-			case RTE_RESULT:
-			{
-				return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
-									 "MERGE command is not supported with "
-									 "Tuplestores and results",
-									 NULL, NULL);
-			}
-
-			default:
-			{
-				return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
-									 "MERGE command: Unrecognized range table entry.",
-									 NULL, NULL);
-			}
-		}
-
-		/* RTE Relation can be of various types, check them now */
-
-		/* skip the regular views as they are replaced with subqueries */
-		if (rangeTableEntry->relkind == RELKIND_VIEW)
-		{
-			continue;
-		}
-
-		if (rangeTableEntry->relkind == RELKIND_MATVIEW ||
-			rangeTableEntry->relkind == RELKIND_FOREIGN_TABLE)
-		{
-			/* Materialized view or Foreign table as target is not allowed */
-			if (IsMergeAllowedOnRelation(parse, rangeTableEntry))
-			{
-				/* Non target relation is ok */
-				continue;
-			}
-			else
-			{
-				/* Usually we don't reach this exception as the Postgres parser catches it */
-				StringInfo errorMessage = makeStringInfo();
-				appendStringInfo(errorMessage,
-								 "MERGE command is not allowed on "
-								 "relation type(relkind:%c)", rangeTableEntry->relkind);
-				return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, errorMessage->data,
-									 NULL, NULL);
-			}
-		}
-
-		if (rangeTableEntry->relkind != RELKIND_RELATION &&
-			rangeTableEntry->relkind != RELKIND_PARTITIONED_TABLE)
-		{
-			StringInfo errorMessage = makeStringInfo();
-			appendStringInfo(errorMessage, "Unexpected table type(relkind:%c) "
-										   "in MERGE command", rangeTableEntry->relkind);
-			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, errorMessage->data,
-								 NULL, NULL);
-		}
-
-		Assert(rangeTableEntry->relid != 0);
-
-		/* Reference tables are not supported yet */
-		if (IsCitusTableType(relationId, REFERENCE_TABLE))
-		{
-			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
-								 "MERGE command is not supported on reference "
-								 "tables yet", NULL, NULL);
-		}
-
-		/* Append/Range tables are not supported */
-		if (IsCitusTableType(relationId, APPEND_DISTRIBUTED) ||
-			IsCitusTableType(relationId, RANGE_DISTRIBUTED))
-		{
-			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
-								 "For MERGE command, all the distributed tables "
-								 "must be colocated, for append/range distribution, "
-								 "colocation is not supported", NULL,
-								 "Consider using hash distribution instead");
-		}
-
-		/*
-		 * For now, save all distributed tables, later (below) we will
-		 * check for supported combination(s).
-		 */
-		if (IsCitusTableType(relationId, DISTRIBUTED_TABLE))
-		{
-			distTablesList = lappend(distTablesList, rangeTableEntry);
-			continue;
-		}
-
-		/* Regular Postgres tables and Citus local tables are allowed */
-		if (!IsCitusTable(relationId) ||
-			IsCitusTableType(relationId, CITUS_LOCAL_TABLE))
-		{
-			foundLocalTables = true;
-			continue;
-		}
-
-		/* Any other Citus table type missing ? */
-	}
-
-	/* Ensure all tables are indeed local */
-	if (foundLocalTables && list_length(distTablesList) == 0)
-	{
-		/* All the tables are local, supported */
-		return NULL;
-	}
-	else if (foundLocalTables && list_length(distTablesList) > 0)
-	{
-		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
-							 "MERGE command is not supported with "
-							 "combination of distributed/local tables yet",
-							 NULL, NULL);
-	}
-
-	/* Ensure all distributed tables are indeed co-located */
-	return ErrorIfDistTablesNotColocated(parse, distTablesList, restrictionContext);
-}
-
-
-/*
- * QueryHasMergeCommand walks over the query tree and returns false if there
- * is no Merge command (e.g., CMD_MERGE), true otherwise.
- */
-static bool
-QueryHasMergeCommand(Query *queryTree)
-{
-	/* function is void for pre-15 versions of Postgres */
-	#if PG_VERSION_NUM < PG_VERSION_15
-	return false;
-	#else
-
-	/*
-	 * Postgres currently doesn't support Merge queries inside subqueries and
-	 * ctes, but lets be defensive and do query tree walk anyway.
-	 *
-	 * We do not call this path for fast-path queries to avoid this additional
-	 * overhead.
-	 */
-	if (!ContainsMergeCommandWalker((Node *) queryTree))
-	{
-		/* No MERGE found */
-		return false;
-	}
-
-	return true;
-	#endif
-}
diff --git a/src/backend/distributed/planner/query_pushdown_planning.c b/src/backend/distributed/planner/query_pushdown_planning.c
index 5cae19497..cbe6a3606 100644
--- a/src/backend/distributed/planner/query_pushdown_planning.c
+++ b/src/backend/distributed/planner/query_pushdown_planning.c
@@ -591,10 +591,16 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
 	}
 	else if (!RestrictionEquivalenceForPartitionKeys(plannerRestrictionContext))
 	{
+		StringInfo errorMessage = makeStringInfo();
+		bool isMergeCmd = IsMergeQuery(originalQuery);
+		appendStringInfo(errorMessage,
+						 "%s"
+						 "only supported when all distributed tables are "
+						 "co-located and joined on their distribution columns",
+						 isMergeCmd ? "MERGE command is " : "complex joins are ");
+
 		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
-							 "complex joins are only supported when all distributed tables are "
-							 "co-located and joined on their distribution columns",
-							 NULL, NULL);
+							 errorMessage->data, NULL, NULL);
 	}
 
 	/* we shouldn't allow reference tables in the FROM clause when the query has sublinks */
diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c
index f76a95d26..5c91ee79c 100644
--- a/src/backend/distributed/planner/relation_restriction_equivalence.c
+++ b/src/backend/distributed/planner/relation_restriction_equivalence.c
@@ -151,6 +151,9 @@ static void ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass
 													  secondClass);
 static Var * PartitionKeyForRTEIdentityInQuery(Query *query, int targetRTEIndex,
 											   Index *partitionKeyIndex);
+static bool AllRelationsInRestrictionContextColocated(RelationRestrictionContext *
+													  restrictionContext);
+static bool AllRelationsInListColocated(List *relationList);
 static bool IsNotSafeRestrictionToRecursivelyPlan(Node *node);
 static JoinRestrictionContext * FilterJoinRestrictionContext(
 	JoinRestrictionContext *joinRestrictionContext, Relids
@@ -381,8 +384,7 @@ SafeToPushdownUnionSubquery(Query *originalQuery,
 		return false;
 	}
 
-	if (!AllRelationsInListColocated(restrictionContext->relationRestrictionList,
-									 RESTRICTION_CONTEXT))
+	if (!AllRelationsInRestrictionContextColocated(restrictionContext))
 	{
 		/* distribution columns are equal, but tables are not co-located */
 		return false;
@@ -1918,34 +1920,56 @@ FindQueryContainingRTEIdentityInternal(Node *node,
 
 
 /*
- * AllRelationsInListColocated determines whether all of the relations in the
- * given list are co-located.
- * Note: The list can be of dofferent types, which is specified by ListEntryType
+ * AllRelationsInRestrictionContextColocated determines whether all of the relations in the
+ * given relation restrictions list are co-located.
  */
-bool
-AllRelationsInListColocated(List *relationList, ListEntryType entryType)
+static bool
+AllRelationsInRestrictionContextColocated(RelationRestrictionContext *restrictionContext)
 {
-	void *varPtr = NULL;
-	RangeTblEntry *rangeTableEntry = NULL;
 	RelationRestriction *relationRestriction = NULL;
-	int initialColocationId = INVALID_COLOCATION_ID;
+	List *relationIdList = NIL;
 
 	/* check whether all relations exists in the main restriction list */
-	foreach_ptr(varPtr, relationList)
+	foreach_ptr(relationRestriction, restrictionContext->relationRestrictionList)
 	{
-		Oid relationId = InvalidOid;
+		relationIdList = lappend_oid(relationIdList, relationRestriction->relationId);
+	}
 
-		if (entryType == RANGETABLE_ENTRY)
-		{
-			rangeTableEntry = (RangeTblEntry *) varPtr;
-			relationId = rangeTableEntry->relid;
-		}
-		else if (entryType == RESTRICTION_CONTEXT)
-		{
-			relationRestriction = (RelationRestriction *) varPtr;
-			relationId = relationRestriction->relationId;
-		}
+	return AllRelationsInListColocated(relationIdList);
+}
 
+
+/*
+ * AllRelationsInRTEListColocated determines whether all of the relations in the
+ * given RangeTableEntry list are co-located.
+ */
+bool
+AllRelationsInRTEListColocated(List *rangeTableEntryList)
+{
+	RangeTblEntry *rangeTableEntry = NULL;
+	List *relationIdList = NIL;
+
+	foreach_ptr(rangeTableEntry, rangeTableEntryList)
+	{
+		relationIdList = lappend_oid(relationIdList, rangeTableEntry->relid);
+	}
+
+	return AllRelationsInListColocated(relationIdList);
+}
+
+
+/*
+ * AllRelationsInListColocated determines whether all of the relations in the
+ * given list are co-located.
+ */
+static bool
+AllRelationsInListColocated(List *relationList)
+{
+	int initialColocationId = INVALID_COLOCATION_ID;
+	Oid relationId = InvalidOid;
+
+	foreach_oid(relationId, relationList)
+	{
 		if (IsCitusTable(relationId) && !HasDistributionKey(relationId))
 		{
 			continue;
diff --git a/src/include/distributed/distributed_planner.h b/src/include/distributed/distributed_planner.h
index 19bd9f0c2..412859449 100644
--- a/src/include/distributed/distributed_planner.h
+++ b/src/include/distributed/distributed_planner.h
@@ -255,10 +255,4 @@ extern struct DistributedPlan * CreateDistributedPlan(uint64 planId,
 													  PlannerRestrictionContext *
 													  plannerRestrictionContext);
 
-extern bool IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte);
-extern bool ConjunctionContainsColumnFilter(Node *node,
-											Var *column,
-											Node **distributionKeyValue);
-extern bool ContainsMergeCommandWalker(Node *node);
-
 #endif /* DISTRIBUTED_PLANNER_H */
diff --git a/src/include/distributed/merge_planner.h b/src/include/distributed/merge_planner.h
new file mode 100644
index 000000000..243be14d0
--- /dev/null
+++ b/src/include/distributed/merge_planner.h
@@ -0,0 +1,26 @@
+/*-------------------------------------------------------------------------
+ *
+ * merge_planner.h
+ *
+ * Declarations for public functions and types related to router planning.
+ *
+ * Copyright (c) Citus Data, Inc.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef MERGE_PLANNER_H
+#define MERGE_PLANNER_H
+
+#include "c.h"
+
+#include "nodes/parsenodes.h"
+#include "distributed/distributed_planner.h"
+#include "distributed/errormessage.h"
+
+extern bool IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte);
+extern DeferredErrorMessage * MergeQuerySupported(Query *originalQuery,
+												  bool multiShardQuery,
+												  PlannerRestrictionContext *
+												  plannerRestrictionContext);
+#endif /* MERGE_PLANNER_H */
diff --git a/src/include/distributed/multi_router_planner.h b/src/include/distributed/multi_router_planner.h
index 07d160865..698a0fd60 100644
--- a/src/include/distributed/multi_router_planner.h
+++ b/src/include/distributed/multi_router_planner.h
@@ -100,6 +100,17 @@ extern PlannedStmt * FastPathPlanner(Query *originalQuery, Query *parse, ParamLi
 extern bool FastPathRouterQuery(Query *query, Node **distributionKeyValue);
 extern bool JoinConditionIsOnFalse(List *relOptInfo);
 extern Oid ResultRelationOidForQuery(Query *query);
-
+extern DeferredErrorMessage * TargetlistAndFunctionsSupported(Oid resultRelationId,
+															  FromExpr *joinTree,
+															  Node *quals,
+															  List *targetList,
+															  CmdType commandType,
+															  List *returningList);
+extern bool NodeIsFieldStore(Node *node);
+extern bool TargetEntryChangesValue(TargetEntry *targetEntry, Var *column,
+									FromExpr *joinTree);
+extern bool MasterIrreducibleExpression(Node *expression, bool *varArgument,
+										bool *badCoalesce);
+extern bool HasDangerousJoinUsing(List *rtableList, Node *jtnode);
 
 #endif /* MULTI_ROUTER_PLANNER_H */
diff --git a/src/include/distributed/relation_restriction_equivalence.h b/src/include/distributed/relation_restriction_equivalence.h
index 4fd9c7015..e0e716c7e 100644
--- a/src/include/distributed/relation_restriction_equivalence.h
+++ b/src/include/distributed/relation_restriction_equivalence.h
@@ -17,15 +17,6 @@
 
 #define SINGLE_RTE_INDEX 1
 
-/*
- * Represents the pointer type that's being passed in the list.
- */
-typedef enum ListEntryType
-{
-	RANGETABLE_ENTRY, /* RangeTblEntry */
-	RESTRICTION_CONTEXT /* RelationRestriction */
-} ListEntryType;
-
 extern bool AllDistributionKeysInQueryAreEqual(Query *originalQuery,
 											   PlannerRestrictionContext *
 											   plannerRestrictionContext);
@@ -63,6 +54,5 @@ extern RelationRestrictionContext * FilterRelationRestrictionContext(
 	RelationRestrictionContext *relationRestrictionContext,
 	Relids
 	queryRteIdentities);
-extern bool AllRelationsInListColocated(List *relationList, ListEntryType entryType);
-
+extern bool AllRelationsInRTEListColocated(List *rangeTableEntryList);
 #endif /* RELATION_RESTRICTION_EQUIVALENCE_H */
diff --git a/src/test/regress/create_schedule b/src/test/regress/create_schedule
index 82dfa2475..db2ae92be 100644
--- a/src/test/regress/create_schedule
+++ b/src/test/regress/create_schedule
@@ -13,3 +13,4 @@ test: arbitrary_configs_truncate_create
 test: arbitrary_configs_truncate_cascade_create
 test: arbitrary_configs_truncate_partition_create
 test: arbitrary_configs_alter_table_add_constraint_without_name_create
+test: merge_arbitrary_create
diff --git a/src/test/regress/expected/merge.out b/src/test/regress/expected/merge.out
index 02671acd0..e2b3aea65 100644
--- a/src/test/regress/expected/merge.out
+++ b/src/test/regress/expected/merge.out
@@ -17,8 +17,9 @@ CREATE SCHEMA merge_schema;
 SET search_path TO merge_schema;
 SET citus.shard_count TO 4;
 SET citus.next_shard_id TO 4000000;
-SET citus.explain_all_tasks to true;
+SET citus.explain_all_tasks TO true;
 SET citus.shard_replication_factor TO 1;
+SET citus.max_adaptive_executor_pool_size TO 1;
 SELECT 1 FROM master_add_node('localhost', :master_port, groupid => 0);
 NOTICE:  localhost:xxxxx is the coordinator and already contains metadata, skipping syncing the metadata
  ?column?
@@ -268,6 +269,29 @@ SELECT * from target t WHERE t.customer_id  = 30004;
 ---------------------------------------------------------------------
 (0 rows)
 
+-- Updating distribution column is allowed if the operation is a no-op
+SELECT * from target t WHERE t.customer_id  = 30000;
+ customer_id | last_order_id | order_center | order_count |        last_order
+---------------------------------------------------------------------
+       30000 |           101 | WX           |         123 | Sat Jan 01 00:00:00 2022
+(1 row)
+
+MERGE INTO target t
+USING SOURCE s
+ON (t.customer_id = s.customer_id AND t.customer_id = 30000)
+WHEN MATCHED THEN
+	UPDATE SET customer_id = 30000;
+MERGE INTO target t
+USING SOURCE s
+ON (t.customer_id = s.customer_id AND t.customer_id = 30000)
+WHEN MATCHED THEN
+	UPDATE SET customer_id = t.customer_id;
+SELECT * from target t WHERE t.customer_id  = 30000;
+ customer_id | last_order_id | order_center | order_count |        last_order
+---------------------------------------------------------------------
+       30000 |           101 | WX           |         123 | Sat Jan 01 00:00:00 2022
+(1 row)
+
 --
 -- Test MERGE with CTE as source
 --
@@ -310,7 +334,6 @@ MERGE INTO t1
 		UPDATE SET val = t1.val + 1
 	WHEN NOT MATCHED THEN
 		INSERT (id, val) VALUES (pg_res.id, pg_res.val);
--- Two rows with id 2 and val incremented, id 3, and id 1 is deleted
 SELECT * FROM t1 order by id;
  id | val
 ---------------------------------------------------------------------
@@ -1200,7 +1223,8 @@ END;
 $$ language plpgsql volatile;
 CREATE TABLE fn_target(id int, data varchar);
 MERGE INTO fn_target
-USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source
+--USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source
+USING (SELECT id, source FROM dist_table) as fn_source
 ON fn_source.id = fn_target.id
 WHEN MATCHED THEN
 DO NOTHING
@@ -1216,29 +1240,22 @@ SELECT citus_add_local_table_to_metadata('fn_target');
 
 (1 row)
 
-SELECT create_distributed_table('dist_table', 'id');
-NOTICE:  Copying data from local table...
-NOTICE:  copying the data has completed
-DETAIL:  The local data in the table is no longer visible, but is still on disk.
-HINT:  To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.dist_table$$)
- create_distributed_table
+SELECT citus_add_local_table_to_metadata('dist_table');
+ citus_add_local_table_to_metadata
 ---------------------------------------------------------------------
 
 (1 row)
 
 SET client_min_messages TO DEBUG1;
 MERGE INTO fn_target
-USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source
+--USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source
+USING (SELECT id, source FROM dist_table) as fn_source
 ON fn_source.id = fn_target.id
 WHEN MATCHED THEN
 DO NOTHING
 WHEN NOT MATCHED THEN
 INSERT VALUES(fn_source.id, fn_source.source);
-DEBUG:  function does not have co-located tables
-DEBUG:  generating subplan XXX_1 for subquery SELECT id, source FROM merge_schema.f_dist() f(id integer, source character varying)
-DEBUG:  <Deparsed MERGE query: MERGE INTO merge_schema.fn_target USING (SELECT intermediate_result.id, intermediate_result.source FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, source character varying)) fn_source ON (fn_source.id OPERATOR(pg_catalog.=) fn_target.id) WHEN MATCHED THEN DO NOTHING  WHEN NOT MATCHED THEN INSERT (id, data) VALUES (fn_source.id, fn_source.source)>
-DEBUG:  Plan XXX query after replacing subqueries and CTEs: MERGE INTO merge_schema.fn_target USING (SELECT intermediate_result.id, intermediate_result.source FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, source character varying)) fn_source ON (fn_source.id OPERATOR(pg_catalog.=) fn_target.id) WHEN MATCHED THEN DO NOTHING  WHEN NOT MATCHED THEN INSERT (id, data) VALUES (fn_source.id, fn_source.source)
-DEBUG:  <Deparsed MERGE query: MERGE INTO merge_schema.fn_target_xxxxxxx fn_target USING (SELECT intermediate_result.id, intermediate_result.source FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, source character varying)) fn_source ON (fn_source.id OPERATOR(pg_catalog.=) fn_target.id) WHEN MATCHED THEN DO NOTHING  WHEN NOT MATCHED THEN INSERT (id, data) VALUES (fn_source.id, fn_source.source)>
+DEBUG:  <Deparsed MERGE query: MERGE INTO merge_schema.fn_target_xxxxxxx fn_target USING (SELECT dist_table.id, dist_table.source FROM merge_schema.dist_table_xxxxxxx dist_table) fn_source ON (fn_source.id OPERATOR(pg_catalog.=) fn_target.id) WHEN MATCHED THEN DO NOTHING  WHEN NOT MATCHED THEN INSERT (id, data) VALUES (fn_source.id, fn_source.source)>
 RESET client_min_messages;
 SELECT * INTO fn_local FROM fn_target ORDER BY 1 ;
 -- Should be equal
@@ -1959,7 +1976,7 @@ ON pg_target.id = sub.id AND pg_target.id = $1
 WHEN MATCHED THEN
         UPDATE SET val = 'Updated by prepare using ' || sub.val
 WHEN NOT MATCHED THEN
-        DO NOTHING;
+        INSERT VALUES (sub.id, sub.val);
 PREPARE citus_prep(int) AS
 MERGE INTO citus_target
 USING (SELECT * FROM citus_source) sub
@@ -1967,15 +1984,20 @@ ON citus_target.id = sub.id AND citus_target.id = $1
 WHEN MATCHED THEN
         UPDATE SET val = 'Updated by prepare using ' || sub.val
 WHEN NOT MATCHED THEN
-        DO NOTHING;
+        INSERT VALUES (sub.id, sub.val);
 BEGIN;
-SET citus.log_remote_commands to true;
 SELECT * FROM pg_target WHERE id = 500; -- before merge
  id  |  val
 ---------------------------------------------------------------------
  500 | target
 (1 row)
 
+SELECT count(*) FROM pg_target; -- before merge
+ count
+---------------------------------------------------------------------
+   251
+(1 row)
+
 EXECUTE pg_prep(500);
 SELECT * FROM pg_target WHERE id = 500; -- non-cached
  id  |               val
@@ -1994,18 +2016,33 @@ SELECT * FROM pg_target WHERE id = 500; -- cached
  500 | Updated by prepare using source
 (1 row)
 
+SELECT count(*) FROM pg_target; -- cached
+ count
+---------------------------------------------------------------------
+  3245
+(1 row)
+
 SELECT * FROM citus_target WHERE id = 500; -- before merge
-NOTICE:  issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
-DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
-NOTICE:  issuing SELECT id, val FROM merge_schema.citus_target_xxxxxxx citus_target WHERE (id OPERATOR(pg_catalog.=) 500)
-DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
  id  |  val
 ---------------------------------------------------------------------
  500 | target
 (1 row)
 
+SELECT count(*) FROM citus_target; -- before merge
+ count
+---------------------------------------------------------------------
+   251
+(1 row)
+
+SET citus.log_remote_commands to true;
 EXECUTE citus_prep(500);
-NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
 DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
 SELECT * FROM citus_target WHERE id = 500; -- non-cached
 NOTICE:  issuing SELECT id, val FROM merge_schema.citus_target_xxxxxxx citus_target WHERE (id OPERATOR(pg_catalog.=) 500)
@@ -2016,29 +2053,63 @@ DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
 (1 row)
 
 EXECUTE citus_prep(500);
-NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
 DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
 EXECUTE citus_prep(500);
-NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
 DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
 EXECUTE citus_prep(500);
-NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
 DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
 EXECUTE citus_prep(500);
-NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
 DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
 EXECUTE citus_prep(500);
-NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
 DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+NOTICE:  issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
+DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
+SET citus.log_remote_commands to false;
 SELECT * FROM citus_target WHERE id = 500; -- cached
-NOTICE:  issuing SELECT id, val FROM merge_schema.citus_target_xxxxxxx citus_target WHERE (id OPERATOR(pg_catalog.=) 500)
-DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
  id  |               val
 ---------------------------------------------------------------------
  500 | Updated by prepare using source
 (1 row)
 
-SET citus.log_remote_commands to false;
+SELECT count(*) FROM citus_target; -- cached
+ count
+---------------------------------------------------------------------
+  3245
+(1 row)
+
 SELECT compare_tables();
  compare_tables
 ---------------------------------------------------------------------
@@ -2165,9 +2236,263 @@ SELECT pa_compare_tables();
 (1 row)
 
 ROLLBACK;
+CREATE TABLE source_json( id   integer, z int, d jsonb);
+CREATE TABLE target_json( id   integer, z int, d jsonb);
+INSERT INTO source_json SELECT i,i FROM generate_series(0,5)i;
+SELECT create_distributed_table('target_json','id'), create_distributed_table('source_json', 'id');
+NOTICE:  Copying data from local table...
+NOTICE:  copying the data has completed
+DETAIL:  The local data in the table is no longer visible, but is still on disk.
+HINT:  To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.source_json$$)
+ create_distributed_table | create_distributed_table
+---------------------------------------------------------------------
+                          |
+(1 row)
+
+-- single shard query given source_json is filtered and Postgres is smart to pushdown
+-- filter to the target_json as well
+SELECT public.coordinator_plan($Q$
+EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda
+USING (SELECT * FROM source_json WHERE id = 1) sdn
+ON sda.id = sdn.id
+WHEN NOT matched THEN
+	INSERT (id, z) VALUES (sdn.id, 5);
+$Q$);
+                                    coordinator_plan
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive)  (cost=0.00..0.00 rows=0 width=0) (actual rows=0 loops=1)
+   Task Count: 1
+(2 rows)
+
+SELECT * FROM target_json ORDER BY 1;
+ id | z | d
+---------------------------------------------------------------------
+  1 | 5 |
+(1 row)
+
+-- zero shard query as filters do not match
+--SELECT public.coordinator_plan($Q$
+--EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda
+--USING (SELECT * FROM source_json WHERE id = 1) sdn
+--ON sda.id = sdn.id AND sda.id = 2
+--WHEN NOT matched THEN
+--	INSERT (id, z) VALUES (sdn.id, 5);
+--$Q$);
+--SELECT * FROM target_json ORDER BY 1;
+-- join for source_json is happening at a different place
+SELECT public.coordinator_plan($Q$
+EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda
+USING source_json s1 LEFT JOIN (SELECT * FROM source_json) s2 USING(z)
+ON sda.id = s1.id AND s1.id = s2.id
+WHEN NOT matched THEN
+	INSERT (id, z) VALUES (s2.id, 5);
+$Q$);
+                                    coordinator_plan
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive)  (cost=0.00..0.00 rows=0 width=0) (actual rows=0 loops=1)
+   Task Count: 4
+(2 rows)
+
+SELECT * FROM target_json ORDER BY 1;
+ id | z | d
+---------------------------------------------------------------------
+  0 | 5 |
+  1 | 5 |
+  2 | 5 |
+  3 | 5 |
+  4 | 5 |
+  5 | 5 |
+(6 rows)
+
+-- update JSON column
+SELECT public.coordinator_plan($Q$
+EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda
+USING source_json sdn
+ON sda.id = sdn.id
+WHEN matched THEN
+	UPDATE SET d = '{"a" : 5}';
+$Q$);
+                                    coordinator_plan
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive)  (cost=0.00..0.00 rows=0 width=0) (actual rows=0 loops=1)
+   Task Count: 4
+(2 rows)
+
+SELECT * FROM target_json ORDER BY 1;
+ id | z |    d
+---------------------------------------------------------------------
+  0 | 5 | {"a": 5}
+  1 | 5 | {"a": 5}
+  2 | 5 | {"a": 5}
+  3 | 5 | {"a": 5}
+  4 | 5 | {"a": 5}
+  5 | 5 | {"a": 5}
+(6 rows)
+
+CREATE FUNCTION immutable_hash(int) RETURNS int
+AS 'SELECT hashtext( ($1 + $1)::text);'
+LANGUAGE SQL
+IMMUTABLE
+RETURNS NULL ON NULL INPUT;
+MERGE INTO target_json sda
+USING source_json sdn
+ON sda.id = sdn.id
+WHEN matched THEN
+	UPDATE SET z = immutable_hash(sdn.z);
+-- Test bigserial
+CREATE TABLE source_serial (id integer, z int, d bigserial);
+CREATE TABLE target_serial (id integer, z int, d bigserial);
+INSERT INTO source_serial SELECT i,i FROM generate_series(0,100)i;
+SELECT create_distributed_table('source_serial', 'id'),
+       create_distributed_table('target_serial', 'id');
+NOTICE:  Copying data from local table...
+NOTICE:  copying the data has completed
+DETAIL:  The local data in the table is no longer visible, but is still on disk.
+HINT:  To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.source_serial$$)
+ create_distributed_table | create_distributed_table
+---------------------------------------------------------------------
+                          |
+(1 row)
+
+MERGE INTO target_serial sda
+USING source_serial sdn
+ON sda.id = sdn.id
+WHEN NOT matched THEN
+       INSERT (id, z) VALUES (id, z);
+ERROR:  non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables
+SELECT count(*) from source_serial;
+ count
+---------------------------------------------------------------------
+   101
+(1 row)
+
+SELECT count(*) from target_serial;
+ count
+---------------------------------------------------------------------
+     0
+(1 row)
+
+SELECT count(distinct d) from source_serial;
+ count
+---------------------------------------------------------------------
+   101
+(1 row)
+
+SELECT count(distinct d) from target_serial;
+ count
+---------------------------------------------------------------------
+     0
+(1 row)
+
+-- Test set operations
+CREATE TABLE target_set(t1 int, t2 int);
+CREATE TABLE source_set(s1 int, s2 int);
+SELECT create_distributed_table('target_set', 't1'),
+       create_distributed_table('source_set', 's1');
+ create_distributed_table | create_distributed_table
+---------------------------------------------------------------------
+                          |
+(1 row)
+
+INSERT INTO target_set VALUES(1, 0);
+INSERT INTO source_set VALUES(1, 1);
+INSERT INTO source_set VALUES(2, 2);
+MERGE INTO target_set
+USING (SELECT * FROM source_set UNION SELECT * FROM source_set) AS foo ON target_set.t1 = foo.s1
+WHEN MATCHED THEN
+        UPDATE SET t2 = t2 + 100
+WHEN NOT MATCHED THEN
+	INSERT VALUES(foo.s1);
+SELECT * FROM target_set ORDER BY 1, 2;
+ t1 | t2
+---------------------------------------------------------------------
+  1 | 100
+  2 |
+(2 rows)
+
 --
 -- Error and Unsupported scenarios
 --
+MERGE INTO target_set
+USING (SELECT s1,s2 FROM source_set UNION SELECT s2,s1 FROM source_set) AS foo ON target_set.t1 = foo.s1
+WHEN MATCHED THEN
+        UPDATE SET t2 = t2 + 1;
+ERROR:  cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position
+DETAIL:  Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column
+MERGE INTO target_set
+USING (SELECT 2 as s3, source_set.* FROM (SELECT * FROM source_set LIMIT 1) as foo LEFT JOIN source_set USING( s1)) AS foo
+ON target_set.t1 = foo.s1
+WHEN MATCHED THEN UPDATE SET t2 = t2 + 1
+WHEN NOT MATCHED THEN INSERT VALUES(s1, s3);
+ERROR:  cannot push down this subquery
+DETAIL:  Limit clause is currently unsupported when a subquery references a column from another query
+-- modifying CTE not supported
+EXPLAIN
+WITH cte_1 AS (DELETE FROM target_json)
+MERGE INTO target_json sda
+USING source_json sdn
+ON sda.id = sdn.id
+WHEN NOT matched THEN
+	INSERT (id, z) VALUES (sdn.id, 5);
+ERROR:  MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns
+-- Grouping sets not supported
+MERGE INTO citus_target t
+USING (SELECT count(*), id FROM citus_source GROUP BY GROUPING SETS (id, val)) subq
+ON subq.id = t.id
+WHEN MATCHED AND t.id > 350 THEN
+    UPDATE SET val = t.val || 'Updated'
+WHEN NOT MATCHED THEN
+        INSERT VALUES (subq.id, 99)
+WHEN MATCHED AND t.id < 350 THEN
+        DELETE;
+ERROR:  cannot push down this subquery
+DETAIL:  could not run distributed query with GROUPING SETS, CUBE, or ROLLUP
+WITH subq AS
+(
+SELECT count(*), id FROM citus_source GROUP BY GROUPING SETS (id, val)
+)
+MERGE INTO citus_target t
+USING subq
+ON subq.id = t.id
+WHEN MATCHED AND t.id > 350 THEN
+    UPDATE SET val = t.val || 'Updated'
+WHEN NOT MATCHED THEN
+        INSERT VALUES (subq.id, 99)
+WHEN MATCHED AND t.id < 350 THEN
+        DELETE;
+ERROR:  cannot push down this subquery
+DETAIL:  could not run distributed query with GROUPING SETS, CUBE, or ROLLUP
+-- try inserting unmatched distribution column value
+MERGE INTO citus_target t
+USING citus_source s
+ON t.id = s.id
+WHEN NOT MATCHED THEN
+  INSERT DEFAULT VALUES;
+ERROR:  cannot perform MERGE INSERT with DEFAULTS
+MERGE INTO citus_target t
+USING citus_source s
+ON t.id = s.id
+WHEN NOT MATCHED THEN
+  INSERT VALUES(10000);
+ERROR:  MERGE INSERT must refer a source column for distribution column
+MERGE INTO citus_target t
+USING citus_source s
+ON t.id = s.id
+WHEN NOT MATCHED THEN
+  INSERT (id) VALUES(1000);
+ERROR:  MERGE INSERT must refer a source column for distribution column
+MERGE INTO t1 t
+USING s1 s
+ON t.id = s.id
+WHEN NOT MATCHED THEN
+  INSERT (id) VALUES(s.val);
+ERROR:  MERGE INSERT must use the source table distribution column value
+MERGE INTO t1 t
+USING s1 s
+ON t.id = s.id
+WHEN NOT MATCHED THEN
+  INSERT (val) VALUES(s.val);
+ERROR:  MERGE INSERT must have distribution column as value
 -- try updating the distribution key column
 BEGIN;
 MERGE INTO target_cj t
@@ -2177,7 +2502,7 @@ MERGE INTO target_cj t
     UPDATE SET tid = tid + 9, src = src || ' updated by merge'
   WHEN NOT MATCHED THEN
     INSERT VALUES (sid1, 'inserted by merge', val1);
-ERROR:  modifying the partition value of rows is not allowed
+ERROR:  updating the distribution column is not allowed in MERGE actions
 ROLLBACK;
 -- Foreign table as target
 MERGE INTO foreign_table
@@ -2269,13 +2594,31 @@ BEGIN
         RETURN TRUE;
 END;
 $$;
+-- Test functions executing in MERGE statement. This is to prevent the functions from
+-- doing a random sql, which may be executed in a remote node or modifying the target
+-- relation which will have unexpected/suprising results.
+MERGE INTO t1 USING (SELECT * FROM s1 WHERE true) s1 ON
+  t1.id = s1.id AND s1.id = 2
+   WHEN matched THEN
+ UPDATE SET id = s1.id, val = random();
+ERROR:  non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables
+-- Test STABLE function
+CREATE FUNCTION add_s(integer, integer) RETURNS integer
+AS 'select $1 + $2;'
+LANGUAGE SQL
+STABLE RETURNS NULL ON NULL INPUT;
+MERGE INTO t1
+USING s1 ON t1.id = s1.id
+WHEN NOT MATCHED THEN
+	INSERT VALUES(s1.id, add_s(s1.val, 2));
+ERROR:  non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables
 -- Test preventing "ON" join condition from writing to the database
 BEGIN;
 MERGE INTO t1
 USING s1 ON t1.id = s1.id AND t1.id = 2 AND (merge_when_and_write())
 WHEN MATCHED THEN
         UPDATE SET val = t1.val + s1.val;
-ERROR:  functions used in the WHERE/ON/WHEN clause of modification queries on distributed tables must not be VOLATILE
+ERROR:  non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables
 ROLLBACK;
 -- Test preventing WHEN clause(s) from writing to the database
 BEGIN;
@@ -2283,7 +2626,7 @@ MERGE INTO t1
 USING s1 ON t1.id = s1.id AND t1.id = 2
 WHEN MATCHED AND (merge_when_and_write()) THEN
         UPDATE SET val = t1.val + s1.val;
-ERROR:  functions used in the WHERE/ON/WHEN clause of modification queries on distributed tables must not be VOLATILE
+ERROR:  non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables
 ROLLBACK;
 -- Joining on partition columns with sub-query
 MERGE INTO t1
@@ -2294,7 +2637,7 @@ MERGE INTO t1
 		UPDATE SET val = t1.val + 1
 	WHEN NOT MATCHED THEN
 		INSERT (id, val) VALUES (sub.id, sub.val);
-ERROR:  MERGE command is only supported when distributed tables are joined on their distribution column
+ERROR:  MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns
 -- Joining on partition columns with CTE
 WITH s1_res AS (
 	SELECT * FROM s1
@@ -2307,7 +2650,7 @@ MERGE INTO t1
 		UPDATE SET val = t1.val + 1
 	WHEN NOT MATCHED THEN
 		INSERT (id, val) VALUES (s1_res.id, s1_res.val);
-ERROR:  MERGE command is only supported when distributed tables are joined on their distribution column
+ERROR:  MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns
 -- Constant Join condition
 WITH s1_res AS (
 	SELECT * FROM s1
@@ -2320,7 +2663,7 @@ MERGE INTO t1
 		UPDATE SET val = t1.val + 1
 	WHEN NOT MATCHED THEN
 		INSERT (id, val) VALUES (s1_res.id, s1_res.val);
-ERROR:  MERGE command is only supported when distributed tables are joined on their distribution column
+ERROR:  MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns
 -- With a single WHEN clause, which causes a non-left join
 WITH s1_res AS (
      SELECT * FROM s1
@@ -2329,7 +2672,7 @@ WITH s1_res AS (
  WHEN MATCHED THEN DELETE
 	WHEN NOT MATCHED THEN
 		INSERT (id, val) VALUES (s1_res.id, s1_res.val);
-ERROR:  MERGE command is only supported when distributed tables are joined on their distribution column
+ERROR:  MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns
 --
 -- Reference tables
 --
@@ -2559,7 +2902,7 @@ WHEN MATCHED THEN
 UPDATE SET val = dist_colocated.val
 WHEN NOT MATCHED THEN
 INSERT VALUES(dist_colocated.id, dist_colocated.val);
-ERROR:  MERGE command is only supported when distributed tables are joined on their distribution column
+ERROR:  MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns
 -- Both the source and target must be distributed
 MERGE INTO dist_target
 USING (SELECT 100 id) AS source
@@ -2752,14 +3095,14 @@ HINT:  Consider using hash distribution instead
 DROP SERVER foreign_server CASCADE;
 NOTICE:  drop cascades to 3 other objects
 DETAIL:  drop cascades to user mapping for postgres on server foreign_server
-drop cascades to foreign table foreign_table_4000046
+drop cascades to foreign table foreign_table_4000043
 drop cascades to foreign table foreign_table
-NOTICE:  foreign table "foreign_table_4000046" does not exist, skipping
+NOTICE:  foreign table "foreign_table_4000043" does not exist, skipping
 CONTEXT:  SQL statement "SELECT citus_drop_all_shards(v_obj.objid, v_obj.schema_name, v_obj.object_name, drop_shards_metadata_only := false)"
 PL/pgSQL function citus_drop_trigger() line XX at PERFORM
 DROP FUNCTION merge_when_and_write();
 DROP SCHEMA merge_schema CASCADE;
-NOTICE:  drop cascades to 75 other objects
+NOTICE:  drop cascades to 84 other objects
 DETAIL:  drop cascades to function insert_data()
 drop cascades to table pg_result
 drop cascades to table local_local
@@ -2801,14 +3144,15 @@ drop cascades to table mv_target
 drop cascades to table mv_source_table
 drop cascades to materialized view mv_source
 drop cascades to table mv_local
-drop cascades to table dist_table
+drop cascades to table dist_table_4000041
 drop cascades to function f_dist()
 drop cascades to table fn_target_4000040
 drop cascades to table fn_result
 drop cascades to table fn_target
+drop cascades to table dist_table
 drop cascades to table fn_local
 drop cascades to table ft_target
-drop cascades to table ft_source_4000045
+drop cascades to table ft_source_4000042
 drop cascades to table ft_source
 drop cascades to extension postgres_fdw
 drop cascades to table target_cj
@@ -2826,9 +3170,17 @@ drop cascades to table citus_pa_target
 drop cascades to table pg_pa_source
 drop cascades to table citus_pa_source
 drop cascades to function pa_compare_tables()
+drop cascades to table source_json
+drop cascades to table target_json
+drop cascades to function immutable_hash(integer)
+drop cascades to table source_serial
+drop cascades to table target_serial
+drop cascades to table target_set
+drop cascades to table source_set
+drop cascades to function add_s(integer,integer)
 drop cascades to table pg
-drop cascades to table t1_4000110
-drop cascades to table s1_4000111
+drop cascades to table t1_4000131
+drop cascades to table s1_4000132
 drop cascades to table t1
 drop cascades to table s1
 drop cascades to table dist_colocated
diff --git a/src/test/regress/expected/merge_arbitrary.out b/src/test/regress/expected/merge_arbitrary.out
new file mode 100644
index 000000000..345ac1410
--- /dev/null
+++ b/src/test/regress/expected/merge_arbitrary.out
@@ -0,0 +1,150 @@
+SHOW server_version \gset
+SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15
+\gset
+\if :server_version_ge_15
+\else
+\q
+\endif
+SET search_path TO merge_arbitrary_schema;
+INSERT INTO target_cj VALUES (1, 'target', 0);
+INSERT INTO target_cj VALUES (2, 'target', 0);
+INSERT INTO target_cj VALUES (2, 'target', 0);
+INSERT INTO target_cj VALUES (3, 'target', 0);
+INSERT INTO source_cj1 VALUES (2, 'source-1', 10);
+INSERT INTO source_cj2 VALUES (2, 'source-2', 20);
+BEGIN;
+MERGE INTO target_cj t
+USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2
+ON t.tid = sid1 AND t.tid = 2
+WHEN MATCHED THEN
+        UPDATE SET src = src2
+WHEN NOT MATCHED THEN
+        DO NOTHING;
+SELECT * FROM target_cj ORDER BY 1;
+ tid |   src    | val
+---------------------------------------------------------------------
+   1 | target   |   0
+   2 | source-2 |   0
+   2 | source-2 |   0
+   3 | target   |   0
+(4 rows)
+
+ROLLBACK;
+BEGIN;
+-- try accessing columns from either side of the source join
+MERGE INTO target_cj t
+USING source_cj1 s2
+        INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10
+ON t.tid = sid1 AND t.tid = 2
+WHEN MATCHED THEN
+        UPDATE SET src = src1, val = val2
+WHEN NOT MATCHED THEN
+        DO NOTHING;
+SELECT * FROM target_cj ORDER BY 1;
+ tid |   src    | val
+---------------------------------------------------------------------
+   1 | target   |   0
+   2 | source-1 |  20
+   2 | source-1 |  20
+   3 | target   |   0
+(4 rows)
+
+ROLLBACK;
+-- Test PREPARE
+PREPARE insert(int, int, int) AS
+MERGE INTO prept
+USING (SELECT $2, s1, s2 FROM preps WHERE s2 > $3) as foo
+ON prept.t1 = foo.s1
+WHEN MATCHED THEN
+        UPDATE SET t2 = t2 + $1
+WHEN NOT MATCHED THEN
+        INSERT VALUES(s1, s2);
+PREPARE delete(int) AS
+MERGE INTO prept
+USING preps
+ON prept.t1 = preps.s1
+WHEN MATCHED AND prept.t2 = $1 THEN
+        DELETE
+WHEN MATCHED THEN
+        UPDATE SET t2 = t2 + 1;
+INSERT INTO prept VALUES(100, 0);
+INSERT INTO preps VALUES(100, 0);
+INSERT INTO preps VALUES(200, 0);
+EXECUTE insert(1, 1, -1); EXECUTE delete(0);
+EXECUTE insert(1, 1, -1); EXECUTE delete(0);
+EXECUTE insert(1, 1, -1); EXECUTE delete(0);
+EXECUTE insert(1, 1, -1); EXECUTE delete(0);
+EXECUTE insert(1, 1, -1); EXECUTE delete(0);
+-- sixth time
+EXECUTE insert(1, 1, -1); EXECUTE delete(0);
+EXECUTE insert(1, 1, -1); EXECUTE delete(0);
+-- Should have the counter as 14 (7 * 2)
+SELECT * FROM prept;
+ t1  | t2
+---------------------------------------------------------------------
+ 100 | 14
+(1 row)
+
+-- Test local tables
+INSERT INTO s1 VALUES(1, 0); -- Matches DELETE clause
+INSERT INTO s1 VALUES(2, 1); -- Matches UPDATE clause
+INSERT INTO s1 VALUES(3, 1); -- No Match INSERT clause
+INSERT INTO s1 VALUES(4, 1); -- No Match INSERT clause
+INSERT INTO s1 VALUES(6, 1); -- No Match INSERT clause
+INSERT INTO t1 VALUES(1, 0); -- Will be deleted
+INSERT INTO t1 VALUES(2, 0); -- Will be updated
+INSERT INTO t1 VALUES(5, 0); -- Will be intact
+PREPARE local(int, int) AS
+WITH s1_res AS (
+        SELECT * FROM s1
+)
+MERGE INTO t1
+        USING s1_res ON (s1_res.id = t1.id)
+        WHEN MATCHED AND s1_res.val = $1 THEN
+                DELETE
+        WHEN MATCHED THEN
+                UPDATE SET val = t1.val + $2
+        WHEN NOT MATCHED THEN
+                INSERT (id, val) VALUES (s1_res.id, s1_res.val);
+BEGIN;
+EXECUTE local(0, 1);
+SELECT * FROM t1 order by id;
+ id | val
+---------------------------------------------------------------------
+  2 |   1
+  3 |   1
+  4 |   1
+  5 |   0
+  6 |   1
+(5 rows)
+
+ROLLBACK;
+BEGIN;
+EXECUTE local(0, 1);
+ROLLBACK;
+BEGIN;
+EXECUTE local(0, 1);
+ROLLBACK;
+BEGIN;
+EXECUTE local(0, 1);
+ROLLBACK;
+BEGIN;
+EXECUTE local(0, 1);
+ROLLBACK;
+-- sixth time
+BEGIN;
+EXECUTE local(0, 1);
+ROLLBACK;
+BEGIN;
+EXECUTE local(0, 1);
+SELECT * FROM t1 order by id;
+ id | val
+---------------------------------------------------------------------
+  2 |   1
+  3 |   1
+  4 |   1
+  5 |   0
+  6 |   1
+(5 rows)
+
+ROLLBACK;
diff --git a/src/test/regress/expected/merge_arbitrary_0.out b/src/test/regress/expected/merge_arbitrary_0.out
new file mode 100644
index 000000000..a7e3fbf20
--- /dev/null
+++ b/src/test/regress/expected/merge_arbitrary_0.out
@@ -0,0 +1,6 @@
+SHOW server_version \gset
+SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15
+\gset
+\if :server_version_ge_15
+\else
+\q
diff --git a/src/test/regress/expected/merge_arbitrary_create.out b/src/test/regress/expected/merge_arbitrary_create.out
new file mode 100644
index 000000000..9b2444f17
--- /dev/null
+++ b/src/test/regress/expected/merge_arbitrary_create.out
@@ -0,0 +1,72 @@
+SHOW server_version \gset
+SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15
+\gset
+\if :server_version_ge_15
+\else
+\q
+\endif
+DROP SCHEMA IF EXISTS merge_arbitrary_schema CASCADE;
+CREATE SCHEMA merge_arbitrary_schema;
+SET search_path TO merge_arbitrary_schema;
+SET citus.shard_count TO 4;
+SET citus.next_shard_id TO 6000000;
+CREATE TABLE target_cj(tid int, src text, val int);
+CREATE TABLE source_cj1(sid1 int, src1 text, val1 int);
+CREATE TABLE source_cj2(sid2 int, src2 text, val2 int);
+SELECT create_distributed_table('target_cj', 'tid');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_distributed_table('source_cj1', 'sid1');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_distributed_table('source_cj2', 'sid2');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE TABLE prept(t1 int, t2 int);
+CREATE TABLE preps(s1 int, s2 int);
+SELECT create_distributed_table('prept', 't1'), create_distributed_table('preps', 's1');
+ create_distributed_table | create_distributed_table
+---------------------------------------------------------------------
+                          |
+(1 row)
+
+PREPARE insert(int, int, int) AS
+MERGE INTO prept
+USING (SELECT $2, s1, s2 FROM preps WHERE s2 > $3) as foo
+ON prept.t1 = foo.s1
+WHEN MATCHED THEN
+        UPDATE SET t2 = t2 + $1
+WHEN NOT MATCHED THEN
+        INSERT VALUES(s1, s2);
+PREPARE delete(int) AS
+MERGE INTO prept
+USING preps
+ON prept.t1 = preps.s1
+WHEN MATCHED AND prept.t2 = $1 THEN
+        DELETE
+WHEN MATCHED THEN
+        UPDATE SET t2 = t2 + 1;
+-- Citus local tables
+CREATE TABLE t1(id int, val int);
+CREATE TABLE s1(id int, val int);
+SELECT citus_add_local_table_to_metadata('t1');
+ citus_add_local_table_to_metadata
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT citus_add_local_table_to_metadata('s1');
+ citus_add_local_table_to_metadata
+---------------------------------------------------------------------
+
+(1 row)
+
diff --git a/src/test/regress/expected/merge_arbitrary_create_0.out b/src/test/regress/expected/merge_arbitrary_create_0.out
new file mode 100644
index 000000000..a7e3fbf20
--- /dev/null
+++ b/src/test/regress/expected/merge_arbitrary_create_0.out
@@ -0,0 +1,6 @@
+SHOW server_version \gset
+SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15
+\gset
+\if :server_version_ge_15
+\else
+\q
diff --git a/src/test/regress/expected/pg15.out b/src/test/regress/expected/pg15.out
index d92686b93..7fc102dbb 100644
--- a/src/test/regress/expected/pg15.out
+++ b/src/test/regress/expected/pg15.out
@@ -419,29 +419,36 @@ SELECT create_distributed_table('tbl2', 'x');
 
 MERGE INTO tbl1 USING tbl2 ON (true)
 WHEN MATCHED THEN DELETE;
-ERROR:  MERGE command is only supported when distributed tables are joined on their distribution column
+ERROR:  MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns
 -- also, not inside subqueries & ctes
 WITH targq AS (
     SELECT * FROM tbl2
 )
 MERGE INTO tbl1 USING targq ON (true)
 WHEN MATCHED THEN DELETE;
-ERROR:  MERGE command is only supported when distributed tables are joined on their distribution column
--- crashes on beta3, fixed on 15 stable
---WITH foo AS (
---  MERGE INTO tbl1 USING tbl2 ON (true)
---  WHEN MATCHED THEN DELETE
---) SELECT * FROM foo;
---COPY (
---  MERGE INTO tbl1 USING tbl2 ON (true)
---  WHEN MATCHED THEN DELETE
---) TO stdout;
+ERROR:  MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns
+WITH foo AS (
+  MERGE INTO tbl1 USING tbl2 ON (true)
+  WHEN MATCHED THEN DELETE
+) SELECT * FROM foo;
+ERROR:  MERGE not supported in WITH query
+COPY (
+  MERGE INTO tbl1 USING tbl2 ON (true)
+  WHEN MATCHED THEN DELETE
+) TO stdout;
+ERROR:  MERGE not supported in COPY
+MERGE INTO tbl1 t
+USING tbl2
+ON (true)
+WHEN MATCHED THEN
+    DO NOTHING;
+ERROR:  MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns
 MERGE INTO tbl1 t
 USING tbl2
 ON (true)
 WHEN MATCHED THEN
     UPDATE SET x = (SELECT count(*) FROM tbl2);
-ERROR:  MERGE command is only supported when distributed tables are joined on their distribution column
+ERROR:  updating the distribution column is not allowed in MERGE actions
 -- test numeric types with negative scale
 CREATE TABLE numeric_negative_scale(numeric_column numeric(3,-1), orig_value int);
 INSERT into numeric_negative_scale SELECT x,x FROM generate_series(111, 115) x;
diff --git a/src/test/regress/expected/pgmerge.out b/src/test/regress/expected/pgmerge.out
index 0bedf356f..8a74336a0 100644
--- a/src/test/regress/expected/pgmerge.out
+++ b/src/test/regress/expected/pgmerge.out
@@ -910,7 +910,7 @@ MERGE INTO wq_target t
 USING wq_source s ON t.tid = s.sid
 WHEN MATCHED AND (merge_when_and_write()) THEN
 	UPDATE SET balance = t.balance + s.balance;
-ERROR:  functions used in the WHERE/ON/WHEN clause of modification queries on distributed tables must not be VOLATILE
+ERROR:  non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables
 ROLLBACK;
 -- Test preventing ON condition from writing to the database
 BEGIN;
@@ -918,7 +918,7 @@ MERGE INTO wq_target t
 USING wq_source s ON t.tid = s.sid AND (merge_when_and_write())
 WHEN MATCHED THEN
 	UPDATE SET balance = t.balance + s.balance;
-ERROR:  functions used in the WHERE/ON/WHEN clause of modification queries on distributed tables must not be VOLATILE
+ERROR:  non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables
 ROLLBACK;
 drop function merge_when_and_write();
 DROP TABLE wq_target, wq_source;
@@ -1893,13 +1893,15 @@ INSERT INTO pa_target SELECT '2017-02-28', id, id * 100, 'initial' FROM generate
 SET client_min_messages TO DEBUG1;
 BEGIN;
 MERGE INTO pa_target t
-  USING (SELECT '2017-01-15' AS slogts, * FROM pa_source WHERE sid < 10) s
+  USING (SELECT * FROM pa_source WHERE sid < 10) s
+  --USING (SELECT '2017-01-15' AS slogts, * FROM pa_source WHERE sid < 10) s
   ON t.tid = s.sid
   WHEN MATCHED THEN
     UPDATE SET balance = balance + delta, val = val || ' updated by merge'
   WHEN NOT MATCHED THEN
-    INSERT VALUES (slogts::timestamp, sid, delta, 'inserted by merge');
-DEBUG:  <Deparsed MERGE query: MERGE INTO pgmerge_schema.pa_target t USING (SELECT '2017-01-15'::text AS slogts, pa_source.sid, pa_source.delta FROM pgmerge_schema.pa_source_xxxxxxx pa_source WHERE (pa_source.sid OPERATOR(pg_catalog.<) 10)) s ON (t.tid OPERATOR(pg_catalog.=) s.sid) WHEN MATCHED THEN UPDATE SET balance = (t.balance OPERATOR(pg_catalog.+) s.delta), val = (t.val OPERATOR(pg_catalog.||) ' updated by merge'::text) WHEN NOT MATCHED THEN INSERT (logts, tid, balance, val) VALUES ((s.slogts)::timestamp without time zone, s.sid, s.delta, 'inserted by merge'::text)>
+    INSERT VALUES ('2017-01-15', sid, delta, 'inserted by merge');
+DEBUG:  <Deparsed MERGE query: MERGE INTO pgmerge_schema.pa_target t USING (SELECT pa_source.sid, pa_source.delta FROM pgmerge_schema.pa_source_xxxxxxx pa_source WHERE (pa_source.sid OPERATOR(pg_catalog.<) 10)) s ON (t.tid OPERATOR(pg_catalog.=) s.sid) WHEN MATCHED THEN UPDATE SET balance = (t.balance OPERATOR(pg_catalog.+) s.delta), val = (t.val OPERATOR(pg_catalog.||) ' updated by merge'::text) WHEN NOT MATCHED THEN INSERT (logts, tid, balance, val) VALUES ('Sun Jan 15 00:00:00 2017'::timestamp without time zone, s.sid, s.delta, 'inserted by merge'::text)>
+    --INSERT VALUES (slogts::timestamp, sid, delta, 'inserted by merge');
 SELECT * FROM pa_target ORDER BY tid;
           logts           | tid | balance |           val
 ---------------------------------------------------------------------
diff --git a/src/test/regress/sql/merge.sql b/src/test/regress/sql/merge.sql
index 12294b2c9..ded90b69c 100644
--- a/src/test/regress/sql/merge.sql
+++ b/src/test/regress/sql/merge.sql
@@ -18,8 +18,9 @@ CREATE SCHEMA merge_schema;
 SET search_path TO merge_schema;
 SET citus.shard_count TO 4;
 SET citus.next_shard_id TO 4000000;
-SET citus.explain_all_tasks to true;
+SET citus.explain_all_tasks TO true;
 SET citus.shard_replication_factor TO 1;
+SET citus.max_adaptive_executor_pool_size TO 1;
 SELECT 1 FROM master_add_node('localhost', :master_port, groupid => 0);
 
 CREATE TABLE source
@@ -185,6 +186,21 @@ MERGE INTO target t
            VALUES (customer_id, s.order_id, s.order_center, 123, s.order_time);
 SELECT * from target t WHERE t.customer_id  = 30004;
 
+-- Updating distribution column is allowed if the operation is a no-op
+SELECT * from target t WHERE t.customer_id  = 30000;
+MERGE INTO target t
+USING SOURCE s
+ON (t.customer_id = s.customer_id AND t.customer_id = 30000)
+WHEN MATCHED THEN
+	UPDATE SET customer_id = 30000;
+
+MERGE INTO target t
+USING SOURCE s
+ON (t.customer_id = s.customer_id AND t.customer_id = 30000)
+WHEN MATCHED THEN
+	UPDATE SET customer_id = t.customer_id;
+SELECT * from target t WHERE t.customer_id  = 30000;
+
 --
 -- Test MERGE with CTE as source
 --
@@ -223,7 +239,6 @@ MERGE INTO t1
 	WHEN NOT MATCHED THEN
 		INSERT (id, val) VALUES (pg_res.id, pg_res.val);
 
--- Two rows with id 2 and val incremented, id 3, and id 1 is deleted
 SELECT * FROM t1 order by id;
 SELECT * INTO merge_result FROM t1 order by id;
 
@@ -777,7 +792,8 @@ $$ language plpgsql volatile;
 CREATE TABLE fn_target(id int, data varchar);
 
 MERGE INTO fn_target
-USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source
+--USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source
+USING (SELECT id, source FROM dist_table) as fn_source
 ON fn_source.id = fn_target.id
 WHEN MATCHED THEN
 DO NOTHING
@@ -790,11 +806,12 @@ SELECT * INTO fn_result FROM fn_target ORDER BY 1 ;
 -- Clean the slate
 TRUNCATE TABLE fn_target;
 SELECT citus_add_local_table_to_metadata('fn_target');
-SELECT create_distributed_table('dist_table', 'id');
+SELECT citus_add_local_table_to_metadata('dist_table');
 
 SET client_min_messages TO DEBUG1;
 MERGE INTO fn_target
-USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source
+--USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source
+USING (SELECT id, source FROM dist_table) as fn_source
 ON fn_source.id = fn_target.id
 WHEN MATCHED THEN
 DO NOTHING
@@ -1287,7 +1304,7 @@ ON pg_target.id = sub.id AND pg_target.id = $1
 WHEN MATCHED THEN
         UPDATE SET val = 'Updated by prepare using ' || sub.val
 WHEN NOT MATCHED THEN
-        DO NOTHING;
+        INSERT VALUES (sub.id, sub.val);
 
 PREPARE citus_prep(int) AS
 MERGE INTO citus_target
@@ -1296,12 +1313,12 @@ ON citus_target.id = sub.id AND citus_target.id = $1
 WHEN MATCHED THEN
         UPDATE SET val = 'Updated by prepare using ' || sub.val
 WHEN NOT MATCHED THEN
-        DO NOTHING;
+        INSERT VALUES (sub.id, sub.val);
 
 BEGIN;
-SET citus.log_remote_commands to true;
 
 SELECT * FROM pg_target WHERE id = 500; -- before merge
+SELECT count(*) FROM pg_target; -- before merge
 EXECUTE pg_prep(500);
 SELECT * FROM pg_target WHERE id = 500; -- non-cached
 EXECUTE pg_prep(500);
@@ -1310,8 +1327,11 @@ EXECUTE pg_prep(500);
 EXECUTE pg_prep(500);
 EXECUTE pg_prep(500);
 SELECT * FROM pg_target WHERE id = 500; -- cached
+SELECT count(*) FROM pg_target; -- cached
 
 SELECT * FROM citus_target WHERE id = 500; -- before merge
+SELECT count(*) FROM citus_target; -- before merge
+SET citus.log_remote_commands to true;
 EXECUTE citus_prep(500);
 SELECT * FROM citus_target WHERE id = 500; -- non-cached
 EXECUTE citus_prep(500);
@@ -1319,9 +1339,10 @@ EXECUTE citus_prep(500);
 EXECUTE citus_prep(500);
 EXECUTE citus_prep(500);
 EXECUTE citus_prep(500);
-SELECT * FROM citus_target WHERE id = 500; -- cached
-
 SET citus.log_remote_commands to false;
+SELECT * FROM citus_target WHERE id = 500; -- cached
+SELECT count(*) FROM citus_target; -- cached
+
 SELECT compare_tables();
 ROLLBACK;
 
@@ -1417,10 +1438,185 @@ MERGE INTO citus_pa_target t
 SELECT pa_compare_tables();
 ROLLBACK;
 
+CREATE TABLE source_json( id   integer, z int, d jsonb);
+CREATE TABLE target_json( id   integer, z int, d jsonb);
+
+INSERT INTO source_json SELECT i,i FROM generate_series(0,5)i;
+
+SELECT create_distributed_table('target_json','id'), create_distributed_table('source_json', 'id');
+
+-- single shard query given source_json is filtered and Postgres is smart to pushdown
+-- filter to the target_json as well
+SELECT public.coordinator_plan($Q$
+EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda
+USING (SELECT * FROM source_json WHERE id = 1) sdn
+ON sda.id = sdn.id
+WHEN NOT matched THEN
+	INSERT (id, z) VALUES (sdn.id, 5);
+$Q$);
+SELECT * FROM target_json ORDER BY 1;
+
+-- zero shard query as filters do not match
+--SELECT public.coordinator_plan($Q$
+--EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda
+--USING (SELECT * FROM source_json WHERE id = 1) sdn
+--ON sda.id = sdn.id AND sda.id = 2
+--WHEN NOT matched THEN
+--	INSERT (id, z) VALUES (sdn.id, 5);
+--$Q$);
+--SELECT * FROM target_json ORDER BY 1;
+
+-- join for source_json is happening at a different place
+SELECT public.coordinator_plan($Q$
+EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda
+USING source_json s1 LEFT JOIN (SELECT * FROM source_json) s2 USING(z)
+ON sda.id = s1.id AND s1.id = s2.id
+WHEN NOT matched THEN
+	INSERT (id, z) VALUES (s2.id, 5);
+$Q$);
+SELECT * FROM target_json ORDER BY 1;
+
+-- update JSON column
+SELECT public.coordinator_plan($Q$
+EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda
+USING source_json sdn
+ON sda.id = sdn.id
+WHEN matched THEN
+	UPDATE SET d = '{"a" : 5}';
+$Q$);
+SELECT * FROM target_json ORDER BY 1;
+
+CREATE FUNCTION immutable_hash(int) RETURNS int
+AS 'SELECT hashtext( ($1 + $1)::text);'
+LANGUAGE SQL
+IMMUTABLE
+RETURNS NULL ON NULL INPUT;
+
+MERGE INTO target_json sda
+USING source_json sdn
+ON sda.id = sdn.id
+WHEN matched THEN
+	UPDATE SET z = immutable_hash(sdn.z);
+
+-- Test bigserial
+CREATE TABLE source_serial (id integer, z int, d bigserial);
+CREATE TABLE target_serial (id integer, z int, d bigserial);
+INSERT INTO source_serial SELECT i,i FROM generate_series(0,100)i;
+SELECT create_distributed_table('source_serial', 'id'),
+       create_distributed_table('target_serial', 'id');
+
+MERGE INTO target_serial sda
+USING source_serial sdn
+ON sda.id = sdn.id
+WHEN NOT matched THEN
+       INSERT (id, z) VALUES (id, z);
+
+SELECT count(*) from source_serial;
+SELECT count(*) from target_serial;
+
+SELECT count(distinct d) from source_serial;
+SELECT count(distinct d) from target_serial;
+
+-- Test set operations
+CREATE TABLE target_set(t1 int, t2 int);
+CREATE TABLE source_set(s1 int, s2 int);
+
+SELECT create_distributed_table('target_set', 't1'),
+       create_distributed_table('source_set', 's1');
+
+INSERT INTO target_set VALUES(1, 0);
+INSERT INTO source_set VALUES(1, 1);
+INSERT INTO source_set VALUES(2, 2);
+
+MERGE INTO target_set
+USING (SELECT * FROM source_set UNION SELECT * FROM source_set) AS foo ON target_set.t1 = foo.s1
+WHEN MATCHED THEN
+        UPDATE SET t2 = t2 + 100
+WHEN NOT MATCHED THEN
+	INSERT VALUES(foo.s1);
+SELECT * FROM target_set ORDER BY 1, 2;
+
 --
 -- Error and Unsupported scenarios
 --
 
+MERGE INTO target_set
+USING (SELECT s1,s2 FROM source_set UNION SELECT s2,s1 FROM source_set) AS foo ON target_set.t1 = foo.s1
+WHEN MATCHED THEN
+        UPDATE SET t2 = t2 + 1;
+
+MERGE INTO target_set
+USING (SELECT 2 as s3, source_set.* FROM (SELECT * FROM source_set LIMIT 1) as foo LEFT JOIN source_set USING( s1)) AS foo
+ON target_set.t1 = foo.s1
+WHEN MATCHED THEN UPDATE SET t2 = t2 + 1
+WHEN NOT MATCHED THEN INSERT VALUES(s1, s3);
+
+
+-- modifying CTE not supported
+EXPLAIN
+WITH cte_1 AS (DELETE FROM target_json)
+MERGE INTO target_json sda
+USING source_json sdn
+ON sda.id = sdn.id
+WHEN NOT matched THEN
+	INSERT (id, z) VALUES (sdn.id, 5);
+
+-- Grouping sets not supported
+MERGE INTO citus_target t
+USING (SELECT count(*), id FROM citus_source GROUP BY GROUPING SETS (id, val)) subq
+ON subq.id = t.id
+WHEN MATCHED AND t.id > 350 THEN
+    UPDATE SET val = t.val || 'Updated'
+WHEN NOT MATCHED THEN
+        INSERT VALUES (subq.id, 99)
+WHEN MATCHED AND t.id < 350 THEN
+        DELETE;
+
+WITH subq AS
+(
+SELECT count(*), id FROM citus_source GROUP BY GROUPING SETS (id, val)
+)
+MERGE INTO citus_target t
+USING subq
+ON subq.id = t.id
+WHEN MATCHED AND t.id > 350 THEN
+    UPDATE SET val = t.val || 'Updated'
+WHEN NOT MATCHED THEN
+        INSERT VALUES (subq.id, 99)
+WHEN MATCHED AND t.id < 350 THEN
+        DELETE;
+
+-- try inserting unmatched distribution column value
+MERGE INTO citus_target t
+USING citus_source s
+ON t.id = s.id
+WHEN NOT MATCHED THEN
+  INSERT DEFAULT VALUES;
+
+MERGE INTO citus_target t
+USING citus_source s
+ON t.id = s.id
+WHEN NOT MATCHED THEN
+  INSERT VALUES(10000);
+
+MERGE INTO citus_target t
+USING citus_source s
+ON t.id = s.id
+WHEN NOT MATCHED THEN
+  INSERT (id) VALUES(1000);
+
+MERGE INTO t1 t
+USING s1 s
+ON t.id = s.id
+WHEN NOT MATCHED THEN
+  INSERT (id) VALUES(s.val);
+
+MERGE INTO t1 t
+USING s1 s
+ON t.id = s.id
+WHEN NOT MATCHED THEN
+  INSERT (val) VALUES(s.val);
+
 -- try updating the distribution key column
 BEGIN;
 MERGE INTO target_cj t
@@ -1473,6 +1669,25 @@ BEGIN
 END;
 $$;
 
+-- Test functions executing in MERGE statement. This is to prevent the functions from
+-- doing a random sql, which may be executed in a remote node or modifying the target
+-- relation which will have unexpected/suprising results.
+MERGE INTO t1 USING (SELECT * FROM s1 WHERE true) s1 ON
+  t1.id = s1.id AND s1.id = 2
+   WHEN matched THEN
+ UPDATE SET id = s1.id, val = random();
+
+-- Test STABLE function
+CREATE FUNCTION add_s(integer, integer) RETURNS integer
+AS 'select $1 + $2;'
+LANGUAGE SQL
+STABLE RETURNS NULL ON NULL INPUT;
+
+MERGE INTO t1
+USING s1 ON t1.id = s1.id
+WHEN NOT MATCHED THEN
+	INSERT VALUES(s1.id, add_s(s1.val, 2));
+
 -- Test preventing "ON" join condition from writing to the database
 BEGIN;
 MERGE INTO t1
diff --git a/src/test/regress/sql/merge_arbitrary.sql b/src/test/regress/sql/merge_arbitrary.sql
new file mode 100644
index 000000000..17b7d4f90
--- /dev/null
+++ b/src/test/regress/sql/merge_arbitrary.sql
@@ -0,0 +1,133 @@
+SHOW server_version \gset
+SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15
+\gset
+\if :server_version_ge_15
+\else
+\q
+\endif
+
+SET search_path TO merge_arbitrary_schema;
+INSERT INTO target_cj VALUES (1, 'target', 0);
+INSERT INTO target_cj VALUES (2, 'target', 0);
+INSERT INTO target_cj VALUES (2, 'target', 0);
+INSERT INTO target_cj VALUES (3, 'target', 0);
+
+INSERT INTO source_cj1 VALUES (2, 'source-1', 10);
+INSERT INTO source_cj2 VALUES (2, 'source-2', 20);
+
+BEGIN;
+MERGE INTO target_cj t
+USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2
+ON t.tid = sid1 AND t.tid = 2
+WHEN MATCHED THEN
+        UPDATE SET src = src2
+WHEN NOT MATCHED THEN
+        DO NOTHING;
+SELECT * FROM target_cj ORDER BY 1;
+ROLLBACK;
+
+BEGIN;
+-- try accessing columns from either side of the source join
+MERGE INTO target_cj t
+USING source_cj1 s2
+        INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10
+ON t.tid = sid1 AND t.tid = 2
+WHEN MATCHED THEN
+        UPDATE SET src = src1, val = val2
+WHEN NOT MATCHED THEN
+        DO NOTHING;
+SELECT * FROM target_cj ORDER BY 1;
+ROLLBACK;
+
+-- Test PREPARE
+PREPARE insert(int, int, int) AS
+MERGE INTO prept
+USING (SELECT $2, s1, s2 FROM preps WHERE s2 > $3) as foo
+ON prept.t1 = foo.s1
+WHEN MATCHED THEN
+        UPDATE SET t2 = t2 + $1
+WHEN NOT MATCHED THEN
+        INSERT VALUES(s1, s2);
+
+PREPARE delete(int) AS
+MERGE INTO prept
+USING preps
+ON prept.t1 = preps.s1
+WHEN MATCHED AND prept.t2 = $1 THEN
+        DELETE
+WHEN MATCHED THEN
+        UPDATE SET t2 = t2 + 1;
+
+INSERT INTO prept VALUES(100, 0);
+
+INSERT INTO preps VALUES(100, 0);
+INSERT INTO preps VALUES(200, 0);
+
+EXECUTE insert(1, 1, -1); EXECUTE delete(0);
+EXECUTE insert(1, 1, -1); EXECUTE delete(0);
+EXECUTE insert(1, 1, -1); EXECUTE delete(0);
+EXECUTE insert(1, 1, -1); EXECUTE delete(0);
+EXECUTE insert(1, 1, -1); EXECUTE delete(0);
+
+-- sixth time
+EXECUTE insert(1, 1, -1); EXECUTE delete(0);
+EXECUTE insert(1, 1, -1); EXECUTE delete(0);
+
+-- Should have the counter as 14 (7 * 2)
+SELECT * FROM prept;
+
+-- Test local tables
+INSERT INTO s1 VALUES(1, 0); -- Matches DELETE clause
+INSERT INTO s1 VALUES(2, 1); -- Matches UPDATE clause
+INSERT INTO s1 VALUES(3, 1); -- No Match INSERT clause
+INSERT INTO s1 VALUES(4, 1); -- No Match INSERT clause
+INSERT INTO s1 VALUES(6, 1); -- No Match INSERT clause
+
+INSERT INTO t1 VALUES(1, 0); -- Will be deleted
+INSERT INTO t1 VALUES(2, 0); -- Will be updated
+INSERT INTO t1 VALUES(5, 0); -- Will be intact
+
+PREPARE local(int, int) AS
+WITH s1_res AS (
+        SELECT * FROM s1
+)
+MERGE INTO t1
+        USING s1_res ON (s1_res.id = t1.id)
+
+        WHEN MATCHED AND s1_res.val = $1 THEN
+                DELETE
+        WHEN MATCHED THEN
+                UPDATE SET val = t1.val + $2
+        WHEN NOT MATCHED THEN
+                INSERT (id, val) VALUES (s1_res.id, s1_res.val);
+
+BEGIN;
+EXECUTE local(0, 1);
+SELECT * FROM t1 order by id;
+ROLLBACK;
+
+BEGIN;
+EXECUTE local(0, 1);
+ROLLBACK;
+
+BEGIN;
+EXECUTE local(0, 1);
+ROLLBACK;
+
+BEGIN;
+EXECUTE local(0, 1);
+ROLLBACK;
+
+BEGIN;
+EXECUTE local(0, 1);
+ROLLBACK;
+
+-- sixth time
+BEGIN;
+EXECUTE local(0, 1);
+ROLLBACK;
+
+BEGIN;
+EXECUTE local(0, 1);
+SELECT * FROM t1 order by id;
+ROLLBACK;
diff --git a/src/test/regress/sql/merge_arbitrary_create.sql b/src/test/regress/sql/merge_arbitrary_create.sql
new file mode 100644
index 000000000..edf9b0d9d
--- /dev/null
+++ b/src/test/regress/sql/merge_arbitrary_create.sql
@@ -0,0 +1,50 @@
+SHOW server_version \gset
+SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15
+\gset
+\if :server_version_ge_15
+\else
+\q
+\endif
+
+DROP SCHEMA IF EXISTS merge_arbitrary_schema CASCADE;
+CREATE SCHEMA merge_arbitrary_schema;
+SET search_path TO merge_arbitrary_schema;
+SET citus.shard_count TO 4;
+SET citus.next_shard_id TO 6000000;
+CREATE TABLE target_cj(tid int, src text, val int);
+CREATE TABLE source_cj1(sid1 int, src1 text, val1 int);
+CREATE TABLE source_cj2(sid2 int, src2 text, val2 int);
+
+SELECT create_distributed_table('target_cj', 'tid');
+SELECT create_distributed_table('source_cj1', 'sid1');
+SELECT create_distributed_table('source_cj2', 'sid2');
+
+CREATE TABLE prept(t1 int, t2 int);
+CREATE TABLE preps(s1 int, s2 int);
+
+SELECT create_distributed_table('prept', 't1'), create_distributed_table('preps', 's1');
+
+PREPARE insert(int, int, int) AS
+MERGE INTO prept
+USING (SELECT $2, s1, s2 FROM preps WHERE s2 > $3) as foo
+ON prept.t1 = foo.s1
+WHEN MATCHED THEN
+        UPDATE SET t2 = t2 + $1
+WHEN NOT MATCHED THEN
+        INSERT VALUES(s1, s2);
+
+PREPARE delete(int) AS
+MERGE INTO prept
+USING preps
+ON prept.t1 = preps.s1
+WHEN MATCHED AND prept.t2 = $1 THEN
+        DELETE
+WHEN MATCHED THEN
+        UPDATE SET t2 = t2 + 1;
+
+-- Citus local tables
+CREATE TABLE t1(id int, val int);
+CREATE TABLE s1(id int, val int);
+
+SELECT citus_add_local_table_to_metadata('t1');
+SELECT citus_add_local_table_to_metadata('s1');
diff --git a/src/test/regress/sql/pg15.sql b/src/test/regress/sql/pg15.sql
index 121b41f86..ac8062c65 100644
--- a/src/test/regress/sql/pg15.sql
+++ b/src/test/regress/sql/pg15.sql
@@ -269,16 +269,21 @@ WITH targq AS (
 MERGE INTO tbl1 USING targq ON (true)
 WHEN MATCHED THEN DELETE;
 
--- crashes on beta3, fixed on 15 stable
---WITH foo AS (
---  MERGE INTO tbl1 USING tbl2 ON (true)
---  WHEN MATCHED THEN DELETE
---) SELECT * FROM foo;
+WITH foo AS (
+  MERGE INTO tbl1 USING tbl2 ON (true)
+  WHEN MATCHED THEN DELETE
+) SELECT * FROM foo;
 
---COPY (
---  MERGE INTO tbl1 USING tbl2 ON (true)
---  WHEN MATCHED THEN DELETE
---) TO stdout;
+COPY (
+  MERGE INTO tbl1 USING tbl2 ON (true)
+  WHEN MATCHED THEN DELETE
+) TO stdout;
+
+MERGE INTO tbl1 t
+USING tbl2
+ON (true)
+WHEN MATCHED THEN
+    DO NOTHING;
 
 MERGE INTO tbl1 t
 USING tbl2
diff --git a/src/test/regress/sql/pgmerge.sql b/src/test/regress/sql/pgmerge.sql
index 83bf01a68..9b828f27e 100644
--- a/src/test/regress/sql/pgmerge.sql
+++ b/src/test/regress/sql/pgmerge.sql
@@ -1172,12 +1172,14 @@ INSERT INTO pa_target SELECT '2017-02-28', id, id * 100, 'initial' FROM generate
 SET client_min_messages TO DEBUG1;
 BEGIN;
 MERGE INTO pa_target t
-  USING (SELECT '2017-01-15' AS slogts, * FROM pa_source WHERE sid < 10) s
+  USING (SELECT * FROM pa_source WHERE sid < 10) s
+  --USING (SELECT '2017-01-15' AS slogts, * FROM pa_source WHERE sid < 10) s
   ON t.tid = s.sid
   WHEN MATCHED THEN
     UPDATE SET balance = balance + delta, val = val || ' updated by merge'
   WHEN NOT MATCHED THEN
-    INSERT VALUES (slogts::timestamp, sid, delta, 'inserted by merge');
+    INSERT VALUES ('2017-01-15', sid, delta, 'inserted by merge');
+    --INSERT VALUES (slogts::timestamp, sid, delta, 'inserted by merge');
 SELECT * FROM pa_target ORDER BY tid;
 ROLLBACK;
 RESET client_min_messages;
diff --git a/src/test/regress/sql_schedule b/src/test/regress/sql_schedule
index f07f7af9a..272a84eff 100644
--- a/src/test/regress/sql_schedule
+++ b/src/test/regress/sql_schedule
@@ -14,3 +14,4 @@ test: arbitrary_configs_truncate
 test: arbitrary_configs_truncate_cascade
 test: arbitrary_configs_truncate_partition
 test: arbitrary_configs_alter_table_add_constraint_without_name
+test: merge_arbitrary

From ea3093bdb67888ba30ef77dc777f3785edada87d Mon Sep 17 00:00:00 2001
From: aykut-bozkurt <51649454+aykut-bozkurt@users.noreply.github.com>
Date: Mon, 20 Mar 2023 12:06:31 +0300
Subject: [PATCH 15/58] Make workerCount configurable for regression tests
 (#6764)

Make worker count flexible in our regression tests instead of hardcoding
it to 2 workers.
---
 src/test/regress/Makefile                | 14 ++++++++------
 src/test/regress/citus_tests/run_test.py | 19 +++++++++++++++++--
 src/test/regress/pg_regress_multi.pl     |  4 +++-
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/src/test/regress/Makefile b/src/test/regress/Makefile
index 368f8f8c5..d9700df80 100644
--- a/src/test/regress/Makefile
+++ b/src/test/regress/Makefile
@@ -117,29 +117,31 @@ check-minimal-mx: all
 	-- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/mx_minimal_schedule $(EXTRA_TESTS)
 
 check-custom-schedule: all
-	$(pg_regress_multi_check) --load-extension=citus \
+	$(pg_regress_multi_check) --load-extension=citus --worker-count=$(WORKERCOUNT) \
 	-- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS)
 
 check-failure-custom-schedule: all
-	$(pg_regress_multi_check) --load-extension=citus --mitmproxy \
+	$(pg_regress_multi_check) --load-extension=citus --mitmproxy --worker-count=$(WORKERCOUNT) \
 	-- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS)
 
 check-isolation-custom-schedule: all  $(isolation_test_files)
-	$(pg_regress_multi_check) --load-extension=citus --isolationtester \
+	$(pg_regress_multi_check) --load-extension=citus --isolationtester --worker-count=$(WORKERCOUNT) \
 	-- $(MULTI_REGRESS_OPTS) --inputdir=$(citus_abs_srcdir)/build --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS)
 
 check-custom-schedule-vg: all
 	$(pg_regress_multi_check) --load-extension=citus \
-	--valgrind --pg_ctl-timeout=360 --connection-timeout=500000 --valgrind-path=valgrind --valgrind-log-file=$(CITUS_VALGRIND_LOG_FILE) \
+	--valgrind --pg_ctl-timeout=360 --connection-timeout=500000 --worker-count=$(WORKERCOUNT) \
+	--valgrind-path=valgrind --valgrind-log-file=$(CITUS_VALGRIND_LOG_FILE) \
 	-- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS)
 
 check-failure-custom-schedule-vg: all
 	$(pg_regress_multi_check) --load-extension=citus --mitmproxy \
-	--valgrind --pg_ctl-timeout=360 --connection-timeout=500000 --valgrind-path=valgrind --valgrind-log-file=$(CITUS_VALGRIND_LOG_FILE) \
+	--valgrind --pg_ctl-timeout=360 --connection-timeout=500000 --worker-count=$(WORKERCOUNT) \
+	--valgrind-path=valgrind --valgrind-log-file=$(CITUS_VALGRIND_LOG_FILE) \
 	-- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS)
 
 check-isolation-custom-schedule-vg: all  $(isolation_test_files)
-	$(pg_regress_multi_check) --load-extension=citus --isolationtester \
+	$(pg_regress_multi_check) --load-extension=citus --isolationtester --worker-count=$(WORKERCOUNT) \
 	--valgrind --pg_ctl-timeout=360 --connection-timeout=500000 --valgrind-path=valgrind --valgrind-log-file=$(CITUS_VALGRIND_LOG_FILE) \
 	-- $(MULTI_REGRESS_OPTS) --inputdir=$(citus_abs_srcdir)/build --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS)
 
diff --git a/src/test/regress/citus_tests/run_test.py b/src/test/regress/citus_tests/run_test.py
index 3daac8b6a..9c901785c 100755
--- a/src/test/regress/citus_tests/run_test.py
+++ b/src/test/regress/citus_tests/run_test.py
@@ -73,10 +73,11 @@ if __name__ == "__main__":
         schedule: Optional[str]
         direct_extra_tests: list[str]
 
-        def __init__(self, schedule, extra_tests=None, repeatable=True):
+        def __init__(self, schedule, extra_tests=None, repeatable=True, worker_count=2):
             self.schedule = schedule
             self.direct_extra_tests = extra_tests or []
             self.repeatable = repeatable
+            self.worker_count = worker_count
 
         def extra_tests(self):
             all_deps = OrderedDict()
@@ -180,6 +181,15 @@ if __name__ == "__main__":
             return "base_schedule"
         return "minimal_schedule"
 
+    # we run the tests with 2 workers by default.
+    # If we find any dependency which requires more workers, we update the worker count.
+    def worker_count_for(test_name):
+        if test_name in deps:
+            return deps[test_name].worker_count
+        return 2
+
+    test_worker_count = max(worker_count_for(test_file_name), 2)
+
     if test_file_name in deps:
         dependencies = deps[test_file_name]
     elif schedule_line_is_upgrade_after(test_schedule_line):
@@ -204,6 +214,7 @@ if __name__ == "__main__":
     with open(tmp_schedule_path, "a") as myfile:
         for dependency in dependencies.extra_tests():
             myfile.write(f"test: {dependency}\n")
+            test_worker_count = max(worker_count_for(dependency), test_worker_count)
 
         repetition_cnt = args["repeat"]
         if repetition_cnt > 1 and not dependencies.repeatable:
@@ -224,7 +235,11 @@ if __name__ == "__main__":
         make_recipe += "-vg"
 
     # prepare command to run tests
-    test_command = f"make -C {regress_dir} {make_recipe} SCHEDULE='{pathlib.Path(tmp_schedule_path).stem}'"
+    test_command = (
+        f"make -C {regress_dir} {make_recipe} "
+        f"WORKERCOUNT={test_worker_count} "
+        f"SCHEDULE='{pathlib.Path(tmp_schedule_path).stem}'"
+    )
 
     # run test command n times
     try:
diff --git a/src/test/regress/pg_regress_multi.pl b/src/test/regress/pg_regress_multi.pl
index 119e6a758..af594c1d4 100755
--- a/src/test/regress/pg_regress_multi.pl
+++ b/src/test/regress/pg_regress_multi.pl
@@ -49,6 +49,7 @@ sub Usage()
     print "  --pg_ctl-timeout    	Timeout for pg_ctl\n";
     print "  --connection-timeout	Timeout for connecting to worker nodes\n";
     print "  --mitmproxy        	Start a mitmproxy for one of the workers\n";
+    print "  --worker-count         Number of workers in Citus cluster (default: 2)\n";
     exit 1;
 }
 
@@ -84,6 +85,7 @@ my $mitmFifoPath = catfile($TMP_CHECKDIR, "mitmproxy.fifo");
 my $conninfo = "";
 my $publicWorker1Host = "localhost";
 my $publicWorker2Host = "localhost";
+my $workerCount = 2;
 
 my $serversAreShutdown = "TRUE";
 my $usingWindows = 0;
@@ -116,6 +118,7 @@ GetOptions(
     'conninfo=s' => \$conninfo,
     'worker-1-public-hostname=s' => \$publicWorker1Host,
     'worker-2-public-hostname=s' => \$publicWorker2Host,
+    'worker-count=i' => \$workerCount,
     'help' => sub { Usage() });
 
 my $fixopen = "$bindir/postgres.fixopen";
@@ -318,7 +321,6 @@ my $mitmPort = 9060;
 # Set some default configuration options
 my $masterPort = 57636;
 
-my $workerCount = 2;
 my @workerHosts = ();
 my @workerPorts = ();
 

From aa33988c6edd66ec9ae9e8a0e27ce34599eaf76b Mon Sep 17 00:00:00 2001
From: aykut-bozkurt <51649454+aykut-bozkurt@users.noreply.github.com>
Date: Tue, 21 Mar 2023 00:58:12 +0300
Subject: [PATCH 16/58] fix pip lock file (#6766)

ci/fix_styles.sh were complaining about `black` and `isort` packages are
not found even if I `pipenv install --dev` due to broken lock file. I
regenerated the lock file and now it works fine. We also wanted to
upgrade required python version for the pipfile.
---
 .circleci/config.yml          |   2 +-
 src/test/regress/Pipfile.lock | 215 ++++++++++++++++++----------------
 2 files changed, 115 insertions(+), 102 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 8f2d86f15..d0db414ce 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -6,7 +6,7 @@ orbs:
 parameters:
   image_suffix:
     type: string
-    default: '-v89059f9'
+    default: '-v4b2ae97'
   pg13_version:
     type: string
     default: '13.10'
diff --git a/src/test/regress/Pipfile.lock b/src/test/regress/Pipfile.lock
index 8bf8715ea..709254d77 100644
--- a/src/test/regress/Pipfile.lock
+++ b/src/test/regress/Pipfile.lock
@@ -219,32 +219,32 @@
         },
         "cryptography": {
             "hashes": [
-                "sha256:0f8da300b5c8af9f98111ffd512910bc792b4c77392a9523624680f7956a99d4",
-                "sha256:35f7c7d015d474f4011e859e93e789c87d21f6f4880ebdc29896a60403328f1f",
-                "sha256:4789d1e3e257965e960232345002262ede4d094d1a19f4d3b52e48d4d8f3b885",
-                "sha256:5aa67414fcdfa22cf052e640cb5ddc461924a045cacf325cd164e65312d99502",
-                "sha256:5d2d8b87a490bfcd407ed9d49093793d0f75198a35e6eb1a923ce1ee86c62b41",
-                "sha256:6687ef6d0a6497e2b58e7c5b852b53f62142cfa7cd1555795758934da363a965",
-                "sha256:6f8ba7f0328b79f08bdacc3e4e66fb4d7aab0c3584e0bd41328dce5262e26b2e",
-                "sha256:706843b48f9a3f9b9911979761c91541e3d90db1ca905fd63fee540a217698bc",
-                "sha256:807ce09d4434881ca3a7594733669bd834f5b2c6d5c7e36f8c00f691887042ad",
-                "sha256:83e17b26de248c33f3acffb922748151d71827d6021d98c70e6c1a25ddd78505",
-                "sha256:96f1157a7c08b5b189b16b47bc9db2332269d6680a196341bf30046330d15388",
-                "sha256:aec5a6c9864be7df2240c382740fcf3b96928c46604eaa7f3091f58b878c0bb6",
-                "sha256:b0afd054cd42f3d213bf82c629efb1ee5f22eba35bf0eec88ea9ea7304f511a2",
-                "sha256:c5caeb8188c24888c90b5108a441c106f7faa4c4c075a2bcae438c6e8ca73cef",
-                "sha256:ced4e447ae29ca194449a3f1ce132ded8fcab06971ef5f618605aacaa612beac",
-                "sha256:d1f6198ee6d9148405e49887803907fe8962a23e6c6f83ea7d98f1c0de375695",
-                "sha256:e124352fd3db36a9d4a21c1aa27fd5d051e621845cb87fb851c08f4f75ce8be6",
-                "sha256:e422abdec8b5fa8462aa016786680720d78bdce7a30c652b7fadf83a4ba35336",
-                "sha256:ef8b72fa70b348724ff1218267e7f7375b8de4e8194d1636ee60510aae104cd0",
-                "sha256:f0c64d1bd842ca2633e74a1a28033d139368ad959872533b1bab8c80e8240a0c",
-                "sha256:f24077a3b5298a5a06a8e0536e3ea9ec60e4c7ac486755e5fb6e6ea9b3500106",
-                "sha256:fdd188c8a6ef8769f148f88f859884507b954cc64db6b52f66ef199bb9ad660a",
-                "sha256:fe913f20024eb2cb2f323e42a64bdf2911bb9738a15dba7d3cce48151034e3a8"
+                "sha256:103e8f7155f3ce2ffa0049fe60169878d47a4364b277906386f8de21c9234aa1",
+                "sha256:23df8ca3f24699167daf3e23e51f7ba7334d504af63a94af468f468b975b7dd7",
+                "sha256:2725672bb53bb92dc7b4150d233cd4b8c59615cd8288d495eaa86db00d4e5c06",
+                "sha256:30b1d1bfd00f6fc80d11300a29f1d8ab2b8d9febb6ed4a38a76880ec564fae84",
+                "sha256:35d658536b0a4117c885728d1a7032bdc9a5974722ae298d6c533755a6ee3915",
+                "sha256:50cadb9b2f961757e712a9737ef33d89b8190c3ea34d0fb6675e00edbe35d074",
+                "sha256:5f8c682e736513db7d04349b4f6693690170f95aac449c56f97415c6980edef5",
+                "sha256:6236a9610c912b129610eb1a274bdc1350b5df834d124fa84729ebeaf7da42c3",
+                "sha256:788b3921d763ee35dfdb04248d0e3de11e3ca8eb22e2e48fef880c42e1f3c8f9",
+                "sha256:8bc0008ef798231fac03fe7d26e82d601d15bd16f3afaad1c6113771566570f3",
+                "sha256:8f35c17bd4faed2bc7797d2a66cbb4f986242ce2e30340ab832e5d99ae60e011",
+                "sha256:b49a88ff802e1993b7f749b1eeb31134f03c8d5c956e3c125c75558955cda536",
+                "sha256:bc0521cce2c1d541634b19f3ac661d7a64f9555135e9d8af3980965be717fd4a",
+                "sha256:bc5b871e977c8ee5a1bbc42fa8d19bcc08baf0c51cbf1586b0e87a2694dde42f",
+                "sha256:c43ac224aabcbf83a947eeb8b17eaf1547bce3767ee2d70093b461f31729a480",
+                "sha256:d15809e0dbdad486f4ad0979753518f47980020b7a34e9fc56e8be4f60702fac",
+                "sha256:d7d84a512a59f4412ca8549b01f94be4161c94efc598bf09d027d67826beddc0",
+                "sha256:e029b844c21116564b8b61216befabca4b500e6816fa9f0ba49527653cae2108",
+                "sha256:e8a0772016feeb106efd28d4a328e77dc2edae84dfbac06061319fdb669ff828",
+                "sha256:e944fe07b6f229f4c1a06a7ef906a19652bdd9fd54c761b0ff87e83ae7a30354",
+                "sha256:eb40fe69cfc6f5cdab9a5ebd022131ba21453cf7b8a7fd3631f45bbf52bed612",
+                "sha256:fa507318e427169ade4e9eccef39e9011cdc19534f55ca2f36ec3f388c1f70f3",
+                "sha256:ffd394c7896ed7821a6d13b24657c6a34b6e2650bd84ae063cf11ccffa4f1a97"
             ],
             "index": "pypi",
-            "version": "==39.0.1"
+            "version": "==39.0.2"
         },
         "docopt": {
             "hashes": [
@@ -255,11 +255,11 @@
         },
         "exceptiongroup": {
             "hashes": [
-                "sha256:327cbda3da756e2de031a3107b81ab7b3770a602c4d16ca618298c526f4bec1e",
-                "sha256:bcb67d800a4497e1b404c2dd44fca47d3b7a5e5433dbab67f96c1a685cdfdf23"
+                "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e",
+                "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785"
             ],
             "markers": "python_version < '3.11'",
-            "version": "==1.1.0"
+            "version": "==1.1.1"
         },
         "execnet": {
             "hashes": [
@@ -271,11 +271,11 @@
         },
         "filelock": {
             "hashes": [
-                "sha256:7b319f24340b51f55a2bf7a12ac0755a9b03e718311dac567a0f4f7fabd2f5de",
-                "sha256:f58d535af89bb9ad5cd4df046f741f8553a418c01a7856bf0d173bbc9f6bd16d"
+                "sha256:3199fd0d3faea8b911be52b663dfccceb84c95949dd13179aa21436d1a79c4ce",
+                "sha256:e90b34656470756edf8b19656785c5fea73afa1953f3e1b0d645cef11cab3182"
             ],
             "index": "pypi",
-            "version": "==3.9.0"
+            "version": "==3.10.0"
         },
         "flask": {
             "hashes": [
@@ -420,60 +420,71 @@
         },
         "msgpack": {
             "hashes": [
-                "sha256:002b5c72b6cd9b4bafd790f364b8480e859b4712e91f43014fe01e4f957b8467",
-                "sha256:0a68d3ac0104e2d3510de90a1091720157c319ceeb90d74f7b5295a6bee51bae",
-                "sha256:0df96d6eaf45ceca04b3f3b4b111b86b33785683d682c655063ef8057d61fd92",
-                "sha256:0dfe3947db5fb9ce52aaea6ca28112a170db9eae75adf9339a1aec434dc954ef",
-                "sha256:0e3590f9fb9f7fbc36df366267870e77269c03172d086fa76bb4eba8b2b46624",
-                "sha256:11184bc7e56fd74c00ead4f9cc9a3091d62ecb96e97653add7a879a14b003227",
-                "sha256:112b0f93202d7c0fef0b7810d465fde23c746a2d482e1e2de2aafd2ce1492c88",
-                "sha256:1276e8f34e139aeff1c77a3cefb295598b504ac5314d32c8c3d54d24fadb94c9",
-                "sha256:1576bd97527a93c44fa856770197dec00d223b0b9f36ef03f65bac60197cedf8",
-                "sha256:1e91d641d2bfe91ba4c52039adc5bccf27c335356055825c7f88742c8bb900dd",
-                "sha256:26b8feaca40a90cbe031b03d82b2898bf560027160d3eae1423f4a67654ec5d6",
-                "sha256:2999623886c5c02deefe156e8f869c3b0aaeba14bfc50aa2486a0415178fce55",
-                "sha256:2a2df1b55a78eb5f5b7d2a4bb221cd8363913830145fad05374a80bf0877cb1e",
-                "sha256:2bb8cdf50dd623392fa75525cce44a65a12a00c98e1e37bf0fb08ddce2ff60d2",
-                "sha256:2cc5ca2712ac0003bcb625c96368fd08a0f86bbc1a5578802512d87bc592fe44",
-                "sha256:35bc0faa494b0f1d851fd29129b2575b2e26d41d177caacd4206d81502d4c6a6",
-                "sha256:3c11a48cf5e59026ad7cb0dc29e29a01b5a66a3e333dc11c04f7e991fc5510a9",
-                "sha256:449e57cc1ff18d3b444eb554e44613cffcccb32805d16726a5494038c3b93dab",
-                "sha256:462497af5fd4e0edbb1559c352ad84f6c577ffbbb708566a0abaaa84acd9f3ae",
-                "sha256:4733359808c56d5d7756628736061c432ded018e7a1dff2d35a02439043321aa",
-                "sha256:48f5d88c99f64c456413d74a975bd605a9b0526293218a3b77220a2c15458ba9",
-                "sha256:49565b0e3d7896d9ea71d9095df15b7f75a035c49be733051c34762ca95bbf7e",
-                "sha256:4ab251d229d10498e9a2f3b1e68ef64cb393394ec477e3370c457f9430ce9250",
-                "sha256:4d5834a2a48965a349da1c5a79760d94a1a0172fbb5ab6b5b33cbf8447e109ce",
-                "sha256:4dea20515f660aa6b7e964433b1808d098dcfcabbebeaaad240d11f909298075",
-                "sha256:545e3cf0cf74f3e48b470f68ed19551ae6f9722814ea969305794645da091236",
-                "sha256:63e29d6e8c9ca22b21846234913c3466b7e4ee6e422f205a2988083de3b08cae",
-                "sha256:6916c78f33602ecf0509cc40379271ba0f9ab572b066bd4bdafd7434dee4bc6e",
-                "sha256:6a4192b1ab40f8dca3f2877b70e63799d95c62c068c84dc028b40a6cb03ccd0f",
-                "sha256:6c9566f2c39ccced0a38d37c26cc3570983b97833c365a6044edef3574a00c08",
-                "sha256:76ee788122de3a68a02ed6f3a16bbcd97bc7c2e39bd4d94be2f1821e7c4a64e6",
-                "sha256:7760f85956c415578c17edb39eed99f9181a48375b0d4a94076d84148cf67b2d",
-                "sha256:77ccd2af37f3db0ea59fb280fa2165bf1b096510ba9fe0cc2bf8fa92a22fdb43",
-                "sha256:81fc7ba725464651190b196f3cd848e8553d4d510114a954681fd0b9c479d7e1",
-                "sha256:85f279d88d8e833ec015650fd15ae5eddce0791e1e8a59165318f371158efec6",
-                "sha256:9667bdfdf523c40d2511f0e98a6c9d3603be6b371ae9a238b7ef2dc4e7a427b0",
-                "sha256:a75dfb03f8b06f4ab093dafe3ddcc2d633259e6c3f74bb1b01996f5d8aa5868c",
-                "sha256:ac5bd7901487c4a1dd51a8c58f2632b15d838d07ceedaa5e4c080f7190925bff",
-                "sha256:aca0f1644d6b5a73eb3e74d4d64d5d8c6c3d577e753a04c9e9c87d07692c58db",
-                "sha256:b17be2478b622939e39b816e0aa8242611cc8d3583d1cd8ec31b249f04623243",
-                "sha256:c1683841cd4fa45ac427c18854c3ec3cd9b681694caf5bff04edb9387602d661",
-                "sha256:c23080fdeec4716aede32b4e0ef7e213c7b1093eede9ee010949f2a418ced6ba",
-                "sha256:d5b5b962221fa2c5d3a7f8133f9abffc114fe218eb4365e40f17732ade576c8e",
-                "sha256:d603de2b8d2ea3f3bcb2efe286849aa7a81531abc52d8454da12f46235092bcb",
-                "sha256:e83f80a7fec1a62cf4e6c9a660e39c7f878f603737a0cdac8c13131d11d97f52",
-                "sha256:eb514ad14edf07a1dbe63761fd30f89ae79b42625731e1ccf5e1f1092950eaa6",
-                "sha256:eba96145051ccec0ec86611fe9cf693ce55f2a3ce89c06ed307de0e085730ec1",
-                "sha256:ed6f7b854a823ea44cf94919ba3f727e230da29feb4a99711433f25800cf747f",
-                "sha256:f0029245c51fd9473dc1aede1160b0a29f4a912e6b1dd353fa6d317085b219da",
-                "sha256:f5d869c18f030202eb412f08b28d2afeea553d6613aee89e200d7aca7ef01f5f",
-                "sha256:fb62ea4b62bfcb0b380d5680f9a4b3f9a2d166d9394e9bbd9666c0ee09a3645c",
-                "sha256:fcb8a47f43acc113e24e910399376f7277cf8508b27e5b88499f053de6b115a8"
+                "sha256:06f5174b5f8ed0ed919da0e62cbd4ffde676a374aba4020034da05fab67b9164",
+                "sha256:0c05a4a96585525916b109bb85f8cb6511db1c6f5b9d9cbcbc940dc6b4be944b",
+                "sha256:137850656634abddfb88236008339fdaba3178f4751b28f270d2ebe77a563b6c",
+                "sha256:17358523b85973e5f242ad74aa4712b7ee560715562554aa2134d96e7aa4cbbf",
+                "sha256:18334484eafc2b1aa47a6d42427da7fa8f2ab3d60b674120bce7a895a0a85bdd",
+                "sha256:1835c84d65f46900920b3708f5ba829fb19b1096c1800ad60bae8418652a951d",
+                "sha256:1967f6129fc50a43bfe0951c35acbb729be89a55d849fab7686004da85103f1c",
+                "sha256:1ab2f3331cb1b54165976a9d976cb251a83183631c88076613c6c780f0d6e45a",
+                "sha256:1c0f7c47f0087ffda62961d425e4407961a7ffd2aa004c81b9c07d9269512f6e",
+                "sha256:20a97bf595a232c3ee6d57ddaadd5453d174a52594bf9c21d10407e2a2d9b3bd",
+                "sha256:20c784e66b613c7f16f632e7b5e8a1651aa5702463d61394671ba07b2fc9e025",
+                "sha256:266fa4202c0eb94d26822d9bfd7af25d1e2c088927fe8de9033d929dd5ba24c5",
+                "sha256:28592e20bbb1620848256ebc105fc420436af59515793ed27d5c77a217477705",
+                "sha256:288e32b47e67f7b171f86b030e527e302c91bd3f40fd9033483f2cacc37f327a",
+                "sha256:3055b0455e45810820db1f29d900bf39466df96ddca11dfa6d074fa47054376d",
+                "sha256:332360ff25469c346a1c5e47cbe2a725517919892eda5cfaffe6046656f0b7bb",
+                "sha256:362d9655cd369b08fda06b6657a303eb7172d5279997abe094512e919cf74b11",
+                "sha256:366c9a7b9057e1547f4ad51d8facad8b406bab69c7d72c0eb6f529cf76d4b85f",
+                "sha256:36961b0568c36027c76e2ae3ca1132e35123dcec0706c4b7992683cc26c1320c",
+                "sha256:379026812e49258016dd84ad79ac8446922234d498058ae1d415f04b522d5b2d",
+                "sha256:382b2c77589331f2cb80b67cc058c00f225e19827dbc818d700f61513ab47bea",
+                "sha256:476a8fe8fae289fdf273d6d2a6cb6e35b5a58541693e8f9f019bfe990a51e4ba",
+                "sha256:48296af57cdb1d885843afd73c4656be5c76c0c6328db3440c9601a98f303d87",
+                "sha256:4867aa2df9e2a5fa5f76d7d5565d25ec76e84c106b55509e78c1ede0f152659a",
+                "sha256:4c075728a1095efd0634a7dccb06204919a2f67d1893b6aa8e00497258bf926c",
+                "sha256:4f837b93669ce4336e24d08286c38761132bc7ab29782727f8557e1eb21b2080",
+                "sha256:4f8d8b3bf1ff2672567d6b5c725a1b347fe838b912772aa8ae2bf70338d5a198",
+                "sha256:525228efd79bb831cf6830a732e2e80bc1b05436b086d4264814b4b2955b2fa9",
+                "sha256:5494ea30d517a3576749cad32fa27f7585c65f5f38309c88c6d137877fa28a5a",
+                "sha256:55b56a24893105dc52c1253649b60f475f36b3aa0fc66115bffafb624d7cb30b",
+                "sha256:56a62ec00b636583e5cb6ad313bbed36bb7ead5fa3a3e38938503142c72cba4f",
+                "sha256:57e1f3528bd95cc44684beda696f74d3aaa8a5e58c816214b9046512240ef437",
+                "sha256:586d0d636f9a628ddc6a17bfd45aa5b5efaf1606d2b60fa5d87b8986326e933f",
+                "sha256:5cb47c21a8a65b165ce29f2bec852790cbc04936f502966768e4aae9fa763cb7",
+                "sha256:6c4c68d87497f66f96d50142a2b73b97972130d93677ce930718f68828b382e2",
+                "sha256:821c7e677cc6acf0fd3f7ac664c98803827ae6de594a9f99563e48c5a2f27eb0",
+                "sha256:916723458c25dfb77ff07f4c66aed34e47503b2eb3188b3adbec8d8aa6e00f48",
+                "sha256:9e6ca5d5699bcd89ae605c150aee83b5321f2115695e741b99618f4856c50898",
+                "sha256:9f5ae84c5c8a857ec44dc180a8b0cc08238e021f57abdf51a8182e915e6299f0",
+                "sha256:a2b031c2e9b9af485d5e3c4520f4220d74f4d222a5b8dc8c1a3ab9448ca79c57",
+                "sha256:a61215eac016f391129a013c9e46f3ab308db5f5ec9f25811e811f96962599a8",
+                "sha256:a740fa0e4087a734455f0fc3abf5e746004c9da72fbd541e9b113013c8dc3282",
+                "sha256:a9985b214f33311df47e274eb788a5893a761d025e2b92c723ba4c63936b69b1",
+                "sha256:ab31e908d8424d55601ad7075e471b7d0140d4d3dd3272daf39c5c19d936bd82",
+                "sha256:ac9dd47af78cae935901a9a500104e2dea2e253207c924cc95de149606dc43cc",
+                "sha256:addab7e2e1fcc04bd08e4eb631c2a90960c340e40dfc4a5e24d2ff0d5a3b3edb",
+                "sha256:b1d46dfe3832660f53b13b925d4e0fa1432b00f5f7210eb3ad3bb9a13c6204a6",
+                "sha256:b2de4c1c0538dcb7010902a2b97f4e00fc4ddf2c8cda9749af0e594d3b7fa3d7",
+                "sha256:b5ef2f015b95f912c2fcab19c36814963b5463f1fb9049846994b007962743e9",
+                "sha256:b72d0698f86e8d9ddf9442bdedec15b71df3598199ba33322d9711a19f08145c",
+                "sha256:bae7de2026cbfe3782c8b78b0db9cbfc5455e079f1937cb0ab8d133496ac55e1",
+                "sha256:bf22a83f973b50f9d38e55c6aade04c41ddda19b00c4ebc558930d78eecc64ed",
+                "sha256:c075544284eadc5cddc70f4757331d99dcbc16b2bbd4849d15f8aae4cf36d31c",
+                "sha256:c396e2cc213d12ce017b686e0f53497f94f8ba2b24799c25d913d46c08ec422c",
+                "sha256:cb5aaa8c17760909ec6cb15e744c3ebc2ca8918e727216e79607b7bbce9c8f77",
+                "sha256:cdc793c50be3f01106245a61b739328f7dccc2c648b501e237f0699fe1395b81",
+                "sha256:d25dd59bbbbb996eacf7be6b4ad082ed7eacc4e8f3d2df1ba43822da9bfa122a",
+                "sha256:e42b9594cc3bf4d838d67d6ed62b9e59e201862a25e9a157019e171fbe672dd3",
+                "sha256:e57916ef1bd0fee4f21c4600e9d1da352d8816b52a599c46460e93a6e9f17086",
+                "sha256:ed40e926fa2f297e8a653c954b732f125ef97bdd4c889f243182299de27e2aa9",
+                "sha256:ef8108f8dedf204bb7b42994abf93882da1159728a2d4c5e82012edd92c9da9f",
+                "sha256:f933bbda5a3ee63b8834179096923b094b76f0c7a73c1cfe8f07ad608c58844b",
+                "sha256:fe5c63197c55bce6385d9aee16c4d0641684628f63ace85f73571e65ad1c1e8d"
             ],
-            "version": "==1.0.4"
+            "version": "==1.0.5"
         },
         "packaging": {
             "hashes": [
@@ -578,7 +589,7 @@
                 "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1",
                 "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"
             ],
-            "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2'",
             "version": "==2.4.7"
         },
         "pyperclip": {
@@ -589,11 +600,11 @@
         },
         "pytest": {
             "hashes": [
-                "sha256:c7c6ca206e93355074ae32f7403e8ea12163b1163c976fee7d4d84027c162be5",
-                "sha256:d45e0952f3727241918b8fd0f376f5ff6b301cc0777c6f9a556935c92d8a7d42"
+                "sha256:130328f552dcfac0b1cec75c12e3f005619dc5f874f0a06e8ff7263f0ee6225e",
+                "sha256:c99ab0c73aceb050f68929bc93af19ab6db0558791c6a0715723abe9d0ade9d4"
             ],
             "index": "pypi",
-            "version": "==7.2.1"
+            "version": "==7.2.2"
         },
         "pytest-asyncio": {
             "hashes": [
@@ -613,11 +624,11 @@
         },
         "pytest-xdist": {
             "hashes": [
-                "sha256:336098e3bbd8193276867cc87db8b22903c3927665dff9d1ac8684c02f597b68",
-                "sha256:fa10f95a2564cd91652f2d132725183c3b590d9fdcdec09d3677386ecf4c1ce9"
+                "sha256:1849bd98d8b242b948e472db7478e090bf3361912a8fed87992ed94085f54727",
+                "sha256:37290d161638a20b672401deef1cba812d110ac27e35d213f091d15b8beb40c9"
             ],
             "index": "pypi",
-            "version": "==3.2.0"
+            "version": "==3.2.1"
         },
         "ruamel.yaml": {
             "hashes": [
@@ -657,14 +668,16 @@
                 "sha256:d000f258cf42fec2b1bbf2863c61d7b8918d31ffee905da62dede869254d3b8a",
                 "sha256:d5859983f26d8cd7bb5c287ef452e8aacc86501487634573d260968f753e1d71",
                 "sha256:d5e51e2901ec2366b79f16c2299a03e74ba4531ddcfacc1416639c557aef0ad8",
+                "sha256:da538167284de58a52109a9b89b8f6a53ff8437dd6dc26d33b57bf6699153122",
                 "sha256:debc87a9516b237d0466a711b18b6ebeb17ba9f391eb7f91c649c5c4ec5006c7",
                 "sha256:df5828871e6648db72d1c19b4bd24819b80a755c4541d3409f0f7acd0f335c80",
                 "sha256:ecdf1a604009bd35c674b9225a8fa609e0282d9b896c03dd441a91e5f53b534e",
                 "sha256:efa08d63ef03d079dcae1dfe334f6c8847ba8b645d08df286358b1f5293d24ab",
                 "sha256:f01da5790e95815eb5a8a138508c01c758e5f5bc0ce4286c4f7028b8dd7ac3d0",
-                "sha256:f34019dced51047d6f70cb9383b2ae2853b7fc4dce65129a5acd49f4f9256646"
+                "sha256:f34019dced51047d6f70cb9383b2ae2853b7fc4dce65129a5acd49f4f9256646",
+                "sha256:f6d3d39611ac2e4f62c3128a9eed45f19a6608670c5a2f4f07f24e8de3441d38"
             ],
-            "markers": "python_version < '3.10' and platform_python_implementation == 'CPython'",
+            "markers": "platform_python_implementation == 'CPython' and python_version < '3.10'",
             "version": "==0.2.7"
         },
         "sortedcontainers": {
@@ -842,11 +855,11 @@
         },
         "flake8-bugbear": {
             "hashes": [
-                "sha256:39259814a83f33c8409417ee12dd4050c9c0bb4c8707c12fc18ae62b2f3ddee1",
-                "sha256:f136bd0ca2684f101168bba2310dec541e11aa6b252260c17dcf58d18069a740"
+                "sha256:beb5c7efcd7ccc2039ef66a77bb8db925e7be3531ff1cb4d0b7030d0e2113d72",
+                "sha256:e3e7f74c8a49ad3794a7183353026dabd68c74030d5f46571f84c1fb0eb79363"
             ],
             "index": "pypi",
-            "version": "==23.2.13"
+            "version": "==23.3.12"
         },
         "isort": {
             "hashes": [
@@ -882,19 +895,19 @@
         },
         "pathspec": {
             "hashes": [
-                "sha256:3a66eb970cbac598f9e5ccb5b2cf58930cd8e3ed86d393d541eaf2d8b1705229",
-                "sha256:64d338d4e0914e91c1792321e6907b5a593f1ab1851de7fc269557a21b30ebbc"
+                "sha256:2798de800fa92780e33acca925945e9a19a133b715067cf165b8866c15a31687",
+                "sha256:d8af70af76652554bd134c22b3e8a1cc46ed7d91edcdd721ef1a0c51a84a5293"
             ],
             "markers": "python_version >= '3.7'",
-            "version": "==0.11.0"
+            "version": "==0.11.1"
         },
         "platformdirs": {
             "hashes": [
-                "sha256:8a1228abb1ef82d788f74139988b137e78692984ec7b08eaa6c65f1723af28f9",
-                "sha256:b1d5eb14f221506f50d6604a561f4c5786d9e80355219694a1b244bcd96f4567"
+                "sha256:024996549ee88ec1a9aa99ff7f8fc819bb59e2c3477b410d90a16d32d6e707aa",
+                "sha256:e5986afb596e4bb5bde29a79ac9061aa955b94fca2399b7aaac4090860920dd8"
             ],
             "markers": "python_version >= '3.7'",
-            "version": "==3.0.0"
+            "version": "==3.1.1"
         },
         "pycodestyle": {
             "hashes": [

From aa465b6de1702fce7e2b5a0e577d085e838c2209 Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Tue, 21 Mar 2023 14:04:07 +0300
Subject: [PATCH 17/58] Decide what to do with router planner error at one
 place (#6781)

---
 .../distributed/planner/distributed_planner.c | 36 +++++++------------
 1 file changed, 12 insertions(+), 24 deletions(-)

diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c
index 17b63ee0a..866f7353a 100644
--- a/src/backend/distributed/planner/distributed_planner.c
+++ b/src/backend/distributed/planner/distributed_planner.c
@@ -944,18 +944,6 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina
 			distributedPlan =
 				CreateModifyPlan(originalQuery, query, plannerRestrictionContext);
 		}
-
-		/* the functions above always return a plan, possibly with an error */
-		Assert(distributedPlan);
-
-		if (distributedPlan->planningError == NULL)
-		{
-			return distributedPlan;
-		}
-		else
-		{
-			RaiseDeferredError(distributedPlan->planningError, DEBUG2);
-		}
 	}
 	else
 	{
@@ -968,18 +956,18 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina
 
 		distributedPlan = CreateRouterPlan(originalQuery, query,
 										   plannerRestrictionContext);
-		if (distributedPlan->planningError == NULL)
-		{
-			return distributedPlan;
-		}
-		else
-		{
-			/*
-			 * For debugging it's useful to display why query was not
-			 * router plannable.
-			 */
-			RaiseDeferredError(distributedPlan->planningError, DEBUG2);
-		}
+	}
+
+	/* the functions above always return a plan, possibly with an error */
+	Assert(distributedPlan);
+
+	if (distributedPlan->planningError == NULL)
+	{
+		return distributedPlan;
+	}
+	else
+	{
+		RaiseDeferredError(distributedPlan->planningError, DEBUG2);
 	}
 
 	if (hasUnresolvedParams)

From 2713e015d695db8fcca5644abe6ce2e0d0e05d17 Mon Sep 17 00:00:00 2001
From: Ahmet Gedemenli <afgedemenli@gmail.com>
Date: Tue, 21 Mar 2023 16:34:52 +0300
Subject: [PATCH 18/58] Check before logicalrep for rebalancer, error if needed
 (#6754)

DESCRIPTION: Check before logicalrep for rebalancer, error if needed

Check if we can use logical replication or not, in case of shard
transfer mode = auto, before executing the shard moves. If we can't,
error out. Before this PR, we used to error out in the middle of shard
moves:
```sql
set citus.shard_count = 4; -- just to get the error sooner
select citus_remove_node('localhost',9702);

create table t1 (a int primary key);
select create_distributed_table('t1','a');
create table t2 (a bigint);
select create_distributed_table('t2','a');

select citus_add_node('localhost',9702);
select rebalance_table_shards();
NOTICE:  Moving shard 102008 from localhost:9701 to localhost:9702 ...
NOTICE:  Moving shard 102009 from localhost:9701 to localhost:9702 ...
NOTICE:  Moving shard 102012 from localhost:9701 to localhost:9702 ...
ERROR:  cannot use logical replication to transfer shards of the relation t2 since it doesn't have a REPLICA IDENTITY or PRIMARY KEY
```

Now we check and error out in the beginning, without moving the shards.

fixes: #6727
---
 .../distributed/operations/shard_rebalancer.c | 44 ++++++++++++++++---
 .../regress/expected/shard_rebalancer.out     | 34 +++++++++++++-
 src/test/regress/sql/shard_rebalancer.sql     | 15 +++++++
 3 files changed, 85 insertions(+), 8 deletions(-)

diff --git a/src/backend/distributed/operations/shard_rebalancer.c b/src/backend/distributed/operations/shard_rebalancer.c
index baed8b0d5..c5282202e 100644
--- a/src/backend/distributed/operations/shard_rebalancer.c
+++ b/src/backend/distributed/operations/shard_rebalancer.c
@@ -1818,10 +1818,10 @@ static void
 RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid)
 {
 	char transferMode = LookupShardTransferMode(shardReplicationModeOid);
-	EnsureReferenceTablesExistOnAllNodesExtended(transferMode);
 
 	if (list_length(options->relationIdList) == 0)
 	{
+		EnsureReferenceTablesExistOnAllNodesExtended(transferMode);
 		return;
 	}
 
@@ -1836,6 +1836,25 @@ RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid)
 
 	List *placementUpdateList = GetRebalanceSteps(options);
 
+	if (transferMode == TRANSFER_MODE_AUTOMATIC)
+	{
+		/*
+		 * If the shard transfer mode is set to auto, we should check beforehand
+		 * if we are able to use logical replication to transfer shards or not.
+		 * We throw an error if any of the tables do not have a replica identity, which
+		 * is required for logical replication to replicate UPDATE and DELETE commands.
+		 */
+		PlacementUpdateEvent *placementUpdate = NULL;
+		foreach_ptr(placementUpdate, placementUpdateList)
+		{
+			Oid relationId = RelationIdForShard(placementUpdate->shardId);
+			List *colocatedTableList = ColocatedTableList(relationId);
+			VerifyTablesHaveReplicaIdentity(colocatedTableList);
+		}
+	}
+
+	EnsureReferenceTablesExistOnAllNodesExtended(transferMode);
+
 	if (list_length(placementUpdateList) == 0)
 	{
 		return;
@@ -1916,12 +1935,6 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo
 		EnsureTableOwner(colocatedTableId);
 	}
 
-	if (shardTransferMode == TRANSFER_MODE_AUTOMATIC)
-	{
-		/* make sure that all tables included in the rebalance have a replica identity*/
-		VerifyTablesHaveReplicaIdentity(colocatedTableList);
-	}
-
 	List *placementUpdateList = GetRebalanceSteps(options);
 
 	if (list_length(placementUpdateList) == 0)
@@ -1930,6 +1943,23 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo
 		return 0;
 	}
 
+	if (shardTransferMode == TRANSFER_MODE_AUTOMATIC)
+	{
+		/*
+		 * If the shard transfer mode is set to auto, we should check beforehand
+		 * if we are able to use logical replication to transfer shards or not.
+		 * We throw an error if any of the tables do not have a replica identity, which
+		 * is required for logical replication to replicate UPDATE and DELETE commands.
+		 */
+		PlacementUpdateEvent *placementUpdate = NULL;
+		foreach_ptr(placementUpdate, placementUpdateList)
+		{
+			relationId = RelationIdForShard(placementUpdate->shardId);
+			List *colocatedTables = ColocatedTableList(relationId);
+			VerifyTablesHaveReplicaIdentity(colocatedTables);
+		}
+	}
+
 	DropOrphanedResourcesInSeparateTransaction();
 
 	/* find the name of the shard transfer mode to interpolate in the scheduled command */
diff --git a/src/test/regress/expected/shard_rebalancer.out b/src/test/regress/expected/shard_rebalancer.out
index 2146d67f1..1dea3b442 100644
--- a/src/test/regress/expected/shard_rebalancer.out
+++ b/src/test/regress/expected/shard_rebalancer.out
@@ -1482,7 +1482,6 @@ SELECT * from master_drain_node('localhost', :worker_2_port);
 ERROR:  cannot use logical replication to transfer shards of the relation colocated_rebalance_test since it doesn't have a REPLICA IDENTITY or PRIMARY KEY
 DETAIL:  UPDATE and DELETE commands on the shard will error out during logical replication unless there is a REPLICA IDENTITY or PRIMARY KEY.
 HINT:  If you wish to continue without a replica identity set the shard_transfer_mode to 'force_logical' or 'block_writes'.
-CONTEXT:  while executing command on localhost:xxxxx
 -- Make sure shouldhaveshards is false
 select shouldhaveshards from pg_dist_node where nodeport = :worker_2_port;
  shouldhaveshards
@@ -2714,6 +2713,39 @@ SELECT sh.logicalrelid, pl.nodeport
 (5 rows)
 
 DROP TABLE single_shard_colocation_1a, single_shard_colocation_1b, single_shard_colocation_1c, single_shard_colocation_2a, single_shard_colocation_2b CASCADE;
+-- verify we detect if one of the tables do not have a replica identity or primary key
+-- and error out in case of shard transfer mode = auto
+SELECT 1 FROM citus_remove_node('localhost', :worker_2_port);
+ ?column?
+---------------------------------------------------------------------
+        1
+(1 row)
+
+create table table_with_primary_key (a int primary key);
+select create_distributed_table('table_with_primary_key','a');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+create table table_without_primary_key (a bigint);
+select create_distributed_table('table_without_primary_key','a');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+-- add the second node back, then rebalance
+ALTER SEQUENCE pg_dist_groupid_seq RESTART WITH 16;
+select 1 from citus_add_node('localhost', :worker_2_port);
+ ?column?
+---------------------------------------------------------------------
+        1
+(1 row)
+
+select rebalance_table_shards();
+ERROR:  cannot use logical replication to transfer shards of the relation table_without_primary_key since it doesn't have a REPLICA IDENTITY or PRIMARY KEY
+DROP TABLE table_with_primary_key, table_without_primary_key;
 \c - - - :worker_1_port
 SET citus.enable_ddl_propagation TO OFF;
 REVOKE ALL ON SCHEMA public FROM testrole;
diff --git a/src/test/regress/sql/shard_rebalancer.sql b/src/test/regress/sql/shard_rebalancer.sql
index dbbc94732..da4259f5b 100644
--- a/src/test/regress/sql/shard_rebalancer.sql
+++ b/src/test/regress/sql/shard_rebalancer.sql
@@ -1497,6 +1497,21 @@ SELECT sh.logicalrelid, pl.nodeport
 
 DROP TABLE single_shard_colocation_1a, single_shard_colocation_1b, single_shard_colocation_1c, single_shard_colocation_2a, single_shard_colocation_2b CASCADE;
 
+-- verify we detect if one of the tables do not have a replica identity or primary key
+-- and error out in case of shard transfer mode = auto
+SELECT 1 FROM citus_remove_node('localhost', :worker_2_port);
+
+create table table_with_primary_key (a int primary key);
+select create_distributed_table('table_with_primary_key','a');
+create table table_without_primary_key (a bigint);
+select create_distributed_table('table_without_primary_key','a');
+
+-- add the second node back, then rebalance
+ALTER SEQUENCE pg_dist_groupid_seq RESTART WITH 16;
+select 1 from citus_add_node('localhost', :worker_2_port);
+select rebalance_table_shards();
+
+DROP TABLE table_with_primary_key, table_without_primary_key;
 \c - - - :worker_1_port
 SET citus.enable_ddl_propagation TO OFF;
 REVOKE ALL ON SCHEMA public FROM testrole;

From 4960ced1759c38955cdc05ab60b505f4b3b0f408 Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Wed, 22 Mar 2023 10:49:08 +0300
Subject: [PATCH 19/58]  Add an arbitrary config test heavily based on
 multi_router_planner_fast_path.sql (#6782)

This would be useful for testing #6773. This is because, given that
#6773
only adds support for router / fast-path queries, theoretically almost
all
the tests that we have in that test file should work for null-shard-key
tables too (and they indeed do).

I deliberately did not replace multi_router_planner_fast_path.sql with
the one that I'm adding into arbitrary configs because we might still
want to see when we're able to go through fast-path planning for the
usual distributed tables (the ones that have a shard key).
---
 src/test/regress/create_schedule              |    1 +
 .../expected/arbitrary_configs_router.out     | 1561 +++++++++++++++++
 .../arbitrary_configs_router_create.out       |  121 ++
 .../regress/sql/arbitrary_configs_router.sql  |  634 +++++++
 .../sql/arbitrary_configs_router_create.sql   |  118 ++
 src/test/regress/sql_schedule                 |    1 +
 6 files changed, 2436 insertions(+)
 create mode 100644 src/test/regress/expected/arbitrary_configs_router.out
 create mode 100644 src/test/regress/expected/arbitrary_configs_router_create.out
 create mode 100644 src/test/regress/sql/arbitrary_configs_router.sql
 create mode 100644 src/test/regress/sql/arbitrary_configs_router_create.sql

diff --git a/src/test/regress/create_schedule b/src/test/regress/create_schedule
index db2ae92be..e301678b9 100644
--- a/src/test/regress/create_schedule
+++ b/src/test/regress/create_schedule
@@ -14,3 +14,4 @@ test: arbitrary_configs_truncate_cascade_create
 test: arbitrary_configs_truncate_partition_create
 test: arbitrary_configs_alter_table_add_constraint_without_name_create
 test: merge_arbitrary_create
+test: arbitrary_configs_router_create
diff --git a/src/test/regress/expected/arbitrary_configs_router.out b/src/test/regress/expected/arbitrary_configs_router.out
new file mode 100644
index 000000000..a42b955cc
--- /dev/null
+++ b/src/test/regress/expected/arbitrary_configs_router.out
@@ -0,0 +1,1561 @@
+SET search_path TO arbitrary_configs_router;
+SET client_min_messages TO WARNING;
+-- test simple select for a single row
+SELECT * FROM articles_hash WHERE author_id = 10 AND id = 50;
+ id | author_id |   title   | word_count
+---------------------------------------------------------------------
+ 50 |        10 | anjanette |      19519
+(1 row)
+
+-- get all titles by a single author
+SELECT title FROM articles_hash WHERE author_id = 10;
+   title
+---------------------------------------------------------------------
+ aggrandize
+ absentness
+ andelee
+ attemper
+ anjanette
+(5 rows)
+
+-- try ordering them by word count
+SELECT title, word_count FROM articles_hash
+	WHERE author_id = 10
+	ORDER BY word_count DESC NULLS LAST;
+   title    | word_count
+---------------------------------------------------------------------
+ anjanette  |      19519
+ aggrandize |      17277
+ attemper   |      14976
+ andelee    |       6363
+ absentness |       1820
+(5 rows)
+
+-- look at last two articles by an author
+SELECT title, id FROM articles_hash
+	WHERE author_id = 5
+	ORDER BY id
+	LIMIT 2;
+  title  | id
+---------------------------------------------------------------------
+ aruru   |  5
+ adversa | 15
+(2 rows)
+
+-- find all articles by two authors in same shard
+-- but plan is not fast path router plannable due to
+-- two distribution columns in the query
+SELECT title, author_id FROM articles_hash
+	WHERE author_id = 7 OR author_id = 8
+	ORDER BY author_id ASC, id;
+    title    | author_id
+---------------------------------------------------------------------
+ aseptic     |         7
+ auriga      |         7
+ arsenous    |         7
+ archduchies |         7
+ abeyance    |         7
+ agatized    |         8
+ assembly    |         8
+ aerophyte   |         8
+ anatine     |         8
+ alkylic     |         8
+(10 rows)
+
+-- having clause is supported if it goes to a single shard
+-- and single dist. key on the query
+SELECT author_id, sum(word_count) AS corpus_size FROM articles_hash
+	WHERE author_id = 1
+	GROUP BY author_id
+	HAVING sum(word_count) > 1000
+	ORDER BY sum(word_count) DESC;
+ author_id | corpus_size
+---------------------------------------------------------------------
+         1 |       35894
+(1 row)
+
+-- fast path planner only support = operator
+SELECT * FROM articles_hash WHERE author_id <= 1;
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+SELECT * FROM articles_hash WHERE author_id IN (1, 3) ORDER BY 1,2,3,4;
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+  3 |         3 | asternal     |      10480
+ 11 |         1 | alamo        |       1347
+ 13 |         3 | aseyev       |       2255
+ 21 |         1 | arcading     |       5890
+ 23 |         3 | abhorring    |       6799
+ 31 |         1 | athwartships |       7271
+ 33 |         3 | autochrome   |       8180
+ 41 |         1 | aznavour     |      11814
+ 43 |         3 | affixal      |      12723
+(10 rows)
+
+-- queries with CTEs cannot go through fast-path planning
+WITH first_author AS ( SELECT id FROM articles_hash WHERE author_id = 1)
+SELECT * FROM first_author;
+ id
+---------------------------------------------------------------------
+  1
+ 11
+ 21
+ 31
+ 41
+(5 rows)
+
+-- two CTE joins also cannot go through fast-path planning
+WITH id_author AS ( SELECT id, author_id FROM articles_hash WHERE author_id = 1),
+id_title AS (SELECT id, title from articles_hash WHERE author_id = 1)
+SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id;
+ id | author_id | id |    title
+---------------------------------------------------------------------
+  1 |         1 |  1 | arsenous
+ 11 |         1 | 11 | alamo
+ 21 |         1 | 21 | arcading
+ 31 |         1 | 31 | athwartships
+ 41 |         1 | 41 | aznavour
+(5 rows)
+
+-- this is a different case where each CTE is recursively planned and those goes
+-- through the fast-path router planner, but the top level join is not
+WITH id_author AS ( SELECT id, author_id FROM articles_hash WHERE author_id = 1),
+id_title AS (SELECT id, title from articles_hash WHERE author_id = 2)
+SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id;
+ id | author_id | id | title
+---------------------------------------------------------------------
+(0 rows)
+
+-- recursive CTEs are also cannot go through fast
+-- path planning
+WITH RECURSIVE hierarchy as (
+	SELECT *, 1 AS level
+		FROM company_employees
+		WHERE company_id = 1 and manager_id = 0
+	UNION
+	SELECT ce.*, (h.level+1)
+		FROM hierarchy h JOIN company_employees ce
+			ON (h.employee_id = ce.manager_id AND
+				h.company_id = ce.company_id AND
+				ce.company_id = 1))
+SELECT * FROM hierarchy WHERE LEVEL <= 2;
+ company_id | employee_id | manager_id | level
+---------------------------------------------------------------------
+          1 |           1 |          0 |     1
+          1 |           2 |          1 |     2
+          1 |           3 |          1 |     2
+(3 rows)
+
+WITH update_article AS (
+    UPDATE articles_hash SET word_count = 10 WHERE id = 1 AND word_count = 9 RETURNING *
+)
+SELECT * FROM update_article;
+ id | author_id | title | word_count
+---------------------------------------------------------------------
+(0 rows)
+
+WITH delete_article AS (
+    DELETE FROM articles_hash WHERE id = 1 AND word_count = 10 RETURNING *
+)
+SELECT * FROM delete_article;
+ id | author_id | title | word_count
+---------------------------------------------------------------------
+(0 rows)
+
+-- grouping sets are supported via fast-path
+SELECT
+	id, substring(title, 2, 1) AS subtitle, count(*)
+	FROM articles_hash
+	WHERE author_id = 1
+	GROUP BY GROUPING SETS ((id),(subtitle))
+	ORDER BY id, subtitle;
+ id | subtitle | count
+---------------------------------------------------------------------
+  1 |          |     1
+ 11 |          |     1
+ 21 |          |     1
+ 31 |          |     1
+ 41 |          |     1
+    | l        |     1
+    | r        |     2
+    | t        |     1
+    | z        |     1
+(9 rows)
+
+-- queries which involve functions in FROM clause are not supported via fast path planning
+SELECT * FROM articles_hash, position('om' in 'Thomas') WHERE author_id = 1;
+ id | author_id |    title     | word_count | position
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572 |        3
+ 11 |         1 | alamo        |       1347 |        3
+ 21 |         1 | arcading     |       5890 |        3
+ 31 |         1 | athwartships |       7271 |        3
+ 41 |         1 | aznavour     |      11814 |        3
+(5 rows)
+
+-- sublinks are not supported via fast path planning
+SELECT * FROM articles_hash
+WHERE author_id IN (SELECT author_id FROM articles_hash WHERE author_id = 2)
+ORDER BY articles_hash.id;
+ id | author_id |   title    | word_count
+---------------------------------------------------------------------
+  2 |         2 | abducing   |      13642
+ 12 |         2 | archiblast |      18185
+ 22 |         2 | antipope   |       2728
+ 32 |         2 | amazon     |      11342
+ 42 |         2 | ausable    |      15885
+(5 rows)
+
+-- subqueries are not supported via fast path planning
+SELECT articles_hash.id,test.word_count
+FROM articles_hash, (SELECT id, word_count FROM articles_hash) AS test WHERE test.id = articles_hash.id
+ORDER BY test.word_count DESC, articles_hash.id LIMIT 5;
+ id | word_count
+---------------------------------------------------------------------
+ 50 |      19519
+ 14 |      19094
+ 48 |      18610
+ 12 |      18185
+ 46 |      17702
+(5 rows)
+
+SELECT articles_hash.id,test.word_count
+FROM articles_hash, (SELECT id, word_count FROM articles_hash) AS test
+WHERE test.id = articles_hash.id and articles_hash.author_id = 1
+ORDER BY articles_hash.id;
+ id | word_count
+---------------------------------------------------------------------
+  1 |       9572
+ 11 |       1347
+ 21 |       5890
+ 31 |       7271
+ 41 |      11814
+(5 rows)
+
+-- simple lookup query just works
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1;
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+-- below query hits a single shard but with multiple filters
+-- so cannot go via fast-path
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1 OR author_id = 17;
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+-- rename the output columns
+SELECT id as article_id, word_count * id as random_value
+	FROM articles_hash
+	WHERE author_id = 1;
+ article_id | random_value
+---------------------------------------------------------------------
+          1 |         9572
+         11 |        14817
+         21 |       123690
+         31 |       225401
+         41 |       484374
+(5 rows)
+
+-- joins do not go through fast-path planning
+SELECT a.author_id as first_author, b.word_count as second_word_count
+	FROM articles_hash a, articles_hash b
+	WHERE a.author_id = 10 and a.author_id = b.author_id
+	LIMIT 3;
+ first_author | second_word_count
+---------------------------------------------------------------------
+           10 |             17277
+           10 |              1820
+           10 |              6363
+(3 rows)
+
+-- single shard select with limit goes through fast-path planning
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1
+	LIMIT 3;
+ id | author_id |  title   | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous |       9572
+ 11 |         1 | alamo    |       1347
+ 21 |         1 | arcading |       5890
+(3 rows)
+
+-- single shard select with limit + offset goes through fast-path planning
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1
+	LIMIT 2
+	OFFSET 1;
+ id | author_id |  title   | word_count
+---------------------------------------------------------------------
+ 11 |         1 | alamo    |       1347
+ 21 |         1 | arcading |       5890
+(2 rows)
+
+-- single shard select with limit + offset + order by goes through fast-path planning
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1
+	ORDER BY id desc
+	LIMIT 2
+	OFFSET 1;
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+ 31 |         1 | athwartships |       7271
+ 21 |         1 | arcading     |       5890
+(2 rows)
+
+-- single shard select with group by on non-partition column goes through fast-path planning
+SELECT id
+	FROM articles_hash
+	WHERE author_id = 1
+	GROUP BY id
+	ORDER BY id;
+ id
+---------------------------------------------------------------------
+  1
+ 11
+ 21
+ 31
+ 41
+(5 rows)
+
+-- single shard select with distinct goes through fast-path planning
+SELECT DISTINCT id
+	FROM articles_hash
+	WHERE author_id = 1
+	ORDER BY id;
+ id
+---------------------------------------------------------------------
+  1
+ 11
+ 21
+ 31
+ 41
+(5 rows)
+
+-- single shard aggregate goes through fast-path planning
+SELECT avg(word_count)
+	FROM articles_hash
+	WHERE author_id = 2;
+        avg
+---------------------------------------------------------------------
+ 12356.400000000000
+(1 row)
+
+-- max, min, sum, count goes through fast-path planning
+SELECT max(word_count) as max, min(word_count) as min,
+	   sum(word_count) as sum, count(word_count) as cnt
+	FROM articles_hash
+	WHERE author_id = 2;
+  max  | min  |  sum  | cnt
+---------------------------------------------------------------------
+ 18185 | 2728 | 61782 |   5
+(1 row)
+
+-- queries with aggregates and group by goes through fast-path planning
+SELECT max(word_count)
+	FROM articles_hash
+	WHERE author_id = 1
+	GROUP BY author_id;
+  max
+---------------------------------------------------------------------
+ 11814
+(1 row)
+
+-- set operations are not supported via fast-path planning
+SELECT * FROM (
+	SELECT * FROM articles_hash WHERE author_id = 1
+	UNION
+	SELECT * FROM articles_hash WHERE author_id = 3
+) AS combination
+ORDER BY id;
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+  3 |         3 | asternal     |      10480
+ 11 |         1 | alamo        |       1347
+ 13 |         3 | aseyev       |       2255
+ 21 |         1 | arcading     |       5890
+ 23 |         3 | abhorring    |       6799
+ 31 |         1 | athwartships |       7271
+ 33 |         3 | autochrome   |       8180
+ 41 |         1 | aznavour     |      11814
+ 43 |         3 | affixal      |      12723
+(10 rows)
+
+-- function calls in the target list is supported via fast path
+SELECT LEFT(title, 1) FROM articles_hash WHERE author_id = 1;
+ left
+---------------------------------------------------------------------
+ a
+ a
+ a
+ a
+ a
+(5 rows)
+
+-- top-level union queries are supported through recursive planning
+-- unions in subqueries are not supported via fast-path planning
+SELECT * FROM (
+	(SELECT * FROM articles_hash WHERE author_id = 1)
+	UNION
+	(SELECT * FROM articles_hash WHERE author_id = 1)) uu
+ORDER BY 1, 2
+LIMIT 5;
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+-- Test various filtering options for router plannable check
+-- cannot go through fast-path if there is
+-- explicit coercion
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1::bigint;
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+-- can go through fast-path if there is
+-- implicit coercion
+-- This doesn't work see the related issue
+-- reported https://github.com/citusdata/citus/issues/2605
+-- SELECT *
+--	FROM articles_hash
+--	WHERE author_id = 1.0;
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 68719476736; -- this is bigint
+ id | author_id | title | word_count
+---------------------------------------------------------------------
+(0 rows)
+
+-- cannot go through fast-path due to
+-- multiple filters on the dist. key
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1 and author_id >= 1;
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+-- cannot go through fast-path due to
+-- multiple filters on the dist. key
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1 or id = 1;
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+-- goes through fast-path planning because
+-- the dist. key is ANDed with the rest of the
+-- filters
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1 and (id = 1 or id = 41);
+ id | author_id |  title   | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous |       9572
+ 41 |         1 | aznavour |      11814
+(2 rows)
+
+-- this time there is an OR clause which prevents
+-- router planning at all
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1 and id = 1 or id = 41;
+ id | author_id |  title   | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous |       9572
+ 41 |         1 | aznavour |      11814
+(2 rows)
+
+-- goes through fast-path planning because
+-- the dist. key is ANDed with the rest of the
+-- filters
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1 and (id = random()::int  * 0);
+ id | author_id | title | word_count
+---------------------------------------------------------------------
+(0 rows)
+
+-- not router plannable due to function call on the right side
+SELECT *
+	FROM articles_hash
+	WHERE author_id = (random()::int  * 0 + 1);
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+-- Citus does not qualify this as a fast-path because
+-- dist_key = func()
+SELECT *
+	FROM articles_hash
+	WHERE author_id = abs(-1);
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+-- Citus does not qualify this as a fast-path because
+-- dist_key = func()
+SELECT *
+	FROM articles_hash
+	WHERE 1 = abs(author_id);
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+-- Citus does not qualify this as a fast-path because
+-- dist_key = func()
+SELECT *
+	FROM articles_hash
+	WHERE author_id = abs(author_id - 2);
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+-- the function is not on the dist. key, so qualify as
+-- fast-path
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1 and (id = abs(id - 2));
+ id | author_id |  title   | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous |       9572
+(1 row)
+
+-- not router plannable due to is true
+SELECT *
+	FROM articles_hash
+	WHERE (author_id = 1) is true;
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+-- router plannable, (boolean expression) = true is collapsed to (boolean expression)
+SELECT *
+	FROM articles_hash
+	WHERE (author_id = 1) = true;
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+-- some more complex quals
+SELECT count(*) FROM articles_hash WHERE (author_id = 15) AND (id = 1 OR word_count > 5);
+ count
+---------------------------------------------------------------------
+     0
+(1 row)
+
+SELECT count(*) FROM articles_hash WHERE (author_id = 15) OR (id = 1 AND word_count > 5);
+ count
+---------------------------------------------------------------------
+     1
+(1 row)
+
+SELECT count(*) FROM articles_hash WHERE (id = 15) OR (author_id = 1 AND word_count > 5);
+ count
+---------------------------------------------------------------------
+     6
+(1 row)
+
+SELECT count(*) FROM articles_hash WHERE (id = 15) AND (author_id = 1 OR word_count > 5);
+ count
+---------------------------------------------------------------------
+     1
+(1 row)
+
+SELECT count(*) FROM articles_hash WHERE (id = 15) AND (author_id = 1 AND (word_count > 5 OR id  = 2));
+ count
+---------------------------------------------------------------------
+     0
+(1 row)
+
+SELECT count(*) FROM articles_hash WHERE (id = 15) AND (title ilike 'a%' AND (word_count > 5 OR author_id  = 2));
+ count
+---------------------------------------------------------------------
+     1
+(1 row)
+
+SELECT count(*) FROM articles_hash WHERE (id = 15) AND (title ilike 'a%' AND (word_count > 5 AND author_id  = 2));
+ count
+---------------------------------------------------------------------
+     0
+(1 row)
+
+SELECT count(*) FROM articles_hash WHERE (id = 15) AND (title ilike 'a%' AND ((word_count > 5 OR title ilike 'b%' ) AND (author_id  = 2 AND word_count > 50)));
+ count
+---------------------------------------------------------------------
+     0
+(1 row)
+
+-- fast-path router plannable, between operator is on another column
+SELECT *
+	FROM articles_hash
+	WHERE (author_id = 1) and id between 0 and 20;
+ id | author_id |  title   | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous |       9572
+ 11 |         1 | alamo    |       1347
+(2 rows)
+
+-- fast-path router plannable, partition column expression is and'ed to rest
+SELECT *
+	FROM articles_hash
+	WHERE (author_id = 1) and (id = 1 or id = 31) and title like '%s';
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 31 |         1 | athwartships |       7271
+(2 rows)
+
+-- fast-path router plannable, order is changed
+SELECT *
+	FROM articles_hash
+	WHERE (id = 1 or id = 31) and title like '%s' and (author_id = 1);
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 31 |         1 | athwartships |       7271
+(2 rows)
+
+-- fast-path router plannable
+SELECT *
+	FROM articles_hash
+	WHERE (title like '%s' or title like 'a%') and (author_id = 1);
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+-- fast-path router plannable
+SELECT *
+	FROM articles_hash
+	WHERE (title like '%s' or title like 'a%') and (author_id = 1) and (word_count < 3000 or word_count > 8000);
+ id | author_id |  title   | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous |       9572
+ 11 |         1 | alamo    |       1347
+ 41 |         1 | aznavour |      11814
+(3 rows)
+
+-- window functions are supported with fast-path router plannable
+SELECT LAG(title, 1) over (ORDER BY word_count) prev, title, word_count
+	FROM articles_hash
+	WHERE author_id = 5;
+   prev   |  title   | word_count
+---------------------------------------------------------------------
+          | afrasia  |        864
+ afrasia  | adversa  |       3164
+ adversa  | antehall |       7707
+ antehall | aminate  |       9089
+ aminate  | aruru    |      11389
+(5 rows)
+
+SELECT LAG(title, 1) over (ORDER BY word_count) prev, title, word_count
+	FROM articles_hash
+	WHERE author_id = 5
+	ORDER BY word_count DESC;
+   prev   |  title   | word_count
+---------------------------------------------------------------------
+ aminate  | aruru    |      11389
+ antehall | aminate  |       9089
+ adversa  | antehall |       7707
+ afrasia  | adversa  |       3164
+          | afrasia  |        864
+(5 rows)
+
+SELECT id, MIN(id) over (order by word_count)
+	FROM articles_hash
+	WHERE author_id = 1;
+ id | min
+---------------------------------------------------------------------
+ 11 |  11
+ 21 |  11
+ 31 |  11
+  1 |   1
+ 41 |   1
+(5 rows)
+
+SELECT id, word_count, AVG(word_count) over (order by word_count)
+	FROM articles_hash
+	WHERE author_id = 1;
+ id | word_count |          avg
+---------------------------------------------------------------------
+ 11 |       1347 | 1347.0000000000000000
+ 21 |       5890 | 3618.5000000000000000
+ 31 |       7271 | 4836.0000000000000000
+  1 |       9572 | 6020.0000000000000000
+ 41 |      11814 | 7178.8000000000000000
+(5 rows)
+
+SELECT word_count, rank() OVER (PARTITION BY author_id ORDER BY word_count)
+	FROM articles_hash
+	WHERE author_id = 1;
+ word_count | rank
+---------------------------------------------------------------------
+       1347 |    1
+       5890 |    2
+       7271 |    3
+       9572 |    4
+      11814 |    5
+(5 rows)
+
+-- some more tests on complex target lists
+SELECT DISTINCT ON (author_id, id) author_id, id,
+	MIN(id) over (order by avg(word_count)) * AVG(id * 5.2 + (1.0/max(word_count))) over (order by max(word_count)) as t1,
+	count(*) FILTER (WHERE title LIKE 'al%') as cnt_with_filter,
+	count(*) FILTER (WHERE '0300030' LIKE '%3%') as cnt_with_filter_2,
+	avg(case when id > 2 then char_length(word_count::text) * (id * strpos(word_count::text, '1')) end) as case_cnt,
+	COALESCE(strpos(avg(word_count)::text, '1'), 20)
+	FROM articles_hash as aliased_table
+	WHERE author_id = 1
+	GROUP BY author_id, id
+	HAVING count(DISTINCT title) > 0
+	ORDER BY author_id, id, sum(word_count) - avg(char_length(title)) DESC, COALESCE(array_upper(ARRAY[max(id)],1) * 5,0) DESC;
+ author_id | id |              t1              | cnt_with_filter | cnt_with_filter_2 |        case_cnt        | coalesce
+---------------------------------------------------------------------
+         1 |  1 |      83.20028854345579490574 |               0 |                 1 |                        |        0
+         1 | 11 |     629.20816629547141796586 |               1 |                 1 |    44.0000000000000000 |        1
+         1 | 21 |     915.20501693381380745499 |               0 |                 1 | 0.00000000000000000000 |        0
+         1 | 31 |    1201.20384890897723321000 |               0 |                 1 |   496.0000000000000000 |        4
+         1 | 41 | 109.200247763831844321405335 |               0 |                 1 |   205.0000000000000000 |        1
+(5 rows)
+
+-- where false queries are router plannable but not fast-path
+SELECT *
+	FROM articles_hash
+	WHERE false;
+ id | author_id | title | word_count
+---------------------------------------------------------------------
+(0 rows)
+
+-- fast-path with false
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1 and false;
+ id | author_id | title | word_count
+---------------------------------------------------------------------
+(0 rows)
+
+-- fast-path with false
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1 and 1=0;
+ id | author_id | title | word_count
+---------------------------------------------------------------------
+(0 rows)
+
+SELECT *
+	FROM articles_hash
+	WHERE null and author_id = 1;
+ id | author_id | title | word_count
+---------------------------------------------------------------------
+(0 rows)
+
+-- we cannot qualify dist_key = X operator Y via
+-- fast-path planning
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1 + 1;
+ id | author_id |   title    | word_count
+---------------------------------------------------------------------
+  2 |         2 | abducing   |      13642
+ 12 |         2 | archiblast |      18185
+ 22 |         2 | antipope   |       2728
+ 32 |         2 | amazon     |      11342
+ 42 |         2 | ausable    |      15885
+(5 rows)
+
+-- where false with immutable function returning false
+-- goes through fast-path
+SELECT *
+	FROM articles_hash a
+	WHERE a.author_id = 10 and int4eq(1, 2);
+ id | author_id | title | word_count
+---------------------------------------------------------------------
+(0 rows)
+
+-- partition_column is null clause does not prune out any shards,
+-- all shards remain after shard pruning, not router plannable
+-- not fast-path router either
+SELECT *
+	FROM articles_hash a
+	WHERE a.author_id is null;
+ id | author_id | title | word_count
+---------------------------------------------------------------------
+(0 rows)
+
+-- partition_column equals to null clause prunes out all shards
+-- no shards after shard pruning, router plannable
+-- not fast-path router either
+SELECT *
+	FROM articles_hash a
+	WHERE a.author_id = null;
+ id | author_id | title | word_count
+---------------------------------------------------------------------
+(0 rows)
+
+-- union/difference /intersection with where false
+-- this query was not originally router plannable, addition of 1=0
+-- makes it router plannable but not fast-path
+SELECT * FROM (
+	SELECT * FROM articles_hash WHERE author_id = 1
+	UNION
+	SELECT * FROM articles_hash WHERE author_id = 2 and 1=0
+) AS combination
+ORDER BY id;
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+-- same with the above, but with WHERE false
+SELECT * FROM (
+	SELECT * FROM articles_hash WHERE author_id = 1
+	UNION
+	SELECT * FROM articles_hash WHERE author_id = 2 and 1=0
+) AS combination WHERE false
+ORDER BY id;
+ id | author_id | title | word_count
+---------------------------------------------------------------------
+(0 rows)
+
+-- window functions with where false
+SELECT word_count, rank() OVER (PARTITION BY author_id ORDER BY word_count)
+	FROM articles_hash
+	WHERE author_id = 1 and 1=0;
+ word_count | rank
+---------------------------------------------------------------------
+(0 rows)
+
+-- complex query hitting a single shard and a fast-path
+SELECT
+	count(DISTINCT CASE
+			WHEN
+				word_count > 100
+			THEN
+				id
+			ELSE
+				NULL
+			END) as c
+	FROM
+		articles_hash
+	WHERE
+		author_id = 5;
+ c
+---------------------------------------------------------------------
+ 5
+(1 row)
+
+-- queries inside transactions can be fast-path router plannable
+BEGIN;
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1
+	ORDER BY id;
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+END;
+-- queries inside read-only transactions can be fast-path router plannable
+SET TRANSACTION READ ONLY;
+WARNING:  SET TRANSACTION can only be used in transaction blocks
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1
+	ORDER BY id;
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+END;
+WARNING:  there is no transaction in progress
+-- cursor queries are fast-path router plannable
+BEGIN;
+DECLARE test_cursor CURSOR FOR
+	SELECT *
+		FROM articles_hash
+		WHERE author_id = 1
+		ORDER BY id;
+FETCH test_cursor;
+ id | author_id |  title   | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous |       9572
+(1 row)
+
+FETCH ALL test_cursor;
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(4 rows)
+
+FETCH test_cursor; -- fetch one row after the last
+ id | author_id | title | word_count
+---------------------------------------------------------------------
+(0 rows)
+
+FETCH BACKWARD test_cursor;
+ id | author_id |  title   | word_count
+---------------------------------------------------------------------
+ 41 |         1 | aznavour |      11814
+(1 row)
+
+END;
+-- queries inside copy can be router plannable
+COPY (
+	SELECT *
+	FROM articles_hash
+	WHERE author_id = 1
+	ORDER BY id) TO STDOUT;
+1	1	arsenous	9572
+11	1	alamo	1347
+21	1	arcading	5890
+31	1	athwartships	7271
+41	1	aznavour	11814
+-- table creation queries inside can be fast-path router plannable
+CREATE TEMP TABLE temp_articles_hash as
+	SELECT *
+	FROM articles_hash
+	WHERE author_id = 1
+	ORDER BY id;
+-- fast-path router plannable queries may include filter for aggregates
+SELECT count(*), count(*) FILTER (WHERE id < 3)
+	FROM articles_hash
+	WHERE author_id = 1;
+ count | count
+---------------------------------------------------------------------
+     5 |     1
+(1 row)
+
+-- prepare queries can be router plannable
+PREPARE author_1_articles as
+	SELECT *
+	FROM articles_hash
+	WHERE author_id = 1;
+EXECUTE author_1_articles;
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+EXECUTE author_1_articles;
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+EXECUTE author_1_articles;
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+EXECUTE author_1_articles;
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+EXECUTE author_1_articles;
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+EXECUTE author_1_articles;
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+-- parametric prepare queries can be router plannable
+PREPARE author_articles(int) as
+	SELECT *
+	FROM articles_hash
+	WHERE author_id = $1;
+EXECUTE author_articles(1);
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+EXECUTE author_articles(1);
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+EXECUTE author_articles(1);
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+EXECUTE author_articles(1);
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+EXECUTE author_articles(1);
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+EXECUTE author_articles(1);
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+EXECUTE author_articles(NULL);
+ id | author_id | title | word_count
+---------------------------------------------------------------------
+(0 rows)
+
+EXECUTE author_articles(NULL);
+ id | author_id | title | word_count
+---------------------------------------------------------------------
+(0 rows)
+
+EXECUTE author_articles(NULL);
+ id | author_id | title | word_count
+---------------------------------------------------------------------
+(0 rows)
+
+EXECUTE author_articles(NULL);
+ id | author_id | title | word_count
+---------------------------------------------------------------------
+(0 rows)
+
+EXECUTE author_articles(NULL);
+ id | author_id | title | word_count
+---------------------------------------------------------------------
+(0 rows)
+
+EXECUTE author_articles(NULL);
+ id | author_id | title | word_count
+---------------------------------------------------------------------
+(0 rows)
+
+EXECUTE author_articles(NULL);
+ id | author_id | title | word_count
+---------------------------------------------------------------------
+(0 rows)
+
+PREPARE author_articles_update(int) AS
+	UPDATE articles_hash SET title = 'test' WHERE author_id = $1;
+EXECUTE author_articles_update(NULL);
+EXECUTE author_articles_update(NULL);
+EXECUTE author_articles_update(NULL);
+EXECUTE author_articles_update(NULL);
+EXECUTE author_articles_update(NULL);
+EXECUTE author_articles_update(NULL);
+EXECUTE author_articles_update(NULL);
+-- we don't want too many details. though we're omitting
+-- "DETAIL:  distribution column value:", we see it acceptable
+-- since the query results verifies the correctness
+\set VERBOSITY terse
+SELECT author_articles_max_id();
+ author_articles_max_id
+---------------------------------------------------------------------
+                     41
+(1 row)
+
+SELECT author_articles_max_id();
+ author_articles_max_id
+---------------------------------------------------------------------
+                     41
+(1 row)
+
+SELECT author_articles_max_id();
+ author_articles_max_id
+---------------------------------------------------------------------
+                     41
+(1 row)
+
+SELECT author_articles_max_id();
+ author_articles_max_id
+---------------------------------------------------------------------
+                     41
+(1 row)
+
+SELECT author_articles_max_id();
+ author_articles_max_id
+---------------------------------------------------------------------
+                     41
+(1 row)
+
+SELECT author_articles_max_id();
+ author_articles_max_id
+---------------------------------------------------------------------
+                     41
+(1 row)
+
+SELECT author_articles_max_id(1);
+ author_articles_max_id
+---------------------------------------------------------------------
+                     41
+(1 row)
+
+SELECT author_articles_max_id(1);
+ author_articles_max_id
+---------------------------------------------------------------------
+                     41
+(1 row)
+
+SELECT author_articles_max_id(1);
+ author_articles_max_id
+---------------------------------------------------------------------
+                     41
+(1 row)
+
+SELECT author_articles_max_id(1);
+ author_articles_max_id
+---------------------------------------------------------------------
+                     41
+(1 row)
+
+SELECT author_articles_max_id(1);
+ author_articles_max_id
+---------------------------------------------------------------------
+                     41
+(1 row)
+
+SELECT author_articles_max_id(1);
+ author_articles_max_id
+---------------------------------------------------------------------
+                     41
+(1 row)
+
+SELECT * FROM author_articles_id_word_count();
+ id | word_count
+---------------------------------------------------------------------
+  1 |       9572
+ 11 |       1347
+ 21 |       5890
+ 31 |       7271
+ 41 |      11814
+(5 rows)
+
+SELECT * FROM author_articles_id_word_count();
+ id | word_count
+---------------------------------------------------------------------
+  1 |       9572
+ 11 |       1347
+ 21 |       5890
+ 31 |       7271
+ 41 |      11814
+(5 rows)
+
+SELECT * FROM author_articles_id_word_count();
+ id | word_count
+---------------------------------------------------------------------
+  1 |       9572
+ 11 |       1347
+ 21 |       5890
+ 31 |       7271
+ 41 |      11814
+(5 rows)
+
+SELECT * FROM author_articles_id_word_count();
+ id | word_count
+---------------------------------------------------------------------
+  1 |       9572
+ 11 |       1347
+ 21 |       5890
+ 31 |       7271
+ 41 |      11814
+(5 rows)
+
+SELECT * FROM author_articles_id_word_count();
+ id | word_count
+---------------------------------------------------------------------
+  1 |       9572
+ 11 |       1347
+ 21 |       5890
+ 31 |       7271
+ 41 |      11814
+(5 rows)
+
+SELECT * FROM author_articles_id_word_count();
+ id | word_count
+---------------------------------------------------------------------
+  1 |       9572
+ 11 |       1347
+ 21 |       5890
+ 31 |       7271
+ 41 |      11814
+(5 rows)
+
+SELECT * FROM author_articles_id_word_count(1);
+ id | word_count
+---------------------------------------------------------------------
+  1 |       9572
+ 11 |       1347
+ 21 |       5890
+ 31 |       7271
+ 41 |      11814
+(5 rows)
+
+SELECT * FROM author_articles_id_word_count(1);
+ id | word_count
+---------------------------------------------------------------------
+  1 |       9572
+ 11 |       1347
+ 21 |       5890
+ 31 |       7271
+ 41 |      11814
+(5 rows)
+
+SELECT * FROM author_articles_id_word_count(1);
+ id | word_count
+---------------------------------------------------------------------
+  1 |       9572
+ 11 |       1347
+ 21 |       5890
+ 31 |       7271
+ 41 |      11814
+(5 rows)
+
+SELECT * FROM author_articles_id_word_count(1);
+ id | word_count
+---------------------------------------------------------------------
+  1 |       9572
+ 11 |       1347
+ 21 |       5890
+ 31 |       7271
+ 41 |      11814
+(5 rows)
+
+SELECT * FROM author_articles_id_word_count(1);
+ id | word_count
+---------------------------------------------------------------------
+  1 |       9572
+ 11 |       1347
+ 21 |       5890
+ 31 |       7271
+ 41 |      11814
+(5 rows)
+
+SELECT * FROM author_articles_id_word_count(1);
+ id | word_count
+---------------------------------------------------------------------
+  1 |       9572
+ 11 |       1347
+ 21 |       5890
+ 31 |       7271
+ 41 |      11814
+(5 rows)
+
+\set VERBOSITY default
+-- insert .. select via coordinator could also
+-- use fast-path queries
+PREPARE insert_sel(int, int) AS
+INSERT INTO articles_hash
+	SELECT * FROM articles_hash WHERE author_id = $2 AND word_count = $1 OFFSET 0;
+EXECUTE insert_sel(1,1);
+EXECUTE insert_sel(1,1);
+EXECUTE insert_sel(1,1);
+EXECUTE insert_sel(1,1);
+EXECUTE insert_sel(1,1);
+EXECUTE insert_sel(1,1);
+-- one final interesting preperad statement
+-- where one of the filters is on the target list
+PREPARE fast_path_agg_filter(int, int) AS
+	SELECT
+		count(*) FILTER (WHERE word_count=$1)
+	FROM
+		articles_hash
+	WHERE author_id = $2;
+EXECUTE fast_path_agg_filter(1,1);
+ count
+---------------------------------------------------------------------
+     0
+(1 row)
+
+EXECUTE fast_path_agg_filter(2,2);
+ count
+---------------------------------------------------------------------
+     0
+(1 row)
+
+EXECUTE fast_path_agg_filter(3,3);
+ count
+---------------------------------------------------------------------
+     0
+(1 row)
+
+EXECUTE fast_path_agg_filter(4,4);
+ count
+---------------------------------------------------------------------
+     0
+(1 row)
+
+EXECUTE fast_path_agg_filter(5,5);
+ count
+---------------------------------------------------------------------
+     0
+(1 row)
+
+EXECUTE fast_path_agg_filter(6,6);
+ count
+---------------------------------------------------------------------
+     0
+(1 row)
+
+-- views internally become subqueries, so not fast-path router query
+SELECT * FROM test_view;
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+-- materialized views can be created for fast-path router plannable queries
+CREATE MATERIALIZED VIEW mv_articles_hash_empty AS
+	SELECT * FROM articles_hash WHERE author_id = 1;
+SELECT * FROM mv_articles_hash_empty;
+ id | author_id |    title     | word_count
+---------------------------------------------------------------------
+  1 |         1 | arsenous     |       9572
+ 11 |         1 | alamo        |       1347
+ 21 |         1 | arcading     |       5890
+ 31 |         1 | athwartships |       7271
+ 41 |         1 | aznavour     |      11814
+(5 rows)
+
+SELECT id
+	FROM articles_hash
+	WHERE author_id = 1;
+ id
+---------------------------------------------------------------------
+  1
+ 11
+ 21
+ 31
+ 41
+(5 rows)
+
+INSERT INTO articles_hash VALUES (51, 1, 'amateus', 1814), (52, 1, 'second amateus', 2824);
+-- verify insert is successfull (not router plannable and executable)
+SELECT id
+	FROM articles_hash
+	WHERE author_id = 1;
+ id
+---------------------------------------------------------------------
+  1
+ 11
+ 21
+ 31
+ 41
+ 51
+ 52
+(7 rows)
+
+SELECT count(*) FROM collections_list WHERE key = 4;
+ count
+---------------------------------------------------------------------
+     5
+(1 row)
+
+SELECT count(*) FROM collections_list_1 WHERE key = 4;
+ count
+---------------------------------------------------------------------
+     5
+(1 row)
+
+SELECT count(*) FROM collections_list_2 WHERE key = 4;
+ count
+---------------------------------------------------------------------
+     0
+(1 row)
+
+UPDATE collections_list SET value = 15 WHERE key = 4;
+SELECT count(*) FILTER (where value = 15) FROM collections_list WHERE key = 4;
+ count
+---------------------------------------------------------------------
+     5
+(1 row)
+
+SELECT count(*) FILTER (where value = 15) FROM collections_list_1 WHERE key = 4;
+ count
+---------------------------------------------------------------------
+     5
+(1 row)
+
+SELECT count(*) FILTER (where value = 15) FROM collections_list_2 WHERE key = 4;
+ count
+---------------------------------------------------------------------
+     0
+(1 row)
+
+-- test INSERT using values from generate_series() and repeat() functions
+INSERT INTO authors_reference (id, name) VALUES (generate_series(1, 10), repeat('Migjeni', 3));
+SELECT * FROM authors_reference ORDER BY 1, 2;
+ id |         name
+---------------------------------------------------------------------
+  1 | MigjeniMigjeniMigjeni
+  2 | MigjeniMigjeniMigjeni
+  3 | MigjeniMigjeniMigjeni
+  4 | MigjeniMigjeniMigjeni
+  5 | MigjeniMigjeniMigjeni
+  6 | MigjeniMigjeniMigjeni
+  7 | MigjeniMigjeniMigjeni
+  8 | MigjeniMigjeniMigjeni
+  9 | MigjeniMigjeniMigjeni
+ 10 | MigjeniMigjeniMigjeni
+(10 rows)
+
diff --git a/src/test/regress/expected/arbitrary_configs_router_create.out b/src/test/regress/expected/arbitrary_configs_router_create.out
new file mode 100644
index 000000000..74dfbf4f3
--- /dev/null
+++ b/src/test/regress/expected/arbitrary_configs_router_create.out
@@ -0,0 +1,121 @@
+CREATE SCHEMA arbitrary_configs_router;
+SET search_path TO arbitrary_configs_router;
+CREATE TABLE articles_hash (
+	id bigint NOT NULL,
+	author_id bigint NOT NULL,
+	title varchar(20) NOT NULL,
+	word_count integer
+);
+SELECT create_distributed_table('articles_hash', 'author_id');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE TABLE authors_reference (id int, name text);
+SELECT create_reference_table('authors_reference');
+ create_reference_table
+---------------------------------------------------------------------
+
+(1 row)
+
+-- create a bunch of test data
+INSERT INTO articles_hash VALUES (1,  1, 'arsenous', 9572), (2,  2, 'abducing', 13642),( 3,  3, 'asternal', 10480),( 4,  4, 'altdorfer', 14551),( 5,  5, 'aruru', 11389),
+								 (6,  6, 'atlases', 15459),(7,  7, 'aseptic', 12298),( 8,  8, 'agatized', 16368),(9,  9, 'alligate', 438),
+								 (10, 10, 'aggrandize', 17277),(11,  1, 'alamo', 1347),(12,  2, 'archiblast', 18185),
+								 (13,  3, 'aseyev', 2255),(14,  4, 'andesite', 19094),(15,  5, 'adversa', 3164),
+								 (16,  6, 'allonym', 2),(17,  7, 'auriga', 4073),(18,  8, 'assembly', 911),(19,  9, 'aubergiste', 4981),
+								 (20, 10, 'absentness', 1820),(21,  1, 'arcading', 5890),(22,  2, 'antipope', 2728),(23,  3, 'abhorring', 6799),
+								 (24,  4, 'audacious', 3637),(25,  5, 'antehall', 7707),(26,  6, 'abington', 4545),(27,  7, 'arsenous', 8616),
+								 (28,  8, 'aerophyte', 5454),(29,  9, 'amateur', 9524),(30, 10, 'andelee', 6363),(31,  1, 'athwartships', 7271),
+								 (32,  2, 'amazon', 11342),(33,  3, 'autochrome', 8180),(34,  4, 'amnestied', 12250),(35,  5, 'aminate', 9089),
+								 (36,  6, 'ablation', 13159),(37,  7, 'archduchies', 9997),(38,  8, 'anatine', 14067),(39,  9, 'anchises', 10906),
+								 (40, 10, 'attemper', 14976),(41,  1, 'aznavour', 11814),(42,  2, 'ausable', 15885),(43,  3, 'affixal', 12723),
+								 (44,  4, 'anteport', 16793),(45,  5, 'afrasia', 864),(46,  6, 'atlanta', 17702),(47,  7, 'abeyance', 1772),
+								 (48,  8, 'alkylic', 18610),(49,  9, 'anyone', 2681),(50, 10, 'anjanette', 19519);
+CREATE TABLE company_employees (company_id int, employee_id int, manager_id int);
+SELECT create_distributed_table('company_employees', 'company_id', 'hash');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+INSERT INTO company_employees values(1, 1, 0);
+INSERT INTO company_employees values(1, 2, 1);
+INSERT INTO company_employees values(1, 3, 1);
+INSERT INTO company_employees values(1, 4, 2);
+INSERT INTO company_employees values(1, 5, 4);
+INSERT INTO company_employees values(3, 1, 0);
+INSERT INTO company_employees values(3, 15, 1);
+INSERT INTO company_employees values(3, 3, 1);
+-- finally, some tests with partitioned tables
+CREATE TABLE collections_list (
+	key bigint,
+	ts timestamptz,
+	collection_id integer,
+	value numeric
+) PARTITION BY LIST (collection_id );
+CREATE TABLE collections_list_1
+	PARTITION OF collections_list (key, ts, collection_id, value)
+	FOR VALUES IN ( 1 );
+CREATE TABLE collections_list_2
+	PARTITION OF collections_list (key, ts, collection_id, value)
+	FOR VALUES IN ( 2 );
+SELECT create_distributed_table('collections_list', 'key');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+INSERT INTO collections_list SELECT i % 10, now(), (i % 2) + 1, i*i FROM generate_series(0, 50)i;
+-- queries inside plpgsql functions could be router plannable
+CREATE OR REPLACE FUNCTION author_articles_max_id() RETURNS int AS $$
+DECLARE
+  max_id integer;
+BEGIN
+	SELECT MAX(id) FROM articles_hash ah
+		WHERE author_id = 1
+		into max_id;
+	return max_id;
+END;
+$$ LANGUAGE plpgsql;
+-- queries inside plpgsql functions could be router plannable
+CREATE OR REPLACE FUNCTION author_articles_max_id(int) RETURNS int AS $$
+DECLARE
+  max_id integer;
+BEGIN
+	SELECT MAX(id) FROM articles_hash ah
+		WHERE author_id = $1
+		into max_id;
+	return max_id;
+END;
+$$ LANGUAGE plpgsql;
+-- check that function returning setof query are router plannable
+CREATE OR REPLACE FUNCTION author_articles_id_word_count() RETURNS TABLE(id bigint, word_count int) AS $$
+DECLARE
+BEGIN
+	RETURN QUERY
+		SELECT ah.id, ah.word_count
+		FROM articles_hash ah
+		WHERE author_id = 1;
+
+END;
+$$ LANGUAGE plpgsql;
+-- check that function returning setof query are router plannable
+CREATE OR REPLACE FUNCTION author_articles_id_word_count(int) RETURNS TABLE(id bigint, word_count int) AS $$
+DECLARE
+BEGIN
+	RETURN QUERY
+		SELECT ah.id, ah.word_count
+		FROM articles_hash ah
+		WHERE author_id = $1;
+
+END;
+$$ LANGUAGE plpgsql;
+-- Suppress the warning that tells that the view won't be distributed
+-- because it depends on a local table.
+--
+-- This only happens when running PostgresConfig.
+SET client_min_messages TO ERROR;
+CREATE VIEW test_view AS
+	SELECT * FROM articles_hash WHERE author_id = 1;
diff --git a/src/test/regress/sql/arbitrary_configs_router.sql b/src/test/regress/sql/arbitrary_configs_router.sql
new file mode 100644
index 000000000..f59c5fa4a
--- /dev/null
+++ b/src/test/regress/sql/arbitrary_configs_router.sql
@@ -0,0 +1,634 @@
+SET search_path TO arbitrary_configs_router;
+
+SET client_min_messages TO WARNING;
+
+-- test simple select for a single row
+SELECT * FROM articles_hash WHERE author_id = 10 AND id = 50;
+
+-- get all titles by a single author
+SELECT title FROM articles_hash WHERE author_id = 10;
+
+-- try ordering them by word count
+SELECT title, word_count FROM articles_hash
+	WHERE author_id = 10
+	ORDER BY word_count DESC NULLS LAST;
+
+-- look at last two articles by an author
+SELECT title, id FROM articles_hash
+	WHERE author_id = 5
+	ORDER BY id
+	LIMIT 2;
+
+-- find all articles by two authors in same shard
+-- but plan is not fast path router plannable due to
+-- two distribution columns in the query
+SELECT title, author_id FROM articles_hash
+	WHERE author_id = 7 OR author_id = 8
+	ORDER BY author_id ASC, id;
+
+-- having clause is supported if it goes to a single shard
+-- and single dist. key on the query
+SELECT author_id, sum(word_count) AS corpus_size FROM articles_hash
+	WHERE author_id = 1
+	GROUP BY author_id
+	HAVING sum(word_count) > 1000
+	ORDER BY sum(word_count) DESC;
+
+-- fast path planner only support = operator
+SELECT * FROM articles_hash WHERE author_id <= 1;
+SELECT * FROM articles_hash WHERE author_id IN (1, 3) ORDER BY 1,2,3,4;
+
+-- queries with CTEs cannot go through fast-path planning
+WITH first_author AS ( SELECT id FROM articles_hash WHERE author_id = 1)
+SELECT * FROM first_author;
+
+-- two CTE joins also cannot go through fast-path planning
+WITH id_author AS ( SELECT id, author_id FROM articles_hash WHERE author_id = 1),
+id_title AS (SELECT id, title from articles_hash WHERE author_id = 1)
+SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id;
+
+-- this is a different case where each CTE is recursively planned and those goes
+-- through the fast-path router planner, but the top level join is not
+WITH id_author AS ( SELECT id, author_id FROM articles_hash WHERE author_id = 1),
+id_title AS (SELECT id, title from articles_hash WHERE author_id = 2)
+SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id;
+
+-- recursive CTEs are also cannot go through fast
+-- path planning
+WITH RECURSIVE hierarchy as (
+	SELECT *, 1 AS level
+		FROM company_employees
+		WHERE company_id = 1 and manager_id = 0
+	UNION
+	SELECT ce.*, (h.level+1)
+		FROM hierarchy h JOIN company_employees ce
+			ON (h.employee_id = ce.manager_id AND
+				h.company_id = ce.company_id AND
+				ce.company_id = 1))
+SELECT * FROM hierarchy WHERE LEVEL <= 2;
+
+WITH update_article AS (
+    UPDATE articles_hash SET word_count = 10 WHERE id = 1 AND word_count = 9 RETURNING *
+)
+SELECT * FROM update_article;
+
+WITH delete_article AS (
+    DELETE FROM articles_hash WHERE id = 1 AND word_count = 10 RETURNING *
+)
+SELECT * FROM delete_article;
+
+-- grouping sets are supported via fast-path
+SELECT
+	id, substring(title, 2, 1) AS subtitle, count(*)
+	FROM articles_hash
+	WHERE author_id = 1
+	GROUP BY GROUPING SETS ((id),(subtitle))
+	ORDER BY id, subtitle;
+
+-- queries which involve functions in FROM clause are not supported via fast path planning
+SELECT * FROM articles_hash, position('om' in 'Thomas') WHERE author_id = 1;
+
+-- sublinks are not supported via fast path planning
+SELECT * FROM articles_hash
+WHERE author_id IN (SELECT author_id FROM articles_hash WHERE author_id = 2)
+ORDER BY articles_hash.id;
+
+-- subqueries are not supported via fast path planning
+SELECT articles_hash.id,test.word_count
+FROM articles_hash, (SELECT id, word_count FROM articles_hash) AS test WHERE test.id = articles_hash.id
+ORDER BY test.word_count DESC, articles_hash.id LIMIT 5;
+
+SELECT articles_hash.id,test.word_count
+FROM articles_hash, (SELECT id, word_count FROM articles_hash) AS test
+WHERE test.id = articles_hash.id and articles_hash.author_id = 1
+ORDER BY articles_hash.id;
+
+-- simple lookup query just works
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1;
+
+-- below query hits a single shard but with multiple filters
+-- so cannot go via fast-path
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1 OR author_id = 17;
+
+-- rename the output columns
+SELECT id as article_id, word_count * id as random_value
+	FROM articles_hash
+	WHERE author_id = 1;
+
+-- joins do not go through fast-path planning
+SELECT a.author_id as first_author, b.word_count as second_word_count
+	FROM articles_hash a, articles_hash b
+	WHERE a.author_id = 10 and a.author_id = b.author_id
+	LIMIT 3;
+
+-- single shard select with limit goes through fast-path planning
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1
+	LIMIT 3;
+
+-- single shard select with limit + offset goes through fast-path planning
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1
+	LIMIT 2
+	OFFSET 1;
+
+-- single shard select with limit + offset + order by goes through fast-path planning
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1
+	ORDER BY id desc
+	LIMIT 2
+	OFFSET 1;
+
+-- single shard select with group by on non-partition column goes through fast-path planning
+SELECT id
+	FROM articles_hash
+	WHERE author_id = 1
+	GROUP BY id
+	ORDER BY id;
+
+-- single shard select with distinct goes through fast-path planning
+SELECT DISTINCT id
+	FROM articles_hash
+	WHERE author_id = 1
+	ORDER BY id;
+
+-- single shard aggregate goes through fast-path planning
+SELECT avg(word_count)
+	FROM articles_hash
+	WHERE author_id = 2;
+
+-- max, min, sum, count goes through fast-path planning
+SELECT max(word_count) as max, min(word_count) as min,
+	   sum(word_count) as sum, count(word_count) as cnt
+	FROM articles_hash
+	WHERE author_id = 2;
+
+
+-- queries with aggregates and group by goes through fast-path planning
+SELECT max(word_count)
+	FROM articles_hash
+	WHERE author_id = 1
+	GROUP BY author_id;
+
+
+-- set operations are not supported via fast-path planning
+SELECT * FROM (
+	SELECT * FROM articles_hash WHERE author_id = 1
+	UNION
+	SELECT * FROM articles_hash WHERE author_id = 3
+) AS combination
+ORDER BY id;
+
+-- function calls in the target list is supported via fast path
+SELECT LEFT(title, 1) FROM articles_hash WHERE author_id = 1;
+
+
+-- top-level union queries are supported through recursive planning
+
+-- unions in subqueries are not supported via fast-path planning
+SELECT * FROM (
+	(SELECT * FROM articles_hash WHERE author_id = 1)
+	UNION
+	(SELECT * FROM articles_hash WHERE author_id = 1)) uu
+ORDER BY 1, 2
+LIMIT 5;
+
+
+-- Test various filtering options for router plannable check
+
+-- cannot go through fast-path if there is
+-- explicit coercion
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1::bigint;
+
+-- can go through fast-path if there is
+-- implicit coercion
+-- This doesn't work see the related issue
+-- reported https://github.com/citusdata/citus/issues/2605
+-- SELECT *
+--	FROM articles_hash
+--	WHERE author_id = 1.0;
+
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 68719476736; -- this is bigint
+
+-- cannot go through fast-path due to
+-- multiple filters on the dist. key
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1 and author_id >= 1;
+
+-- cannot go through fast-path due to
+-- multiple filters on the dist. key
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1 or id = 1;
+
+-- goes through fast-path planning because
+-- the dist. key is ANDed with the rest of the
+-- filters
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1 and (id = 1 or id = 41);
+
+-- this time there is an OR clause which prevents
+-- router planning at all
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1 and id = 1 or id = 41;
+
+-- goes through fast-path planning because
+-- the dist. key is ANDed with the rest of the
+-- filters
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1 and (id = random()::int  * 0);
+
+-- not router plannable due to function call on the right side
+SELECT *
+	FROM articles_hash
+	WHERE author_id = (random()::int  * 0 + 1);
+
+-- Citus does not qualify this as a fast-path because
+-- dist_key = func()
+SELECT *
+	FROM articles_hash
+	WHERE author_id = abs(-1);
+
+-- Citus does not qualify this as a fast-path because
+-- dist_key = func()
+SELECT *
+	FROM articles_hash
+	WHERE 1 = abs(author_id);
+
+-- Citus does not qualify this as a fast-path because
+-- dist_key = func()
+SELECT *
+	FROM articles_hash
+	WHERE author_id = abs(author_id - 2);
+
+-- the function is not on the dist. key, so qualify as
+-- fast-path
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1 and (id = abs(id - 2));
+
+-- not router plannable due to is true
+SELECT *
+	FROM articles_hash
+	WHERE (author_id = 1) is true;
+
+-- router plannable, (boolean expression) = true is collapsed to (boolean expression)
+SELECT *
+	FROM articles_hash
+	WHERE (author_id = 1) = true;
+
+-- some more complex quals
+SELECT count(*) FROM articles_hash WHERE (author_id = 15) AND (id = 1 OR word_count > 5);
+SELECT count(*) FROM articles_hash WHERE (author_id = 15) OR (id = 1 AND word_count > 5);
+SELECT count(*) FROM articles_hash WHERE (id = 15) OR (author_id = 1 AND word_count > 5);
+SELECT count(*) FROM articles_hash WHERE (id = 15) AND (author_id = 1 OR word_count > 5);
+SELECT count(*) FROM articles_hash WHERE (id = 15) AND (author_id = 1 AND (word_count > 5 OR id  = 2));
+SELECT count(*) FROM articles_hash WHERE (id = 15) AND (title ilike 'a%' AND (word_count > 5 OR author_id  = 2));
+SELECT count(*) FROM articles_hash WHERE (id = 15) AND (title ilike 'a%' AND (word_count > 5 AND author_id  = 2));
+SELECT count(*) FROM articles_hash WHERE (id = 15) AND (title ilike 'a%' AND ((word_count > 5 OR title ilike 'b%' ) AND (author_id  = 2 AND word_count > 50)));
+
+-- fast-path router plannable, between operator is on another column
+SELECT *
+	FROM articles_hash
+	WHERE (author_id = 1) and id between 0 and 20;
+
+-- fast-path router plannable, partition column expression is and'ed to rest
+SELECT *
+	FROM articles_hash
+	WHERE (author_id = 1) and (id = 1 or id = 31) and title like '%s';
+
+-- fast-path router plannable, order is changed
+SELECT *
+	FROM articles_hash
+	WHERE (id = 1 or id = 31) and title like '%s' and (author_id = 1);
+
+-- fast-path router plannable
+SELECT *
+	FROM articles_hash
+	WHERE (title like '%s' or title like 'a%') and (author_id = 1);
+
+-- fast-path router plannable
+SELECT *
+	FROM articles_hash
+	WHERE (title like '%s' or title like 'a%') and (author_id = 1) and (word_count < 3000 or word_count > 8000);
+
+-- window functions are supported with fast-path router plannable
+SELECT LAG(title, 1) over (ORDER BY word_count) prev, title, word_count
+	FROM articles_hash
+	WHERE author_id = 5;
+
+SELECT LAG(title, 1) over (ORDER BY word_count) prev, title, word_count
+	FROM articles_hash
+	WHERE author_id = 5
+	ORDER BY word_count DESC;
+
+SELECT id, MIN(id) over (order by word_count)
+	FROM articles_hash
+	WHERE author_id = 1;
+
+SELECT id, word_count, AVG(word_count) over (order by word_count)
+	FROM articles_hash
+	WHERE author_id = 1;
+
+SELECT word_count, rank() OVER (PARTITION BY author_id ORDER BY word_count)
+	FROM articles_hash
+	WHERE author_id = 1;
+
+-- some more tests on complex target lists
+SELECT DISTINCT ON (author_id, id) author_id, id,
+	MIN(id) over (order by avg(word_count)) * AVG(id * 5.2 + (1.0/max(word_count))) over (order by max(word_count)) as t1,
+	count(*) FILTER (WHERE title LIKE 'al%') as cnt_with_filter,
+	count(*) FILTER (WHERE '0300030' LIKE '%3%') as cnt_with_filter_2,
+	avg(case when id > 2 then char_length(word_count::text) * (id * strpos(word_count::text, '1')) end) as case_cnt,
+	COALESCE(strpos(avg(word_count)::text, '1'), 20)
+	FROM articles_hash as aliased_table
+	WHERE author_id = 1
+	GROUP BY author_id, id
+	HAVING count(DISTINCT title) > 0
+	ORDER BY author_id, id, sum(word_count) - avg(char_length(title)) DESC, COALESCE(array_upper(ARRAY[max(id)],1) * 5,0) DESC;
+
+-- where false queries are router plannable but not fast-path
+SELECT *
+	FROM articles_hash
+	WHERE false;
+
+-- fast-path with false
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1 and false;
+
+-- fast-path with false
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1 and 1=0;
+
+SELECT *
+	FROM articles_hash
+	WHERE null and author_id = 1;
+
+-- we cannot qualify dist_key = X operator Y via
+-- fast-path planning
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1 + 1;
+
+-- where false with immutable function returning false
+-- goes through fast-path
+SELECT *
+	FROM articles_hash a
+	WHERE a.author_id = 10 and int4eq(1, 2);
+
+-- partition_column is null clause does not prune out any shards,
+-- all shards remain after shard pruning, not router plannable
+-- not fast-path router either
+SELECT *
+	FROM articles_hash a
+	WHERE a.author_id is null;
+
+-- partition_column equals to null clause prunes out all shards
+-- no shards after shard pruning, router plannable
+-- not fast-path router either
+SELECT *
+	FROM articles_hash a
+	WHERE a.author_id = null;
+
+-- union/difference /intersection with where false
+-- this query was not originally router plannable, addition of 1=0
+-- makes it router plannable but not fast-path
+SELECT * FROM (
+	SELECT * FROM articles_hash WHERE author_id = 1
+	UNION
+	SELECT * FROM articles_hash WHERE author_id = 2 and 1=0
+) AS combination
+ORDER BY id;
+
+-- same with the above, but with WHERE false
+SELECT * FROM (
+	SELECT * FROM articles_hash WHERE author_id = 1
+	UNION
+	SELECT * FROM articles_hash WHERE author_id = 2 and 1=0
+) AS combination WHERE false
+ORDER BY id;
+
+-- window functions with where false
+SELECT word_count, rank() OVER (PARTITION BY author_id ORDER BY word_count)
+	FROM articles_hash
+	WHERE author_id = 1 and 1=0;
+
+-- complex query hitting a single shard and a fast-path
+SELECT
+	count(DISTINCT CASE
+			WHEN
+				word_count > 100
+			THEN
+				id
+			ELSE
+				NULL
+			END) as c
+	FROM
+		articles_hash
+	WHERE
+		author_id = 5;
+-- queries inside transactions can be fast-path router plannable
+BEGIN;
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1
+	ORDER BY id;
+END;
+
+-- queries inside read-only transactions can be fast-path router plannable
+SET TRANSACTION READ ONLY;
+SELECT *
+	FROM articles_hash
+	WHERE author_id = 1
+	ORDER BY id;
+END;
+
+-- cursor queries are fast-path router plannable
+BEGIN;
+DECLARE test_cursor CURSOR FOR
+	SELECT *
+		FROM articles_hash
+		WHERE author_id = 1
+		ORDER BY id;
+FETCH test_cursor;
+FETCH ALL test_cursor;
+FETCH test_cursor; -- fetch one row after the last
+FETCH BACKWARD test_cursor;
+END;
+
+-- queries inside copy can be router plannable
+COPY (
+	SELECT *
+	FROM articles_hash
+	WHERE author_id = 1
+	ORDER BY id) TO STDOUT;
+
+-- table creation queries inside can be fast-path router plannable
+CREATE TEMP TABLE temp_articles_hash as
+	SELECT *
+	FROM articles_hash
+	WHERE author_id = 1
+	ORDER BY id;
+
+-- fast-path router plannable queries may include filter for aggregates
+SELECT count(*), count(*) FILTER (WHERE id < 3)
+	FROM articles_hash
+	WHERE author_id = 1;
+
+-- prepare queries can be router plannable
+PREPARE author_1_articles as
+	SELECT *
+	FROM articles_hash
+	WHERE author_id = 1;
+
+EXECUTE author_1_articles;
+EXECUTE author_1_articles;
+EXECUTE author_1_articles;
+EXECUTE author_1_articles;
+EXECUTE author_1_articles;
+EXECUTE author_1_articles;
+
+-- parametric prepare queries can be router plannable
+PREPARE author_articles(int) as
+	SELECT *
+	FROM articles_hash
+	WHERE author_id = $1;
+
+EXECUTE author_articles(1);
+EXECUTE author_articles(1);
+EXECUTE author_articles(1);
+EXECUTE author_articles(1);
+EXECUTE author_articles(1);
+EXECUTE author_articles(1);
+
+EXECUTE author_articles(NULL);
+EXECUTE author_articles(NULL);
+EXECUTE author_articles(NULL);
+EXECUTE author_articles(NULL);
+EXECUTE author_articles(NULL);
+EXECUTE author_articles(NULL);
+EXECUTE author_articles(NULL);
+
+PREPARE author_articles_update(int) AS
+	UPDATE articles_hash SET title = 'test' WHERE author_id = $1;
+
+EXECUTE author_articles_update(NULL);
+EXECUTE author_articles_update(NULL);
+EXECUTE author_articles_update(NULL);
+EXECUTE author_articles_update(NULL);
+EXECUTE author_articles_update(NULL);
+EXECUTE author_articles_update(NULL);
+EXECUTE author_articles_update(NULL);
+
+-- we don't want too many details. though we're omitting
+-- "DETAIL:  distribution column value:", we see it acceptable
+-- since the query results verifies the correctness
+\set VERBOSITY terse
+
+SELECT author_articles_max_id();
+SELECT author_articles_max_id();
+SELECT author_articles_max_id();
+SELECT author_articles_max_id();
+SELECT author_articles_max_id();
+SELECT author_articles_max_id();
+
+SELECT author_articles_max_id(1);
+SELECT author_articles_max_id(1);
+SELECT author_articles_max_id(1);
+SELECT author_articles_max_id(1);
+SELECT author_articles_max_id(1);
+SELECT author_articles_max_id(1);
+
+SELECT * FROM author_articles_id_word_count();
+SELECT * FROM author_articles_id_word_count();
+SELECT * FROM author_articles_id_word_count();
+SELECT * FROM author_articles_id_word_count();
+SELECT * FROM author_articles_id_word_count();
+SELECT * FROM author_articles_id_word_count();
+
+SELECT * FROM author_articles_id_word_count(1);
+SELECT * FROM author_articles_id_word_count(1);
+SELECT * FROM author_articles_id_word_count(1);
+SELECT * FROM author_articles_id_word_count(1);
+SELECT * FROM author_articles_id_word_count(1);
+SELECT * FROM author_articles_id_word_count(1);
+
+\set VERBOSITY default
+
+-- insert .. select via coordinator could also
+-- use fast-path queries
+PREPARE insert_sel(int, int) AS
+INSERT INTO articles_hash
+	SELECT * FROM articles_hash WHERE author_id = $2 AND word_count = $1 OFFSET 0;
+
+EXECUTE insert_sel(1,1);
+EXECUTE insert_sel(1,1);
+EXECUTE insert_sel(1,1);
+EXECUTE insert_sel(1,1);
+EXECUTE insert_sel(1,1);
+EXECUTE insert_sel(1,1);
+
+-- one final interesting preperad statement
+-- where one of the filters is on the target list
+PREPARE fast_path_agg_filter(int, int) AS
+	SELECT
+		count(*) FILTER (WHERE word_count=$1)
+	FROM
+		articles_hash
+	WHERE author_id = $2;
+
+EXECUTE fast_path_agg_filter(1,1);
+EXECUTE fast_path_agg_filter(2,2);
+EXECUTE fast_path_agg_filter(3,3);
+EXECUTE fast_path_agg_filter(4,4);
+EXECUTE fast_path_agg_filter(5,5);
+EXECUTE fast_path_agg_filter(6,6);
+
+-- views internally become subqueries, so not fast-path router query
+SELECT * FROM test_view;
+
+-- materialized views can be created for fast-path router plannable queries
+CREATE MATERIALIZED VIEW mv_articles_hash_empty AS
+	SELECT * FROM articles_hash WHERE author_id = 1;
+SELECT * FROM mv_articles_hash_empty;
+
+
+SELECT id
+	FROM articles_hash
+	WHERE author_id = 1;
+
+INSERT INTO articles_hash VALUES (51, 1, 'amateus', 1814), (52, 1, 'second amateus', 2824);
+
+-- verify insert is successfull (not router plannable and executable)
+SELECT id
+	FROM articles_hash
+	WHERE author_id = 1;
+
+SELECT count(*) FROM collections_list WHERE key = 4;
+SELECT count(*) FROM collections_list_1 WHERE key = 4;
+SELECT count(*) FROM collections_list_2 WHERE key = 4;
+UPDATE collections_list SET value = 15 WHERE key = 4;
+SELECT count(*) FILTER (where value = 15) FROM collections_list WHERE key = 4;
+SELECT count(*) FILTER (where value = 15) FROM collections_list_1 WHERE key = 4;
+SELECT count(*) FILTER (where value = 15) FROM collections_list_2 WHERE key = 4;
+
+-- test INSERT using values from generate_series() and repeat() functions
+INSERT INTO authors_reference (id, name) VALUES (generate_series(1, 10), repeat('Migjeni', 3));
+SELECT * FROM authors_reference ORDER BY 1, 2;
diff --git a/src/test/regress/sql/arbitrary_configs_router_create.sql b/src/test/regress/sql/arbitrary_configs_router_create.sql
new file mode 100644
index 000000000..956100c7e
--- /dev/null
+++ b/src/test/regress/sql/arbitrary_configs_router_create.sql
@@ -0,0 +1,118 @@
+CREATE SCHEMA arbitrary_configs_router;
+SET search_path TO arbitrary_configs_router;
+
+CREATE TABLE articles_hash (
+	id bigint NOT NULL,
+	author_id bigint NOT NULL,
+	title varchar(20) NOT NULL,
+	word_count integer
+);
+
+SELECT create_distributed_table('articles_hash', 'author_id');
+
+CREATE TABLE authors_reference (id int, name text);
+SELECT create_reference_table('authors_reference');
+
+-- create a bunch of test data
+INSERT INTO articles_hash VALUES (1,  1, 'arsenous', 9572), (2,  2, 'abducing', 13642),( 3,  3, 'asternal', 10480),( 4,  4, 'altdorfer', 14551),( 5,  5, 'aruru', 11389),
+								 (6,  6, 'atlases', 15459),(7,  7, 'aseptic', 12298),( 8,  8, 'agatized', 16368),(9,  9, 'alligate', 438),
+								 (10, 10, 'aggrandize', 17277),(11,  1, 'alamo', 1347),(12,  2, 'archiblast', 18185),
+								 (13,  3, 'aseyev', 2255),(14,  4, 'andesite', 19094),(15,  5, 'adversa', 3164),
+								 (16,  6, 'allonym', 2),(17,  7, 'auriga', 4073),(18,  8, 'assembly', 911),(19,  9, 'aubergiste', 4981),
+								 (20, 10, 'absentness', 1820),(21,  1, 'arcading', 5890),(22,  2, 'antipope', 2728),(23,  3, 'abhorring', 6799),
+								 (24,  4, 'audacious', 3637),(25,  5, 'antehall', 7707),(26,  6, 'abington', 4545),(27,  7, 'arsenous', 8616),
+								 (28,  8, 'aerophyte', 5454),(29,  9, 'amateur', 9524),(30, 10, 'andelee', 6363),(31,  1, 'athwartships', 7271),
+								 (32,  2, 'amazon', 11342),(33,  3, 'autochrome', 8180),(34,  4, 'amnestied', 12250),(35,  5, 'aminate', 9089),
+								 (36,  6, 'ablation', 13159),(37,  7, 'archduchies', 9997),(38,  8, 'anatine', 14067),(39,  9, 'anchises', 10906),
+								 (40, 10, 'attemper', 14976),(41,  1, 'aznavour', 11814),(42,  2, 'ausable', 15885),(43,  3, 'affixal', 12723),
+								 (44,  4, 'anteport', 16793),(45,  5, 'afrasia', 864),(46,  6, 'atlanta', 17702),(47,  7, 'abeyance', 1772),
+								 (48,  8, 'alkylic', 18610),(49,  9, 'anyone', 2681),(50, 10, 'anjanette', 19519);
+
+CREATE TABLE company_employees (company_id int, employee_id int, manager_id int);
+
+SELECT create_distributed_table('company_employees', 'company_id', 'hash');
+
+INSERT INTO company_employees values(1, 1, 0);
+INSERT INTO company_employees values(1, 2, 1);
+INSERT INTO company_employees values(1, 3, 1);
+INSERT INTO company_employees values(1, 4, 2);
+INSERT INTO company_employees values(1, 5, 4);
+
+INSERT INTO company_employees values(3, 1, 0);
+INSERT INTO company_employees values(3, 15, 1);
+INSERT INTO company_employees values(3, 3, 1);
+
+-- finally, some tests with partitioned tables
+CREATE TABLE collections_list (
+	key bigint,
+	ts timestamptz,
+	collection_id integer,
+	value numeric
+) PARTITION BY LIST (collection_id );
+
+CREATE TABLE collections_list_1
+	PARTITION OF collections_list (key, ts, collection_id, value)
+	FOR VALUES IN ( 1 );
+
+CREATE TABLE collections_list_2
+	PARTITION OF collections_list (key, ts, collection_id, value)
+	FOR VALUES IN ( 2 );
+
+SELECT create_distributed_table('collections_list', 'key');
+INSERT INTO collections_list SELECT i % 10, now(), (i % 2) + 1, i*i FROM generate_series(0, 50)i;
+
+-- queries inside plpgsql functions could be router plannable
+CREATE OR REPLACE FUNCTION author_articles_max_id() RETURNS int AS $$
+DECLARE
+  max_id integer;
+BEGIN
+	SELECT MAX(id) FROM articles_hash ah
+		WHERE author_id = 1
+		into max_id;
+	return max_id;
+END;
+$$ LANGUAGE plpgsql;
+
+-- queries inside plpgsql functions could be router plannable
+CREATE OR REPLACE FUNCTION author_articles_max_id(int) RETURNS int AS $$
+DECLARE
+  max_id integer;
+BEGIN
+	SELECT MAX(id) FROM articles_hash ah
+		WHERE author_id = $1
+		into max_id;
+	return max_id;
+END;
+$$ LANGUAGE plpgsql;
+
+-- check that function returning setof query are router plannable
+CREATE OR REPLACE FUNCTION author_articles_id_word_count() RETURNS TABLE(id bigint, word_count int) AS $$
+DECLARE
+BEGIN
+	RETURN QUERY
+		SELECT ah.id, ah.word_count
+		FROM articles_hash ah
+		WHERE author_id = 1;
+
+END;
+$$ LANGUAGE plpgsql;
+
+-- check that function returning setof query are router plannable
+CREATE OR REPLACE FUNCTION author_articles_id_word_count(int) RETURNS TABLE(id bigint, word_count int) AS $$
+DECLARE
+BEGIN
+	RETURN QUERY
+		SELECT ah.id, ah.word_count
+		FROM articles_hash ah
+		WHERE author_id = $1;
+
+END;
+$$ LANGUAGE plpgsql;
+
+-- Suppress the warning that tells that the view won't be distributed
+-- because it depends on a local table.
+--
+-- This only happens when running PostgresConfig.
+SET client_min_messages TO ERROR;
+CREATE VIEW test_view AS
+	SELECT * FROM articles_hash WHERE author_id = 1;
diff --git a/src/test/regress/sql_schedule b/src/test/regress/sql_schedule
index 272a84eff..9538f1482 100644
--- a/src/test/regress/sql_schedule
+++ b/src/test/regress/sql_schedule
@@ -15,3 +15,4 @@ test: arbitrary_configs_truncate_cascade
 test: arbitrary_configs_truncate_partition
 test: arbitrary_configs_alter_table_add_constraint_without_name
 test: merge_arbitrary
+test: arbitrary_configs_router

From e1f1d63050899e189faac395440faa187a8ca4e0 Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Wed, 22 Mar 2023 15:15:23 +0300
Subject: [PATCH 20/58] Rename AllRelations.. functions to
 AllDistributedRelations.. (#6789)

Because they're only interested in distributed tables. Even more,
this replaces HasDistributionKey() check with
IsCitusTableType(DISTRIBUTED_TABLE) because this doesn't make a
difference on main and sounds slightly more intuitive. Plus, this
would also allow safely using this function in
https://github.com/citusdata/citus/pull/6773.
---
 .../distributed/planner/merge_planner.c       |  2 +-
 .../relation_restriction_equivalence.c        | 41 +++++++++++--------
 .../relation_restriction_equivalence.h        |  2 +-
 3 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/src/backend/distributed/planner/merge_planner.c b/src/backend/distributed/planner/merge_planner.c
index 03fd9e00d..4839d5725 100644
--- a/src/backend/distributed/planner/merge_planner.c
+++ b/src/backend/distributed/planner/merge_planner.c
@@ -229,7 +229,7 @@ ErrorIfDistTablesNotColocated(Query *parse, List *distTablesList,
 	}
 
 	/* All distributed tables must be colocated */
-	if (!AllRelationsInRTEListColocated(distTablesList))
+	if (!AllDistributedRelationsInRTEListColocated(distTablesList))
 	{
 		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
 							 "For MERGE command, all the distributed tables "
diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c
index 5c91ee79c..3fa3068dc 100644
--- a/src/backend/distributed/planner/relation_restriction_equivalence.c
+++ b/src/backend/distributed/planner/relation_restriction_equivalence.c
@@ -151,9 +151,10 @@ static void ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass
 													  secondClass);
 static Var * PartitionKeyForRTEIdentityInQuery(Query *query, int targetRTEIndex,
 											   Index *partitionKeyIndex);
-static bool AllRelationsInRestrictionContextColocated(RelationRestrictionContext *
-													  restrictionContext);
-static bool AllRelationsInListColocated(List *relationList);
+static bool AllDistributedRelationsInRestrictionContextColocated(
+	RelationRestrictionContext *
+	restrictionContext);
+static bool AllDistributedRelationsInListColocated(List *relationList);
 static bool IsNotSafeRestrictionToRecursivelyPlan(Node *node);
 static JoinRestrictionContext * FilterJoinRestrictionContext(
 	JoinRestrictionContext *joinRestrictionContext, Relids
@@ -384,7 +385,7 @@ SafeToPushdownUnionSubquery(Query *originalQuery,
 		return false;
 	}
 
-	if (!AllRelationsInRestrictionContextColocated(restrictionContext))
+	if (!AllDistributedRelationsInRestrictionContextColocated(restrictionContext))
 	{
 		/* distribution columns are equal, but tables are not co-located */
 		return false;
@@ -1920,11 +1921,12 @@ FindQueryContainingRTEIdentityInternal(Node *node,
 
 
 /*
- * AllRelationsInRestrictionContextColocated determines whether all of the relations in the
- * given relation restrictions list are co-located.
+ * AllDistributedRelationsInRestrictionContextColocated determines whether all of the
+ * distributed  relations in the given relation restrictions list are co-located.
  */
 static bool
-AllRelationsInRestrictionContextColocated(RelationRestrictionContext *restrictionContext)
+AllDistributedRelationsInRestrictionContextColocated(
+	RelationRestrictionContext *restrictionContext)
 {
 	RelationRestriction *relationRestriction = NULL;
 	List *relationIdList = NIL;
@@ -1935,16 +1937,16 @@ AllRelationsInRestrictionContextColocated(RelationRestrictionContext *restrictio
 		relationIdList = lappend_oid(relationIdList, relationRestriction->relationId);
 	}
 
-	return AllRelationsInListColocated(relationIdList);
+	return AllDistributedRelationsInListColocated(relationIdList);
 }
 
 
 /*
- * AllRelationsInRTEListColocated determines whether all of the relations in the
- * given RangeTableEntry list are co-located.
+ * AllDistributedRelationsInRTEListColocated determines whether all of the
+ * distributed relations in the given RangeTableEntry list are co-located.
  */
 bool
-AllRelationsInRTEListColocated(List *rangeTableEntryList)
+AllDistributedRelationsInRTEListColocated(List *rangeTableEntryList)
 {
 	RangeTblEntry *rangeTableEntry = NULL;
 	List *relationIdList = NIL;
@@ -1954,24 +1956,31 @@ AllRelationsInRTEListColocated(List *rangeTableEntryList)
 		relationIdList = lappend_oid(relationIdList, rangeTableEntry->relid);
 	}
 
-	return AllRelationsInListColocated(relationIdList);
+	return AllDistributedRelationsInListColocated(relationIdList);
 }
 
 
 /*
- * AllRelationsInListColocated determines whether all of the relations in the
- * given list are co-located.
+ * AllDistributedRelationsInListColocated determines whether all of the
+ * distributed relations in the given list are co-located.
  */
 static bool
-AllRelationsInListColocated(List *relationList)
+AllDistributedRelationsInListColocated(List *relationList)
 {
 	int initialColocationId = INVALID_COLOCATION_ID;
 	Oid relationId = InvalidOid;
 
 	foreach_oid(relationId, relationList)
 	{
-		if (IsCitusTable(relationId) && !HasDistributionKey(relationId))
+		if (!IsCitusTable(relationId))
 		{
+			/* not interested in Postgres tables */
+			continue;
+		}
+
+		if (!IsCitusTableType(relationId, DISTRIBUTED_TABLE))
+		{
+			/* not interested in non-distributed tables */
 			continue;
 		}
 
diff --git a/src/include/distributed/relation_restriction_equivalence.h b/src/include/distributed/relation_restriction_equivalence.h
index e0e716c7e..07b6348d9 100644
--- a/src/include/distributed/relation_restriction_equivalence.h
+++ b/src/include/distributed/relation_restriction_equivalence.h
@@ -54,5 +54,5 @@ extern RelationRestrictionContext * FilterRelationRestrictionContext(
 	RelationRestrictionContext *relationRestrictionContext,
 	Relids
 	queryRteIdentities);
-extern bool AllRelationsInRTEListColocated(List *rangeTableEntryList);
+extern bool AllDistributedRelationsInRTEListColocated(List *rangeTableEntryList);
 #endif /* RELATION_RESTRICTION_EQUIVALENCE_H */

From da7db53c87af143ec6e4240261928f57d549d85f Mon Sep 17 00:00:00 2001
From: Teja Mupparti <temuppar@microsoft.com>
Date: Tue, 21 Mar 2023 14:45:03 -0700
Subject: [PATCH 21/58] Refactor some of the planning code to accomodate a new
 planning path for MERGE SQL

---
 .../distributed/planner/distributed_planner.c | 155 +++++++++++++-----
 .../distributed/planner/merge_planner.c       |  17 ++
 src/include/distributed/merge_planner.h       |   5 +
 3 files changed, 135 insertions(+), 42 deletions(-)

diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c
index 866f7353a..eb9e21786 100644
--- a/src/backend/distributed/planner/distributed_planner.c
+++ b/src/backend/distributed/planner/distributed_planner.c
@@ -34,6 +34,7 @@
 #include "distributed/intermediate_results.h"
 #include "distributed/listutils.h"
 #include "distributed/coordinator_protocol.h"
+#include "distributed/merge_planner.h"
 #include "distributed/metadata_cache.h"
 #include "distributed/multi_executor.h"
 #include "distributed/distributed_planner.h"
@@ -68,6 +69,17 @@
 #include "utils/syscache.h"
 
 
+/* RouterPlanType is used to determine the router plan to invoke */
+typedef enum RouterPlanType
+{
+	INSERT_SELECT_INTO_CITUS_TABLE,
+	INSERT_SELECT_INTO_LOCAL_TABLE,
+	DML_QUERY,
+	SELECT_QUERY,
+	MERGE_QUERY,
+	REPLAN_WITH_BOUND_PARAMETERS
+} RouterPlanType;
+
 static List *plannerRestrictionContextList = NIL;
 int MultiTaskQueryLogLevel = CITUS_LOG_LEVEL_OFF; /* multi-task query log level */
 static uint64 NextPlanId = 1;
@@ -129,6 +141,9 @@ static PlannedStmt * PlanDistributedStmt(DistributedPlanningContext *planContext
 static RTEListProperties * GetRTEListProperties(List *rangeTableList);
 static List * TranslatedVars(PlannerInfo *root, int relationIndex);
 static void WarnIfListHasForeignDistributedTable(List *rangeTableList);
+static RouterPlanType GetRouterPlanType(Query *query,
+										Query *originalQuery,
+										bool hasUnresolvedParams);
 
 
 /* Distributed planner hook */
@@ -881,6 +896,51 @@ TryCreateDistributedPlannedStmt(PlannedStmt *localPlan,
 }
 
 
+/*
+ * GetRouterPlanType checks the parse tree to return appropriate plan type.
+ */
+static RouterPlanType
+GetRouterPlanType(Query *query, Query *originalQuery, bool hasUnresolvedParams)
+{
+	if (!IsModifyCommand(originalQuery))
+	{
+		return SELECT_QUERY;
+	}
+
+	Oid targetRelationId = ModifyQueryResultRelationId(query);
+
+	EnsureModificationsCanRunOnRelation(targetRelationId);
+	EnsurePartitionTableNotReplicated(targetRelationId);
+
+	/* Check the type of modification being done */
+
+	if (InsertSelectIntoCitusTable(originalQuery))
+	{
+		if (hasUnresolvedParams)
+		{
+			return REPLAN_WITH_BOUND_PARAMETERS;
+		}
+		return INSERT_SELECT_INTO_CITUS_TABLE;
+	}
+	else if (InsertSelectIntoLocalTable(originalQuery))
+	{
+		if (hasUnresolvedParams)
+		{
+			return REPLAN_WITH_BOUND_PARAMETERS;
+		}
+		return INSERT_SELECT_INTO_LOCAL_TABLE;
+	}
+	else if (IsMergeQuery(originalQuery))
+	{
+		return MERGE_QUERY;
+	}
+	else
+	{
+		return DML_QUERY;
+	}
+}
+
+
 /*
  * CreateDistributedPlan generates a distributed plan for a query.
  * It goes through 3 steps:
@@ -898,64 +958,71 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina
 	DistributedPlan *distributedPlan = NULL;
 	bool hasCtes = originalQuery->cteList != NIL;
 
-	if (IsModifyCommand(originalQuery))
+	/* Step 1: Try router planner */
+
+	RouterPlanType routerPlan = GetRouterPlanType(query, originalQuery,
+												  hasUnresolvedParams);
+
+	switch (routerPlan)
 	{
-		Oid targetRelationId = ModifyQueryResultRelationId(query);
-
-		EnsureModificationsCanRunOnRelation(targetRelationId);
-
-		EnsurePartitionTableNotReplicated(targetRelationId);
-
-		if (InsertSelectIntoCitusTable(originalQuery))
+		case INSERT_SELECT_INTO_CITUS_TABLE:
 		{
-			if (hasUnresolvedParams)
-			{
-				/*
-				 * Unresolved parameters can cause performance regressions in
-				 * INSERT...SELECT when the partition column is a parameter
-				 * because we don't perform any additional pruning in the executor.
-				 */
-				return NULL;
-			}
-
 			distributedPlan =
-				CreateInsertSelectPlan(planId, originalQuery, plannerRestrictionContext,
+				CreateInsertSelectPlan(planId,
+									   originalQuery,
+									   plannerRestrictionContext,
 									   boundParams);
+			break;
 		}
-		else if (InsertSelectIntoLocalTable(originalQuery))
+
+		case INSERT_SELECT_INTO_LOCAL_TABLE:
 		{
-			if (hasUnresolvedParams)
-			{
-				/*
-				 * Unresolved parameters can cause performance regressions in
-				 * INSERT...SELECT when the partition column is a parameter
-				 * because we don't perform any additional pruning in the executor.
-				 */
-				return NULL;
-			}
 			distributedPlan =
-				CreateInsertSelectIntoLocalTablePlan(planId, originalQuery, boundParams,
+				CreateInsertSelectIntoLocalTablePlan(planId,
+													 originalQuery,
+													 boundParams,
 													 hasUnresolvedParams,
 													 plannerRestrictionContext);
+			break;
 		}
-		else
+
+		case DML_QUERY:
 		{
 			/* modifications are always routed through the same planner/executor */
 			distributedPlan =
 				CreateModifyPlan(originalQuery, query, plannerRestrictionContext);
+			break;
 		}
-	}
-	else
-	{
-		/*
-		 * For select queries we, if router executor is enabled, first try to
-		 * plan the query as a router query. If not supported, otherwise try
-		 * the full blown plan/optimize/physical planning process needed to
-		 * produce distributed query plans.
-		 */
 
-		distributedPlan = CreateRouterPlan(originalQuery, query,
-										   plannerRestrictionContext);
+		case MERGE_QUERY:
+		{
+			distributedPlan =
+				CreateMergePlan(originalQuery, query, plannerRestrictionContext);
+			break;
+		}
+
+		case REPLAN_WITH_BOUND_PARAMETERS:
+		{
+			/*
+			 * Unresolved parameters can cause performance regressions in
+			 * INSERT...SELECT when the partition column is a parameter
+			 * because we don't perform any additional pruning in the executor.
+			 */
+			return NULL;
+		}
+
+		case SELECT_QUERY:
+		{
+			/*
+			 * For select queries we, if router executor is enabled, first try to
+			 * plan the query as a router query. If not supported, otherwise try
+			 * the full blown plan/optimize/physical planning process needed to
+			 * produce distributed query plans.
+			 */
+			distributedPlan =
+				CreateRouterPlan(originalQuery, query, plannerRestrictionContext);
+			break;
+		}
 	}
 
 	/* the functions above always return a plan, possibly with an error */
@@ -996,6 +1063,8 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina
 													boundParams);
 	Assert(originalQuery != NULL);
 
+	/* Step 2: Generate subplans for CTEs and complex subqueries */
+
 	/*
 	 * Plan subqueries and CTEs that cannot be pushed down by recursively
 	 * calling the planner and return the resulting plans to subPlanList.
@@ -1096,6 +1165,8 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina
 	query->cteList = NIL;
 	Assert(originalQuery->cteList == NIL);
 
+	/* Step 3: Try Logical planner */
+
 	MultiTreeRoot *logicalPlan = MultiLogicalPlanCreate(originalQuery, query,
 														plannerRestrictionContext);
 	MultiLogicalPlanOptimize(logicalPlan);
diff --git a/src/backend/distributed/planner/merge_planner.c b/src/backend/distributed/planner/merge_planner.c
index 4839d5725..46a2484bd 100644
--- a/src/backend/distributed/planner/merge_planner.c
+++ b/src/backend/distributed/planner/merge_planner.c
@@ -54,6 +54,23 @@ static DeferredErrorMessage * MergeQualAndTargetListFunctionsSupported(Oid
 #endif
 
 
+/*
+ * CreateMergePlan attempts to create a plan for the given MERGE SQL
+ * statement. If planning fails ->planningError is set to a description
+ * of the failure.
+ */
+DistributedPlan *
+CreateMergePlan(Query *originalQuery, Query *query,
+				PlannerRestrictionContext *plannerRestrictionContext)
+{
+	/*
+	 * For now, this is a place holder until we isolate the merge
+	 * planning into it's own code-path.
+	 */
+	return CreateModifyPlan(originalQuery, query, plannerRestrictionContext);
+}
+
+
 /*
  * MergeQuerySupported does check for a MERGE command in the query, if it finds
  * one, it will verify the below criteria
diff --git a/src/include/distributed/merge_planner.h b/src/include/distributed/merge_planner.h
index 243be14d0..158f26861 100644
--- a/src/include/distributed/merge_planner.h
+++ b/src/include/distributed/merge_planner.h
@@ -17,10 +17,15 @@
 #include "nodes/parsenodes.h"
 #include "distributed/distributed_planner.h"
 #include "distributed/errormessage.h"
+#include "distributed/multi_physical_planner.h"
 
 extern bool IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte);
 extern DeferredErrorMessage * MergeQuerySupported(Query *originalQuery,
 												  bool multiShardQuery,
 												  PlannerRestrictionContext *
 												  plannerRestrictionContext);
+extern DistributedPlan * CreateMergePlan(Query *originalQuery, Query *query,
+										 PlannerRestrictionContext *
+										 plannerRestrictionContext);
+
 #endif /* MERGE_PLANNER_H */

From 372a93b529bd4d99266ed5946697e6ffe0f738f4 Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Mon, 27 Mar 2023 12:19:06 +0300
Subject: [PATCH 22/58] Make 8 more tests runnable multiple times via
 run_test.py (#6791)

Soon I will be doing some changes related to #692 in router planner
and those changes require updating ~5/6 tests related to router
planning. And to make those test files runnable by run_test.py
multiple times, we need to make some other tests (that they're
run in parallel / they badly depend on) ready for run_test.py too.
---
 src/test/regress/citus_tests/run_test.py      |  4 +
 .../regress/expected/multi_data_types.out     |  8 ++
 .../expected/multi_modifying_xacts.out        | 71 ++++++++++++-----
 .../regress/expected/multi_mx_copy_data.out   |  5 ++
 .../expected/multi_mx_modifying_xacts.out     |  3 +
 .../expected/multi_mx_router_planner.out      |  4 +
 .../regress/expected/multi_router_planner.out | 76 +++++++++----------
 .../regress/expected/multi_simple_queries.out | 19 +++--
 src/test/regress/expected/multi_upsert.out    |  4 +
 src/test/regress/sql/multi_data_types.sql     |  9 +++
 .../regress/sql/multi_modifying_xacts.sql     | 45 ++++++++++-
 src/test/regress/sql/multi_mx_copy_data.sql   |  7 ++
 .../regress/sql/multi_mx_modifying_xacts.sql  |  4 +
 .../regress/sql/multi_mx_router_planner.sql   |  5 ++
 src/test/regress/sql/multi_router_planner.sql | 32 +++-----
 src/test/regress/sql/multi_simple_queries.sql | 16 ++--
 src/test/regress/sql/multi_upsert.sql         |  5 ++
 17 files changed, 218 insertions(+), 99 deletions(-)

diff --git a/src/test/regress/citus_tests/run_test.py b/src/test/regress/citus_tests/run_test.py
index 9c901785c..5964267ec 100755
--- a/src/test/regress/citus_tests/run_test.py
+++ b/src/test/regress/citus_tests/run_test.py
@@ -109,6 +109,10 @@ if __name__ == "__main__":
                 "multi_mx_function_table_reference",
             ],
         ),
+        "multi_mx_modifying_xacts": TestDeps(None, ["multi_mx_create_table"]),
+        "multi_mx_router_planner": TestDeps(None, ["multi_mx_create_table"]),
+        "multi_mx_copy_data": TestDeps(None, ["multi_mx_create_table"]),
+        "multi_simple_queries": TestDeps("base_schedule"),
     }
 
     if not (test_file_name or test_file_path):
diff --git a/src/test/regress/expected/multi_data_types.out b/src/test/regress/expected/multi_data_types.out
index 4bc7da5c7..a88f9e1de 100644
--- a/src/test/regress/expected/multi_data_types.out
+++ b/src/test/regress/expected/multi_data_types.out
@@ -3,6 +3,14 @@
 -- create, distribute, INSERT, SELECT and UPDATE
 -- ===================================================================
 SET citus.next_shard_id TO 530000;
+-- Given that other test files depend on the existence of types created in this file,
+-- we cannot drop them at the end. Instead, we drop them at the beginning of the test
+-- to make this file runnable multiple times via run_test.py.
+BEGIN;
+  SET LOCAL client_min_messages TO WARNING;
+  DROP TYPE IF EXISTS test_composite_type, other_composite_type, bug_status CASCADE;
+  DROP OPERATOR FAMILY IF EXISTS cats_op_fam USING hash;
+COMMIT;
 -- create a custom type...
 CREATE TYPE test_composite_type AS (
     i integer,
diff --git a/src/test/regress/expected/multi_modifying_xacts.out b/src/test/regress/expected/multi_modifying_xacts.out
index 607c327ff..0294e1060 100644
--- a/src/test/regress/expected/multi_modifying_xacts.out
+++ b/src/test/regress/expected/multi_modifying_xacts.out
@@ -1,5 +1,7 @@
 SET citus.next_shard_id TO 1200000;
 SET citus.next_placement_id TO 1200000;
+CREATE SCHEMA multi_modifying_xacts;
+SET search_path TO multi_modifying_xacts;
 -- ===================================================================
 -- test end-to-end modification functionality
 -- ===================================================================
@@ -190,7 +192,7 @@ ALTER TABLE labs ADD COLUMN motto text;
 INSERT INTO labs VALUES (6, 'Bell Labs');
 ABORT;
 -- but the DDL should correctly roll back
-SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.labs'::regclass;
+SELECT "Column", "Type", "Modifiers" FROM public.table_desc WHERE relid='multi_modifying_xacts.labs'::regclass;
  Column |  Type  | Modifiers
 ---------------------------------------------------------------------
  id     | bigint | not null
@@ -339,7 +341,7 @@ CREATE FUNCTION reject_large_id() RETURNS trigger AS $rli$
     END;
 $rli$ LANGUAGE plpgsql;
 -- register after insert trigger
-SELECT * FROM run_command_on_placements('researchers', 'CREATE CONSTRAINT TRIGGER reject_large_researcher_id AFTER INSERT ON %s DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE  reject_large_id()')
+SELECT * FROM run_command_on_placements('multi_modifying_xacts.researchers', 'CREATE CONSTRAINT TRIGGER reject_large_researcher_id AFTER INSERT ON %s DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE  multi_modifying_xacts.reject_large_id()')
 ORDER BY nodeport, shardid;
  nodename  | nodeport | shardid | success |     result
 ---------------------------------------------------------------------
@@ -498,6 +500,7 @@ AND    s.logicalrelid = 'objects'::regclass;
 
 -- create trigger on one worker to reject certain values
 \c - - - :worker_2_port
+SET search_path TO multi_modifying_xacts;
 SET citus.enable_metadata_sync TO OFF;
 CREATE FUNCTION reject_bad() RETURNS trigger AS $rb$
     BEGIN
@@ -514,6 +517,7 @@ AFTER INSERT ON objects_1200003
 DEFERRABLE INITIALLY IMMEDIATE
 FOR EACH ROW EXECUTE PROCEDURE reject_bad();
 \c - - - :master_port
+SET search_path TO multi_modifying_xacts;
 -- test partial failure; worker_1 succeeds, 2 fails
 -- in this case, we expect the transaction to abort
 \set VERBOSITY terse
@@ -551,6 +555,7 @@ DELETE FROM objects;
 -- there cannot be errors on different shards at different times
 -- because the first failure will fail the whole transaction
 \c - - - :worker_1_port
+SET search_path TO multi_modifying_xacts;
 SET citus.enable_metadata_sync TO OFF;
 CREATE FUNCTION reject_bad() RETURNS trigger AS $rb$
     BEGIN
@@ -567,6 +572,7 @@ AFTER INSERT ON labs_1200002
 DEFERRABLE INITIALLY IMMEDIATE
 FOR EACH ROW EXECUTE PROCEDURE reject_bad();
 \c - - - :master_port
+SET search_path TO multi_modifying_xacts;
 BEGIN;
 INSERT INTO objects VALUES (1, 'apple');
 INSERT INTO objects VALUES (2, 'BAD');
@@ -602,12 +608,14 @@ AND    (s.logicalrelid = 'objects'::regclass OR
 
 -- what if the failures happen at COMMIT time?
 \c - - - :worker_2_port
+SET search_path TO multi_modifying_xacts;
 DROP TRIGGER reject_bad ON objects_1200003;
 CREATE CONSTRAINT TRIGGER reject_bad
 AFTER INSERT ON objects_1200003
 DEFERRABLE INITIALLY DEFERRED
 FOR EACH ROW EXECUTE PROCEDURE reject_bad();
 \c - - - :master_port
+SET search_path TO multi_modifying_xacts;
 -- should be the same story as before, just at COMMIT time
 -- as we use 2PC, the transaction is rollbacked
 BEGIN;
@@ -644,12 +652,14 @@ WHERE  sp.shardid = s.shardid
 AND    s.logicalrelid = 'objects'::regclass;
 -- what if all nodes have failures at COMMIT time?
 \c - - - :worker_1_port
+SET search_path TO multi_modifying_xacts;
 DROP TRIGGER reject_bad ON labs_1200002;
 CREATE CONSTRAINT TRIGGER reject_bad
 AFTER INSERT ON labs_1200002
 DEFERRABLE INITIALLY DEFERRED
 FOR EACH ROW EXECUTE PROCEDURE reject_bad();
 \c - - - :master_port
+SET search_path TO multi_modifying_xacts;
 -- reduce the log level for differences between PG14 and PG15
 -- in PGconn->errorMessage
 -- relevant PG commit b15f254466aefbabcbed001929f6e09db59fd158
@@ -688,8 +698,10 @@ AND    (s.logicalrelid = 'objects'::regclass OR
 
 -- what if one shard (objects) succeeds but another (labs) completely fails?
 \c - - - :worker_2_port
+SET search_path TO multi_modifying_xacts;
 DROP TRIGGER reject_bad ON objects_1200003;
 \c - - - :master_port
+SET search_path TO multi_modifying_xacts;
 SET citus.next_shard_id TO 1200004;
 BEGIN;
 INSERT INTO objects VALUES (1, 'apple');
@@ -833,6 +845,7 @@ SELECT * FROM reference_modifying_xacts;
 
 -- lets fail on of the workers at before the commit time
 \c - - - :worker_1_port
+SET search_path TO multi_modifying_xacts;
 SET citus.enable_metadata_sync TO OFF;
 CREATE FUNCTION reject_bad_reference() RETURNS trigger AS $rb$
     BEGIN
@@ -849,6 +862,7 @@ AFTER INSERT ON reference_modifying_xacts_1200006
 DEFERRABLE INITIALLY IMMEDIATE
 FOR EACH ROW EXECUTE PROCEDURE reject_bad_reference();
 \c - - - :master_port
+SET search_path TO multi_modifying_xacts;
 \set VERBOSITY terse
 -- try without wrapping inside a transaction
 INSERT INTO reference_modifying_xacts VALUES (999, 3);
@@ -860,12 +874,14 @@ ERROR:  illegal value
 COMMIT;
 -- lets fail one of the workers at COMMIT time
 \c - - - :worker_1_port
+SET search_path TO multi_modifying_xacts;
 DROP TRIGGER reject_bad_reference ON reference_modifying_xacts_1200006;
 CREATE CONSTRAINT TRIGGER reject_bad_reference
 AFTER INSERT ON reference_modifying_xacts_1200006
 DEFERRABLE INITIALLY  DEFERRED
 FOR EACH ROW EXECUTE PROCEDURE reject_bad_reference();
 \c - - - :master_port
+SET search_path TO multi_modifying_xacts;
 \set VERBOSITY terse
 -- try without wrapping inside a transaction
 INSERT INTO reference_modifying_xacts VALUES (999, 3);
@@ -890,8 +906,10 @@ ORDER BY s.logicalrelid, sp.shardstate;
 
 -- for the time-being drop the constraint
 \c - - - :worker_1_port
+SET search_path TO multi_modifying_xacts;
 DROP TRIGGER reject_bad_reference ON reference_modifying_xacts_1200006;
 \c - - - :master_port
+SET search_path TO multi_modifying_xacts;
 -- now create a hash distributed table and run tests
 -- including both the reference table and the hash
 -- distributed table
@@ -923,6 +941,7 @@ INSERT INTO hash_modifying_xacts VALUES (2, 2);
 ABORT;
 -- lets fail one of the workers before COMMIT time for the hash table
 \c - - - :worker_1_port
+SET search_path TO multi_modifying_xacts;
 SET citus.enable_metadata_sync TO OFF;
 CREATE FUNCTION reject_bad_hash() RETURNS trigger AS $rb$
     BEGIN
@@ -939,6 +958,7 @@ AFTER INSERT ON hash_modifying_xacts_1200007
 DEFERRABLE INITIALLY IMMEDIATE
 FOR EACH ROW EXECUTE PROCEDURE reject_bad_hash();
 \c - - - :master_port
+SET search_path TO multi_modifying_xacts;
 \set VERBOSITY terse
 -- the transaction as a whole should fail
 BEGIN;
@@ -955,6 +975,7 @@ SELECT * FROM reference_modifying_xacts WHERE key = 55;
 -- now lets fail on of the workers for the hash distributed table table
 -- when there is a reference table involved
 \c - - - :worker_1_port
+SET search_path TO multi_modifying_xacts;
 DROP TRIGGER reject_bad_hash ON hash_modifying_xacts_1200007;
 -- the trigger is on execution time
 CREATE CONSTRAINT TRIGGER reject_bad_hash
@@ -962,6 +983,7 @@ AFTER INSERT ON hash_modifying_xacts_1200007
 DEFERRABLE INITIALLY DEFERRED
 FOR EACH ROW EXECUTE PROCEDURE reject_bad_hash();
 \c - - - :master_port
+SET search_path TO multi_modifying_xacts;
 \set VERBOSITY terse
 -- the transaction as a whole should fail
 BEGIN;
@@ -994,11 +1016,13 @@ ORDER BY s.logicalrelid, sp.shardstate;
 -- and ensure that hash distributed table's
 -- change is rollbacked as well
 \c - - - :worker_1_port
+SET search_path TO multi_modifying_xacts;
 CREATE CONSTRAINT TRIGGER reject_bad_reference
 AFTER INSERT ON reference_modifying_xacts_1200006
 DEFERRABLE INITIALLY IMMEDIATE
 FOR EACH ROW EXECUTE PROCEDURE reject_bad_reference();
 \c - - - :master_port
+SET search_path TO multi_modifying_xacts;
 \set VERBOSITY terse
 BEGIN;
 -- to expand participant to include all worker nodes
@@ -1127,8 +1151,10 @@ SELECT count(*) FROM pg_dist_transaction;
 -- in which we'll make the remote host unavailable
 -- first create the new user on all nodes
 CREATE USER test_user;
+GRANT ALL ON SCHEMA multi_modifying_xacts TO test_user;
 -- now connect back to the master with the new user
 \c - test_user - :master_port
+SET search_path TO multi_modifying_xacts;
 SET citus.next_shard_id TO 1200015;
 CREATE TABLE reference_failure_test (key int, value int);
 SELECT create_reference_table('reference_failure_test');
@@ -1148,21 +1174,24 @@ SELECT create_distributed_table('numbers_hash_failure_test', 'key');
 
 -- ensure that the shard is created for this user
 \c - test_user - :worker_1_port
+SET search_path TO multi_modifying_xacts;
 SET citus.override_table_visibility TO false;
 \dt reference_failure_test_1200015
-                      List of relations
- Schema |              Name              | Type  |   Owner
+                             List of relations
+        Schema         |              Name              | Type  |   Owner
 ---------------------------------------------------------------------
- public | reference_failure_test_1200015 | table | test_user
+ multi_modifying_xacts | reference_failure_test_1200015 | table | test_user
 (1 row)
 
 -- now connect with the default user,
 -- and rename the existing user
 \c - :default_user - :worker_1_port
+SET search_path TO multi_modifying_xacts;
 ALTER USER test_user RENAME TO test_user_new;
 NOTICE:  not propagating ALTER ROLE ... RENAME TO commands to worker nodes
 -- connect back to master and query the reference table
  \c - test_user - :master_port
+SET search_path TO multi_modifying_xacts;
 -- should fail since the worker doesn't have test_user anymore
 INSERT INTO reference_failure_test VALUES (1, '1');
 ERROR:  connection to the remote node localhost:xxxxx failed with the following error: FATAL:  role "test_user" does not exist
@@ -1277,14 +1306,17 @@ WARNING:  connection to the remote node localhost:xxxxx failed with the followin
 
 -- break the other node as well
 \c - :default_user - :worker_2_port
+SET search_path TO multi_modifying_xacts;
 ALTER USER test_user RENAME TO test_user_new;
 NOTICE:  not propagating ALTER ROLE ... RENAME TO commands to worker nodes
 \c - test_user - :master_port
+SET search_path TO multi_modifying_xacts;
 -- fails on all shard placements
 INSERT INTO numbers_hash_failure_test VALUES (2,2);
 ERROR:  connection to the remote node localhost:xxxxx failed with the following error: FATAL:  role "test_user" does not exist
 -- connect back to the master with the proper user to continue the tests
 \c - :default_user - :master_port
+SET search_path TO multi_modifying_xacts;
 SET citus.next_shard_id TO 1200020;
 SET citus.next_placement_id TO 1200033;
 -- unbreak both nodes by renaming the user back to the original name
@@ -1297,6 +1329,7 @@ SELECT * FROM run_command_on_workers('ALTER USER test_user_new RENAME TO test_us
 
 DROP TABLE reference_modifying_xacts, hash_modifying_xacts, hash_modifying_xacts_second,
 	reference_failure_test, numbers_hash_failure_test;
+REVOKE ALL ON SCHEMA multi_modifying_xacts FROM test_user;
 DROP USER test_user;
 -- set up foreign keys to test transactions with co-located and reference tables
 BEGIN;
@@ -1322,7 +1355,9 @@ SELECT create_reference_table('itemgroups');
 
 (1 row)
 
+SET client_min_messages TO WARNING;
 DROP TABLE IF EXISTS users ;
+RESET client_min_messages;
 CREATE TABLE users (
     id int PRIMARY KEY,
     name text,
@@ -1354,18 +1389,18 @@ JOIN
 USING (shardid)
 ORDER BY
   id;
- id |  shard_name   | nodename  | nodeport
+ id |             shard_name              | nodename  | nodeport
 ---------------------------------------------------------------------
-  1 | users_1200022 | localhost |    57637
-  2 | users_1200025 | localhost |    57638
-  3 | users_1200023 | localhost |    57638
-  4 | users_1200023 | localhost |    57638
-  5 | users_1200022 | localhost |    57637
-  6 | users_1200024 | localhost |    57637
-  7 | users_1200023 | localhost |    57638
-  8 | users_1200022 | localhost |    57637
-  9 | users_1200025 | localhost |    57638
- 10 | users_1200022 | localhost |    57637
+  1 | multi_modifying_xacts.users_1200022 | localhost |    57637
+  2 | multi_modifying_xacts.users_1200025 | localhost |    57638
+  3 | multi_modifying_xacts.users_1200023 | localhost |    57638
+  4 | multi_modifying_xacts.users_1200023 | localhost |    57638
+  5 | multi_modifying_xacts.users_1200022 | localhost |    57637
+  6 | multi_modifying_xacts.users_1200024 | localhost |    57637
+  7 | multi_modifying_xacts.users_1200023 | localhost |    57638
+  8 | multi_modifying_xacts.users_1200022 | localhost |    57637
+  9 | multi_modifying_xacts.users_1200025 | localhost |    57638
+ 10 | multi_modifying_xacts.users_1200022 | localhost |    57637
 (10 rows)
 
 END;
@@ -1546,5 +1581,5 @@ SELECT name FROM labs WHERE id = 1001;
 (1 row)
 
 RESET citus.function_opens_transaction_block;
-DROP FUNCTION insert_abort();
-DROP TABLE items, users, itemgroups, usergroups, researchers, labs;
+SET client_min_messages TO WARNING;
+DROP SCHEMA multi_modifying_xacts CASCADE;
diff --git a/src/test/regress/expected/multi_mx_copy_data.out b/src/test/regress/expected/multi_mx_copy_data.out
index c1d3d7180..0db64c16e 100644
--- a/src/test/regress/expected/multi_mx_copy_data.out
+++ b/src/test/regress/expected/multi_mx_copy_data.out
@@ -1,6 +1,10 @@
 --
 -- MULTI_MX_COPY_DATA
 --
+-- We truncate them to make this test runnable multiple times.
+-- Note that we cannot do that at the end of the test because
+-- we need to keep the data for the other tests.
+TRUNCATE lineitem_mx, orders_mx;
 \set nation_data_file :abs_srcdir '/data/nation.data'
 \set client_side_copy_command '\\copy nation_hash FROM ' :'nation_data_file' ' with delimiter '''|''';'
 :client_side_copy_command
@@ -161,3 +165,4 @@ SET search_path TO public;
 :client_side_copy_command
 \set client_side_copy_command '\\copy supplier_mx FROM ' :'supplier_data_file' ' with delimiter '''|''';'
 :client_side_copy_command
+DROP TABLE citus_mx_test_schema.nation_hash_replicated;
diff --git a/src/test/regress/expected/multi_mx_modifying_xacts.out b/src/test/regress/expected/multi_mx_modifying_xacts.out
index e486b8b1b..dfbdc7603 100644
--- a/src/test/regress/expected/multi_mx_modifying_xacts.out
+++ b/src/test/regress/expected/multi_mx_modifying_xacts.out
@@ -406,3 +406,6 @@ SELECT * FROM labs_mx WHERE id = 8;
 ---------------------------------------------------------------------
 (0 rows)
 
+TRUNCATE objects_mx, labs_mx, researchers_mx;
+DROP TRIGGER reject_bad_mx ON labs_mx_1220102;
+DROP FUNCTION reject_bad_mx;
diff --git a/src/test/regress/expected/multi_mx_router_planner.out b/src/test/regress/expected/multi_mx_router_planner.out
index d006b4bb8..bf007be9d 100644
--- a/src/test/regress/expected/multi_mx_router_planner.out
+++ b/src/test/regress/expected/multi_mx_router_planner.out
@@ -1460,3 +1460,7 @@ DEBUG:  query has a single distribution column value: 1
  51
 (6 rows)
 
+SET client_min_messages to WARNING;
+TRUNCATE articles_hash_mx, company_employees_mx, articles_single_shard_hash_mx;
+DROP MATERIALIZED VIEW mv_articles_hash_mx_error;
+DROP TABLE authors_hash_mx;
diff --git a/src/test/regress/expected/multi_router_planner.out b/src/test/regress/expected/multi_router_planner.out
index 56ff44b3b..e0e5bc541 100644
--- a/src/test/regress/expected/multi_router_planner.out
+++ b/src/test/regress/expected/multi_router_planner.out
@@ -6,6 +6,8 @@ SET citus.next_shard_id TO 840000;
 -- router planner, so we're disabling it in this file. We've bunch of
 -- other tests that triggers fast-path-router planner
 SET citus.enable_fast_path_router_planner TO false;
+CREATE SCHEMA multi_router_planner;
+SET search_path TO multi_router_planner;
 CREATE TABLE articles_hash (
 	id bigint NOT NULL,
 	author_id bigint NOT NULL,
@@ -290,10 +292,10 @@ WITH first_author AS MATERIALIZED (
 UPDATE articles_hash SET title = first_author.name
 FROM first_author WHERE articles_hash.author_id = 2 AND articles_hash.id = first_author.id;
 DEBUG:  Router planner doesn't support SELECT FOR UPDATE in common table expressions involving reference tables.
-DEBUG:  generating subplan XXX_1 for CTE first_author: SELECT articles_hash.id, auref.name FROM public.articles_hash, public.authors_reference auref WHERE ((articles_hash.author_id OPERATOR(pg_catalog.=) 2) AND (auref.id OPERATOR(pg_catalog.=) articles_hash.author_id)) FOR UPDATE OF articles_hash FOR UPDATE OF auref
+DEBUG:  generating subplan XXX_1 for CTE first_author: SELECT articles_hash.id, auref.name FROM multi_router_planner.articles_hash, multi_router_planner.authors_reference auref WHERE ((articles_hash.author_id OPERATOR(pg_catalog.=) 2) AND (auref.id OPERATOR(pg_catalog.=) articles_hash.author_id)) FOR UPDATE OF articles_hash FOR UPDATE OF auref
 DEBUG:  Creating router plan
 DEBUG:  query has a single distribution column value: 2
-DEBUG:  Plan XXX query after replacing subqueries and CTEs: UPDATE public.articles_hash SET title = first_author.name FROM (SELECT intermediate_result.id, intermediate_result.name FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, name character varying(20))) first_author WHERE ((articles_hash.author_id OPERATOR(pg_catalog.=) 2) AND (articles_hash.id OPERATOR(pg_catalog.=) first_author.id))
+DEBUG:  Plan XXX query after replacing subqueries and CTEs: UPDATE multi_router_planner.articles_hash SET title = first_author.name FROM (SELECT intermediate_result.id, intermediate_result.name FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, name character varying(20))) first_author WHERE ((articles_hash.author_id OPERATOR(pg_catalog.=) 2) AND (articles_hash.id OPERATOR(pg_catalog.=) first_author.id))
 DEBUG:  Creating router plan
 DEBUG:  query has a single distribution column value: 2
 WITH first_author AS MATERIALIZED (
@@ -356,10 +358,10 @@ WITH id_author AS MATERIALIZED ( SELECT id, author_id FROM articles_hash WHERE a
 id_title AS MATERIALIZED (SELECT id, title from articles_hash WHERE author_id = 2)
 SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id;
 DEBUG:  cannot run command which targets multiple shards
-DEBUG:  generating subplan XXX_1 for CTE id_author: SELECT id, author_id FROM public.articles_hash WHERE (author_id OPERATOR(pg_catalog.=) 1)
+DEBUG:  generating subplan XXX_1 for CTE id_author: SELECT id, author_id FROM multi_router_planner.articles_hash WHERE (author_id OPERATOR(pg_catalog.=) 1)
 DEBUG:  Creating router plan
 DEBUG:  query has a single distribution column value: 1
-DEBUG:  generating subplan XXX_2 for CTE id_title: SELECT id, title FROM public.articles_hash WHERE (author_id OPERATOR(pg_catalog.=) 2)
+DEBUG:  generating subplan XXX_2 for CTE id_title: SELECT id, title FROM multi_router_planner.articles_hash WHERE (author_id OPERATOR(pg_catalog.=) 2)
 DEBUG:  Creating router plan
 DEBUG:  query has a single distribution column value: 2
 DEBUG:  Plan XXX query after replacing subqueries and CTEs: SELECT id_author.id, id_author.author_id, id_title.id, id_title.title FROM (SELECT intermediate_result.id, intermediate_result.author_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint)) id_author, (SELECT intermediate_result.id, intermediate_result.title FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, title character varying(20))) id_title WHERE (id_author.id OPERATOR(pg_catalog.=) id_title.id)
@@ -456,7 +458,7 @@ WITH new_article AS MATERIALIZED(
 )
 SELECT * FROM new_article;
 DEBUG:  only SELECT, UPDATE, or DELETE common table expressions may be router planned
-DEBUG:  generating subplan XXX_1 for CTE new_article: INSERT INTO public.articles_hash (id, author_id, title, word_count) VALUES (1, 1, 'arsenous'::character varying, 9) RETURNING id, author_id, title, word_count
+DEBUG:  generating subplan XXX_1 for CTE new_article: INSERT INTO multi_router_planner.articles_hash (id, author_id, title, word_count) VALUES (1, 1, 'arsenous'::character varying, 9) RETURNING id, author_id, title, word_count
 DEBUG:  Creating router plan
 DEBUG:  query has a single distribution column value: 1
 DEBUG:  Plan XXX query after replacing subqueries and CTEs: SELECT id, author_id, title, word_count FROM (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) new_article
@@ -471,7 +473,7 @@ WITH update_article AS MATERIALIZED(
 )
 SELECT * FROM update_article;
 DEBUG:  Router planner cannot handle multi-shard select queries
-DEBUG:  generating subplan XXX_1 for CTE update_article: UPDATE public.articles_hash SET word_count = 10 WHERE ((id OPERATOR(pg_catalog.=) 1) AND (word_count OPERATOR(pg_catalog.=) 9)) RETURNING id, author_id, title, word_count
+DEBUG:  generating subplan XXX_1 for CTE update_article: UPDATE multi_router_planner.articles_hash SET word_count = 10 WHERE ((id OPERATOR(pg_catalog.=) 1) AND (word_count OPERATOR(pg_catalog.=) 9)) RETURNING id, author_id, title, word_count
 DEBUG:  Creating router plan
 DEBUG:  Plan XXX query after replacing subqueries and CTEs: SELECT id, author_id, title, word_count FROM (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) update_article
 DEBUG:  Creating router plan
@@ -485,7 +487,7 @@ WITH update_article AS MATERIALIZED (
 )
 SELECT coalesce(1,random());
 DEBUG:  Router planner cannot handle multi-shard select queries
-DEBUG:  generating subplan XXX_1 for CTE update_article: UPDATE public.articles_hash SET word_count = 11 WHERE ((id OPERATOR(pg_catalog.=) 1) AND (word_count OPERATOR(pg_catalog.=) 10)) RETURNING id, author_id, title, word_count
+DEBUG:  generating subplan XXX_1 for CTE update_article: UPDATE multi_router_planner.articles_hash SET word_count = 11 WHERE ((id OPERATOR(pg_catalog.=) 1) AND (word_count OPERATOR(pg_catalog.=) 10)) RETURNING id, author_id, title, word_count
 DEBUG:  Creating router plan
 DEBUG:  Plan XXX query after replacing subqueries and CTEs: SELECT COALESCE((1)::double precision, random()) AS "coalesce"
 DEBUG:  Creating router plan
@@ -510,7 +512,7 @@ WITH update_article AS MATERIALIZED (
 )
 SELECT coalesce(1,random());
 DEBUG:  cannot router plan modification of a non-distributed table
-DEBUG:  generating subplan XXX_1 for CTE update_article: UPDATE public.authors_reference SET name = ''::character varying WHERE (id OPERATOR(pg_catalog.=) 0) RETURNING name, id
+DEBUG:  generating subplan XXX_1 for CTE update_article: UPDATE multi_router_planner.authors_reference SET name = ''::character varying WHERE (id OPERATOR(pg_catalog.=) 0) RETURNING name, id
 DEBUG:  Creating router plan
 DEBUG:  Plan XXX query after replacing subqueries and CTEs: SELECT COALESCE((1)::double precision, random()) AS "coalesce"
 DEBUG:  Creating router plan
@@ -524,7 +526,7 @@ WITH delete_article AS MATERIALIZED (
 )
 SELECT * FROM delete_article;
 DEBUG:  Router planner cannot handle multi-shard select queries
-DEBUG:  generating subplan XXX_1 for CTE delete_article: DELETE FROM public.articles_hash WHERE ((id OPERATOR(pg_catalog.=) 1) AND (word_count OPERATOR(pg_catalog.=) 10)) RETURNING id, author_id, title, word_count
+DEBUG:  generating subplan XXX_1 for CTE delete_article: DELETE FROM multi_router_planner.articles_hash WHERE ((id OPERATOR(pg_catalog.=) 1) AND (word_count OPERATOR(pg_catalog.=) 10)) RETURNING id, author_id, title, word_count
 DEBUG:  Creating router plan
 DEBUG:  Plan XXX query after replacing subqueries and CTEs: SELECT id, author_id, title, word_count FROM (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) delete_article
 DEBUG:  Creating router plan
@@ -653,8 +655,8 @@ FROM articles_hash, (SELECT id, word_count FROM articles_hash) AS test WHERE tes
 ORDER BY test.word_count DESC, articles_hash.id LIMIT 5;
 DEBUG:  Router planner cannot handle multi-shard select queries
 DEBUG:  Router planner cannot handle multi-shard select queries
-DEBUG:  generating subplan XXX_1 for subquery SELECT id, word_count FROM public.articles_hash
-DEBUG:  Plan XXX query after replacing subqueries and CTEs: SELECT articles_hash.id, test.word_count FROM public.articles_hash, (SELECT intermediate_result.id, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, word_count integer)) test WHERE (test.id OPERATOR(pg_catalog.=) articles_hash.id) ORDER BY test.word_count DESC, articles_hash.id LIMIT 5
+DEBUG:  generating subplan XXX_1 for subquery SELECT id, word_count FROM multi_router_planner.articles_hash
+DEBUG:  Plan XXX query after replacing subqueries and CTEs: SELECT articles_hash.id, test.word_count FROM multi_router_planner.articles_hash, (SELECT intermediate_result.id, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, word_count integer)) test WHERE (test.id OPERATOR(pg_catalog.=) articles_hash.id) ORDER BY test.word_count DESC, articles_hash.id LIMIT 5
 DEBUG:  Router planner cannot handle multi-shard select queries
 DEBUG:  push down of limit count: 5
  id | word_count
@@ -672,8 +674,8 @@ WHERE test.id = articles_hash.id and articles_hash.author_id = 1
 ORDER BY articles_hash.id;
 DEBUG:  Router planner cannot handle multi-shard select queries
 DEBUG:  Router planner cannot handle multi-shard select queries
-DEBUG:  generating subplan XXX_1 for subquery SELECT id, word_count FROM public.articles_hash
-DEBUG:  Plan XXX query after replacing subqueries and CTEs: SELECT articles_hash.id, test.word_count FROM public.articles_hash, (SELECT intermediate_result.id, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, word_count integer)) test WHERE ((test.id OPERATOR(pg_catalog.=) articles_hash.id) AND (articles_hash.author_id OPERATOR(pg_catalog.=) 1)) ORDER BY articles_hash.id
+DEBUG:  generating subplan XXX_1 for subquery SELECT id, word_count FROM multi_router_planner.articles_hash
+DEBUG:  Plan XXX query after replacing subqueries and CTEs: SELECT articles_hash.id, test.word_count FROM multi_router_planner.articles_hash, (SELECT intermediate_result.id, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, word_count integer)) test WHERE ((test.id OPERATOR(pg_catalog.=) articles_hash.id) AND (articles_hash.author_id OPERATOR(pg_catalog.=) 1)) ORDER BY articles_hash.id
 DEBUG:  Creating router plan
 DEBUG:  query has a single distribution column value: 1
  id | word_count
@@ -788,9 +790,9 @@ SELECT a.author_id as first_author, b.word_count as second_word_count
 	WHERE a.author_id = 2 and a.author_id = b.author_id
 	LIMIT 3;
 DEBUG:  found no worker with all shard placements
-DEBUG:  generating subplan XXX_1 for CTE single_shard: SELECT id, author_id, title, word_count FROM public.articles_single_shard_hash
+DEBUG:  generating subplan XXX_1 for CTE single_shard: SELECT id, author_id, title, word_count FROM multi_router_planner.articles_single_shard_hash
 DEBUG:  Creating router plan
-DEBUG:  Plan XXX query after replacing subqueries and CTEs: SELECT a.author_id AS first_author, b.word_count AS second_word_count FROM public.articles_hash a, (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) b WHERE ((a.author_id OPERATOR(pg_catalog.=) 2) AND (a.author_id OPERATOR(pg_catalog.=) b.author_id)) LIMIT 3
+DEBUG:  Plan XXX query after replacing subqueries and CTEs: SELECT a.author_id AS first_author, b.word_count AS second_word_count FROM multi_router_planner.articles_hash a, (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) b WHERE ((a.author_id OPERATOR(pg_catalog.=) 2) AND (a.author_id OPERATOR(pg_catalog.=) b.author_id)) LIMIT 3
 DEBUG:  Creating router plan
 DEBUG:  query has a single distribution column value: 2
  first_author | second_word_count
@@ -1575,10 +1577,10 @@ SELECT 1 FROM authors_reference  r JOIN (
 ) num_db ON (r.id = num_db.datid) LIMIT 1;
 DEBUG:  found no worker with all shard placements
 DEBUG:  function does not have co-located tables
-DEBUG:  generating subplan XXX_1 for subquery SELECT datid FROM public.number1() s(datid)
+DEBUG:  generating subplan XXX_1 for subquery SELECT datid FROM multi_router_planner.number1() s(datid)
 DEBUG:  Creating router plan
 DEBUG:  generating subplan XXX_2 for subquery SELECT s.datid FROM ((SELECT intermediate_result.datid FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) s LEFT JOIN pg_database d ON (((s.datid)::oid OPERATOR(pg_catalog.=) d.oid)))
-DEBUG:  Plan XXX query after replacing subqueries and CTEs: SELECT 1 FROM (public.authors_reference r JOIN (SELECT intermediate_result.datid FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) num_db ON ((r.id OPERATOR(pg_catalog.=) num_db.datid))) LIMIT 1
+DEBUG:  Plan XXX query after replacing subqueries and CTEs: SELECT 1 FROM (multi_router_planner.authors_reference r JOIN (SELECT intermediate_result.datid FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) num_db ON ((r.id OPERATOR(pg_catalog.=) num_db.datid))) LIMIT 1
 DEBUG:  Creating router plan
  ?column?
 ---------------------------------------------------------------------
@@ -1590,10 +1592,10 @@ SELECT s.datid FROM number1() s LEFT JOIN pg_database d ON s.datid = d.oid;
 SELECT 1 FROM authors_reference r JOIN num_db ON (r.id = num_db.datid) LIMIT 1;
 DEBUG:  found no worker with all shard placements
 DEBUG:  function does not have co-located tables
-DEBUG:  generating subplan XXX_1 for subquery SELECT datid FROM public.number1() s(datid)
+DEBUG:  generating subplan XXX_1 for subquery SELECT datid FROM multi_router_planner.number1() s(datid)
 DEBUG:  Creating router plan
 DEBUG:  generating subplan XXX_2 for subquery SELECT s.datid FROM ((SELECT intermediate_result.datid FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) s LEFT JOIN pg_database d ON (((s.datid)::oid OPERATOR(pg_catalog.=) d.oid)))
-DEBUG:  Plan XXX query after replacing subqueries and CTEs: SELECT 1 FROM (public.authors_reference r JOIN (SELECT intermediate_result.datid FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) num_db ON ((r.id OPERATOR(pg_catalog.=) num_db.datid))) LIMIT 1
+DEBUG:  Plan XXX query after replacing subqueries and CTEs: SELECT 1 FROM (multi_router_planner.authors_reference r JOIN (SELECT intermediate_result.datid FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) num_db ON ((r.id OPERATOR(pg_catalog.=) num_db.datid))) LIMIT 1
 DEBUG:  Creating router plan
  ?column?
 ---------------------------------------------------------------------
@@ -1603,8 +1605,8 @@ DEBUG:  Creating router plan
 WITH cte AS MATERIALIZED (SELECT * FROM num_db)
 SELECT 1 FROM authors_reference r JOIN cte ON (r.id = cte.datid) LIMIT 1;
 DEBUG:  found no worker with all shard placements
-DEBUG:  generating subplan XXX_1 for CTE cte: SELECT datid FROM (SELECT s.datid FROM (public.number1() s(datid) LEFT JOIN pg_database d ON (((s.datid)::oid OPERATOR(pg_catalog.=) d.oid)))) num_db
-DEBUG:  Plan XXX query after replacing subqueries and CTEs: SELECT 1 FROM (public.authors_reference r JOIN (SELECT intermediate_result.datid FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) cte ON ((r.id OPERATOR(pg_catalog.=) cte.datid))) LIMIT 1
+DEBUG:  generating subplan XXX_1 for CTE cte: SELECT datid FROM (SELECT s.datid FROM (multi_router_planner.number1() s(datid) LEFT JOIN pg_database d ON (((s.datid)::oid OPERATOR(pg_catalog.=) d.oid)))) num_db
+DEBUG:  Plan XXX query after replacing subqueries and CTEs: SELECT 1 FROM (multi_router_planner.authors_reference r JOIN (SELECT intermediate_result.datid FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) cte ON ((r.id OPERATOR(pg_catalog.=) cte.datid))) LIMIT 1
 DEBUG:  Creating router plan
  ?column?
 ---------------------------------------------------------------------
@@ -1769,7 +1771,7 @@ SET citus.log_remote_commands TO on;
 -- single shard select queries are router plannable
 SELECT * FROM articles_range where author_id = 1;
 DEBUG:  Creating router plan
-NOTICE:  issuing SELECT id, author_id, title, word_count FROM public.articles_range_840012 articles_range WHERE (author_id OPERATOR(pg_catalog.=) 1)
+NOTICE:  issuing SELECT id, author_id, title, word_count FROM multi_router_planner.articles_range_840012 articles_range WHERE (author_id OPERATOR(pg_catalog.=) 1)
 DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
  id | author_id | title | word_count
 ---------------------------------------------------------------------
@@ -1777,7 +1779,7 @@ DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
 
 SELECT * FROM articles_range where author_id = 1 or author_id = 5;
 DEBUG:  Creating router plan
-NOTICE:  issuing SELECT id, author_id, title, word_count FROM public.articles_range_840012 articles_range WHERE ((author_id OPERATOR(pg_catalog.=) 1) OR (author_id OPERATOR(pg_catalog.=) 5))
+NOTICE:  issuing SELECT id, author_id, title, word_count FROM multi_router_planner.articles_range_840012 articles_range WHERE ((author_id OPERATOR(pg_catalog.=) 1) OR (author_id OPERATOR(pg_catalog.=) 5))
 DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
  id | author_id | title | word_count
 ---------------------------------------------------------------------
@@ -1795,7 +1797,7 @@ NOTICE:  executing the command locally: SELECT id, author_id, title, word_count
 SELECT * FROM articles_range ar join authors_range au on (ar.author_id = au.id)
 	WHERE ar.author_id = 1;
 DEBUG:  Creating router plan
-NOTICE:  issuing SELECT ar.id, ar.author_id, ar.title, ar.word_count, au.name, au.id FROM (public.articles_range_840012 ar JOIN public.authors_range_840008 au ON ((ar.author_id OPERATOR(pg_catalog.=) au.id))) WHERE (ar.author_id OPERATOR(pg_catalog.=) 1)
+NOTICE:  issuing SELECT ar.id, ar.author_id, ar.title, ar.word_count, au.name, au.id FROM (multi_router_planner.articles_range_840012 ar JOIN multi_router_planner.authors_range_840008 au ON ((ar.author_id OPERATOR(pg_catalog.=) au.id))) WHERE (ar.author_id OPERATOR(pg_catalog.=) 1)
 DETAIL:  on server postgres@localhost:xxxxx connectionId: xxxxxxx
  id | author_id | title | word_count | name | id
 ---------------------------------------------------------------------
@@ -2433,12 +2435,15 @@ SELECT create_distributed_table('failure_test', 'a', 'hash');
 
 SET citus.enable_ddl_propagation TO off;
 CREATE USER router_user;
-GRANT INSERT ON ALL TABLES IN SCHEMA public TO router_user;
+GRANT USAGE ON SCHEMA multi_router_planner TO router_user;
+GRANT INSERT ON ALL TABLES IN SCHEMA multi_router_planner TO router_user;
 \c - - - :worker_1_port
 SET citus.enable_ddl_propagation TO off;
 CREATE USER router_user;
-GRANT INSERT ON ALL TABLES IN SCHEMA public TO router_user;
+GRANT USAGE ON SCHEMA multi_router_planner TO router_user;
+GRANT INSERT ON ALL TABLES IN SCHEMA multi_router_planner TO router_user;
 \c - router_user - :master_port
+SET search_path TO multi_router_planner;
 -- we will fail to connect to worker 2, since the user does not exist
 -- still, we never mark placements inactive. Instead, fail the transaction
 BEGIN;
@@ -2452,7 +2457,7 @@ SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement
 		SELECT shardid FROM pg_dist_shard
 		WHERE logicalrelid = 'failure_test'::regclass
 	)
-	ORDER BY placementid;
+	ORDER BY shardid, nodeport;
  shardid | shardstate | nodename  | nodeport
 ---------------------------------------------------------------------
   840017 |          1 | localhost |    57637
@@ -2471,18 +2476,5 @@ DROP USER router_user;
 \c - - - :master_port
 DROP OWNED BY router_user;
 DROP USER router_user;
-DROP TABLE failure_test;
-DROP FUNCTION author_articles_max_id();
-DROP FUNCTION author_articles_id_word_count();
-DROP MATERIALIZED VIEW mv_articles_hash_empty;
-DROP MATERIALIZED VIEW mv_articles_hash_data;
-DROP VIEW num_db;
-DROP FUNCTION number1();
-DROP TABLE articles_hash;
-DROP TABLE articles_single_shard_hash;
-DROP TABLE authors_hash;
-DROP TABLE authors_range;
-DROP TABLE authors_reference;
-DROP TABLE company_employees;
-DROP TABLE articles_range;
-DROP TABLE articles_append;
+SET client_min_messages TO WARNING;
+DROP SCHEMA multi_router_planner CASCADE;
diff --git a/src/test/regress/expected/multi_simple_queries.out b/src/test/regress/expected/multi_simple_queries.out
index 6bd8bad4a..4578d69a8 100644
--- a/src/test/regress/expected/multi_simple_queries.out
+++ b/src/test/regress/expected/multi_simple_queries.out
@@ -7,6 +7,8 @@ SET citus.coordinator_aggregation_strategy TO 'disabled';
 -- ===================================================================
 -- test end-to-end query functionality
 -- ===================================================================
+CREATE SCHEMA simple_queries_test;
+SET search_path TO simple_queries_test;
 CREATE TABLE articles (
 	id bigint NOT NULL,
 	author_id bigint NOT NULL,
@@ -382,7 +384,7 @@ SELECT author_id FROM articles
          8
 (3 rows)
 
-SELECT o_orderstatus, count(*), avg(o_totalprice) FROM orders
+SELECT o_orderstatus, count(*), avg(o_totalprice) FROM public.orders
 	GROUP BY o_orderstatus
 	HAVING count(*) > 1450 OR avg(o_totalprice) > 150000
 	ORDER BY o_orderstatus;
@@ -392,7 +394,7 @@ SELECT o_orderstatus, count(*), avg(o_totalprice) FROM orders
  P             |    75 | 164847.914533333333
 (2 rows)
 
-SELECT o_orderstatus, sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders
+SELECT o_orderstatus, sum(l_linenumber), avg(l_linenumber) FROM public.lineitem, public.orders
 	WHERE l_orderkey = o_orderkey AND l_orderkey > 9030
 	GROUP BY o_orderstatus
 	HAVING sum(l_linenumber) > 1000
@@ -541,7 +543,7 @@ DEBUG:  query has a single distribution column value: 2
 
 -- error out on unsupported aggregate
 SET client_min_messages to 'NOTICE';
-CREATE AGGREGATE public.invalid(int) (
+CREATE AGGREGATE invalid(int) (
     sfunc = int4pl,
     stype = int
 );
@@ -812,10 +814,11 @@ SELECT * FROM (SELECT nextval('query_seq') FROM articles LIMIT 3) vals;
 (3 rows)
 
 -- but not elsewhere
-SELECT sum(nextval('query_seq')) FROM articles;
-ERROR:  relation "public.query_seq" does not exist
+SELECT sum(nextval('simple_queries_test.query_seq')) FROM articles;
+ERROR:  relation "simple_queries_test.query_seq" does not exist
 CONTEXT:  while executing command on localhost:xxxxx
-SELECT n FROM (SELECT nextval('query_seq') n, random() FROM articles) vals;
-ERROR:  relation "public.query_seq" does not exist
+SELECT n FROM (SELECT nextval('simple_queries_test.query_seq') n, random() FROM articles) vals;
+ERROR:  relation "simple_queries_test.query_seq" does not exist
 CONTEXT:  while executing command on localhost:xxxxx
-DROP SEQUENCE query_seq;
+SET client_min_messages TO WARNING;
+DROP SCHEMA simple_queries_test CASCADE;
diff --git a/src/test/regress/expected/multi_upsert.out b/src/test/regress/expected/multi_upsert.out
index 08308aba0..e41b2a3d5 100644
--- a/src/test/regress/expected/multi_upsert.out
+++ b/src/test/regress/expected/multi_upsert.out
@@ -1,5 +1,7 @@
 -- this test file aims to test UPSERT feature on Citus
 SET citus.next_shard_id TO 980000;
+CREATE SCHEMA upsert_test;
+SET search_path TO upsert_test;
 CREATE TABLE upsert_test
 (
 	part_key int UNIQUE,
@@ -244,3 +246,5 @@ ERROR:  functions used in the WHERE clause of the ON CONFLICT clause of INSERTs
 INSERT INTO upsert_test (part_key, other_col) VALUES (1, 1) ON CONFLICT (part_key) DO
 	UPDATE SET part_key = 15;
 ERROR:  modifying the partition value of rows is not allowed
+SET client_min_messages TO WARNING;
+DROP SCHEMA upsert_test CASCADE;
diff --git a/src/test/regress/sql/multi_data_types.sql b/src/test/regress/sql/multi_data_types.sql
index 7601bb319..d307c4c6f 100644
--- a/src/test/regress/sql/multi_data_types.sql
+++ b/src/test/regress/sql/multi_data_types.sql
@@ -6,6 +6,15 @@
 
 SET citus.next_shard_id TO 530000;
 
+-- Given that other test files depend on the existence of types created in this file,
+-- we cannot drop them at the end. Instead, we drop them at the beginning of the test
+-- to make this file runnable multiple times via run_test.py.
+BEGIN;
+  SET LOCAL client_min_messages TO WARNING;
+  DROP TYPE IF EXISTS test_composite_type, other_composite_type, bug_status CASCADE;
+  DROP OPERATOR FAMILY IF EXISTS cats_op_fam USING hash;
+COMMIT;
+
 -- create a custom type...
 CREATE TYPE test_composite_type AS (
     i integer,
diff --git a/src/test/regress/sql/multi_modifying_xacts.sql b/src/test/regress/sql/multi_modifying_xacts.sql
index 2be3a0911..506480093 100644
--- a/src/test/regress/sql/multi_modifying_xacts.sql
+++ b/src/test/regress/sql/multi_modifying_xacts.sql
@@ -1,6 +1,9 @@
 SET citus.next_shard_id TO 1200000;
 SET citus.next_placement_id TO 1200000;
 
+CREATE SCHEMA multi_modifying_xacts;
+SET search_path TO multi_modifying_xacts;
+
 -- ===================================================================
 -- test end-to-end modification functionality
 -- ===================================================================
@@ -169,7 +172,7 @@ INSERT INTO labs VALUES (6, 'Bell Labs');
 ABORT;
 
 -- but the DDL should correctly roll back
-SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.labs'::regclass;
+SELECT "Column", "Type", "Modifiers" FROM public.table_desc WHERE relid='multi_modifying_xacts.labs'::regclass;
 SELECT * FROM labs WHERE id = 6;
 
 -- COPY can happen after single row INSERT
@@ -294,7 +297,7 @@ CREATE FUNCTION reject_large_id() RETURNS trigger AS $rli$
 $rli$ LANGUAGE plpgsql;
 
 -- register after insert trigger
-SELECT * FROM run_command_on_placements('researchers', 'CREATE CONSTRAINT TRIGGER reject_large_researcher_id AFTER INSERT ON %s DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE  reject_large_id()')
+SELECT * FROM run_command_on_placements('multi_modifying_xacts.researchers', 'CREATE CONSTRAINT TRIGGER reject_large_researcher_id AFTER INSERT ON %s DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE  multi_modifying_xacts.reject_large_id()')
 ORDER BY nodeport, shardid;
 
 -- hide postgresql version dependend messages for next test only
@@ -418,6 +421,7 @@ AND    s.logicalrelid = 'objects'::regclass;
 
 -- create trigger on one worker to reject certain values
 \c - - - :worker_2_port
+SET search_path TO multi_modifying_xacts;
 
 SET citus.enable_metadata_sync TO OFF;
 CREATE FUNCTION reject_bad() RETURNS trigger AS $rb$
@@ -437,6 +441,7 @@ DEFERRABLE INITIALLY IMMEDIATE
 FOR EACH ROW EXECUTE PROCEDURE reject_bad();
 
 \c - - - :master_port
+SET search_path TO multi_modifying_xacts;
 
 -- test partial failure; worker_1 succeeds, 2 fails
 -- in this case, we expect the transaction to abort
@@ -465,6 +470,7 @@ DELETE FROM objects;
 -- there cannot be errors on different shards at different times
 -- because the first failure will fail the whole transaction
 \c - - - :worker_1_port
+SET search_path TO multi_modifying_xacts;
 SET citus.enable_metadata_sync TO OFF;
 CREATE FUNCTION reject_bad() RETURNS trigger AS $rb$
     BEGIN
@@ -483,6 +489,7 @@ DEFERRABLE INITIALLY IMMEDIATE
 FOR EACH ROW EXECUTE PROCEDURE reject_bad();
 
 \c - - - :master_port
+SET search_path TO multi_modifying_xacts;
 
 BEGIN;
 INSERT INTO objects VALUES (1, 'apple');
@@ -506,6 +513,7 @@ AND    (s.logicalrelid = 'objects'::regclass OR
 
 -- what if the failures happen at COMMIT time?
 \c - - - :worker_2_port
+SET search_path TO multi_modifying_xacts;
 
 DROP TRIGGER reject_bad ON objects_1200003;
 
@@ -515,6 +523,7 @@ DEFERRABLE INITIALLY DEFERRED
 FOR EACH ROW EXECUTE PROCEDURE reject_bad();
 
 \c - - - :master_port
+SET search_path TO multi_modifying_xacts;
 
 -- should be the same story as before, just at COMMIT time
 -- as we use 2PC, the transaction is rollbacked
@@ -547,6 +556,7 @@ AND    s.logicalrelid = 'objects'::regclass;
 
 -- what if all nodes have failures at COMMIT time?
 \c - - - :worker_1_port
+SET search_path TO multi_modifying_xacts;
 
 DROP TRIGGER reject_bad ON labs_1200002;
 
@@ -556,6 +566,7 @@ DEFERRABLE INITIALLY DEFERRED
 FOR EACH ROW EXECUTE PROCEDURE reject_bad();
 
 \c - - - :master_port
+SET search_path TO multi_modifying_xacts;
 
 -- reduce the log level for differences between PG14 and PG15
 -- in PGconn->errorMessage
@@ -586,10 +597,12 @@ AND    (s.logicalrelid = 'objects'::regclass OR
 
 -- what if one shard (objects) succeeds but another (labs) completely fails?
 \c - - - :worker_2_port
+SET search_path TO multi_modifying_xacts;
 
 DROP TRIGGER reject_bad ON objects_1200003;
 
 \c - - - :master_port
+SET search_path TO multi_modifying_xacts;
 SET citus.next_shard_id TO 1200004;
 BEGIN;
 INSERT INTO objects VALUES (1, 'apple');
@@ -682,6 +695,7 @@ SELECT * FROM reference_modifying_xacts;
 
 -- lets fail on of the workers at before the commit time
 \c - - - :worker_1_port
+SET search_path TO multi_modifying_xacts;
 SET citus.enable_metadata_sync TO OFF;
 CREATE FUNCTION reject_bad_reference() RETURNS trigger AS $rb$
     BEGIN
@@ -700,6 +714,7 @@ DEFERRABLE INITIALLY IMMEDIATE
 FOR EACH ROW EXECUTE PROCEDURE reject_bad_reference();
 
 \c - - - :master_port
+SET search_path TO multi_modifying_xacts;
 \set VERBOSITY terse
 -- try without wrapping inside a transaction
 INSERT INTO reference_modifying_xacts VALUES (999, 3);
@@ -711,6 +726,7 @@ COMMIT;
 
 -- lets fail one of the workers at COMMIT time
 \c - - - :worker_1_port
+SET search_path TO multi_modifying_xacts;
 DROP TRIGGER reject_bad_reference ON reference_modifying_xacts_1200006;
 
 CREATE CONSTRAINT TRIGGER reject_bad_reference
@@ -719,6 +735,7 @@ DEFERRABLE INITIALLY  DEFERRED
 FOR EACH ROW EXECUTE PROCEDURE reject_bad_reference();
 
 \c - - - :master_port
+SET search_path TO multi_modifying_xacts;
 \set VERBOSITY terse
 
 -- try without wrapping inside a transaction
@@ -740,10 +757,12 @@ ORDER BY s.logicalrelid, sp.shardstate;
 
 -- for the time-being drop the constraint
 \c - - - :worker_1_port
+SET search_path TO multi_modifying_xacts;
 DROP TRIGGER reject_bad_reference ON reference_modifying_xacts_1200006;
 
 
 \c - - - :master_port
+SET search_path TO multi_modifying_xacts;
 
 -- now create a hash distributed table and run tests
 -- including both the reference table and the hash
@@ -777,6 +796,7 @@ ABORT;
 
 -- lets fail one of the workers before COMMIT time for the hash table
 \c - - - :worker_1_port
+SET search_path TO multi_modifying_xacts;
 SET citus.enable_metadata_sync TO OFF;
 CREATE FUNCTION reject_bad_hash() RETURNS trigger AS $rb$
     BEGIN
@@ -795,6 +815,7 @@ DEFERRABLE INITIALLY IMMEDIATE
 FOR EACH ROW EXECUTE PROCEDURE reject_bad_hash();
 
 \c - - - :master_port
+SET search_path TO multi_modifying_xacts;
 \set VERBOSITY terse
 
 -- the transaction as a whole should fail
@@ -809,6 +830,7 @@ SELECT * FROM reference_modifying_xacts WHERE key = 55;
 -- now lets fail on of the workers for the hash distributed table table
 -- when there is a reference table involved
 \c - - - :worker_1_port
+SET search_path TO multi_modifying_xacts;
 DROP TRIGGER reject_bad_hash ON hash_modifying_xacts_1200007;
 
 -- the trigger is on execution time
@@ -818,6 +840,7 @@ DEFERRABLE INITIALLY DEFERRED
 FOR EACH ROW EXECUTE PROCEDURE reject_bad_hash();
 
 \c - - - :master_port
+SET search_path TO multi_modifying_xacts;
 \set VERBOSITY terse
 
 -- the transaction as a whole should fail
@@ -844,6 +867,7 @@ ORDER BY s.logicalrelid, sp.shardstate;
 -- change is rollbacked as well
 
 \c - - - :worker_1_port
+SET search_path TO multi_modifying_xacts;
 
 CREATE CONSTRAINT TRIGGER reject_bad_reference
 AFTER INSERT ON reference_modifying_xacts_1200006
@@ -851,6 +875,7 @@ DEFERRABLE INITIALLY IMMEDIATE
 FOR EACH ROW EXECUTE PROCEDURE reject_bad_reference();
 
 \c - - - :master_port
+SET search_path TO multi_modifying_xacts;
 \set VERBOSITY terse
 
 BEGIN;
@@ -920,9 +945,11 @@ SELECT count(*) FROM pg_dist_transaction;
 
 -- first create the new user on all nodes
 CREATE USER test_user;
+GRANT ALL ON SCHEMA multi_modifying_xacts TO test_user;
 
 -- now connect back to the master with the new user
 \c - test_user - :master_port
+SET search_path TO multi_modifying_xacts;
 SET citus.next_shard_id TO 1200015;
 CREATE TABLE reference_failure_test (key int, value int);
 SELECT create_reference_table('reference_failure_test');
@@ -934,16 +961,19 @@ SELECT create_distributed_table('numbers_hash_failure_test', 'key');
 
 -- ensure that the shard is created for this user
 \c - test_user - :worker_1_port
+SET search_path TO multi_modifying_xacts;
 SET citus.override_table_visibility TO false;
 \dt reference_failure_test_1200015
 
 -- now connect with the default user,
 -- and rename the existing user
 \c - :default_user - :worker_1_port
+SET search_path TO multi_modifying_xacts;
 ALTER USER test_user RENAME TO test_user_new;
 
 -- connect back to master and query the reference table
  \c - test_user - :master_port
+SET search_path TO multi_modifying_xacts;
 -- should fail since the worker doesn't have test_user anymore
 INSERT INTO reference_failure_test VALUES (1, '1');
 
@@ -1007,15 +1037,18 @@ SELECT count(*) FROM numbers_hash_failure_test;
 
 -- break the other node as well
 \c - :default_user - :worker_2_port
+SET search_path TO multi_modifying_xacts;
 ALTER USER test_user RENAME TO test_user_new;
 
 \c - test_user - :master_port
+SET search_path TO multi_modifying_xacts;
 
 -- fails on all shard placements
 INSERT INTO numbers_hash_failure_test VALUES (2,2);
 
 -- connect back to the master with the proper user to continue the tests
 \c - :default_user - :master_port
+SET search_path TO multi_modifying_xacts;
 SET citus.next_shard_id TO 1200020;
 SET citus.next_placement_id TO 1200033;
 -- unbreak both nodes by renaming the user back to the original name
@@ -1024,6 +1057,7 @@ SELECT * FROM run_command_on_workers('ALTER USER test_user_new RENAME TO test_us
 DROP TABLE reference_modifying_xacts, hash_modifying_xacts, hash_modifying_xacts_second,
 	reference_failure_test, numbers_hash_failure_test;
 
+REVOKE ALL ON SCHEMA multi_modifying_xacts FROM test_user;
 DROP USER test_user;
 
 -- set up foreign keys to test transactions with co-located and reference tables
@@ -1043,7 +1077,10 @@ CREATE TABLE itemgroups (
 );
 SELECT create_reference_table('itemgroups');
 
+SET client_min_messages TO WARNING;
 DROP TABLE IF EXISTS users ;
+RESET client_min_messages;
+
 CREATE TABLE users (
     id int PRIMARY KEY,
     name text,
@@ -1199,5 +1236,5 @@ SELECT insert_abort();
 SELECT name FROM labs WHERE id = 1001;
 RESET citus.function_opens_transaction_block;
 
-DROP FUNCTION insert_abort();
-DROP TABLE items, users, itemgroups, usergroups, researchers, labs;
+SET client_min_messages TO WARNING;
+DROP SCHEMA multi_modifying_xacts CASCADE;
diff --git a/src/test/regress/sql/multi_mx_copy_data.sql b/src/test/regress/sql/multi_mx_copy_data.sql
index 26d4d3c42..b4598ae61 100644
--- a/src/test/regress/sql/multi_mx_copy_data.sql
+++ b/src/test/regress/sql/multi_mx_copy_data.sql
@@ -2,6 +2,11 @@
 -- MULTI_MX_COPY_DATA
 --
 
+-- We truncate them to make this test runnable multiple times.
+-- Note that we cannot do that at the end of the test because
+-- we need to keep the data for the other tests.
+TRUNCATE lineitem_mx, orders_mx;
+
 \set nation_data_file :abs_srcdir '/data/nation.data'
 \set client_side_copy_command '\\copy nation_hash FROM ' :'nation_data_file' ' with delimiter '''|''';'
 :client_side_copy_command
@@ -96,3 +101,5 @@ SET search_path TO public;
 :client_side_copy_command
 \set client_side_copy_command '\\copy supplier_mx FROM ' :'supplier_data_file' ' with delimiter '''|''';'
 :client_side_copy_command
+
+DROP TABLE citus_mx_test_schema.nation_hash_replicated;
diff --git a/src/test/regress/sql/multi_mx_modifying_xacts.sql b/src/test/regress/sql/multi_mx_modifying_xacts.sql
index cf60f023d..15335f579 100644
--- a/src/test/regress/sql/multi_mx_modifying_xacts.sql
+++ b/src/test/regress/sql/multi_mx_modifying_xacts.sql
@@ -331,3 +331,7 @@ COMMIT;
 -- no data should persists
 SELECT * FROM objects_mx WHERE id = 1;
 SELECT * FROM labs_mx WHERE id = 8;
+
+TRUNCATE objects_mx, labs_mx, researchers_mx;
+DROP TRIGGER reject_bad_mx ON labs_mx_1220102;
+DROP FUNCTION reject_bad_mx;
diff --git a/src/test/regress/sql/multi_mx_router_planner.sql b/src/test/regress/sql/multi_mx_router_planner.sql
index fdfd81b07..6a1271720 100644
--- a/src/test/regress/sql/multi_mx_router_planner.sql
+++ b/src/test/regress/sql/multi_mx_router_planner.sql
@@ -657,3 +657,8 @@ INSERT INTO articles_hash_mx VALUES (51,  1, 'amateus', 1814);
 SELECT id
 	FROM articles_hash_mx
 	WHERE author_id = 1;
+
+SET client_min_messages to WARNING;
+TRUNCATE articles_hash_mx, company_employees_mx, articles_single_shard_hash_mx;
+DROP MATERIALIZED VIEW mv_articles_hash_mx_error;
+DROP TABLE authors_hash_mx;
diff --git a/src/test/regress/sql/multi_router_planner.sql b/src/test/regress/sql/multi_router_planner.sql
index 87104599c..142568d5d 100644
--- a/src/test/regress/sql/multi_router_planner.sql
+++ b/src/test/regress/sql/multi_router_planner.sql
@@ -10,6 +10,9 @@ SET citus.next_shard_id TO 840000;
 -- other tests that triggers fast-path-router planner
 SET citus.enable_fast_path_router_planner TO false;
 
+CREATE SCHEMA multi_router_planner;
+SET search_path TO multi_router_planner;
+
 CREATE TABLE articles_hash (
 	id bigint NOT NULL,
 	author_id bigint NOT NULL,
@@ -1182,12 +1185,15 @@ SELECT create_distributed_table('failure_test', 'a', 'hash');
 
 SET citus.enable_ddl_propagation TO off;
 CREATE USER router_user;
-GRANT INSERT ON ALL TABLES IN SCHEMA public TO router_user;
+GRANT USAGE ON SCHEMA multi_router_planner TO router_user;
+GRANT INSERT ON ALL TABLES IN SCHEMA multi_router_planner TO router_user;
 \c - - - :worker_1_port
 SET citus.enable_ddl_propagation TO off;
 CREATE USER router_user;
-GRANT INSERT ON ALL TABLES IN SCHEMA public TO router_user;
+GRANT USAGE ON SCHEMA multi_router_planner TO router_user;
+GRANT INSERT ON ALL TABLES IN SCHEMA multi_router_planner TO router_user;
 \c - router_user - :master_port
+SET search_path TO multi_router_planner;
 -- we will fail to connect to worker 2, since the user does not exist
 -- still, we never mark placements inactive. Instead, fail the transaction
 BEGIN;
@@ -1199,29 +1205,13 @@ SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement
 		SELECT shardid FROM pg_dist_shard
 		WHERE logicalrelid = 'failure_test'::regclass
 	)
-	ORDER BY placementid;
+	ORDER BY shardid, nodeport;
 \c - postgres - :worker_1_port
 DROP OWNED BY router_user;
 DROP USER router_user;
 \c - - - :master_port
 DROP OWNED BY router_user;
 DROP USER router_user;
-DROP TABLE failure_test;
 
-DROP FUNCTION author_articles_max_id();
-DROP FUNCTION author_articles_id_word_count();
-
-DROP MATERIALIZED VIEW mv_articles_hash_empty;
-DROP MATERIALIZED VIEW mv_articles_hash_data;
-
-DROP VIEW num_db;
-DROP FUNCTION number1();
-
-DROP TABLE articles_hash;
-DROP TABLE articles_single_shard_hash;
-DROP TABLE authors_hash;
-DROP TABLE authors_range;
-DROP TABLE authors_reference;
-DROP TABLE company_employees;
-DROP TABLE articles_range;
-DROP TABLE articles_append;
+SET client_min_messages TO WARNING;
+DROP SCHEMA multi_router_planner CASCADE;
diff --git a/src/test/regress/sql/multi_simple_queries.sql b/src/test/regress/sql/multi_simple_queries.sql
index 8d7e45255..7fcf45b1c 100644
--- a/src/test/regress/sql/multi_simple_queries.sql
+++ b/src/test/regress/sql/multi_simple_queries.sql
@@ -11,6 +11,9 @@ SET citus.coordinator_aggregation_strategy TO 'disabled';
 -- test end-to-end query functionality
 -- ===================================================================
 
+CREATE SCHEMA simple_queries_test;
+SET search_path TO simple_queries_test;
+
 CREATE TABLE articles (
 	id bigint NOT NULL,
 	author_id bigint NOT NULL,
@@ -203,12 +206,12 @@ SELECT author_id FROM articles
 	HAVING author_id <= 2 OR author_id = 8
 	ORDER BY author_id;
 
-SELECT o_orderstatus, count(*), avg(o_totalprice) FROM orders
+SELECT o_orderstatus, count(*), avg(o_totalprice) FROM public.orders
 	GROUP BY o_orderstatus
 	HAVING count(*) > 1450 OR avg(o_totalprice) > 150000
 	ORDER BY o_orderstatus;
 
-SELECT o_orderstatus, sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders
+SELECT o_orderstatus, sum(l_linenumber), avg(l_linenumber) FROM public.lineitem, public.orders
 	WHERE l_orderkey = o_orderkey AND l_orderkey > 9030
 	GROUP BY o_orderstatus
 	HAVING sum(l_linenumber) > 1000
@@ -277,7 +280,7 @@ SELECT avg(word_count)
 -- error out on unsupported aggregate
 SET client_min_messages to 'NOTICE';
 
-CREATE AGGREGATE public.invalid(int) (
+CREATE AGGREGATE invalid(int) (
     sfunc = int4pl,
     stype = int
 );
@@ -355,7 +358,8 @@ SELECT nextval('query_seq')*2 FROM articles LIMIT 3;
 SELECT * FROM (SELECT nextval('query_seq') FROM articles LIMIT 3) vals;
 
 -- but not elsewhere
-SELECT sum(nextval('query_seq')) FROM articles;
-SELECT n FROM (SELECT nextval('query_seq') n, random() FROM articles) vals;
+SELECT sum(nextval('simple_queries_test.query_seq')) FROM articles;
+SELECT n FROM (SELECT nextval('simple_queries_test.query_seq') n, random() FROM articles) vals;
 
-DROP SEQUENCE query_seq;
+SET client_min_messages TO WARNING;
+DROP SCHEMA simple_queries_test CASCADE;
diff --git a/src/test/regress/sql/multi_upsert.sql b/src/test/regress/sql/multi_upsert.sql
index 24503b7a4..6ef72d576 100644
--- a/src/test/regress/sql/multi_upsert.sql
+++ b/src/test/regress/sql/multi_upsert.sql
@@ -3,6 +3,8 @@
 
 SET citus.next_shard_id TO 980000;
 
+CREATE SCHEMA upsert_test;
+SET search_path TO upsert_test;
 
 CREATE TABLE upsert_test
 (
@@ -207,3 +209,6 @@ INSERT INTO upsert_test (part_key, other_col) VALUES (1, 1) ON CONFLICT (part_ke
 -- error out on attempt to update the partition key
 INSERT INTO upsert_test (part_key, other_col) VALUES (1, 1) ON CONFLICT (part_key) DO
 	UPDATE SET part_key = 15;
+
+SET client_min_messages TO WARNING;
+DROP SCHEMA upsert_test CASCADE;

From d6603390ab1fc26688a1834c93cf680938381ea1 Mon Sep 17 00:00:00 2001
From: Halil Ozan Akgul <hozanakgul@gmail.com>
Date: Mon, 27 Mar 2023 12:14:49 +0300
Subject: [PATCH 23/58] Add multi tenant statistics monitoring

---
 .../distributed/executor/citus_custom_scan.c  |   3 +-
 .../distributed/planner/deparse_shard_query.c |  19 +-
 .../distributed/planner/distributed_planner.c |   3 +
 .../planner/multi_router_planner.c            |  40 +-
 src/backend/distributed/shared_library_init.c |  35 +
 .../distributed/sql/citus--11.2-1--11.3-1.sql |   1 +
 .../sql/downgrades/citus--11.3-1--11.2-1.sql  |   4 +-
 .../sql/udfs/citus_stats_tenants/11.3-1.sql   |  27 +
 .../sql/udfs/citus_stats_tenants/latest.sql   |  27 +
 src/backend/distributed/utils/attribute.c     | 688 ++++++++++++++++++
 src/include/distributed/citus_custom_scan.h   |   2 +
 .../distributed/multi_physical_planner.h      |   3 +
 src/include/distributed/utils/attribute.h     | 102 +++
 src/test/regress/bin/normalize.sed            |   2 +
 .../regress/expected/citus_stats_tenants.out  | 290 ++++++++
 .../regress/expected/failure_multi_dml.out    |  12 +-
 .../expected/failure_multi_row_insert.out     |   2 +-
 .../regress/expected/failure_ref_tables.out   |   6 +-
 .../failure_replicated_partitions.out         |   2 +-
 .../regress/expected/failure_single_mod.out   |   6 +-
 .../expected/failure_single_select.out        |  18 +-
 src/test/regress/expected/multi_extension.out |   6 +-
 .../expected/upgrade_list_citus_objects.out   |   4 +-
 src/test/regress/multi_1_schedule             |   5 +
 src/test/regress/pg_regress_multi.pl          |   1 +
 src/test/regress/sql/citus_stats_tenants.sql  | 118 +++
 src/test/regress/sql/failure_multi_dml.sql    |  12 +-
 .../regress/sql/failure_multi_row_insert.sql  |   2 +-
 src/test/regress/sql/failure_ref_tables.sql   |   6 +-
 .../sql/failure_replicated_partitions.sql     |   2 +-
 src/test/regress/sql/failure_single_mod.sql   |   6 +-
 .../regress/sql/failure_single_select.sql     |  18 +-
 32 files changed, 1415 insertions(+), 57 deletions(-)
 create mode 100644 src/backend/distributed/sql/udfs/citus_stats_tenants/11.3-1.sql
 create mode 100644 src/backend/distributed/sql/udfs/citus_stats_tenants/latest.sql
 create mode 100644 src/backend/distributed/utils/attribute.c
 create mode 100644 src/include/distributed/utils/attribute.h
 create mode 100644 src/test/regress/expected/citus_stats_tenants.out
 create mode 100644 src/test/regress/sql/citus_stats_tenants.sql

diff --git a/src/backend/distributed/executor/citus_custom_scan.c b/src/backend/distributed/executor/citus_custom_scan.c
index 5e4afd1a7..28486f23d 100644
--- a/src/backend/distributed/executor/citus_custom_scan.c
+++ b/src/backend/distributed/executor/citus_custom_scan.c
@@ -66,7 +66,6 @@ static DistributedPlan * CopyDistributedPlanWithoutCache(
 	DistributedPlan *originalDistributedPlan);
 static void CitusEndScan(CustomScanState *node);
 static void CitusReScan(CustomScanState *node);
-static void SetJobColocationId(Job *job);
 static void EnsureForceDelegationDistributionKey(Job *job);
 static void EnsureAnchorShardsInJobExist(Job *job);
 static bool AnchorShardsInTaskListExist(List *taskList);
@@ -892,7 +891,7 @@ IsCitusCustomScan(Plan *plan)
  * colocation group, the Job's colocation ID is set to the group ID, else,
  * it will be set to INVALID_COLOCATION_ID.
  */
-static void
+void
 SetJobColocationId(Job *job)
 {
 	uint32 jobColocationId = INVALID_COLOCATION_ID;
diff --git a/src/backend/distributed/planner/deparse_shard_query.c b/src/backend/distributed/planner/deparse_shard_query.c
index e62821ad0..136268ba0 100644
--- a/src/backend/distributed/planner/deparse_shard_query.c
+++ b/src/backend/distributed/planner/deparse_shard_query.c
@@ -26,6 +26,7 @@
 #include "distributed/multi_physical_planner.h"
 #include "distributed/multi_router_planner.h"
 #include "distributed/shard_utils.h"
+#include "distributed/utils/attribute.h"
 #include "distributed/version_compat.h"
 #include "lib/stringinfo.h"
 #include "nodes/makefuncs.h"
@@ -141,6 +142,21 @@ RebuildQueryStrings(Job *workerJob)
 								? "(null)"
 								: TaskQueryString(task))));
 
+		Datum partitionColumnValue;
+		Oid partitionColumnType = 0;
+		char *partitionColumnString = NULL;
+		if (workerJob->partitionKeyValue != NULL)
+		{
+			partitionColumnValue = workerJob->partitionKeyValue->constvalue;
+			partitionColumnType = workerJob->partitionKeyValue->consttype;
+			partitionColumnString = DatumToString(partitionColumnValue,
+												  partitionColumnType);
+		}
+
+		task->partitionColumn = partitionColumnString;
+		SetJobColocationId(workerJob);
+		task->colocationId = workerJob->colocationId;
+
 		UpdateTaskQueryString(query, task);
 
 		/*
@@ -387,7 +403,8 @@ SetTaskQueryIfShouldLazyDeparse(Task *task, Query *query)
 		return;
 	}
 
-	SetTaskQueryString(task, DeparseTaskQuery(task, query));
+	SetTaskQueryString(task, AnnotateQuery(DeparseTaskQuery(task, query),
+										   task->partitionColumn, task->colocationId));
 }
 
 
diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c
index eb9e21786..b395e5b24 100644
--- a/src/backend/distributed/planner/distributed_planner.c
+++ b/src/backend/distributed/planner/distributed_planner.c
@@ -49,6 +49,7 @@
 #include "distributed/recursive_planning.h"
 #include "distributed/shardinterval_utils.h"
 #include "distributed/shard_utils.h"
+#include "distributed/utils/attribute.h"
 #include "distributed/version_compat.h"
 #include "distributed/worker_shard_visibility.h"
 #include "executor/executor.h"
@@ -157,6 +158,8 @@ distributed_planner(Query *parse,
 	bool fastPathRouterQuery = false;
 	Node *distributionKeyValue = NULL;
 
+	AttributeQueryIfAnnotated(query_string, parse->commandType);
+
 	List *rangeTableList = ExtractRangeTableEntryList(parse);
 
 	if (cursorOptions & CURSOR_OPT_FORCE_DISTRIBUTED)
diff --git a/src/backend/distributed/planner/multi_router_planner.c b/src/backend/distributed/planner/multi_router_planner.c
index 407aeaf65..9603ccc2d 100644
--- a/src/backend/distributed/planner/multi_router_planner.c
+++ b/src/backend/distributed/planner/multi_router_planner.c
@@ -165,7 +165,8 @@ static int CompareInsertValuesByShardId(const void *leftElement,
 static List * SingleShardTaskList(Query *query, uint64 jobId,
 								  List *relationShardList, List *placementList,
 								  uint64 shardId, bool parametersInQueryResolved,
-								  bool isLocalTableModification);
+								  bool isLocalTableModification, char *partitionColumn,
+								  int colocationId);
 static bool RowLocksOnRelations(Node *node, List **rtiLockList);
 static void ReorderTaskPlacementsByTaskAssignmentPolicy(Job *job,
 														TaskAssignmentPolicyType
@@ -1951,11 +1952,25 @@ GenerateSingleShardRouterTaskList(Job *job, List *relationShardList,
 
 	if (originalQuery->commandType == CMD_SELECT)
 	{
+		Datum partitionColumnValue;
+		Oid partitionColumnType = 0;
+		char *partitionColumnString = NULL;
+		if (job->partitionKeyValue != NULL)
+		{
+			partitionColumnValue = job->partitionKeyValue->constvalue;
+			partitionColumnType = job->partitionKeyValue->consttype;
+			partitionColumnString = DatumToString(partitionColumnValue,
+												  partitionColumnType);
+		}
+
+		SetJobColocationId(job);
+
 		job->taskList = SingleShardTaskList(originalQuery, job->jobId,
 											relationShardList, placementList,
 											shardId,
 											job->parametersInJobQueryResolved,
-											isLocalTableModification);
+											isLocalTableModification,
+											partitionColumnString, job->colocationId);
 
 		/*
 		 * Queries to reference tables, or distributed tables with multiple replica's have
@@ -1979,11 +1994,25 @@ GenerateSingleShardRouterTaskList(Job *job, List *relationShardList,
 	}
 	else
 	{
+		Datum partitionColumnValue;
+		Oid partitionColumnType = 0;
+		char *partitionColumnString = NULL;
+		if (job->partitionKeyValue != NULL)
+		{
+			partitionColumnValue = job->partitionKeyValue->constvalue;
+			partitionColumnType = job->partitionKeyValue->consttype;
+			partitionColumnString = DatumToString(partitionColumnValue,
+												  partitionColumnType);
+		}
+
+		SetJobColocationId(job);
+
 		job->taskList = SingleShardTaskList(originalQuery, job->jobId,
 											relationShardList, placementList,
 											shardId,
 											job->parametersInJobQueryResolved,
-											isLocalTableModification);
+											isLocalTableModification,
+											partitionColumnString, job->colocationId);
 	}
 }
 
@@ -2077,7 +2106,8 @@ static List *
 SingleShardTaskList(Query *query, uint64 jobId, List *relationShardList,
 					List *placementList, uint64 shardId,
 					bool parametersInQueryResolved,
-					bool isLocalTableModification)
+					bool isLocalTableModification, char *partitionColumn,
+					int colocationId)
 {
 	TaskType taskType = READ_TASK;
 	char replicationModel = 0;
@@ -2147,6 +2177,8 @@ SingleShardTaskList(Query *query, uint64 jobId, List *relationShardList,
 	 * that the query cannot be executed locally.
 	 */
 	task->taskPlacementList = placementList;
+	task->partitionColumn = partitionColumn;
+	task->colocationId = colocationId;
 	SetTaskQueryIfShouldLazyDeparse(task, query);
 	task->anchorShardId = shardId;
 	task->jobId = jobId;
diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c
index 3c67d9b78..c390ee1c0 100644
--- a/src/backend/distributed/shared_library_init.c
+++ b/src/backend/distributed/shared_library_init.c
@@ -90,6 +90,7 @@
 #include "distributed/resource_lock.h"
 #include "distributed/transaction_management.h"
 #include "distributed/transaction_recovery.h"
+#include "distributed/utils/attribute.h"
 #include "distributed/utils/directory.h"
 #include "distributed/worker_log_messages.h"
 #include "distributed/worker_manager.h"
@@ -439,6 +440,8 @@ _PG_init(void)
 	ExecutorStart_hook = CitusExecutorStart;
 	ExecutorRun_hook = CitusExecutorRun;
 	ExplainOneQuery_hook = CitusExplainOneQuery;
+	prev_ExecutorEnd = ExecutorEnd_hook;
+	ExecutorEnd_hook = CitusAttributeToEnd;
 
 	/* register hook for error messages */
 	emit_log_hook = multi_log_hook;
@@ -472,6 +475,8 @@ _PG_init(void)
 	/* initialize shard split shared memory handle management */
 	InitializeShardSplitSMHandleManagement();
 
+	InitializeMultiTenantMonitorSMHandleManagement();
+
 	/* enable modification of pg_catalog tables during pg_upgrade */
 	if (IsBinaryUpgrade)
 	{
@@ -1899,6 +1904,16 @@ RegisterCitusConfigVariables(void)
 		GUC_STANDARD,
 		NULL, NULL, NULL);
 
+	DefineCustomEnumVariable(
+		"citus.multi_tenant_monitoring_log_level",
+		gettext_noop("Sets the level of multi tenant monitoring log messages"),
+		NULL,
+		&MultiTenantMonitoringLogLevel,
+		CITUS_LOG_LEVEL_OFF, log_level_options,
+		PGC_USERSET,
+		GUC_STANDARD,
+		NULL, NULL, NULL);
+
 	DefineCustomIntVariable(
 		"citus.next_cleanup_record_id",
 		gettext_noop("Set the next cleanup record ID to use in operation creation."),
@@ -2283,6 +2298,26 @@ RegisterCitusConfigVariables(void)
 		GUC_STANDARD,
 		NULL, NULL, NULL);
 
+	DefineCustomIntVariable(
+		"citus.stats_tenants_limit",
+		gettext_noop("monitor limit"),
+		NULL,
+		&CitusStatsTenantsLimit,
+		10, 1, 100,
+		PGC_POSTMASTER,
+		GUC_STANDARD,
+		NULL, NULL, NULL);
+
+	DefineCustomIntVariable(
+		"citus.stats_tenants_period",
+		gettext_noop("monitor period"),
+		NULL,
+		&CitusStatsTenantsPeriod,
+		60, 1, 1000000000,
+		PGC_USERSET,
+		GUC_STANDARD,
+		NULL, NULL, NULL);
+
 	DefineCustomBoolVariable(
 		"citus.subquery_pushdown",
 		gettext_noop("Usage of this GUC is highly discouraged, please read the long "
diff --git a/src/backend/distributed/sql/citus--11.2-1--11.3-1.sql b/src/backend/distributed/sql/citus--11.2-1--11.3-1.sql
index 981c5f375..73a2bf8a9 100644
--- a/src/backend/distributed/sql/citus--11.2-1--11.3-1.sql
+++ b/src/backend/distributed/sql/citus--11.2-1--11.3-1.sql
@@ -2,3 +2,4 @@
 
 -- bump version to 11.3-1
 
+#include "udfs/citus_stats_tenants/11.3-1.sql"
diff --git a/src/backend/distributed/sql/downgrades/citus--11.3-1--11.2-1.sql b/src/backend/distributed/sql/downgrades/citus--11.3-1--11.2-1.sql
index 7d71235d7..47d2701ac 100644
--- a/src/backend/distributed/sql/downgrades/citus--11.3-1--11.2-1.sql
+++ b/src/backend/distributed/sql/downgrades/citus--11.3-1--11.2-1.sql
@@ -1,2 +1,4 @@
 -- citus--11.3-1--11.2-1
--- this is an empty downgrade path since citus--11.2-1--11.3-1.sql is empty for now
+
+DROP VIEW pg_catalog.citus_stats_tenants;
+DROP FUNCTION pg_catalog.citus_stats_tenants(boolean);
diff --git a/src/backend/distributed/sql/udfs/citus_stats_tenants/11.3-1.sql b/src/backend/distributed/sql/udfs/citus_stats_tenants/11.3-1.sql
new file mode 100644
index 000000000..f476a9c28
--- /dev/null
+++ b/src/backend/distributed/sql/udfs/citus_stats_tenants/11.3-1.sql
@@ -0,0 +1,27 @@
+CREATE OR REPLACE FUNCTION pg_catalog.citus_stats_tenants(
+    return_all_tenants BOOLEAN DEFAULT FALSE,
+    OUT colocation_id INT,
+    OUT tenant_attribute TEXT,
+    OUT read_count_in_this_period INT,
+    OUT read_count_in_last_period INT,
+    OUT query_count_in_this_period INT,
+    OUT query_count_in_last_period INT,
+    OUT score BIGINT)
+RETURNS SETOF RECORD
+LANGUAGE C
+AS 'citus', $$citus_stats_tenants$$;
+
+
+CREATE OR REPLACE VIEW citus.citus_stats_tenants AS
+SELECT
+    colocation_id,
+    tenant_attribute,
+    read_count_in_this_period,
+    read_count_in_last_period,
+    query_count_in_this_period,
+    query_count_in_last_period
+FROM pg_catalog.citus_stats_tenants()
+ORDER BY score DESC;
+
+ALTER VIEW citus.citus_stats_tenants SET SCHEMA pg_catalog;
+GRANT SELECT ON pg_catalog.citus_stats_tenants TO PUBLIC;
diff --git a/src/backend/distributed/sql/udfs/citus_stats_tenants/latest.sql b/src/backend/distributed/sql/udfs/citus_stats_tenants/latest.sql
new file mode 100644
index 000000000..f476a9c28
--- /dev/null
+++ b/src/backend/distributed/sql/udfs/citus_stats_tenants/latest.sql
@@ -0,0 +1,27 @@
+CREATE OR REPLACE FUNCTION pg_catalog.citus_stats_tenants(
+    return_all_tenants BOOLEAN DEFAULT FALSE,
+    OUT colocation_id INT,
+    OUT tenant_attribute TEXT,
+    OUT read_count_in_this_period INT,
+    OUT read_count_in_last_period INT,
+    OUT query_count_in_this_period INT,
+    OUT query_count_in_last_period INT,
+    OUT score BIGINT)
+RETURNS SETOF RECORD
+LANGUAGE C
+AS 'citus', $$citus_stats_tenants$$;
+
+
+CREATE OR REPLACE VIEW citus.citus_stats_tenants AS
+SELECT
+    colocation_id,
+    tenant_attribute,
+    read_count_in_this_period,
+    read_count_in_last_period,
+    query_count_in_this_period,
+    query_count_in_last_period
+FROM pg_catalog.citus_stats_tenants()
+ORDER BY score DESC;
+
+ALTER VIEW citus.citus_stats_tenants SET SCHEMA pg_catalog;
+GRANT SELECT ON pg_catalog.citus_stats_tenants TO PUBLIC;
diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
new file mode 100644
index 000000000..9b2d65e95
--- /dev/null
+++ b/src/backend/distributed/utils/attribute.c
@@ -0,0 +1,688 @@
+/*-------------------------------------------------------------------------
+ *
+ * attribute.c
+ *	  Routines related to the multi tenant monitor.
+ *
+ * Copyright (c) Citus Data, Inc.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+#include "unistd.h"
+
+#include "distributed/citus_safe_lib.h"
+#include "distributed/log_utils.h"
+#include "distributed/listutils.h"
+#include "distributed/metadata_cache.h"
+#include "distributed/tuplestore.h"
+#include "executor/execdesc.h"
+#include "storage/ipc.h"
+#include "storage/lwlock.h"
+#include "storage/shmem.h"
+#include <sys/time.h>
+#include "utils/builtins.h"
+
+#include "distributed/utils/attribute.h"
+
+#include <time.h>
+
+static void AttributeMetricsIfApplicable(void);
+
+ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
+
+#define ATTRIBUTE_PREFIX "/* attributeTo: "
+#define ATTRIBUTE_STRING_FORMAT "/* attributeTo: %s,%d */"
+#define CITUS_STATS_TENANTS_COLUMNS 7
+#define ONE_QUERY_SCORE 1000000000
+
+/* TODO maybe needs to be a stack */
+char attributeToTenant[MAX_TENANT_ATTRIBUTE_LENGTH] = "";
+CmdType attributeCommandType = CMD_UNKNOWN;
+int colocationGroupId = -1;
+clock_t attributeToTenantStart = { 0 };
+
+const char *SharedMemoryNameForMultiTenantMonitor =
+	"Shared memory for multi tenant monitor";
+
+char *tenantTrancheName = "Tenant Tranche";
+char *monitorTrancheName = "Multi Tenant Monitor Tranche";
+
+static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
+
+static int CompareTenantScore(const void *leftElement, const void *rightElement);
+static void UpdatePeriodsIfNecessary(TenantStats *tenantStats, time_t queryTime);
+static void ReduceScoreIfNecessary(TenantStats *tenantStats, time_t queryTime);
+static void EvictTenantsIfNecessary(time_t queryTime);
+static void RecordTenantStats(TenantStats *tenantStats);
+static void CreateMultiTenantMonitor(void);
+static MultiTenantMonitor * CreateSharedMemoryForMultiTenantMonitor(void);
+static MultiTenantMonitor * GetMultiTenantMonitor(void);
+static void MultiTenantMonitorSMInit(void);
+static int CreateTenantStats(MultiTenantMonitor *monitor, time_t queryTime);
+static int FindTenantStats(MultiTenantMonitor *monitor);
+static size_t MultiTenantMonitorshmemSize(void);
+
+int MultiTenantMonitoringLogLevel = CITUS_LOG_LEVEL_OFF;
+int CitusStatsTenantsPeriod = (time_t) 60;
+int CitusStatsTenantsLimit = 10;
+
+
+PG_FUNCTION_INFO_V1(citus_stats_tenants);
+PG_FUNCTION_INFO_V1(clean_citus_stats_tenants);
+PG_FUNCTION_INFO_V1(sleep_until_next_period);
+
+
+/*
+ * citus_stats_tenants finds, updates and returns the statistics for tenants.
+ */
+Datum
+citus_stats_tenants(PG_FUNCTION_ARGS)
+{
+	CheckCitusVersion(ERROR);
+
+	/*
+	 * We keep more than CitusStatsTenantsLimit tenants in our monitor.
+	 * We do this to not lose data if a tenant falls out of top CitusStatsTenantsLimit in case they need to return soon.
+	 * Normally we return CitusStatsTenantsLimit tenants but if returnAllTenants is true we return all of them.
+	 */
+	bool returnAllTenants = PG_GETARG_BOOL(0);
+
+	TupleDesc tupleDescriptor = NULL;
+	Tuplestorestate *tupleStore = SetupTuplestore(fcinfo, &tupleDescriptor);
+	time_t monitoringTime = time(0);
+
+	Datum values[CITUS_STATS_TENANTS_COLUMNS];
+	bool isNulls[CITUS_STATS_TENANTS_COLUMNS];
+
+	MultiTenantMonitor *monitor = GetMultiTenantMonitor();
+
+	if (monitor == NULL)
+	{
+		PG_RETURN_VOID();
+	}
+
+	LWLockAcquire(&monitor->lock, LW_EXCLUSIVE);
+
+	int numberOfRowsToReturn = 0;
+	if (returnAllTenants)
+	{
+		numberOfRowsToReturn = monitor->tenantCount;
+	}
+	else
+	{
+		numberOfRowsToReturn = Min(monitor->tenantCount, CitusStatsTenantsLimit);
+	}
+
+	for (int tenantIndex = 0; tenantIndex < monitor->tenantCount; tenantIndex++)
+	{
+		UpdatePeriodsIfNecessary(&monitor->tenants[tenantIndex], monitoringTime);
+		ReduceScoreIfNecessary(&monitor->tenants[tenantIndex], monitoringTime);
+	}
+	SafeQsort(monitor->tenants, monitor->tenantCount, sizeof(TenantStats),
+			  CompareTenantScore);
+
+	for (int i = 0; i < numberOfRowsToReturn; i++)
+	{
+		memset(values, 0, sizeof(values));
+		memset(isNulls, false, sizeof(isNulls));
+
+		TenantStats *tenantStats = &monitor->tenants[i];
+
+		values[0] = Int32GetDatum(tenantStats->colocationGroupId);
+		values[1] = PointerGetDatum(cstring_to_text(tenantStats->tenantAttribute));
+		values[2] = Int32GetDatum(tenantStats->readsInThisPeriod);
+		values[3] = Int32GetDatum(tenantStats->readsInLastPeriod);
+		values[4] = Int32GetDatum(tenantStats->readsInThisPeriod +
+								  tenantStats->writesInThisPeriod);
+		values[5] = Int32GetDatum(tenantStats->readsInLastPeriod +
+								  tenantStats->writesInLastPeriod);
+		values[6] = Int64GetDatum(tenantStats->score);
+
+		tuplestore_putvalues(tupleStore, tupleDescriptor, values, isNulls);
+	}
+
+	LWLockRelease(&monitor->lock);
+
+	PG_RETURN_VOID();
+}
+
+
+/*
+ * clean_citus_stats_tenants cleans the citus_stats_tenants monitor.
+ */
+Datum
+clean_citus_stats_tenants(PG_FUNCTION_ARGS)
+{
+	MultiTenantMonitor *monitor = GetMultiTenantMonitor();
+	monitor->tenantCount = 0;
+
+	PG_RETURN_VOID();
+}
+
+
+/*
+ * sleep_until_next_period sleeps until the next monitoring period starts.
+ */
+Datum
+sleep_until_next_period(PG_FUNCTION_ARGS)
+{
+	struct timeval currentTime;
+	gettimeofday(&currentTime, NULL);
+
+	long int nextPeriodStart = currentTime.tv_sec -
+							   (currentTime.tv_sec % CitusStatsTenantsPeriod) +
+							   CitusStatsTenantsPeriod;
+
+	long int sleepTime = (nextPeriodStart - currentTime.tv_sec) * 1000000 -
+						 currentTime.tv_usec + 100000;
+	pg_usleep(sleepTime);
+
+	PG_RETURN_VOID();
+}
+
+
+/*
+ * AttributeQueryIfAnnotated checks the query annotation and if the query is annotated
+ * for the tenant statistics monitoring this function records the tenant attributes.
+ */
+void
+AttributeQueryIfAnnotated(const char *query_string, CmdType commandType)
+{
+	strcpy_s(attributeToTenant, sizeof(attributeToTenant), "");
+
+	attributeCommandType = commandType;
+
+	if (query_string == NULL)
+	{
+		return;
+	}
+
+	if (strncmp(ATTRIBUTE_PREFIX, query_string, strlen(ATTRIBUTE_PREFIX)) == 0)
+	{
+		/* TODO create a function to safely parse the tenant identifier from the query comment */
+		/* query is attributed to a tenant */
+		char *tenantId = (char *) query_string + strlen(ATTRIBUTE_PREFIX);
+		char *tenantEnd = tenantId;
+		while (true && tenantEnd[0] != '\0')
+		{
+			if (tenantEnd[0] == ' ' && tenantEnd[1] == '*' && tenantEnd[2] == '/')
+			{
+				break;
+			}
+
+			tenantEnd++;
+		}
+		tenantEnd--;
+
+		colocationGroupId = 0;
+		while (*tenantEnd != ',')
+		{
+			colocationGroupId *= 10;
+			colocationGroupId += *tenantEnd - '0';
+			tenantEnd--;
+		}
+
+		int t = colocationGroupId;
+		colocationGroupId = 0;
+		while (t)
+		{
+			colocationGroupId *= 10;
+			colocationGroupId += t % 10;
+			t /= 10;
+		}
+
+		/* hack to get a clean copy of the tenant id string */
+		char tenantEndTmp = *tenantEnd;
+		*tenantEnd = '\0';
+		tenantId = pstrdup(tenantId);
+		*tenantEnd = tenantEndTmp;
+
+		if (MultiTenantMonitoringLogLevel != CITUS_LOG_LEVEL_OFF)
+		{
+			ereport(NOTICE, (errmsg("attributing query to tenant: %s",
+									quote_literal_cstr(tenantId))));
+		}
+
+		strcpy_s(attributeToTenant, sizeof(attributeToTenant), tenantId);
+		attributeToTenantStart = clock();
+	}
+	else
+	{
+		strcpy_s(attributeToTenant, sizeof(attributeToTenant), "");
+	}
+
+	/*DetachSegment(); */
+}
+
+
+/*
+ * AnnotateQuery annotates the query with tenant attributes.
+ */
+char *
+AnnotateQuery(char *queryString, char *partitionColumn, int colocationId)
+{
+	if (partitionColumn == NULL)
+	{
+		return queryString;
+	}
+	StringInfo newQuery = makeStringInfo();
+	appendStringInfo(newQuery, ATTRIBUTE_STRING_FORMAT, partitionColumn, colocationId);
+
+	appendStringInfoString(newQuery, queryString);
+
+	return newQuery->data;
+}
+
+
+/*
+ * CitusAttributeToEnd keeps the statistics for the tenant and calls the previously installed end hook
+ * or the standard executor end function.
+ */
+void
+CitusAttributeToEnd(QueryDesc *queryDesc)
+{
+	/*
+	 * At the end of the Executor is the last moment we have to attribute the previous
+	 * attribution to a tenant, if applicable
+	 */
+	AttributeMetricsIfApplicable();
+
+	/* now call in to the previously installed hook, or the standard implementation */
+	if (prev_ExecutorEnd)
+	{
+		prev_ExecutorEnd(queryDesc);
+	}
+	else
+	{
+		standard_ExecutorEnd(queryDesc);
+	}
+}
+
+
+/*
+ * CompareTenantScore is used to sort the tenant statistics by score
+ * in descending order.
+ */
+static int
+CompareTenantScore(const void *leftElement, const void *rightElement)
+{
+	const TenantStats *leftTenant = (const TenantStats *) leftElement;
+	const TenantStats *rightTenant = (const TenantStats *) rightElement;
+
+	if (leftTenant->score > rightTenant->score)
+	{
+		return -1;
+	}
+	else if (leftTenant->score < rightTenant->score)
+	{
+		return 1;
+	}
+	return 0;
+}
+
+
+/*
+ * AttributeMetricsIfApplicable updates the metrics for current tenant's statistics
+ */
+static void
+AttributeMetricsIfApplicable()
+{
+	if (strcmp(attributeToTenant, "") != 0)
+	{
+		clock_t end = { 0 };
+
+		end = clock();
+		time_t queryTime = time(0);
+		double cpu_time_used = ((double) (end - attributeToTenantStart)) / CLOCKS_PER_SEC;
+
+		if (MultiTenantMonitoringLogLevel != CITUS_LOG_LEVEL_OFF)
+		{
+			ereport(NOTICE, (errmsg("attribute cpu counter (%f) to tenant: %s",
+									cpu_time_used, attributeToTenant)));
+		}
+
+		MultiTenantMonitor *monitor = GetMultiTenantMonitor();
+
+		LWLockAcquire(&monitor->lock, LW_SHARED);
+
+		int currentTenantIndex = FindTenantStats(monitor);
+
+		if (currentTenantIndex != -1)
+		{
+			TenantStats *tenantStats = &monitor->tenants[currentTenantIndex];
+			LWLockAcquire(&tenantStats->lock, LW_EXCLUSIVE);
+
+			UpdatePeriodsIfNecessary(tenantStats, queryTime);
+			ReduceScoreIfNecessary(tenantStats, queryTime);
+			RecordTenantStats(tenantStats);
+
+			LWLockRelease(&tenantStats->lock);
+		}
+		else
+		{
+			LWLockRelease(&monitor->lock);
+
+			LWLockAcquire(&monitor->lock, LW_EXCLUSIVE);
+			currentTenantIndex = FindTenantStats(monitor);
+
+			if (currentTenantIndex == -1)
+			{
+				currentTenantIndex = CreateTenantStats(monitor, queryTime);
+			}
+
+			LWLockRelease(&monitor->lock);
+
+			LWLockAcquire(&monitor->lock, LW_SHARED);
+			currentTenantIndex = FindTenantStats(monitor);
+			if (currentTenantIndex != -1)
+			{
+				TenantStats *tenantStats = &monitor->tenants[currentTenantIndex];
+				LWLockAcquire(&tenantStats->lock, LW_EXCLUSIVE);
+
+				UpdatePeriodsIfNecessary(tenantStats, queryTime);
+				ReduceScoreIfNecessary(tenantStats, queryTime);
+				RecordTenantStats(tenantStats);
+
+				LWLockRelease(&tenantStats->lock);
+			}
+		}
+		LWLockRelease(&monitor->lock);
+	}
+
+	strcpy_s(attributeToTenant, sizeof(attributeToTenant), "");
+}
+
+
+/*
+ * UpdatePeriodsIfNecessary moves the query counts to previous periods if a enough time has passed.
+ *
+ * If 1 period has passed after the latest query, this function moves this period's counts to the last period
+ * and cleans this period's statistics.
+ *
+ * If 2 or more periods has passed after the last query, this function cleans all both this and last period's
+ * statistics.
+ */
+static void
+UpdatePeriodsIfNecessary(TenantStats *tenantStats, time_t queryTime)
+{
+	time_t periodStart = queryTime - (queryTime % CitusStatsTenantsPeriod);
+
+	/*
+	 * If the last query in this tenant was before the start of current period
+	 * but there are some query count for this period we move them to the last period.
+	 */
+	if (tenantStats->lastQueryTime < periodStart &&
+		(tenantStats->writesInThisPeriod || tenantStats->readsInThisPeriod))
+	{
+		tenantStats->writesInLastPeriod = tenantStats->writesInThisPeriod;
+		tenantStats->writesInThisPeriod = 0;
+
+		tenantStats->readsInLastPeriod = tenantStats->readsInThisPeriod;
+		tenantStats->readsInThisPeriod = 0;
+	}
+
+	/*
+	 * If the last query is more than two periods ago, we clean the last period counts too.
+	 */
+	if (tenantStats->lastQueryTime < periodStart - CitusStatsTenantsPeriod)
+	{
+		tenantStats->writesInLastPeriod = 0;
+
+		tenantStats->readsInLastPeriod = 0;
+	}
+
+	tenantStats->lastQueryTime = queryTime;
+}
+
+
+/*
+ * ReduceScoreIfNecessary reduces the tenant score only if it is necessary.
+ *
+ * We halve the tenants' scores after each period. This function checks the number of
+ * periods that passed after the lsat score reduction and reduces the score accordingly.
+ */
+static void
+ReduceScoreIfNecessary(TenantStats *tenantStats, time_t queryTime)
+{
+	time_t periodStart = queryTime - (queryTime % CitusStatsTenantsPeriod);
+
+	/*
+	 * With each query we increase the score of tenant by ONE_QUERY_SCORE.
+	 * After one period we halve the scores.
+	 *
+	 * Here we calculate how many periods passed after the last time we did score reduction
+	 * If the latest score reduction was in this period this number should be 0,
+	 * if it was in the last period this number should be 1 and so on.
+	 */
+	int periodCountAfterLastScoreReduction = (periodStart -
+											  tenantStats->lastScoreReduction +
+											  CitusStatsTenantsPeriod - 1) /
+											 CitusStatsTenantsPeriod;
+
+	/*
+	 * This should not happen but let's make sure
+	 */
+	if (periodCountAfterLastScoreReduction < 0)
+	{
+		periodCountAfterLastScoreReduction = 0;
+	}
+
+	/*
+	 * If the last score reduction was not in this period we do score reduction now.
+	 */
+	if (periodCountAfterLastScoreReduction > 0)
+	{
+		tenantStats->score >>= periodCountAfterLastScoreReduction;
+		tenantStats->lastScoreReduction = queryTime;
+	}
+}
+
+
+/*
+ * EvictTenantsIfNecessary sorts and evicts the tenants if the tenant count is more than or
+ * equal to 3 * CitusStatsTenantsLimit.
+ */
+static void
+EvictTenantsIfNecessary(time_t queryTime)
+{
+	MultiTenantMonitor *monitor = GetMultiTenantMonitor();
+
+	/*
+	 * We keep up to CitusStatsTenantsLimit * 3 tenants instead of CitusStatsTenantsLimit,
+	 * so we don't lose data immediately after a tenant is out of top CitusStatsTenantsLimit
+	 *
+	 * Every time tenant count hits CitusStatsTenantsLimit * 3, we reduce it back to CitusStatsTenantsLimit * 2.
+	 */
+	if (monitor->tenantCount >= CitusStatsTenantsLimit * 3)
+	{
+		for (int tenantIndex = 0; tenantIndex < monitor->tenantCount; tenantIndex++)
+		{
+			ReduceScoreIfNecessary(&monitor->tenants[tenantIndex], queryTime);
+		}
+		SafeQsort(monitor->tenants, monitor->tenantCount, sizeof(TenantStats),
+				  CompareTenantScore);
+		monitor->tenantCount = CitusStatsTenantsLimit * 2;
+	}
+}
+
+
+/*
+ * RecordTenantStats records the query statistics for the tenant.
+ */
+static void
+RecordTenantStats(TenantStats *tenantStats)
+{
+	if (tenantStats->score < LLONG_MAX - ONE_QUERY_SCORE)
+	{
+		tenantStats->score += ONE_QUERY_SCORE;
+	}
+	else
+	{
+		tenantStats->score = LLONG_MAX;
+	}
+
+	if (attributeCommandType == CMD_SELECT)
+	{
+		tenantStats->readsInThisPeriod++;
+	}
+	else if (attributeCommandType == CMD_UPDATE ||
+			 attributeCommandType == CMD_INSERT ||
+			 attributeCommandType == CMD_DELETE)
+	{
+		tenantStats->writesInThisPeriod++;
+	}
+}
+
+
+/*
+ * CreateMultiTenantMonitor creates the data structure for multi tenant monitor.
+ */
+static void
+CreateMultiTenantMonitor()
+{
+	MultiTenantMonitor *monitor = CreateSharedMemoryForMultiTenantMonitor();
+	monitor->tenantCount = 0;
+}
+
+
+/*
+ * CreateSharedMemoryForMultiTenantMonitor creates a dynamic shared memory segment for multi tenant monitor.
+ */
+static MultiTenantMonitor *
+CreateSharedMemoryForMultiTenantMonitor()
+{
+	bool found = false;
+	MultiTenantMonitor *monitor = ShmemInitStruct(SharedMemoryNameForMultiTenantMonitor,
+												  MultiTenantMonitorshmemSize(),
+												  &found);
+	if (found)
+	{
+		return monitor;
+	}
+
+	monitor->namedLockTranche.trancheId = LWLockNewTrancheId();
+	monitor->namedLockTranche.trancheName = monitorTrancheName;
+
+	LWLockRegisterTranche(monitor->namedLockTranche.trancheId,
+						  monitor->namedLockTranche.trancheName);
+	LWLockInitialize(&monitor->lock, monitor->namedLockTranche.trancheId);
+
+	return monitor;
+}
+
+
+/*
+ * GetMultiTenantMonitor returns the data structure for multi tenant monitor.
+ */
+static MultiTenantMonitor *
+GetMultiTenantMonitor()
+{
+	bool found = false;
+	MultiTenantMonitor *monitor = ShmemInitStruct(SharedMemoryNameForMultiTenantMonitor,
+												  MultiTenantMonitorshmemSize(),
+												  &found);
+
+	if (!found)
+	{
+		elog(WARNING, "monitor not found");
+		return NULL;
+	}
+
+	return monitor;
+}
+
+
+/*
+ * InitializeMultiTenantMonitorSMHandleManagement sets up the shared memory startup hook
+ * so that the multi tenant monitor can be initialized and stored in shared memory.
+ */
+void
+InitializeMultiTenantMonitorSMHandleManagement()
+{
+	prev_shmem_startup_hook = shmem_startup_hook;
+	shmem_startup_hook = MultiTenantMonitorSMInit;
+}
+
+
+/*
+ * MultiTenantMonitorSMInit initializes the shared memory for MultiTenantMonitorSMData.
+ */
+static void
+MultiTenantMonitorSMInit()
+{
+	CreateMultiTenantMonitor();
+
+	if (prev_shmem_startup_hook != NULL)
+	{
+		prev_shmem_startup_hook();
+	}
+}
+
+
+/*
+ * CreateTenantStats creates the data structure for a tenant's statistics.
+ */
+static int
+CreateTenantStats(MultiTenantMonitor *monitor, time_t queryTime)
+{
+	/*
+	 * If the tenant count reached 3 * CitusStatsTenantsLimit, we evict the tenants
+	 * with the lowest score.
+	 */
+	EvictTenantsIfNecessary(queryTime);
+
+	int tenantIndex = monitor->tenantCount;
+
+	memset(&monitor->tenants[tenantIndex], 0, sizeof(monitor->tenants[tenantIndex]));
+
+	strcpy_s(monitor->tenants[tenantIndex].tenantAttribute,
+			 sizeof(monitor->tenants[tenantIndex].tenantAttribute), attributeToTenant);
+	monitor->tenants[tenantIndex].colocationGroupId = colocationGroupId;
+
+	monitor->tenants[tenantIndex].namedLockTranche.trancheId = LWLockNewTrancheId();
+	monitor->tenants[tenantIndex].namedLockTranche.trancheName = tenantTrancheName;
+
+	LWLockRegisterTranche(monitor->tenants[tenantIndex].namedLockTranche.trancheId,
+						  monitor->tenants[tenantIndex].namedLockTranche.trancheName);
+	LWLockInitialize(&monitor->tenants[tenantIndex].lock,
+					 monitor->tenants[tenantIndex].namedLockTranche.trancheId);
+
+	monitor->tenantCount++;
+
+	return tenantIndex;
+}
+
+
+/*
+ * FindTenantStats finds the index for the current tenant's statistics.
+ */
+static int
+FindTenantStats(MultiTenantMonitor *monitor)
+{
+	for (int i = 0; i < monitor->tenantCount; i++)
+	{
+		TenantStats *tenantStats = &monitor->tenants[i];
+		if (strcmp(tenantStats->tenantAttribute, attributeToTenant) == 0 &&
+			tenantStats->colocationGroupId == colocationGroupId)
+		{
+			return i;
+		}
+	}
+
+	return -1;
+}
+
+
+/*
+ * MultiTenantMonitorshmemSize calculates the size of the multi tenant monitor using
+ * CitusStatsTenantsLimit parameter.
+ */
+static size_t
+MultiTenantMonitorshmemSize(void)
+{
+	Size size = sizeof(MultiTenantMonitor);
+	size = add_size(size, mul_size(sizeof(TenantStats), CitusStatsTenantsLimit * 3));
+
+	return size;
+}
diff --git a/src/include/distributed/citus_custom_scan.h b/src/include/distributed/citus_custom_scan.h
index 92301fceb..f31138ac2 100644
--- a/src/include/distributed/citus_custom_scan.h
+++ b/src/include/distributed/citus_custom_scan.h
@@ -46,4 +46,6 @@ extern CustomScan * FetchCitusCustomScanIfExists(Plan *plan);
 extern bool IsCitusPlan(Plan *plan);
 extern bool IsCitusCustomScan(Plan *plan);
 
+extern void SetJobColocationId(Job *job);
+
 #endif /* CITUS_CUSTOM_SCAN_H */
diff --git a/src/include/distributed/multi_physical_planner.h b/src/include/distributed/multi_physical_planner.h
index d6ad4c248..49fe28f1d 100644
--- a/src/include/distributed/multi_physical_planner.h
+++ b/src/include/distributed/multi_physical_planner.h
@@ -330,6 +330,9 @@ typedef struct Task
 	 * Vacuum, create/drop/reindex concurrently cannot be executed in a transaction.
 	 */
 	bool cannotBeExecutedInTransction;
+
+	char *partitionColumn;
+	int colocationId;
 } Task;
 
 
diff --git a/src/include/distributed/utils/attribute.h b/src/include/distributed/utils/attribute.h
new file mode 100644
index 000000000..b4d8bb607
--- /dev/null
+++ b/src/include/distributed/utils/attribute.h
@@ -0,0 +1,102 @@
+/*-------------------------------------------------------------------------
+ *
+ * attribute.h
+ *	  Routines related to the multi tenant monitor.
+ *
+ * Copyright (c) Citus Data, Inc.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef CITUS_ATTRIBUTE_H
+#define CITUS_ATTRIBUTE_H
+
+#include "executor/execdesc.h"
+#include "executor/executor.h"
+#include "storage/lwlock.h"
+
+#define MAX_TENANT_ATTRIBUTE_LENGTH 100
+
+/*
+ * TenantStats is the struct that keeps statistics about one tenant.
+ */
+typedef struct TenantStats
+{
+	/*
+	 * The attribute value, e.g distribution column, and colocation group id
+	 * of the tenant.
+	 */
+	char tenantAttribute[MAX_TENANT_ATTRIBUTE_LENGTH];
+	int colocationGroupId;
+
+	/*
+	 * Number of SELECT queries this tenant ran in this and last periods.
+	 */
+	int readsInLastPeriod;
+	int readsInThisPeriod;
+
+	/*
+	 * Number of INSERT, UPDATE, and DELETE queries this tenant ran in this and last periods.
+	 */
+	int writesInLastPeriod;
+	int writesInThisPeriod;
+
+	/*
+	 * The latest time this tenant ran a query. This value is used to update the score later.
+	 */
+	time_t lastQueryTime;
+
+	/*
+	 * The tenant monitoring score of this tenant. This value is increased by ONE_QUERY_SCORE at every query
+	 * and halved after every period.
+	 */
+	long long score;
+
+	/*
+	 * The latest time the score of this tenant is halved. This value is used to correctly calculate the reduction later.
+	 */
+	time_t lastScoreReduction;
+
+	/*
+	 * Locks needed to update this tenant's statistics.
+	 */
+	NamedLWLockTranche namedLockTranche;
+	LWLock lock;
+} TenantStats;
+
+/*
+ * MultiTenantMonitor is the struct for keeping the statistics
+ * of the tenants
+ */
+typedef struct MultiTenantMonitor
+{
+	/*
+	 * Lock mechanism for the monitor.
+	 * Each tenant update acquires the lock in shared mode and
+	 * the tenant number reduction and monitor view acquires in exclusive mode.
+	 */
+	NamedLWLockTranche namedLockTranche;
+	LWLock lock;
+
+	/*
+	 * tenantCount is the number of items in the tenants array.
+	 * The total length of tenants array is set up at CreateSharedMemoryForMultiTenantMonitor
+	 * and is 3 * citus.stats_tenants_limit
+	 */
+	int tenantCount;
+	TenantStats tenants[FLEXIBLE_ARRAY_MEMBER];
+} MultiTenantMonitor;
+
+
+extern void CitusAttributeToEnd(QueryDesc *queryDesc);
+extern void AttributeQueryIfAnnotated(const char *queryString, CmdType commandType);
+extern char * AnnotateQuery(char *queryString, char *partitionColumn, int colocationId);
+extern void InitializeMultiTenantMonitorSMHandleManagement(void);
+
+extern ExecutorEnd_hook_type prev_ExecutorEnd;
+
+extern int MultiTenantMonitoringLogLevel;
+extern int CitusStatsTenantsPeriod;
+extern int CitusStatsTenantsLimit;
+
+#endif /*CITUS_ATTRIBUTE_H */
diff --git a/src/test/regress/bin/normalize.sed b/src/test/regress/bin/normalize.sed
index 2ebb31f47..33a35f286 100644
--- a/src/test/regress/bin/normalize.sed
+++ b/src/test/regress/bin/normalize.sed
@@ -307,3 +307,5 @@ s/(NOTICE:  issuing SET LOCAL application_name TO 'citus_rebalancer gpid=)[0-9]+
 
 # shard_rebalancer output, flaky improvement number
 s/improvement of 0.1[0-9]* is lower/improvement of 0.1xxxxx is lower/g
+
+s/\/\* attributeTo.*\*\///g
diff --git a/src/test/regress/expected/citus_stats_tenants.out b/src/test/regress/expected/citus_stats_tenants.out
new file mode 100644
index 000000000..dbd525aab
--- /dev/null
+++ b/src/test/regress/expected/citus_stats_tenants.out
@@ -0,0 +1,290 @@
+CREATE SCHEMA citus_stats_tenants;
+SET search_path TO citus_stats_tenants;
+SET citus.next_shard_id TO 5797500;
+SET citus.shard_replication_factor TO 1;
+CREATE OR REPLACE FUNCTION pg_catalog.clean_citus_stats_tenants()
+RETURNS VOID
+LANGUAGE C
+AS 'citus', $$clean_citus_stats_tenants$$;
+CREATE OR REPLACE FUNCTION pg_catalog.sleep_until_next_period()
+RETURNS VOID
+LANGUAGE C
+AS 'citus', $$sleep_until_next_period$$;
+SELECT result FROM run_command_on_all_nodes('SELECT clean_citus_stats_tenants()');
+ result
+---------------------------------------------------------------------
+
+
+
+(3 rows)
+
+-- set period to a high number to prevent stats from being reset
+SELECT result FROM run_command_on_all_nodes('ALTER SYSTEM SET citus.stats_tenants_period TO 1000000000');
+    result
+---------------------------------------------------------------------
+ ALTER SYSTEM
+ ALTER SYSTEM
+ ALTER SYSTEM
+(3 rows)
+
+SELECT result FROM run_command_on_all_nodes('SELECT pg_reload_conf()');
+ result
+---------------------------------------------------------------------
+ t
+ t
+ t
+(3 rows)
+
+CREATE TABLE dist_tbl (a INT, b TEXT);
+SELECT create_distributed_table('dist_tbl', 'a', shard_count:=4, colocate_with:='none');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE TABLE dist_tbl_2 (a INT, b INT);
+SELECT create_distributed_table('dist_tbl_2', 'a', colocate_with:='dist_tbl');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE TABLE dist_tbl_text (a TEXT, b INT);
+SELECT create_distributed_table('dist_tbl_text', 'a', shard_count:=4, colocate_with:='none');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE TABLE ref_tbl (a INT, b INT);
+SELECT create_reference_table('ref_tbl');
+ create_reference_table
+---------------------------------------------------------------------
+
+(1 row)
+
+INSERT INTO dist_tbl VALUES (1, 'abcd');
+INSERT INTO dist_tbl VALUES (2, 'abcd');
+UPDATE dist_tbl SET b = a + 1 WHERE a = 3;
+UPDATE dist_tbl SET b = a + 1 WHERE a = 4;
+DELETE FROM dist_tbl WHERE a = 5;
+\c - - - :worker_1_port
+SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute;
+ tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period
+---------------------------------------------------------------------
+ 1                |                         0 |                         0 |                          1 |                          0
+ 5                |                         0 |                         0 |                          1 |                          0
+(2 rows)
+
+\c - - - :worker_2_port
+SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute;
+ tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period
+---------------------------------------------------------------------
+ 2                |                         0 |                         0 |                          1 |                          0
+ 3                |                         0 |                         0 |                          1 |                          0
+(2 rows)
+
+\c - - - :master_port
+SET search_path TO citus_stats_tenants;
+SELECT result FROM run_command_on_all_nodes('SELECT clean_citus_stats_tenants()');
+ result
+---------------------------------------------------------------------
+
+
+
+(3 rows)
+
+-- queries with multiple tenants should not be counted
+SELECT count(*)>=0 FROM dist_tbl WHERE a IN (1, 5);
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+-- queries with reference tables should not be counted
+SELECT count(*)>=0 FROM ref_tbl WHERE a = 1;
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+\c - - - :worker_1_port
+SELECT tenant_attribute, query_count_in_this_period FROM citus_stats_tenants ORDER BY tenant_attribute;
+ tenant_attribute | query_count_in_this_period
+---------------------------------------------------------------------
+(0 rows)
+
+\c - - - :master_port
+SET search_path TO citus_stats_tenants;
+-- queries with multiple tables but one tenant should be counted
+SELECT count(*)>=0 FROM dist_tbl, dist_tbl_2 WHERE dist_tbl.a = 1 AND dist_tbl_2.a = 1;
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT count(*)>=0 FROM dist_tbl JOIN dist_tbl_2 ON dist_tbl.a = dist_tbl_2.a WHERE dist_tbl.a = 1;
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+\c - - - :worker_1_port
+SELECT tenant_attribute, query_count_in_this_period FROM citus_stats_tenants WHERE tenant_attribute = '1';
+ tenant_attribute | query_count_in_this_period
+---------------------------------------------------------------------
+ 1                |                          2
+(1 row)
+
+\c - - - :master_port
+SET search_path TO citus_stats_tenants;
+-- test scoring
+-- all of these distribution column values are from second worker
+SELECT count(*)>=0 FROM dist_tbl WHERE a = 2;
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT count(*)>=0 FROM dist_tbl WHERE a = 3;
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT count(*)>=0 FROM dist_tbl WHERE a = 4;
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'abcd';
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+\c - - - :worker_2_port
+SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) ORDER BY score DESC;
+ tenant_attribute | query_count_in_this_period |   score
+---------------------------------------------------------------------
+ 2                |                          1 | 1000000000
+ 3                |                          1 | 1000000000
+ 4                |                          1 | 1000000000
+ abcd             |                          1 | 1000000000
+(4 rows)
+
+\c - - - :master_port
+SET search_path TO citus_stats_tenants;
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'abcd';
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'abcd';
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde';
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'cdef';
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+\c - - - :worker_2_port
+SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) ORDER BY score DESC;
+ tenant_attribute | query_count_in_this_period |   score
+---------------------------------------------------------------------
+ abcd             |                          3 | 3000000000
+ 2                |                          1 | 1000000000
+ 3                |                          1 | 1000000000
+ 4                |                          1 | 1000000000
+ bcde             |                          1 | 1000000000
+ cdef             |                          1 | 1000000000
+(6 rows)
+
+\c - - - :master_port
+SET search_path TO citus_stats_tenants;
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde';
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde';
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'defg';
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+\c - - - :worker_2_port
+SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) ORDER BY score DESC;
+ tenant_attribute | query_count_in_this_period |   score
+---------------------------------------------------------------------
+ abcd             |                          3 | 3000000000
+ bcde             |                          3 | 3000000000
+ 2                |                          1 | 1000000000
+ 3                |                          1 | 1000000000
+ defg             |                          1 | 1000000000
+(5 rows)
+
+\c - - - :master_port
+SET search_path TO citus_stats_tenants;
+-- test period passing
+SELECT result FROM run_command_on_all_nodes('SELECT clean_citus_stats_tenants()');
+ result
+---------------------------------------------------------------------
+
+
+
+(3 rows)
+
+SELECT count(*)>=0 FROM dist_tbl WHERE a = 1;
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+INSERT INTO dist_tbl VALUES (5, 'abcd');
+\c - - - :worker_1_port
+SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute;
+ tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period
+---------------------------------------------------------------------
+ 1                |                         1 |                         0 |                          1 |                          0
+ 5                |                         0 |                         0 |                          1 |                          0
+(2 rows)
+
+-- simulate passing the period
+SET citus.stats_tenants_period TO 2;
+SELECT sleep_until_next_period();
+ sleep_until_next_period
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute;
+ tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period
+---------------------------------------------------------------------
+ 1                |                         0 |                         1 |                          0 |                          1
+ 5                |                         0 |                         0 |                          0 |                          1
+(2 rows)
+
+\c - - - :master_port
+SET search_path TO citus_stats_tenants;
+SET client_min_messages TO ERROR;
+DROP SCHEMA citus_stats_tenants CASCADE;
diff --git a/src/test/regress/expected/failure_multi_dml.out b/src/test/regress/expected/failure_multi_dml.out
index 7ca8a8f91..bbea2c999 100644
--- a/src/test/regress/expected/failure_multi_dml.out
+++ b/src/test/regress/expected/failure_multi_dml.out
@@ -25,7 +25,7 @@ SELECT citus.clear_network_traffic();
 ---- test multiple statements spanning multiple shards,
 ---- at each significant point. These transactions are 2pc
 -- fail at DELETE
-SELECT citus.mitmproxy('conn.onQuery(query="^DELETE").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="DELETE").kill()');
  mitmproxy
 ---------------------------------------------------------------------
 
@@ -54,7 +54,7 @@ SELECT * FROM dml_test ORDER BY id ASC;
 (4 rows)
 
 -- cancel at DELETE
-SELECT citus.mitmproxy('conn.onQuery(query="^DELETE").cancel(' ||  pg_backend_pid() || ')');
+SELECT citus.mitmproxy('conn.onQuery(query="DELETE").cancel(' ||  pg_backend_pid() || ')');
  mitmproxy
 ---------------------------------------------------------------------
 
@@ -83,7 +83,7 @@ SELECT * FROM dml_test ORDER BY id ASC;
 (4 rows)
 
 -- fail at INSERT
-SELECT citus.mitmproxy('conn.onQuery(query="^INSERT").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="INSERT").kill()');
  mitmproxy
 ---------------------------------------------------------------------
 
@@ -110,7 +110,7 @@ SELECT * FROM dml_test ORDER BY id ASC;
 (4 rows)
 
 -- cancel at INSERT
-SELECT citus.mitmproxy('conn.onQuery(query="^INSERT").cancel(' ||  pg_backend_pid() || ')');
+SELECT citus.mitmproxy('conn.onQuery(query="INSERT").cancel(' ||  pg_backend_pid() || ')');
  mitmproxy
 ---------------------------------------------------------------------
 
@@ -137,7 +137,7 @@ SELECT * FROM dml_test ORDER BY id ASC;
 (4 rows)
 
 -- fail at UPDATE
-SELECT citus.mitmproxy('conn.onQuery(query="^UPDATE").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="UPDATE").kill()');
  mitmproxy
 ---------------------------------------------------------------------
 
@@ -163,7 +163,7 @@ SELECT * FROM dml_test ORDER BY id ASC;
 (4 rows)
 
 -- cancel at UPDATE
-SELECT citus.mitmproxy('conn.onQuery(query="^UPDATE").cancel(' ||  pg_backend_pid() || ')');
+SELECT citus.mitmproxy('conn.onQuery(query="UPDATE").cancel(' ||  pg_backend_pid() || ')');
  mitmproxy
 ---------------------------------------------------------------------
 
diff --git a/src/test/regress/expected/failure_multi_row_insert.out b/src/test/regress/expected/failure_multi_row_insert.out
index 8948be94e..f3cd4919a 100644
--- a/src/test/regress/expected/failure_multi_row_insert.out
+++ b/src/test/regress/expected/failure_multi_row_insert.out
@@ -36,7 +36,7 @@ SELECT create_reference_table('reference_table');
 -- (d) multi-row INSERT that hits multiple shards in multiple workers
 -- (e) multi-row INSERT to a reference table
 --  Failure and cancellation on multi-row INSERT that hits the same shard with the same value
-SELECT citus.mitmproxy('conn.onQuery(query="^INSERT").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="INSERT").kill()');
  mitmproxy
 ---------------------------------------------------------------------
 
diff --git a/src/test/regress/expected/failure_ref_tables.out b/src/test/regress/expected/failure_ref_tables.out
index 6485691af..4984cc1bf 100644
--- a/src/test/regress/expected/failure_ref_tables.out
+++ b/src/test/regress/expected/failure_ref_tables.out
@@ -26,7 +26,7 @@ SELECT COUNT(*) FROM ref_table;
 (1 row)
 
 -- verify behavior of single INSERT; should fail to execute
-SELECT citus.mitmproxy('conn.onQuery(query="^INSERT").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="INSERT").kill()');
  mitmproxy
 ---------------------------------------------------------------------
 
@@ -41,7 +41,7 @@ SELECT COUNT(*) FROM ref_table WHERE key=5;
 (1 row)
 
 -- verify behavior of UPDATE ... RETURNING; should not execute
-SELECT citus.mitmproxy('conn.onQuery(query="^UPDATE").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="UPDATE").kill()');
  mitmproxy
 ---------------------------------------------------------------------
 
@@ -56,7 +56,7 @@ SELECT COUNT(*) FROM ref_table WHERE key=7;
 (1 row)
 
 -- verify fix to #2214; should raise error and fail to execute
-SELECT citus.mitmproxy('conn.onQuery(query="^UPDATE").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="UPDATE").kill()');
  mitmproxy
 ---------------------------------------------------------------------
 
diff --git a/src/test/regress/expected/failure_replicated_partitions.out b/src/test/regress/expected/failure_replicated_partitions.out
index 4ae2d604c..7294df98b 100644
--- a/src/test/regress/expected/failure_replicated_partitions.out
+++ b/src/test/regress/expected/failure_replicated_partitions.out
@@ -21,7 +21,7 @@ CREATE TABLE partitioned_table_0
 	PARTITION OF partitioned_table (dist_key, partition_id)
 	FOR VALUES IN ( 0 );
 INSERT INTO partitioned_table VALUES (0, 0);
-SELECT citus.mitmproxy('conn.onQuery(query="^INSERT").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="INSERT").kill()');
  mitmproxy
 ---------------------------------------------------------------------
 
diff --git a/src/test/regress/expected/failure_single_mod.out b/src/test/regress/expected/failure_single_mod.out
index 54db33ff6..2a6ed2d77 100644
--- a/src/test/regress/expected/failure_single_mod.out
+++ b/src/test/regress/expected/failure_single_mod.out
@@ -20,7 +20,7 @@ SELECT create_distributed_table('mod_test', 'key');
 (1 row)
 
 -- verify behavior of single INSERT; should mark shard as failed
-SELECT citus.mitmproxy('conn.onQuery(query="^INSERT").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="INSERT").kill()');
  mitmproxy
 ---------------------------------------------------------------------
 
@@ -52,7 +52,7 @@ SELECT citus.mitmproxy('conn.allow()');
 (1 row)
 
 INSERT INTO mod_test VALUES (2, 6);
-SELECT citus.mitmproxy('conn.onQuery(query="^UPDATE").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="UPDATE").kill()');
  mitmproxy
 ---------------------------------------------------------------------
 
@@ -78,7 +78,7 @@ WHERE shardid IN (
 TRUNCATE mod_test;
 -- verify behavior of multi-statement modifications to a single shard
 -- should fail the transaction and never mark placements inactive
-SELECT citus.mitmproxy('conn.onQuery(query="^UPDATE").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="UPDATE").kill()');
  mitmproxy
 ---------------------------------------------------------------------
 
diff --git a/src/test/regress/expected/failure_single_select.out b/src/test/regress/expected/failure_single_select.out
index 5d17cc4ad..1b60f3125 100644
--- a/src/test/regress/expected/failure_single_select.out
+++ b/src/test/regress/expected/failure_single_select.out
@@ -23,7 +23,7 @@ SELECT create_distributed_table('select_test', 'key');
 
 -- put data in shard for which mitm node is first placement
 INSERT INTO select_test VALUES (3, 'test data');
-SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="SELECT.*select_test").kill()');
  mitmproxy
 ---------------------------------------------------------------------
 
@@ -45,7 +45,7 @@ WARNING:  connection to the remote node localhost:xxxxx failed with the followin
 
 -- kill after first SELECT; txn should fail as INSERT triggers
 -- 2PC (and placementis not marked bad)
-SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="SELECT.*select_test").kill()');
  mitmproxy
 ---------------------------------------------------------------------
 
@@ -66,7 +66,7 @@ TRUNCATE select_test;
 -- now the same tests with query cancellation
 -- put data in shard for which mitm node is first placement
 INSERT INTO select_test VALUES (3, 'test data');
-SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").cancel(' ||  pg_backend_pid() || ')');
+SELECT citus.mitmproxy('conn.onQuery(query="SELECT.*select_test").cancel(' ||  pg_backend_pid() || ')');
  mitmproxy
 ---------------------------------------------------------------------
 
@@ -77,7 +77,7 @@ ERROR:  canceling statement due to user request
 SELECT * FROM select_test WHERE key = 3;
 ERROR:  canceling statement due to user request
 -- cancel after first SELECT; txn should fail and nothing should be marked as invalid
-SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").cancel(' ||  pg_backend_pid() || ')');
+SELECT citus.mitmproxy('conn.onQuery(query="SELECT.*select_test").cancel(' ||  pg_backend_pid() || ')');
  mitmproxy
 ---------------------------------------------------------------------
 
@@ -107,7 +107,7 @@ SELECT citus.mitmproxy('conn.allow()');
 TRUNCATE select_test;
 -- cancel the second query
 -- error after second SELECT; txn should fail
-SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").after(1).cancel(' ||  pg_backend_pid() || ')');
+SELECT citus.mitmproxy('conn.onQuery(query="SELECT.*select_test").after(1).cancel(' ||  pg_backend_pid() || ')');
  mitmproxy
 ---------------------------------------------------------------------
 
@@ -126,7 +126,7 @@ SELECT * FROM select_test WHERE key = 3;
 ERROR:  canceling statement due to user request
 COMMIT;
 -- error after second SELECT; txn should fails the transaction
-SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").after(1).reset()');
+SELECT citus.mitmproxy('conn.onQuery(query="SELECT.*select_test").after(1).reset()');
  mitmproxy
 ---------------------------------------------------------------------
 
@@ -144,7 +144,7 @@ INSERT INTO select_test VALUES (3, 'even more data');
 SELECT * FROM select_test WHERE key = 3;
 ERROR:  connection to the remote node localhost:xxxxx failed with the following error: connection not open
 COMMIT;
-SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*pg_prepared_xacts").after(2).kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="SELECT.*pg_prepared_xacts").after(2).kill()');
  mitmproxy
 ---------------------------------------------------------------------
 
@@ -173,7 +173,7 @@ SELECT create_distributed_table('select_test', 'key');
 
 SET citus.max_cached_conns_per_worker TO 1; -- allow connection to be cached
 INSERT INTO select_test VALUES (1, 'test data');
-SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").after(1).kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="SELECT.*select_test").after(1).kill()');
  mitmproxy
 ---------------------------------------------------------------------
 
@@ -188,7 +188,7 @@ SELECT * FROM select_test WHERE key = 1;
 SELECT * FROM select_test WHERE key = 1;
 ERROR:  connection to the remote node localhost:xxxxx failed with the following error: connection not open
 -- now the same test with query cancellation
-SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").after(1).cancel(' ||  pg_backend_pid() || ')');
+SELECT citus.mitmproxy('conn.onQuery(query="SELECT.*select_test").after(1).cancel(' ||  pg_backend_pid() || ')');
  mitmproxy
 ---------------------------------------------------------------------
 
diff --git a/src/test/regress/expected/multi_extension.out b/src/test/regress/expected/multi_extension.out
index ead2a5b85..2bb0c29e7 100644
--- a/src/test/regress/expected/multi_extension.out
+++ b/src/test/regress/expected/multi_extension.out
@@ -1360,9 +1360,11 @@ SELECT * FROM multi_extension.print_extension_changes();
 -- Snapshot of state at 11.3-1
 ALTER EXTENSION citus UPDATE TO '11.3-1';
 SELECT * FROM multi_extension.print_extension_changes();
- previous_object | current_object
+ previous_object |                   current_object
 ---------------------------------------------------------------------
-(0 rows)
+                 | function citus_stats_tenants(boolean) SETOF record
+                 | view citus_stats_tenants
+(2 rows)
 
 DROP TABLE multi_extension.prev_objects, multi_extension.extension_diff;
 -- show running version
diff --git a/src/test/regress/expected/upgrade_list_citus_objects.out b/src/test/regress/expected/upgrade_list_citus_objects.out
index 7cd2f63c8..8005c0c42 100644
--- a/src/test/regress/expected/upgrade_list_citus_objects.out
+++ b/src/test/regress/expected/upgrade_list_citus_objects.out
@@ -121,6 +121,7 @@ ORDER BY 1;
  function citus_stat_activity()
  function citus_stat_statements()
  function citus_stat_statements_reset()
+ function citus_stats_tenants(boolean)
  function citus_table_is_visible(oid)
  function citus_table_size(regclass)
  function citus_task_wait(bigint,citus_task_status)
@@ -316,7 +317,8 @@ ORDER BY 1;
  view citus_shards_on_worker
  view citus_stat_activity
  view citus_stat_statements
+ view citus_stats_tenants
  view pg_dist_shard_placement
  view time_partitions
-(310 rows)
+(312 rows)
 
diff --git a/src/test/regress/multi_1_schedule b/src/test/regress/multi_1_schedule
index ee81bde38..4091b7a63 100644
--- a/src/test/regress/multi_1_schedule
+++ b/src/test/regress/multi_1_schedule
@@ -102,6 +102,11 @@ test: pg13_propagate_statistics
 # ----------
 test: citus_update_table_statistics
 
+# ----------
+# Test for tenant statistics
+# ----------
+test: citus_stats_tenants
+
 # ----------
 # Parallel TPC-H tests to check our distributed execution behavior
 # ----------
diff --git a/src/test/regress/pg_regress_multi.pl b/src/test/regress/pg_regress_multi.pl
index af594c1d4..f4e85ab61 100755
--- a/src/test/regress/pg_regress_multi.pl
+++ b/src/test/regress/pg_regress_multi.pl
@@ -487,6 +487,7 @@ push(@pgOptions, "citus.explain_analyze_sort_method='taskId'");
 push(@pgOptions, "citus.enable_manual_changes_to_shards=on");
 push(@pgOptions, "citus.allow_unsafe_locks_from_workers=on");
 push(@pgOptions, "citus.stat_statements_track = 'all'");
+push(@pgOptions, "citus.stats_tenants_limit = 2");
 
 # Some tests look at shards in pg_class, make sure we can usually see them:
 push(@pgOptions, "citus.show_shards_for_app_name_prefixes='pg_regress'");
diff --git a/src/test/regress/sql/citus_stats_tenants.sql b/src/test/regress/sql/citus_stats_tenants.sql
new file mode 100644
index 000000000..056f1902b
--- /dev/null
+++ b/src/test/regress/sql/citus_stats_tenants.sql
@@ -0,0 +1,118 @@
+CREATE SCHEMA citus_stats_tenants;
+SET search_path TO citus_stats_tenants;
+SET citus.next_shard_id TO 5797500;
+SET citus.shard_replication_factor TO 1;
+
+CREATE OR REPLACE FUNCTION pg_catalog.clean_citus_stats_tenants()
+RETURNS VOID
+LANGUAGE C
+AS 'citus', $$clean_citus_stats_tenants$$;
+
+CREATE OR REPLACE FUNCTION pg_catalog.sleep_until_next_period()
+RETURNS VOID
+LANGUAGE C
+AS 'citus', $$sleep_until_next_period$$;
+
+SELECT result FROM run_command_on_all_nodes('SELECT clean_citus_stats_tenants()');
+
+-- set period to a high number to prevent stats from being reset
+SELECT result FROM run_command_on_all_nodes('ALTER SYSTEM SET citus.stats_tenants_period TO 1000000000');
+SELECT result FROM run_command_on_all_nodes('SELECT pg_reload_conf()');
+
+CREATE TABLE dist_tbl (a INT, b TEXT);
+SELECT create_distributed_table('dist_tbl', 'a', shard_count:=4, colocate_with:='none');
+
+CREATE TABLE dist_tbl_2 (a INT, b INT);
+SELECT create_distributed_table('dist_tbl_2', 'a', colocate_with:='dist_tbl');
+
+CREATE TABLE dist_tbl_text (a TEXT, b INT);
+SELECT create_distributed_table('dist_tbl_text', 'a', shard_count:=4, colocate_with:='none');
+
+CREATE TABLE ref_tbl (a INT, b INT);
+SELECT create_reference_table('ref_tbl');
+
+INSERT INTO dist_tbl VALUES (1, 'abcd');
+INSERT INTO dist_tbl VALUES (2, 'abcd');
+UPDATE dist_tbl SET b = a + 1 WHERE a = 3;
+UPDATE dist_tbl SET b = a + 1 WHERE a = 4;
+DELETE FROM dist_tbl WHERE a = 5;
+
+\c - - - :worker_1_port
+SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute;
+\c - - - :worker_2_port
+SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute;
+\c - - - :master_port
+SET search_path TO citus_stats_tenants;
+
+SELECT result FROM run_command_on_all_nodes('SELECT clean_citus_stats_tenants()');
+
+-- queries with multiple tenants should not be counted
+SELECT count(*)>=0 FROM dist_tbl WHERE a IN (1, 5);
+
+-- queries with reference tables should not be counted
+SELECT count(*)>=0 FROM ref_tbl WHERE a = 1;
+
+\c - - - :worker_1_port
+SELECT tenant_attribute, query_count_in_this_period FROM citus_stats_tenants ORDER BY tenant_attribute;
+\c - - - :master_port
+SET search_path TO citus_stats_tenants;
+
+-- queries with multiple tables but one tenant should be counted
+SELECT count(*)>=0 FROM dist_tbl, dist_tbl_2 WHERE dist_tbl.a = 1 AND dist_tbl_2.a = 1;
+SELECT count(*)>=0 FROM dist_tbl JOIN dist_tbl_2 ON dist_tbl.a = dist_tbl_2.a WHERE dist_tbl.a = 1;
+
+\c - - - :worker_1_port
+SELECT tenant_attribute, query_count_in_this_period FROM citus_stats_tenants WHERE tenant_attribute = '1';
+\c - - - :master_port
+SET search_path TO citus_stats_tenants;
+
+-- test scoring
+-- all of these distribution column values are from second worker
+SELECT count(*)>=0 FROM dist_tbl WHERE a = 2;
+SELECT count(*)>=0 FROM dist_tbl WHERE a = 3;
+SELECT count(*)>=0 FROM dist_tbl WHERE a = 4;
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'abcd';
+
+\c - - - :worker_2_port
+SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) ORDER BY score DESC;
+\c - - - :master_port
+SET search_path TO citus_stats_tenants;
+
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'abcd';
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'abcd';
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde';
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'cdef';
+
+\c - - - :worker_2_port
+SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) ORDER BY score DESC;
+\c - - - :master_port
+SET search_path TO citus_stats_tenants;
+
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde';
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde';
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'defg';
+
+\c - - - :worker_2_port
+SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) ORDER BY score DESC;
+\c - - - :master_port
+SET search_path TO citus_stats_tenants;
+
+-- test period passing
+SELECT result FROM run_command_on_all_nodes('SELECT clean_citus_stats_tenants()');
+
+SELECT count(*)>=0 FROM dist_tbl WHERE a = 1;
+INSERT INTO dist_tbl VALUES (5, 'abcd');
+
+\c - - - :worker_1_port
+SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute;
+
+-- simulate passing the period
+SET citus.stats_tenants_period TO 2;
+SELECT sleep_until_next_period();
+
+SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute;
+\c - - - :master_port
+SET search_path TO citus_stats_tenants;
+
+SET client_min_messages TO ERROR;
+DROP SCHEMA citus_stats_tenants CASCADE;
diff --git a/src/test/regress/sql/failure_multi_dml.sql b/src/test/regress/sql/failure_multi_dml.sql
index 390c01461..f62ede4d5 100644
--- a/src/test/regress/sql/failure_multi_dml.sql
+++ b/src/test/regress/sql/failure_multi_dml.sql
@@ -21,7 +21,7 @@ SELECT citus.clear_network_traffic();
 ---- at each significant point. These transactions are 2pc
 
 -- fail at DELETE
-SELECT citus.mitmproxy('conn.onQuery(query="^DELETE").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="DELETE").kill()');
 
 BEGIN;
 DELETE FROM dml_test WHERE id = 1;
@@ -35,7 +35,7 @@ COMMIT;
 SELECT * FROM dml_test ORDER BY id ASC;
 
 -- cancel at DELETE
-SELECT citus.mitmproxy('conn.onQuery(query="^DELETE").cancel(' ||  pg_backend_pid() || ')');
+SELECT citus.mitmproxy('conn.onQuery(query="DELETE").cancel(' ||  pg_backend_pid() || ')');
 
 BEGIN;
 DELETE FROM dml_test WHERE id = 1;
@@ -49,7 +49,7 @@ COMMIT;
 SELECT * FROM dml_test ORDER BY id ASC;
 
 -- fail at INSERT
-SELECT citus.mitmproxy('conn.onQuery(query="^INSERT").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="INSERT").kill()');
 
 BEGIN;
 DELETE FROM dml_test WHERE id = 1;
@@ -63,7 +63,7 @@ COMMIT;
 SELECT * FROM dml_test ORDER BY id ASC;
 
 -- cancel at INSERT
-SELECT citus.mitmproxy('conn.onQuery(query="^INSERT").cancel(' ||  pg_backend_pid() || ')');
+SELECT citus.mitmproxy('conn.onQuery(query="INSERT").cancel(' ||  pg_backend_pid() || ')');
 
 BEGIN;
 DELETE FROM dml_test WHERE id = 1;
@@ -77,7 +77,7 @@ COMMIT;
 SELECT * FROM dml_test ORDER BY id ASC;
 
 -- fail at UPDATE
-SELECT citus.mitmproxy('conn.onQuery(query="^UPDATE").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="UPDATE").kill()');
 
 BEGIN;
 DELETE FROM dml_test WHERE id = 1;
@@ -91,7 +91,7 @@ COMMIT;
 SELECT * FROM dml_test ORDER BY id ASC;
 
 -- cancel at UPDATE
-SELECT citus.mitmproxy('conn.onQuery(query="^UPDATE").cancel(' ||  pg_backend_pid() || ')');
+SELECT citus.mitmproxy('conn.onQuery(query="UPDATE").cancel(' ||  pg_backend_pid() || ')');
 
 BEGIN;
 DELETE FROM dml_test WHERE id = 1;
diff --git a/src/test/regress/sql/failure_multi_row_insert.sql b/src/test/regress/sql/failure_multi_row_insert.sql
index 53ab8a84d..cfc98f719 100644
--- a/src/test/regress/sql/failure_multi_row_insert.sql
+++ b/src/test/regress/sql/failure_multi_row_insert.sql
@@ -30,7 +30,7 @@ SELECT create_reference_table('reference_table');
 
 
 --  Failure and cancellation on multi-row INSERT that hits the same shard with the same value
-SELECT citus.mitmproxy('conn.onQuery(query="^INSERT").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="INSERT").kill()');
 INSERT INTO distributed_table VALUES (1,1), (1,2), (1,3);
 
 -- this test is broken, see https://github.com/citusdata/citus/issues/2460
diff --git a/src/test/regress/sql/failure_ref_tables.sql b/src/test/regress/sql/failure_ref_tables.sql
index 0088a375e..29b90dc22 100644
--- a/src/test/regress/sql/failure_ref_tables.sql
+++ b/src/test/regress/sql/failure_ref_tables.sql
@@ -17,19 +17,19 @@ SELECT citus.clear_network_traffic();
 SELECT COUNT(*) FROM ref_table;
 
 -- verify behavior of single INSERT; should fail to execute
-SELECT citus.mitmproxy('conn.onQuery(query="^INSERT").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="INSERT").kill()');
 INSERT INTO ref_table VALUES (5, 6);
 
 SELECT COUNT(*) FROM ref_table WHERE key=5;
 
 -- verify behavior of UPDATE ... RETURNING; should not execute
-SELECT citus.mitmproxy('conn.onQuery(query="^UPDATE").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="UPDATE").kill()');
 UPDATE ref_table SET key=7 RETURNING value;
 
 SELECT COUNT(*) FROM ref_table WHERE key=7;
 
 -- verify fix to #2214; should raise error and fail to execute
-SELECT citus.mitmproxy('conn.onQuery(query="^UPDATE").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="UPDATE").kill()');
 
 BEGIN;
 DELETE FROM ref_table WHERE key=5;
diff --git a/src/test/regress/sql/failure_replicated_partitions.sql b/src/test/regress/sql/failure_replicated_partitions.sql
index 1ea79fc83..fbe6ec7a0 100644
--- a/src/test/regress/sql/failure_replicated_partitions.sql
+++ b/src/test/regress/sql/failure_replicated_partitions.sql
@@ -19,7 +19,7 @@ CREATE TABLE partitioned_table_0
 
 INSERT INTO partitioned_table VALUES (0, 0);
 
-SELECT citus.mitmproxy('conn.onQuery(query="^INSERT").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="INSERT").kill()');
 
 INSERT INTO partitioned_table VALUES (0, 0);
 
diff --git a/src/test/regress/sql/failure_single_mod.sql b/src/test/regress/sql/failure_single_mod.sql
index e4dfc8f9f..48fdddcc6 100644
--- a/src/test/regress/sql/failure_single_mod.sql
+++ b/src/test/regress/sql/failure_single_mod.sql
@@ -8,7 +8,7 @@ CREATE TABLE mod_test (key int, value text);
 SELECT create_distributed_table('mod_test', 'key');
 
 -- verify behavior of single INSERT; should mark shard as failed
-SELECT citus.mitmproxy('conn.onQuery(query="^INSERT").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="INSERT").kill()');
 INSERT INTO mod_test VALUES (2, 6);
 
 SELECT COUNT(*) FROM mod_test WHERE key=2;
@@ -24,7 +24,7 @@ TRUNCATE mod_test;
 SELECT citus.mitmproxy('conn.allow()');
 INSERT INTO mod_test VALUES (2, 6);
 
-SELECT citus.mitmproxy('conn.onQuery(query="^UPDATE").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="UPDATE").kill()');
 UPDATE mod_test SET value='ok' WHERE key=2 RETURNING key;
 
 SELECT COUNT(*) FROM mod_test WHERE value='ok';
@@ -38,7 +38,7 @@ TRUNCATE mod_test;
 
 -- verify behavior of multi-statement modifications to a single shard
 -- should fail the transaction and never mark placements inactive
-SELECT citus.mitmproxy('conn.onQuery(query="^UPDATE").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="UPDATE").kill()');
 
 BEGIN;
 INSERT INTO mod_test VALUES (2, 6);
diff --git a/src/test/regress/sql/failure_single_select.sql b/src/test/regress/sql/failure_single_select.sql
index 8dfb33d3e..c8218c950 100644
--- a/src/test/regress/sql/failure_single_select.sql
+++ b/src/test/regress/sql/failure_single_select.sql
@@ -13,13 +13,13 @@ SELECT create_distributed_table('select_test', 'key');
 -- put data in shard for which mitm node is first placement
 INSERT INTO select_test VALUES (3, 'test data');
 
-SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="SELECT.*select_test").kill()');
 SELECT * FROM select_test WHERE key = 3;
 SELECT * FROM select_test WHERE key = 3;
 
 -- kill after first SELECT; txn should fail as INSERT triggers
 -- 2PC (and placementis not marked bad)
-SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="SELECT.*select_test").kill()');
 
 BEGIN;
 INSERT INTO select_test VALUES (3, 'more data');
@@ -35,12 +35,12 @@ TRUNCATE select_test;
 -- put data in shard for which mitm node is first placement
 INSERT INTO select_test VALUES (3, 'test data');
 
-SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").cancel(' ||  pg_backend_pid() || ')');
+SELECT citus.mitmproxy('conn.onQuery(query="SELECT.*select_test").cancel(' ||  pg_backend_pid() || ')');
 SELECT * FROM select_test WHERE key = 3;
 SELECT * FROM select_test WHERE key = 3;
 
 -- cancel after first SELECT; txn should fail and nothing should be marked as invalid
-SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").cancel(' ||  pg_backend_pid() || ')');
+SELECT citus.mitmproxy('conn.onQuery(query="SELECT.*select_test").cancel(' ||  pg_backend_pid() || ')');
 
 BEGIN;
 INSERT INTO select_test VALUES (3, 'more data');
@@ -58,7 +58,7 @@ TRUNCATE select_test;
 
 -- cancel the second query
 -- error after second SELECT; txn should fail
-SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").after(1).cancel(' ||  pg_backend_pid() || ')');
+SELECT citus.mitmproxy('conn.onQuery(query="SELECT.*select_test").after(1).cancel(' ||  pg_backend_pid() || ')');
 
 BEGIN;
 INSERT INTO select_test VALUES (3, 'more data');
@@ -68,7 +68,7 @@ SELECT * FROM select_test WHERE key = 3;
 COMMIT;
 
 -- error after second SELECT; txn should fails the transaction
-SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").after(1).reset()');
+SELECT citus.mitmproxy('conn.onQuery(query="SELECT.*select_test").after(1).reset()');
 
 BEGIN;
 INSERT INTO select_test VALUES (3, 'more data');
@@ -77,7 +77,7 @@ INSERT INTO select_test VALUES (3, 'even more data');
 SELECT * FROM select_test WHERE key = 3;
 COMMIT;
 
-SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*pg_prepared_xacts").after(2).kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="SELECT.*pg_prepared_xacts").after(2).kill()');
 SELECT recover_prepared_transactions();
 SELECT recover_prepared_transactions();
 
@@ -93,12 +93,12 @@ SELECT create_distributed_table('select_test', 'key');
 SET citus.max_cached_conns_per_worker TO 1; -- allow connection to be cached
 INSERT INTO select_test VALUES (1, 'test data');
 
-SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").after(1).kill()');
+SELECT citus.mitmproxy('conn.onQuery(query="SELECT.*select_test").after(1).kill()');
 SELECT * FROM select_test WHERE key = 1;
 SELECT * FROM select_test WHERE key = 1;
 
 -- now the same test with query cancellation
-SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").after(1).cancel(' ||  pg_backend_pid() || ')');
+SELECT citus.mitmproxy('conn.onQuery(query="SELECT.*select_test").after(1).cancel(' ||  pg_backend_pid() || ')');
 SELECT * FROM select_test WHERE key = 1;
 SELECT * FROM select_test WHERE key = 1;
 

From b989e8872cfa8af22fbd5ccb7a301796e1e6496c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Halil=20Ozan=20Akg=C3=BCl?= <hozanakgul@gmail.com>
Date: Mon, 27 Mar 2023 17:42:22 +0300
Subject: [PATCH 24/58] Citus stats tenants collector view (#6761)

Add a view that collects statistics from all nodes
---
 .../distributed/sql/citus--11.2-1--11.3-1.sql |  1 +
 .../sql/downgrades/citus--11.3-1--11.2-1.sql  |  3 +
 .../sql/udfs/citus_stats_tenants/11.3-1.sql   | 55 ++++++++++++++++---
 .../sql/udfs/citus_stats_tenants/latest.sql   | 55 ++++++++++++++++---
 .../udfs/citus_stats_tenants_local/11.3-1.sql | 27 +++++++++
 .../udfs/citus_stats_tenants_local/latest.sql | 27 +++++++++
 src/backend/distributed/utils/attribute.c     |  6 +-
 .../regress/expected/citus_stats_tenants.out  | 46 ++++------------
 src/test/regress/expected/multi_extension.out |  4 +-
 .../expected/upgrade_list_citus_objects.out   |  4 +-
 src/test/regress/sql/citus_stats_tenants.sql  | 38 ++++---------
 11 files changed, 183 insertions(+), 83 deletions(-)
 create mode 100644 src/backend/distributed/sql/udfs/citus_stats_tenants_local/11.3-1.sql
 create mode 100644 src/backend/distributed/sql/udfs/citus_stats_tenants_local/latest.sql

diff --git a/src/backend/distributed/sql/citus--11.2-1--11.3-1.sql b/src/backend/distributed/sql/citus--11.2-1--11.3-1.sql
index 73a2bf8a9..30df05d49 100644
--- a/src/backend/distributed/sql/citus--11.2-1--11.3-1.sql
+++ b/src/backend/distributed/sql/citus--11.2-1--11.3-1.sql
@@ -2,4 +2,5 @@
 
 -- bump version to 11.3-1
 
+#include "udfs/citus_stats_tenants_local/11.3-1.sql"
 #include "udfs/citus_stats_tenants/11.3-1.sql"
diff --git a/src/backend/distributed/sql/downgrades/citus--11.3-1--11.2-1.sql b/src/backend/distributed/sql/downgrades/citus--11.3-1--11.2-1.sql
index 47d2701ac..28052b7fa 100644
--- a/src/backend/distributed/sql/downgrades/citus--11.3-1--11.2-1.sql
+++ b/src/backend/distributed/sql/downgrades/citus--11.3-1--11.2-1.sql
@@ -1,4 +1,7 @@
 -- citus--11.3-1--11.2-1
 
+DROP VIEW pg_catalog.citus_stats_tenants_local;
+DROP FUNCTION pg_catalog.citus_stats_tenants_local(boolean);
+
 DROP VIEW pg_catalog.citus_stats_tenants;
 DROP FUNCTION pg_catalog.citus_stats_tenants(boolean);
diff --git a/src/backend/distributed/sql/udfs/citus_stats_tenants/11.3-1.sql b/src/backend/distributed/sql/udfs/citus_stats_tenants/11.3-1.sql
index f476a9c28..d85f90d66 100644
--- a/src/backend/distributed/sql/udfs/citus_stats_tenants/11.3-1.sql
+++ b/src/backend/distributed/sql/udfs/citus_stats_tenants/11.3-1.sql
@@ -1,27 +1,66 @@
-CREATE OR REPLACE FUNCTION pg_catalog.citus_stats_tenants(
+-- cts in the query is an abbreviation for citus_stats_tenants
+CREATE OR REPLACE FUNCTION pg_catalog.citus_stats_tenants (
     return_all_tenants BOOLEAN DEFAULT FALSE,
+    OUT nodeid INT,
     OUT colocation_id INT,
     OUT tenant_attribute TEXT,
     OUT read_count_in_this_period INT,
     OUT read_count_in_last_period INT,
     OUT query_count_in_this_period INT,
     OUT query_count_in_last_period INT,
-    OUT score BIGINT)
-RETURNS SETOF RECORD
-LANGUAGE C
-AS 'citus', $$citus_stats_tenants$$;
-
+    OUT score BIGINT
+)
+    RETURNS SETOF record
+    LANGUAGE plpgsql
+    AS $function$
+BEGIN
+    RETURN QUERY
+    SELECT *
+    FROM jsonb_to_recordset((
+        SELECT
+            jsonb_agg(all_cst_rows_as_jsonb.cst_row_as_jsonb)::jsonb
+        FROM (
+            SELECT
+                jsonb_array_elements(run_command_on_all_nodes.result::jsonb)::jsonb ||
+                    ('{"nodeid":' || run_command_on_all_nodes.nodeid || '}')::jsonb AS cst_row_as_jsonb
+            FROM
+                run_command_on_all_nodes (
+                    $$
+                        SELECT
+                            coalesce(to_jsonb (array_agg(cstl.*)), '[]'::jsonb)
+                        FROM citus_stats_tenants_local($$||return_all_tenants||$$) cstl;
+                    $$,
+                    parallel:= TRUE,
+                    give_warning_for_connection_errors:= TRUE)
+            WHERE
+                success = 't')
+        AS all_cst_rows_as_jsonb))
+AS (
+    nodeid INT,
+    colocation_id INT,
+    tenant_attribute TEXT,
+    read_count_in_this_period INT,
+    read_count_in_last_period INT,
+    query_count_in_this_period INT,
+    query_count_in_last_period INT,
+    score BIGINT
+)
+    ORDER BY score DESC
+    LIMIT CASE WHEN NOT return_all_tenants THEN current_setting('citus.stats_tenants_limit')::BIGINT END;
+END;
+$function$;
 
 CREATE OR REPLACE VIEW citus.citus_stats_tenants AS
 SELECT
+    nodeid,
     colocation_id,
     tenant_attribute,
     read_count_in_this_period,
     read_count_in_last_period,
     query_count_in_this_period,
     query_count_in_last_period
-FROM pg_catalog.citus_stats_tenants()
-ORDER BY score DESC;
+FROM pg_catalog.citus_stats_tenants(FALSE);
 
 ALTER VIEW citus.citus_stats_tenants SET SCHEMA pg_catalog;
+
 GRANT SELECT ON pg_catalog.citus_stats_tenants TO PUBLIC;
diff --git a/src/backend/distributed/sql/udfs/citus_stats_tenants/latest.sql b/src/backend/distributed/sql/udfs/citus_stats_tenants/latest.sql
index f476a9c28..d85f90d66 100644
--- a/src/backend/distributed/sql/udfs/citus_stats_tenants/latest.sql
+++ b/src/backend/distributed/sql/udfs/citus_stats_tenants/latest.sql
@@ -1,27 +1,66 @@
-CREATE OR REPLACE FUNCTION pg_catalog.citus_stats_tenants(
+-- cts in the query is an abbreviation for citus_stats_tenants
+CREATE OR REPLACE FUNCTION pg_catalog.citus_stats_tenants (
     return_all_tenants BOOLEAN DEFAULT FALSE,
+    OUT nodeid INT,
     OUT colocation_id INT,
     OUT tenant_attribute TEXT,
     OUT read_count_in_this_period INT,
     OUT read_count_in_last_period INT,
     OUT query_count_in_this_period INT,
     OUT query_count_in_last_period INT,
-    OUT score BIGINT)
-RETURNS SETOF RECORD
-LANGUAGE C
-AS 'citus', $$citus_stats_tenants$$;
-
+    OUT score BIGINT
+)
+    RETURNS SETOF record
+    LANGUAGE plpgsql
+    AS $function$
+BEGIN
+    RETURN QUERY
+    SELECT *
+    FROM jsonb_to_recordset((
+        SELECT
+            jsonb_agg(all_cst_rows_as_jsonb.cst_row_as_jsonb)::jsonb
+        FROM (
+            SELECT
+                jsonb_array_elements(run_command_on_all_nodes.result::jsonb)::jsonb ||
+                    ('{"nodeid":' || run_command_on_all_nodes.nodeid || '}')::jsonb AS cst_row_as_jsonb
+            FROM
+                run_command_on_all_nodes (
+                    $$
+                        SELECT
+                            coalesce(to_jsonb (array_agg(cstl.*)), '[]'::jsonb)
+                        FROM citus_stats_tenants_local($$||return_all_tenants||$$) cstl;
+                    $$,
+                    parallel:= TRUE,
+                    give_warning_for_connection_errors:= TRUE)
+            WHERE
+                success = 't')
+        AS all_cst_rows_as_jsonb))
+AS (
+    nodeid INT,
+    colocation_id INT,
+    tenant_attribute TEXT,
+    read_count_in_this_period INT,
+    read_count_in_last_period INT,
+    query_count_in_this_period INT,
+    query_count_in_last_period INT,
+    score BIGINT
+)
+    ORDER BY score DESC
+    LIMIT CASE WHEN NOT return_all_tenants THEN current_setting('citus.stats_tenants_limit')::BIGINT END;
+END;
+$function$;
 
 CREATE OR REPLACE VIEW citus.citus_stats_tenants AS
 SELECT
+    nodeid,
     colocation_id,
     tenant_attribute,
     read_count_in_this_period,
     read_count_in_last_period,
     query_count_in_this_period,
     query_count_in_last_period
-FROM pg_catalog.citus_stats_tenants()
-ORDER BY score DESC;
+FROM pg_catalog.citus_stats_tenants(FALSE);
 
 ALTER VIEW citus.citus_stats_tenants SET SCHEMA pg_catalog;
+
 GRANT SELECT ON pg_catalog.citus_stats_tenants TO PUBLIC;
diff --git a/src/backend/distributed/sql/udfs/citus_stats_tenants_local/11.3-1.sql b/src/backend/distributed/sql/udfs/citus_stats_tenants_local/11.3-1.sql
new file mode 100644
index 000000000..5a47835e7
--- /dev/null
+++ b/src/backend/distributed/sql/udfs/citus_stats_tenants_local/11.3-1.sql
@@ -0,0 +1,27 @@
+CREATE OR REPLACE FUNCTION pg_catalog.citus_stats_tenants_local(
+    return_all_tenants BOOLEAN DEFAULT FALSE,
+    OUT colocation_id INT,
+    OUT tenant_attribute TEXT,
+    OUT read_count_in_this_period INT,
+    OUT read_count_in_last_period INT,
+    OUT query_count_in_this_period INT,
+    OUT query_count_in_last_period INT,
+    OUT score BIGINT)
+RETURNS SETOF RECORD
+LANGUAGE C
+AS 'citus', $$citus_stats_tenants_local$$;
+
+
+CREATE OR REPLACE VIEW citus.citus_stats_tenants_local AS
+SELECT
+    colocation_id,
+    tenant_attribute,
+    read_count_in_this_period,
+    read_count_in_last_period,
+    query_count_in_this_period,
+    query_count_in_last_period
+FROM pg_catalog.citus_stats_tenants_local()
+ORDER BY score DESC;
+
+ALTER VIEW citus.citus_stats_tenants_local SET SCHEMA pg_catalog;
+GRANT SELECT ON pg_catalog.citus_stats_tenants_local TO PUBLIC;
diff --git a/src/backend/distributed/sql/udfs/citus_stats_tenants_local/latest.sql b/src/backend/distributed/sql/udfs/citus_stats_tenants_local/latest.sql
new file mode 100644
index 000000000..5a47835e7
--- /dev/null
+++ b/src/backend/distributed/sql/udfs/citus_stats_tenants_local/latest.sql
@@ -0,0 +1,27 @@
+CREATE OR REPLACE FUNCTION pg_catalog.citus_stats_tenants_local(
+    return_all_tenants BOOLEAN DEFAULT FALSE,
+    OUT colocation_id INT,
+    OUT tenant_attribute TEXT,
+    OUT read_count_in_this_period INT,
+    OUT read_count_in_last_period INT,
+    OUT query_count_in_this_period INT,
+    OUT query_count_in_last_period INT,
+    OUT score BIGINT)
+RETURNS SETOF RECORD
+LANGUAGE C
+AS 'citus', $$citus_stats_tenants_local$$;
+
+
+CREATE OR REPLACE VIEW citus.citus_stats_tenants_local AS
+SELECT
+    colocation_id,
+    tenant_attribute,
+    read_count_in_this_period,
+    read_count_in_last_period,
+    query_count_in_this_period,
+    query_count_in_last_period
+FROM pg_catalog.citus_stats_tenants_local()
+ORDER BY score DESC;
+
+ALTER VIEW citus.citus_stats_tenants_local SET SCHEMA pg_catalog;
+GRANT SELECT ON pg_catalog.citus_stats_tenants_local TO PUBLIC;
diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index 9b2d65e95..19d290cfd 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -68,16 +68,16 @@ int CitusStatsTenantsPeriod = (time_t) 60;
 int CitusStatsTenantsLimit = 10;
 
 
-PG_FUNCTION_INFO_V1(citus_stats_tenants);
+PG_FUNCTION_INFO_V1(citus_stats_tenants_local);
 PG_FUNCTION_INFO_V1(clean_citus_stats_tenants);
 PG_FUNCTION_INFO_V1(sleep_until_next_period);
 
 
 /*
- * citus_stats_tenants finds, updates and returns the statistics for tenants.
+ * citus_stats_tenants_local finds, updates and returns the statistics for tenants.
  */
 Datum
-citus_stats_tenants(PG_FUNCTION_ARGS)
+citus_stats_tenants_local(PG_FUNCTION_ARGS)
 {
 	CheckCitusVersion(ERROR);
 
diff --git a/src/test/regress/expected/citus_stats_tenants.out b/src/test/regress/expected/citus_stats_tenants.out
index dbd525aab..783f38240 100644
--- a/src/test/regress/expected/citus_stats_tenants.out
+++ b/src/test/regress/expected/citus_stats_tenants.out
@@ -68,24 +68,16 @@ INSERT INTO dist_tbl VALUES (2, 'abcd');
 UPDATE dist_tbl SET b = a + 1 WHERE a = 3;
 UPDATE dist_tbl SET b = a + 1 WHERE a = 4;
 DELETE FROM dist_tbl WHERE a = 5;
-\c - - - :worker_1_port
-SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute;
+SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants(true) ORDER BY tenant_attribute;
  tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period
 ---------------------------------------------------------------------
  1                |                         0 |                         0 |                          1 |                          0
- 5                |                         0 |                         0 |                          1 |                          0
-(2 rows)
-
-\c - - - :worker_2_port
-SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute;
- tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period
----------------------------------------------------------------------
  2                |                         0 |                         0 |                          1 |                          0
  3                |                         0 |                         0 |                          1 |                          0
-(2 rows)
+ 4                |                         0 |                         0 |                          1 |                          0
+ 5                |                         0 |                         0 |                          1 |                          0
+(5 rows)
 
-\c - - - :master_port
-SET search_path TO citus_stats_tenants;
 SELECT result FROM run_command_on_all_nodes('SELECT clean_citus_stats_tenants()');
  result
 ---------------------------------------------------------------------
@@ -108,14 +100,11 @@ SELECT count(*)>=0 FROM ref_tbl WHERE a = 1;
  t
 (1 row)
 
-\c - - - :worker_1_port
-SELECT tenant_attribute, query_count_in_this_period FROM citus_stats_tenants ORDER BY tenant_attribute;
+SELECT tenant_attribute, query_count_in_this_period FROM citus_stats_tenants(true) ORDER BY tenant_attribute;
  tenant_attribute | query_count_in_this_period
 ---------------------------------------------------------------------
 (0 rows)
 
-\c - - - :master_port
-SET search_path TO citus_stats_tenants;
 -- queries with multiple tables but one tenant should be counted
 SELECT count(*)>=0 FROM dist_tbl, dist_tbl_2 WHERE dist_tbl.a = 1 AND dist_tbl_2.a = 1;
  ?column?
@@ -129,17 +118,15 @@ SELECT count(*)>=0 FROM dist_tbl JOIN dist_tbl_2 ON dist_tbl.a = dist_tbl_2.a WH
  t
 (1 row)
 
-\c - - - :worker_1_port
-SELECT tenant_attribute, query_count_in_this_period FROM citus_stats_tenants WHERE tenant_attribute = '1';
+SELECT tenant_attribute, query_count_in_this_period FROM citus_stats_tenants(true) WHERE tenant_attribute = '1';
  tenant_attribute | query_count_in_this_period
 ---------------------------------------------------------------------
  1                |                          2
 (1 row)
 
-\c - - - :master_port
-SET search_path TO citus_stats_tenants;
 -- test scoring
 -- all of these distribution column values are from second worker
+SELECT nodeid AS worker_2_nodeid FROM pg_dist_node WHERE nodeport = :worker_2_port \gset
 SELECT count(*)>=0 FROM dist_tbl WHERE a = 2;
  ?column?
 ---------------------------------------------------------------------
@@ -164,8 +151,7 @@ SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'abcd';
  t
 (1 row)
 
-\c - - - :worker_2_port
-SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) ORDER BY score DESC;
+SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) WHERE nodeid = :worker_2_nodeid ORDER BY score DESC, tenant_attribute;
  tenant_attribute | query_count_in_this_period |   score
 ---------------------------------------------------------------------
  2                |                          1 | 1000000000
@@ -174,8 +160,6 @@ SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tena
  abcd             |                          1 | 1000000000
 (4 rows)
 
-\c - - - :master_port
-SET search_path TO citus_stats_tenants;
 SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'abcd';
  ?column?
 ---------------------------------------------------------------------
@@ -200,8 +184,7 @@ SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'cdef';
  t
 (1 row)
 
-\c - - - :worker_2_port
-SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) ORDER BY score DESC;
+SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) WHERE nodeid = :worker_2_nodeid ORDER BY score DESC, tenant_attribute;
  tenant_attribute | query_count_in_this_period |   score
 ---------------------------------------------------------------------
  abcd             |                          3 | 3000000000
@@ -212,8 +195,6 @@ SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tena
  cdef             |                          1 | 1000000000
 (6 rows)
 
-\c - - - :master_port
-SET search_path TO citus_stats_tenants;
 SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde';
  ?column?
 ---------------------------------------------------------------------
@@ -232,8 +213,7 @@ SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'defg';
  t
 (1 row)
 
-\c - - - :worker_2_port
-SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) ORDER BY score DESC;
+SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) WHERE nodeid = :worker_2_nodeid ORDER BY score DESC, tenant_attribute;
  tenant_attribute | query_count_in_this_period |   score
 ---------------------------------------------------------------------
  abcd             |                          3 | 3000000000
@@ -243,8 +223,6 @@ SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tena
  defg             |                          1 | 1000000000
 (5 rows)
 
-\c - - - :master_port
-SET search_path TO citus_stats_tenants;
 -- test period passing
 SELECT result FROM run_command_on_all_nodes('SELECT clean_citus_stats_tenants()');
  result
@@ -262,7 +240,7 @@ SELECT count(*)>=0 FROM dist_tbl WHERE a = 1;
 
 INSERT INTO dist_tbl VALUES (5, 'abcd');
 \c - - - :worker_1_port
-SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute;
+SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants_local ORDER BY tenant_attribute;
  tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period
 ---------------------------------------------------------------------
  1                |                         1 |                         0 |                          1 |                          0
@@ -277,7 +255,7 @@ SELECT sleep_until_next_period();
 
 (1 row)
 
-SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute;
+SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants_local ORDER BY tenant_attribute;
  tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period
 ---------------------------------------------------------------------
  1                |                         0 |                         1 |                          0 |                          1
diff --git a/src/test/regress/expected/multi_extension.out b/src/test/regress/expected/multi_extension.out
index 2bb0c29e7..95768bbcb 100644
--- a/src/test/regress/expected/multi_extension.out
+++ b/src/test/regress/expected/multi_extension.out
@@ -1363,8 +1363,10 @@ SELECT * FROM multi_extension.print_extension_changes();
  previous_object |                   current_object
 ---------------------------------------------------------------------
                  | function citus_stats_tenants(boolean) SETOF record
+                 | function citus_stats_tenants_local(boolean) SETOF record
                  | view citus_stats_tenants
-(2 rows)
+                 | view citus_stats_tenants_local
+(4 rows)
 
 DROP TABLE multi_extension.prev_objects, multi_extension.extension_diff;
 -- show running version
diff --git a/src/test/regress/expected/upgrade_list_citus_objects.out b/src/test/regress/expected/upgrade_list_citus_objects.out
index 8005c0c42..12fc4c17b 100644
--- a/src/test/regress/expected/upgrade_list_citus_objects.out
+++ b/src/test/regress/expected/upgrade_list_citus_objects.out
@@ -122,6 +122,7 @@ ORDER BY 1;
  function citus_stat_statements()
  function citus_stat_statements_reset()
  function citus_stats_tenants(boolean)
+ function citus_stats_tenants_local(boolean)
  function citus_table_is_visible(oid)
  function citus_table_size(regclass)
  function citus_task_wait(bigint,citus_task_status)
@@ -318,7 +319,8 @@ ORDER BY 1;
  view citus_stat_activity
  view citus_stat_statements
  view citus_stats_tenants
+ view citus_stats_tenants_local
  view pg_dist_shard_placement
  view time_partitions
-(312 rows)
+(314 rows)
 
diff --git a/src/test/regress/sql/citus_stats_tenants.sql b/src/test/regress/sql/citus_stats_tenants.sql
index 056f1902b..981533a6e 100644
--- a/src/test/regress/sql/citus_stats_tenants.sql
+++ b/src/test/regress/sql/citus_stats_tenants.sql
@@ -37,12 +37,7 @@ UPDATE dist_tbl SET b = a + 1 WHERE a = 3;
 UPDATE dist_tbl SET b = a + 1 WHERE a = 4;
 DELETE FROM dist_tbl WHERE a = 5;
 
-\c - - - :worker_1_port
-SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute;
-\c - - - :worker_2_port
-SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute;
-\c - - - :master_port
-SET search_path TO citus_stats_tenants;
+SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants(true) ORDER BY tenant_attribute;
 
 SELECT result FROM run_command_on_all_nodes('SELECT clean_citus_stats_tenants()');
 
@@ -52,50 +47,37 @@ SELECT count(*)>=0 FROM dist_tbl WHERE a IN (1, 5);
 -- queries with reference tables should not be counted
 SELECT count(*)>=0 FROM ref_tbl WHERE a = 1;
 
-\c - - - :worker_1_port
-SELECT tenant_attribute, query_count_in_this_period FROM citus_stats_tenants ORDER BY tenant_attribute;
-\c - - - :master_port
-SET search_path TO citus_stats_tenants;
+SELECT tenant_attribute, query_count_in_this_period FROM citus_stats_tenants(true) ORDER BY tenant_attribute;
 
 -- queries with multiple tables but one tenant should be counted
 SELECT count(*)>=0 FROM dist_tbl, dist_tbl_2 WHERE dist_tbl.a = 1 AND dist_tbl_2.a = 1;
 SELECT count(*)>=0 FROM dist_tbl JOIN dist_tbl_2 ON dist_tbl.a = dist_tbl_2.a WHERE dist_tbl.a = 1;
 
-\c - - - :worker_1_port
-SELECT tenant_attribute, query_count_in_this_period FROM citus_stats_tenants WHERE tenant_attribute = '1';
-\c - - - :master_port
-SET search_path TO citus_stats_tenants;
+SELECT tenant_attribute, query_count_in_this_period FROM citus_stats_tenants(true) WHERE tenant_attribute = '1';
 
 -- test scoring
 -- all of these distribution column values are from second worker
+SELECT nodeid AS worker_2_nodeid FROM pg_dist_node WHERE nodeport = :worker_2_port \gset
+
 SELECT count(*)>=0 FROM dist_tbl WHERE a = 2;
 SELECT count(*)>=0 FROM dist_tbl WHERE a = 3;
 SELECT count(*)>=0 FROM dist_tbl WHERE a = 4;
 SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'abcd';
 
-\c - - - :worker_2_port
-SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) ORDER BY score DESC;
-\c - - - :master_port
-SET search_path TO citus_stats_tenants;
+SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) WHERE nodeid = :worker_2_nodeid ORDER BY score DESC, tenant_attribute;
 
 SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'abcd';
 SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'abcd';
 SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde';
 SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'cdef';
 
-\c - - - :worker_2_port
-SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) ORDER BY score DESC;
-\c - - - :master_port
-SET search_path TO citus_stats_tenants;
+SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) WHERE nodeid = :worker_2_nodeid ORDER BY score DESC, tenant_attribute;
 
 SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde';
 SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde';
 SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'defg';
 
-\c - - - :worker_2_port
-SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) ORDER BY score DESC;
-\c - - - :master_port
-SET search_path TO citus_stats_tenants;
+SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) WHERE nodeid = :worker_2_nodeid ORDER BY score DESC, tenant_attribute;
 
 -- test period passing
 SELECT result FROM run_command_on_all_nodes('SELECT clean_citus_stats_tenants()');
@@ -104,13 +86,13 @@ SELECT count(*)>=0 FROM dist_tbl WHERE a = 1;
 INSERT INTO dist_tbl VALUES (5, 'abcd');
 
 \c - - - :worker_1_port
-SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute;
+SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants_local ORDER BY tenant_attribute;
 
 -- simulate passing the period
 SET citus.stats_tenants_period TO 2;
 SELECT sleep_until_next_period();
 
-SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute;
+SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants_local ORDER BY tenant_attribute;
 \c - - - :master_port
 SET search_path TO citus_stats_tenants;
 

From 9d2d97fe6785762faa022fd5e7d514d69a2272e4 Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Thu, 9 Mar 2023 11:20:32 +0300
Subject: [PATCH 25/58] Add ExtractFieldInt32(..) to jsonbutils

---
 src/backend/distributed/utils/jsonbutils.c | 19 +++++++++++++++++++
 src/include/distributed/jsonbutils.h       |  1 +
 2 files changed, 20 insertions(+)

diff --git a/src/backend/distributed/utils/jsonbutils.c b/src/backend/distributed/utils/jsonbutils.c
index 22fa4f568..4855ee004 100644
--- a/src/backend/distributed/utils/jsonbutils.c
+++ b/src/backend/distributed/utils/jsonbutils.c
@@ -83,6 +83,25 @@ ExtractFieldBoolean(Datum jsonbDoc, const char *fieldName, bool defaultValue)
 }
 
 
+/*
+ * ExtractFieldInt32 gets value of fieldName from jsonbDoc, or returns
+ * defaultValue if it doesn't exist.
+ */
+int32
+ExtractFieldInt32(Datum jsonbDoc, const char *fieldName, int32 defaultValue)
+{
+	Datum jsonbDatum = 0;
+	bool found = ExtractFieldJsonb(jsonbDoc, fieldName, &jsonbDatum, false);
+	if (!found)
+	{
+		return defaultValue;
+	}
+
+	Datum int32Datum = DirectFunctionCall1(jsonb_int4, jsonbDatum);
+	return DatumGetInt32(int32Datum);
+}
+
+
 /*
  * ExtractFieldTextP gets value of fieldName as text* from jsonbDoc, or
  * returns NULL if it doesn't exist.
diff --git a/src/include/distributed/jsonbutils.h b/src/include/distributed/jsonbutils.h
index 3e37fa38e..d44044fcb 100644
--- a/src/include/distributed/jsonbutils.h
+++ b/src/include/distributed/jsonbutils.h
@@ -16,5 +16,6 @@
 bool ExtractFieldJsonbDatum(Datum jsonbDoc, const char *fieldName, Datum *result);
 text * ExtractFieldTextP(Datum jsonbDoc, const char *fieldName);
 bool ExtractFieldBoolean(Datum jsonbDoc, const char *fieldName, bool defaultValue);
+int32 ExtractFieldInt32(Datum jsonbDoc, const char *fieldName, int32 defaultValue);
 
 #endif /* CITUS_JSONBUTILS_H */

From 024526ab2fa591c9e2bb502e3b9a7ce30b21bd6e Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Thu, 9 Mar 2023 11:20:55 +0300
Subject: [PATCH 26/58] Introduce JSON based annotation parsing

---
 src/backend/distributed/utils/attribute.c | 101 ++++++++++++----------
 1 file changed, 57 insertions(+), 44 deletions(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index 19d290cfd..aa04aebc6 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -15,6 +15,8 @@
 #include "distributed/log_utils.h"
 #include "distributed/listutils.h"
 #include "distributed/metadata_cache.h"
+#include "distributed/jsonbutils.h"
+#include "distributed/colocation_utils.h"
 #include "distributed/tuplestore.h"
 #include "executor/execdesc.h"
 #include "storage/ipc.h"
@@ -22,7 +24,7 @@
 #include "storage/shmem.h"
 #include <sys/time.h>
 #include "utils/builtins.h"
-
+#include "utils/json.h"
 #include "distributed/utils/attribute.h"
 
 #include <time.h>
@@ -31,8 +33,8 @@ static void AttributeMetricsIfApplicable(void);
 
 ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
 
-#define ATTRIBUTE_PREFIX "/* attributeTo: "
-#define ATTRIBUTE_STRING_FORMAT "/* attributeTo: %s,%d */"
+#define ATTRIBUTE_PREFIX "/*{"
+#define ATTRIBUTE_STRING_FORMAT "/*{\"tId\":%s,\"cId\":%d}*/"
 #define CITUS_STATS_TENANTS_COLUMNS 7
 #define ONE_QUERY_SCORE 1000000000
 
@@ -62,6 +64,7 @@ static void MultiTenantMonitorSMInit(void);
 static int CreateTenantStats(MultiTenantMonitor *monitor, time_t queryTime);
 static int FindTenantStats(MultiTenantMonitor *monitor);
 static size_t MultiTenantMonitorshmemSize(void);
+static char * extractTopComment(const char *inputString);
 
 int MultiTenantMonitoringLogLevel = CITUS_LOG_LEVEL_OFF;
 int CitusStatsTenantsPeriod = (time_t) 60;
@@ -198,54 +201,27 @@ AttributeQueryIfAnnotated(const char *query_string, CmdType commandType)
 		return;
 	}
 
-	if (strncmp(ATTRIBUTE_PREFIX, query_string, strlen(ATTRIBUTE_PREFIX)) == 0)
+	char *annotation = extractTopComment(query_string);
+	if (annotation != NULL)
 	{
-		/* TODO create a function to safely parse the tenant identifier from the query comment */
-		/* query is attributed to a tenant */
-		char *tenantId = (char *) query_string + strlen(ATTRIBUTE_PREFIX);
-		char *tenantEnd = tenantId;
-		while (true && tenantEnd[0] != '\0')
-		{
-			if (tenantEnd[0] == ' ' && tenantEnd[1] == '*' && tenantEnd[2] == '/')
-			{
-				break;
-			}
+		Datum jsonbDatum = DirectFunctionCall1(jsonb_in, PointerGetDatum(annotation));
 
-			tenantEnd++;
-		}
-		tenantEnd--;
-
-		colocationGroupId = 0;
-		while (*tenantEnd != ',')
+		text *tenantIdTextP = ExtractFieldTextP(jsonbDatum, "tId");
+		if (tenantIdTextP != NULL)
 		{
-			colocationGroupId *= 10;
-			colocationGroupId += *tenantEnd - '0';
-			tenantEnd--;
+			char *tenantId = text_to_cstring(tenantIdTextP);
+			strcpy_s(attributeToTenant, sizeof(attributeToTenant), tenantId);
 		}
 
-		int t = colocationGroupId;
-		colocationGroupId = 0;
-		while (t)
-		{
-			colocationGroupId *= 10;
-			colocationGroupId += t % 10;
-			t /= 10;
-		}
-
-		/* hack to get a clean copy of the tenant id string */
-		char tenantEndTmp = *tenantEnd;
-		*tenantEnd = '\0';
-		tenantId = pstrdup(tenantId);
-		*tenantEnd = tenantEndTmp;
+		colocationGroupId = ExtractFieldInt32(jsonbDatum, "cId", INVALID_COLOCATION_ID);
 
 		if (MultiTenantMonitoringLogLevel != CITUS_LOG_LEVEL_OFF)
 		{
-			ereport(NOTICE, (errmsg("attributing query to tenant: %s",
-									quote_literal_cstr(tenantId))));
+			ereport(NOTICE, (errmsg(
+								 "attributing query to tenant: %s, colocationGroupId: %d",
+								 quote_literal_cstr(attributeToTenant),
+								 colocationGroupId)));
 		}
-
-		strcpy_s(attributeToTenant, sizeof(attributeToTenant), tenantId);
-		attributeToTenantStart = clock();
 	}
 	else
 	{
@@ -253,6 +229,7 @@ AttributeQueryIfAnnotated(const char *query_string, CmdType commandType)
 	}
 
 	/*DetachSegment(); */
+	attributeToTenantStart = clock();
 }
 
 
@@ -266,9 +243,13 @@ AnnotateQuery(char *queryString, char *partitionColumn, int colocationId)
 	{
 		return queryString;
 	}
-	StringInfo newQuery = makeStringInfo();
-	appendStringInfo(newQuery, ATTRIBUTE_STRING_FORMAT, partitionColumn, colocationId);
 
+	StringInfo escapedSourceName = makeStringInfo();
+	escape_json(escapedSourceName, partitionColumn);
+
+	StringInfo newQuery = makeStringInfo();
+	appendStringInfo(newQuery, ATTRIBUTE_STRING_FORMAT, escapedSourceName->data,
+					 colocationId);
 	appendStringInfoString(newQuery, queryString);
 
 	return newQuery->data;
@@ -686,3 +667,35 @@ MultiTenantMonitorshmemSize(void)
 
 	return size;
 }
+
+
+/*
+ * extractTopComment extracts the top-level multi-line comment from a given input string.
+ */
+static char *
+extractTopComment(const char *inputString)
+{
+	int i = 0;
+
+	/* If query starts with a comment */
+	if (inputString[i] == '/' && inputString[i + 1] == '*')
+	{
+		/* Skip the comment start characters */
+		i += 2;
+		while (inputString[i] && (inputString[i] != '*' && inputString[i + 1] != '/'))
+		{
+			i++;
+		}
+	}
+
+	if (i > 2)
+	{
+		char *result = (char *) malloc(sizeof(char) * (i - 1));
+		strncpy(result, inputString + 2, i - 2);
+		return result;
+	}
+	else
+	{
+		return NULL;
+	}
+}

From fda680d22e14eaab1028a9eaf3bed7aabddc7899 Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Mon, 13 Mar 2023 10:13:28 +0300
Subject: [PATCH 27/58] Use palloc instead of malloc

---
 src/backend/distributed/utils/attribute.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index aa04aebc6..ae77ed792 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -690,7 +690,7 @@ extractTopComment(const char *inputString)
 
 	if (i > 2)
 	{
-		char *result = (char *) malloc(sizeof(char) * (i - 1));
+		char *result = (char *) palloc(sizeof(char) * (i - 1));
 		strncpy(result, inputString + 2, i - 2);
 		return result;
 	}

From 517ceb2d22532b36d3080fea14ebb47682228cc3 Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Mon, 13 Mar 2023 11:36:54 +0300
Subject: [PATCH 28/58] Use strncpy_s instead of strncpy

---
 src/backend/distributed/utils/attribute.c | 27 ++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index ae77ed792..4674b800e 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -65,6 +65,7 @@ static int CreateTenantStats(MultiTenantMonitor *monitor, time_t queryTime);
 static int FindTenantStats(MultiTenantMonitor *monitor);
 static size_t MultiTenantMonitorshmemSize(void);
 static char * extractTopComment(const char *inputString);
+static char* get_substring(const char* str, int start, int end);
 
 int MultiTenantMonitoringLogLevel = CITUS_LOG_LEVEL_OFF;
 int CitusStatsTenantsPeriod = (time_t) 60;
@@ -690,12 +691,32 @@ extractTopComment(const char *inputString)
 
 	if (i > 2)
 	{
-		char *result = (char *) palloc(sizeof(char) * (i - 1));
-		strncpy(result, inputString + 2, i - 2);
-		return result;
+		return get_substring(inputString, 2, i);
 	}
 	else
 	{
 		return NULL;
 	}
 }
+
+static char*
+get_substring(const char* str, int start, int end) {
+    int len = strlen(str);
+    char* substr = NULL;
+
+    // Ensure start and end are within the bounds of the string
+    if (start < 0 || end > len || start > end) {
+        return NULL;
+    }
+
+    // Allocate memory for the substring
+    substr = (char*) palloc((end - start + 1) * sizeof(char));
+
+    // Copy the substring to the new memory location
+    strncpy_s(substr, end - start + 1, str + start, end - start);
+
+    // Add null terminator to end the substring
+    substr[end - start] = '\0';
+
+    return substr;
+}

From eaa896e744297dfad4239cb38d832b6acd773a16 Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Mon, 13 Mar 2023 12:36:02 +0300
Subject: [PATCH 29/58] Normalize multiline sql comment statements

---
 src/test/regress/bin/normalize.sed | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/test/regress/bin/normalize.sed b/src/test/regress/bin/normalize.sed
index 33a35f286..423319611 100644
--- a/src/test/regress/bin/normalize.sed
+++ b/src/test/regress/bin/normalize.sed
@@ -308,4 +308,4 @@ s/(NOTICE:  issuing SET LOCAL application_name TO 'citus_rebalancer gpid=)[0-9]+
 # shard_rebalancer output, flaky improvement number
 s/improvement of 0.1[0-9]* is lower/improvement of 0.1xxxxx is lower/g
 
-s/\/\* attributeTo.*\*\///g
+s/\/\*.*\*\///g

From 6d8cd8a9a011901859071326fedd6da60b9bc2f5 Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Mon, 13 Mar 2023 12:44:57 +0300
Subject: [PATCH 30/58] Validate input string length

---
 src/backend/distributed/utils/attribute.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index 4674b800e..593e503d9 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -676,8 +676,13 @@ MultiTenantMonitorshmemSize(void)
 static char *
 extractTopComment(const char *inputString)
 {
-	int i = 0;
+	int commentStartCharsLength = 2;
+	if (strlen(inputString) < commentStartCharsLength )
+	{
+		return NULL;
+	}
 
+	int i = 0;
 	/* If query starts with a comment */
 	if (inputString[i] == '/' && inputString[i + 1] == '*')
 	{
@@ -689,9 +694,9 @@ extractTopComment(const char *inputString)
 		}
 	}
 
-	if (i > 2)
+	if (i > commentStartCharsLength)
 	{
-		return get_substring(inputString, 2, i);
+		return get_substring(inputString, commentStartCharsLength, i);
 	}
 	else
 	{

From 21298f66615555481fd78c9f4aafb272052edac3 Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Mon, 13 Mar 2023 13:24:27 +0300
Subject: [PATCH 31/58] Validate attribute prefix existance on query string

---
 src/backend/distributed/utils/attribute.c | 40 +++++++++++------------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index 593e503d9..2b20a4050 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -33,7 +33,7 @@ static void AttributeMetricsIfApplicable(void);
 
 ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
 
-#define ATTRIBUTE_PREFIX "/*{"
+#define ATTRIBUTE_PREFIX "{\"tId\":"
 #define ATTRIBUTE_STRING_FORMAT "/*{\"tId\":%s,\"cId\":%d}*/"
 #define CITUS_STATS_TENANTS_COLUMNS 7
 #define ONE_QUERY_SCORE 1000000000
@@ -202,34 +202,32 @@ AttributeQueryIfAnnotated(const char *query_string, CmdType commandType)
 		return;
 	}
 
-	char *annotation = extractTopComment(query_string);
-	if (annotation != NULL)
+	if (strncmp(ATTRIBUTE_PREFIX, query_string, strlen(ATTRIBUTE_PREFIX)) == 0)
 	{
-		Datum jsonbDatum = DirectFunctionCall1(jsonb_in, PointerGetDatum(annotation));
-
-		text *tenantIdTextP = ExtractFieldTextP(jsonbDatum, "tId");
-		if (tenantIdTextP != NULL)
+		char *annotation = extractTopComment(query_string);
+		if (annotation != NULL)
 		{
-			char *tenantId = text_to_cstring(tenantIdTextP);
-			strcpy_s(attributeToTenant, sizeof(attributeToTenant), tenantId);
-		}
+			Datum jsonbDatum = DirectFunctionCall1(jsonb_in, PointerGetDatum(annotation));
+
+			text *tenantIdTextP = ExtractFieldTextP(jsonbDatum, "tId");
+			if (tenantIdTextP != NULL)
+			{
+				char *tenantId = text_to_cstring(tenantIdTextP);
+				strcpy_s(attributeToTenant, sizeof(attributeToTenant), tenantId);
+			}
 
 		colocationGroupId = ExtractFieldInt32(jsonbDatum, "cId", INVALID_COLOCATION_ID);
 
-		if (MultiTenantMonitoringLogLevel != CITUS_LOG_LEVEL_OFF)
-		{
-			ereport(NOTICE, (errmsg(
-								 "attributing query to tenant: %s, colocationGroupId: %d",
-								 quote_literal_cstr(attributeToTenant),
-								 colocationGroupId)));
+			if (MultiTenantMonitoringLogLevel != CITUS_LOG_LEVEL_OFF)
+			{
+				ereport(NOTICE, (errmsg(
+									"attributing query to tenant: %s, colocationGroupId: %d",
+									quote_literal_cstr(attributeToTenant),
+									colocationGroupId)));
+			}
 		}
 	}
-	else
-	{
-		strcpy_s(attributeToTenant, sizeof(attributeToTenant), "");
-	}
 
-	/*DetachSegment(); */
 	attributeToTenantStart = clock();
 }
 

From e9a6f8a7c57e16091f30da11be651d034b55119c Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Mon, 13 Mar 2023 14:31:41 +0300
Subject: [PATCH 32/58] Indent

---
 src/backend/distributed/utils/attribute.c | 43 ++++++++++++-----------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index 2b20a4050..d028e1360 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -65,7 +65,7 @@ static int CreateTenantStats(MultiTenantMonitor *monitor, time_t queryTime);
 static int FindTenantStats(MultiTenantMonitor *monitor);
 static size_t MultiTenantMonitorshmemSize(void);
 static char * extractTopComment(const char *inputString);
-static char* get_substring(const char* str, int start, int end);
+static char * get_substring(const char *str, int start, int end);
 
 int MultiTenantMonitoringLogLevel = CITUS_LOG_LEVEL_OFF;
 int CitusStatsTenantsPeriod = (time_t) 60;
@@ -221,9 +221,9 @@ AttributeQueryIfAnnotated(const char *query_string, CmdType commandType)
 			if (MultiTenantMonitoringLogLevel != CITUS_LOG_LEVEL_OFF)
 			{
 				ereport(NOTICE, (errmsg(
-									"attributing query to tenant: %s, colocationGroupId: %d",
-									quote_literal_cstr(attributeToTenant),
-									colocationGroupId)));
+									 "attributing query to tenant: %s, colocationGroupId: %d",
+									 quote_literal_cstr(attributeToTenant),
+									 colocationGroupId)));
 			}
 		}
 	}
@@ -675,12 +675,13 @@ static char *
 extractTopComment(const char *inputString)
 {
 	int commentStartCharsLength = 2;
-	if (strlen(inputString) < commentStartCharsLength )
+	if (strlen(inputString) < commentStartCharsLength)
 	{
 		return NULL;
 	}
 
 	int i = 0;
+
 	/* If query starts with a comment */
 	if (inputString[i] == '/' && inputString[i + 1] == '*')
 	{
@@ -702,24 +703,26 @@ extractTopComment(const char *inputString)
 	}
 }
 
-static char*
-get_substring(const char* str, int start, int end) {
-    int len = strlen(str);
-    char* substr = NULL;
 
-    // Ensure start and end are within the bounds of the string
-    if (start < 0 || end > len || start > end) {
-        return NULL;
-    }
+static char *
+get_substring(const char *str, int start, int end)
+{
+	int len = strlen(str);
 
-    // Allocate memory for the substring
-    substr = (char*) palloc((end - start + 1) * sizeof(char));
+	/* Ensure start and end are within the bounds of the string */
+	if (start < 0 || end > len || start > end)
+	{
+		return NULL;
+	}
 
-    // Copy the substring to the new memory location
-    strncpy_s(substr, end - start + 1, str + start, end - start);
+	/* Allocate memory for the substring */
+	char *substr = (char *) palloc((end - start + 1) * sizeof(char));
 
-    // Add null terminator to end the substring
-    substr[end - start] = '\0';
+	/* Copy the substring to the new memory location */
+	strncpy_s(substr, end - start + 1, str + start, end - start);
 
-    return substr;
+	/* Add null terminator to end the substring */
+	substr[end - start] = '\0';
+
+	return substr;
 }

From bb4aacb92fbe81b38a7717bc13fa76f4fcd91a60 Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Mon, 13 Mar 2023 14:32:16 +0300
Subject: [PATCH 33/58] Fix tenant statistics annotations normalization

---
 src/test/regress/bin/normalize.sed | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/test/regress/bin/normalize.sed b/src/test/regress/bin/normalize.sed
index 423319611..65692e1c9 100644
--- a/src/test/regress/bin/normalize.sed
+++ b/src/test/regress/bin/normalize.sed
@@ -307,5 +307,5 @@ s/(NOTICE:  issuing SET LOCAL application_name TO 'citus_rebalancer gpid=)[0-9]+
 
 # shard_rebalancer output, flaky improvement number
 s/improvement of 0.1[0-9]* is lower/improvement of 0.1xxxxx is lower/g
-
-s/\/\*.*\*\///g
+# normalize tenants statistics annotations
+s/\/\*\{"tId":.*\*\///g

From 89e66239600ca5bb4688d365d116ce5c02529f54 Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Wed, 15 Mar 2023 08:08:33 +0300
Subject: [PATCH 34/58] Fix attribute prefix

---
 src/backend/distributed/utils/attribute.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index d028e1360..3a76c682d 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -33,7 +33,7 @@ static void AttributeMetricsIfApplicable(void);
 
 ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
 
-#define ATTRIBUTE_PREFIX "{\"tId\":"
+#define ATTRIBUTE_PREFIX "/*{\"tId\":"
 #define ATTRIBUTE_STRING_FORMAT "/*{\"tId\":%s,\"cId\":%d}*/"
 #define CITUS_STATS_TENANTS_COLUMNS 7
 #define ONE_QUERY_SCORE 1000000000

From dbc26cacb5d1d7d7353b75a8b70fa09f5bcd5fee Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Wed, 15 Mar 2023 08:09:24 +0300
Subject: [PATCH 35/58] Add comment chars escaping

---
 src/backend/distributed/utils/attribute.c | 51 ++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 2 deletions(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index 3a76c682d..69162e372 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -64,8 +64,8 @@ static void MultiTenantMonitorSMInit(void);
 static int CreateTenantStats(MultiTenantMonitor *monitor, time_t queryTime);
 static int FindTenantStats(MultiTenantMonitor *monitor);
 static size_t MultiTenantMonitorshmemSize(void);
-static char * extractTopComment(const char *inputString);
-static char * get_substring(const char *str, int start, int end);
+static char * EscapeCommentChars(const char *str);
+static char * UnescapeCommentChars(const char *str);
 
 int MultiTenantMonitoringLogLevel = CITUS_LOG_LEVEL_OFF;
 int CitusStatsTenantsPeriod = (time_t) 60;
@@ -726,3 +726,50 @@ get_substring(const char *str, int start, int end)
 
 	return substr;
 }
+
+
+/*  EscapeCommentChars adds a backslash before each occurrence of '*' or '/' in the input string */
+static char *
+EscapeCommentChars(const char *str)
+{
+	int len = strlen(str);
+	char *new_str = (char *) malloc(len * 2 + 1);
+	int j = 0;
+
+	for (int i = 0; i < len; i++)
+	{
+		if (str[i] == '*' || str[i] == '/')
+		{
+			new_str[j++] = '\\';
+		}
+		new_str[j++] = str[i];
+	}
+	new_str[j] = '\0';
+
+	return new_str;
+}
+
+
+/*  UnescapeCommentChars removes the backslash that precedes '*' or '/' in the input string. */
+static char *
+UnescapeCommentChars(const char *str)
+{
+	int len = strlen(str);
+	char *new_str = (char *) malloc(len + 1);
+	int j = 0;
+
+	for (int i = 0; i < len; i++)
+	{
+		if (str[i] == '\\' && i < len - 1)
+		{
+			if (str[i + 1] == '*' || str[i + 1] == '/')
+			{
+				i++;
+			}
+		}
+		new_str[j++] = str[i];
+	}
+	new_str[j] = '\0';
+
+	return new_str;
+}

From 80dd73711eb6601d670ecb1cf67ef304c321da02 Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Wed, 15 Mar 2023 08:10:45 +0300
Subject: [PATCH 36/58] Minor renamings and refactorings

---
 src/backend/distributed/utils/attribute.c | 31 ++++++++++++++---------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index 69162e372..2f604a609 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -64,6 +64,8 @@ static void MultiTenantMonitorSMInit(void);
 static int CreateTenantStats(MultiTenantMonitor *monitor, time_t queryTime);
 static int FindTenantStats(MultiTenantMonitor *monitor);
 static size_t MultiTenantMonitorshmemSize(void);
+static char * ExtractTopComment(const char *inputString);
+static char * Substring(const char *str, int start, int end);
 static char * EscapeCommentChars(const char *str);
 static char * UnescapeCommentChars(const char *str);
 
@@ -204,7 +206,7 @@ AttributeQueryIfAnnotated(const char *query_string, CmdType commandType)
 
 	if (strncmp(ATTRIBUTE_PREFIX, query_string, strlen(ATTRIBUTE_PREFIX)) == 0)
 	{
-		char *annotation = extractTopComment(query_string);
+		char *annotation = ExtractTopComment(query_string);
 		if (annotation != NULL)
 		{
 			Datum jsonbDatum = DirectFunctionCall1(jsonb_in, PointerGetDatum(annotation));
@@ -669,33 +671,37 @@ MultiTenantMonitorshmemSize(void)
 
 
 /*
- * extractTopComment extracts the top-level multi-line comment from a given input string.
+ * ExtractTopComment extracts the top-level multi-line comment from a given input string.
  */
 static char *
-extractTopComment(const char *inputString)
+ExtractTopComment(const char *inputString)
 {
 	int commentStartCharsLength = 2;
-	if (strlen(inputString) < commentStartCharsLength)
+	int inputStringLen = strlen(inputString);
+	if (inputStringLen < commentStartCharsLength)
 	{
 		return NULL;
 	}
 
-	int i = 0;
+	int commentEndCharsIndex = 0;
 
 	/* If query starts with a comment */
-	if (inputString[i] == '/' && inputString[i + 1] == '*')
+	if (inputString[commentEndCharsIndex] == '/' && inputString[commentEndCharsIndex +
+																1] == '*')
 	{
 		/* Skip the comment start characters */
-		i += 2;
-		while (inputString[i] && (inputString[i] != '*' && inputString[i + 1] != '/'))
+		commentEndCharsIndex += commentStartCharsLength;
+		while (inputString[commentEndCharsIndex] && commentEndCharsIndex <
+			   inputStringLen && !(inputString[commentEndCharsIndex] == '*' &&
+								   inputString[commentEndCharsIndex + 1] == '/'))
 		{
-			i++;
+			commentEndCharsIndex++;
 		}
 	}
 
-	if (i > commentStartCharsLength)
+	if (commentEndCharsIndex > commentStartCharsLength)
 	{
-		return get_substring(inputString, commentStartCharsLength, i);
+		return Substring(inputString, commentStartCharsLength, commentEndCharsIndex);
 	}
 	else
 	{
@@ -704,8 +710,9 @@ extractTopComment(const char *inputString)
 }
 
 
+/* Extracts a substring from the input string between the specified start and end indices.*/
 static char *
-get_substring(const char *str, int start, int end)
+Substring(const char *str, int start, int end)
 {
 	int len = strlen(str);
 

From 3cfa197f69ba3fb52a88ab7957299df034a2b1e3 Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Wed, 15 Mar 2023 08:11:07 +0300
Subject: [PATCH 37/58] Escape/Unescape sql comment chars

---
 src/backend/distributed/utils/attribute.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index 2f604a609..bea15618d 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -26,6 +26,7 @@
 #include "utils/builtins.h"
 #include "utils/json.h"
 #include "distributed/utils/attribute.h"
+#include "common/base64.h"
 
 #include <time.h>
 
@@ -214,7 +215,7 @@ AttributeQueryIfAnnotated(const char *query_string, CmdType commandType)
 			text *tenantIdTextP = ExtractFieldTextP(jsonbDatum, "tId");
 			if (tenantIdTextP != NULL)
 			{
-				char *tenantId = text_to_cstring(tenantIdTextP);
+				char *tenantId = UnescapeCommentChars(text_to_cstring(tenantIdTextP));
 				strcpy_s(attributeToTenant, sizeof(attributeToTenant), tenantId);
 			}
 
@@ -245,12 +246,15 @@ AnnotateQuery(char *queryString, char *partitionColumn, int colocationId)
 		return queryString;
 	}
 
+	char *commentCharsEscaped = EscapeCommentChars(partitionColumn);
 	StringInfo escapedSourceName = makeStringInfo();
-	escape_json(escapedSourceName, partitionColumn);
+
+	escape_json(escapedSourceName, commentCharsEscaped);
 
 	StringInfo newQuery = makeStringInfo();
 	appendStringInfo(newQuery, ATTRIBUTE_STRING_FORMAT, escapedSourceName->data,
 					 colocationId);
+
 	appendStringInfoString(newQuery, queryString);
 
 	return newQuery->data;

From da24f2fd62bb5bf66df016dddf11d7df396eae1b Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Wed, 15 Mar 2023 08:36:43 +0300
Subject: [PATCH 38/58] Indent

---
 src/backend/distributed/utils/attribute.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index bea15618d..89d75d2fa 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -697,7 +697,8 @@ ExtractTopComment(const char *inputString)
 		commentEndCharsIndex += commentStartCharsLength;
 		while (inputString[commentEndCharsIndex] && commentEndCharsIndex <
 			   inputStringLen && !(inputString[commentEndCharsIndex] == '*' &&
-								   inputString[commentEndCharsIndex + 1] == '/'))
+								   inputString
+								   [commentEndCharsIndex + 1] == '/'))
 		{
 			commentEndCharsIndex++;
 		}

From c60de03d6a18c535346758a6bc2818a1b992746f Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Wed, 15 Mar 2023 09:01:41 +0300
Subject: [PATCH 39/58] Indent

---
 src/backend/distributed/utils/attribute.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index 89d75d2fa..a42e492d5 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -690,15 +690,15 @@ ExtractTopComment(const char *inputString)
 	int commentEndCharsIndex = 0;
 
 	/* If query starts with a comment */
-	if (inputString[commentEndCharsIndex] == '/' && inputString[commentEndCharsIndex +
-																1] == '*')
+	if (inputString[commentEndCharsIndex] == '/' &&
+		inputString[commentEndCharsIndex + 1] == '*')
 	{
 		/* Skip the comment start characters */
 		commentEndCharsIndex += commentStartCharsLength;
-		while (inputString[commentEndCharsIndex] && commentEndCharsIndex <
-			   inputStringLen && !(inputString[commentEndCharsIndex] == '*' &&
-								   inputString
-								   [commentEndCharsIndex + 1] == '/'))
+		while (inputString[commentEndCharsIndex] &&
+			   commentEndCharsIndex < inputStringLen &&
+			   !(inputString[commentEndCharsIndex] == '*' &&
+				 inputString [commentEndCharsIndex + 1] == '/'))
 		{
 			commentEndCharsIndex++;
 		}

From a355825bfe21a0fff7f39ec4094babb1bb13dee0 Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Thu, 16 Mar 2023 10:40:02 +0300
Subject: [PATCH 40/58] Set INVALID_COLOCATION_ID if colocationId doesn't exist
 in the annotation.

---
 src/backend/distributed/utils/attribute.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index a42e492d5..bddf77b08 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -18,6 +18,7 @@
 #include "distributed/jsonbutils.h"
 #include "distributed/colocation_utils.h"
 #include "distributed/tuplestore.h"
+#include "distributed/colocation_utils.h"
 #include "executor/execdesc.h"
 #include "storage/ipc.h"
 #include "storage/lwlock.h"
@@ -219,7 +220,7 @@ AttributeQueryIfAnnotated(const char *query_string, CmdType commandType)
 				strcpy_s(attributeToTenant, sizeof(attributeToTenant), tenantId);
 			}
 
-		colocationGroupId = ExtractFieldInt32(jsonbDatum, "cId", INVALID_COLOCATION_ID);
+			colocationGroupId = ExtractFieldInt32(jsonbDatum, "cId", INVALID_COLOCATION_ID);
 
 			if (MultiTenantMonitoringLogLevel != CITUS_LOG_LEVEL_OFF)
 			{

From 5e6ac25885d6b4264f62d5e7dd3876cf97dd6d24 Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Thu, 16 Mar 2023 10:54:07 +0300
Subject: [PATCH 41/58] Renamings

---
 src/backend/distributed/utils/attribute.c | 46 ++++++++++++-----------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index bddf77b08..84d56f1d2 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -220,7 +220,8 @@ AttributeQueryIfAnnotated(const char *query_string, CmdType commandType)
 				strcpy_s(attributeToTenant, sizeof(attributeToTenant), tenantId);
 			}
 
-			colocationGroupId = ExtractFieldInt32(jsonbDatum, "cId", INVALID_COLOCATION_ID);
+			colocationGroupId = ExtractFieldInt32(jsonbDatum, "cId",
+												  INVALID_COLOCATION_ID);
 
 			if (MultiTenantMonitoringLogLevel != CITUS_LOG_LEVEL_OFF)
 			{
@@ -745,21 +746,22 @@ Substring(const char *str, int start, int end)
 static char *
 EscapeCommentChars(const char *str)
 {
-	int len = strlen(str);
-	char *new_str = (char *) malloc(len * 2 + 1);
-	int j = 0;
+	int originalStringLength = strlen(str);
+	char *escapedString = (char *) malloc(originalStringLength * 2 + 1);
+	int escapedStringIndex = 0;
 
-	for (int i = 0; i < len; i++)
+	for (int originalStringIndex = 0; originalStringIndex < originalStringLength;
+		 originalStringIndex++)
 	{
-		if (str[i] == '*' || str[i] == '/')
+		if (str[originalStringIndex] == '*' || str[originalStringIndex] == '/')
 		{
-			new_str[j++] = '\\';
+			escapedString[escapedStringIndex++] = '\\';
 		}
-		new_str[j++] = str[i];
+		escapedString[escapedStringIndex++] = str[originalStringIndex];
 	}
-	new_str[j] = '\0';
+	escapedString[escapedStringIndex] = '\0';
 
-	return new_str;
+	return escapedString;
 }
 
 
@@ -767,22 +769,22 @@ EscapeCommentChars(const char *str)
 static char *
 UnescapeCommentChars(const char *str)
 {
-	int len = strlen(str);
-	char *new_str = (char *) malloc(len + 1);
-	int j = 0;
+	int originalStringLength = strlen(str);
+	char *unescapedString = (char *) malloc(originalStringLength + 1);
+	int unescapedStringIndex = 0;
 
-	for (int i = 0; i < len; i++)
+	for (int originalStringindex = 0; originalStringindex < originalStringLength;
+		 originalStringindex++)
 	{
-		if (str[i] == '\\' && i < len - 1)
+		if (str[originalStringindex] == '\\' && originalStringindex <
+			originalStringLength - 1 && (str[originalStringindex + 1] == '*' ||
+										 str[originalStringindex + 1] == '/'))
 		{
-			if (str[i + 1] == '*' || str[i + 1] == '/')
-			{
-				i++;
-			}
+			originalStringindex++;
 		}
-		new_str[j++] = str[i];
+		unescapedString[unescapedStringIndex++] = str[originalStringindex];
 	}
-	new_str[j] = '\0';
+	unescapedString[unescapedStringIndex] = '\0';
 
-	return new_str;
+	return unescapedString;
 }

From 8e1e827242f6ce6d798c33b9034a5ba8c4a7dc0d Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Thu, 16 Mar 2023 11:00:37 +0300
Subject: [PATCH 42/58] Remove unnecessary check

---
 src/backend/distributed/utils/attribute.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index 84d56f1d2..9aef7b156 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -697,8 +697,7 @@ ExtractTopComment(const char *inputString)
 	{
 		/* Skip the comment start characters */
 		commentEndCharsIndex += commentStartCharsLength;
-		while (inputString[commentEndCharsIndex] &&
-			   commentEndCharsIndex < inputStringLen &&
+		while (commentEndCharsIndex < inputStringLen &&
 			   !(inputString[commentEndCharsIndex] == '*' &&
 				 inputString [commentEndCharsIndex + 1] == '/'))
 		{
@@ -778,7 +777,8 @@ UnescapeCommentChars(const char *str)
 	{
 		if (str[originalStringindex] == '\\' && originalStringindex <
 			originalStringLength - 1 && (str[originalStringindex + 1] == '*' ||
-										 str[originalStringindex + 1] == '/'))
+										 str
+										 [originalStringindex + 1] == '/'))
 		{
 			originalStringindex++;
 		}

From 486e2a622a3152e4a44f4c6a1d966918d47eb6d4 Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Thu, 16 Mar 2023 11:02:52 +0300
Subject: [PATCH 43/58] Refactoring to reduce nesting

---
 src/backend/distributed/utils/attribute.c | 24 ++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index 9aef7b156..55dffffec 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -691,18 +691,20 @@ ExtractTopComment(const char *inputString)
 
 	int commentEndCharsIndex = 0;
 
-	/* If query starts with a comment */
-	if (inputString[commentEndCharsIndex] == '/' &&
-		inputString[commentEndCharsIndex + 1] == '*')
+	/* If query doesn't start with a comment, return NULL */
+	if (inputString[commentEndCharsIndex] != '/' ||
+		inputString[commentEndCharsIndex + 1] != '*')
 	{
-		/* Skip the comment start characters */
-		commentEndCharsIndex += commentStartCharsLength;
-		while (commentEndCharsIndex < inputStringLen &&
-			   !(inputString[commentEndCharsIndex] == '*' &&
-				 inputString [commentEndCharsIndex + 1] == '/'))
-		{
-			commentEndCharsIndex++;
-		}
+		return NULL;
+	}
+
+	/* Skip the comment start characters */
+	commentEndCharsIndex += commentStartCharsLength;
+	while (commentEndCharsIndex < inputStringLen &&
+			!(inputString[commentEndCharsIndex] == '*' &&
+				inputString [commentEndCharsIndex + 1] == '/'))
+	{
+		commentEndCharsIndex++;
 	}
 
 	if (commentEndCharsIndex > commentStartCharsLength)

From 3a5c7c32800b220bca787d7cb42672484b131070 Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Thu, 16 Mar 2023 11:31:12 +0300
Subject: [PATCH 44/58] Indent

---
 src/backend/distributed/utils/attribute.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index 55dffffec..e4e4a9c5b 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -701,8 +701,8 @@ ExtractTopComment(const char *inputString)
 	/* Skip the comment start characters */
 	commentEndCharsIndex += commentStartCharsLength;
 	while (commentEndCharsIndex < inputStringLen &&
-			!(inputString[commentEndCharsIndex] == '*' &&
-				inputString [commentEndCharsIndex + 1] == '/'))
+		   !(inputString[commentEndCharsIndex] == '*' &&
+			 inputString [commentEndCharsIndex + 1] == '/'))
 	{
 		commentEndCharsIndex++;
 	}
@@ -780,7 +780,8 @@ UnescapeCommentChars(const char *str)
 		if (str[originalStringindex] == '\\' && originalStringindex <
 			originalStringLength - 1 && (str[originalStringindex + 1] == '*' ||
 										 str
-										 [originalStringindex + 1] == '/'))
+										 [
+											 originalStringindex + 1] == '/'))
 		{
 			originalStringindex++;
 		}

From 3ec2994abdbae31b99d7d0a6610c840460946e85 Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Thu, 16 Mar 2023 14:52:36 +0300
Subject: [PATCH 45/58] Indent

---
 src/backend/distributed/utils/attribute.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index e4e4a9c5b..39ca58d1a 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -777,11 +777,10 @@ UnescapeCommentChars(const char *str)
 	for (int originalStringindex = 0; originalStringindex < originalStringLength;
 		 originalStringindex++)
 	{
-		if (str[originalStringindex] == '\\' && originalStringindex <
-			originalStringLength - 1 && (str[originalStringindex + 1] == '*' ||
-										 str
-										 [
-											 originalStringindex + 1] == '/'))
+		if (str[originalStringindex] == '\\' &&
+			originalStringindex < originalStringLength - 1 &&
+			(str[originalStringindex + 1] == '*' ||
+			 str[originalStringindex + 1] == '/'))
 		{
 			originalStringindex++;
 		}

From 905cc5b4f3545316466fdc77ef6d668ae44be908 Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Mon, 20 Mar 2023 08:15:11 +0300
Subject: [PATCH 46/58] Use palloc instead of malloc

---
 src/backend/distributed/utils/attribute.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index 39ca58d1a..c0a0a1fae 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -748,7 +748,7 @@ static char *
 EscapeCommentChars(const char *str)
 {
 	int originalStringLength = strlen(str);
-	char *escapedString = (char *) malloc(originalStringLength * 2 + 1);
+	char *escapedString = (char *) palloc(originalStringLength * 2 + 1);
 	int escapedStringIndex = 0;
 
 	for (int originalStringIndex = 0; originalStringIndex < originalStringLength;
@@ -771,7 +771,7 @@ static char *
 UnescapeCommentChars(const char *str)
 {
 	int originalStringLength = strlen(str);
-	char *unescapedString = (char *) malloc(originalStringLength + 1);
+	char *unescapedString = (char *) palloc(originalStringLength + 1);
 	int unescapedStringIndex = 0;
 
 	for (int originalStringindex = 0; originalStringindex < originalStringLength;

From fc23fd50611639d059c33bf7609186f36bbf2c17 Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Mon, 20 Mar 2023 08:46:22 +0300
Subject: [PATCH 47/58] Use text_substr for getting top comment

---
 src/backend/distributed/utils/attribute.c | 32 +++--------------------
 1 file changed, 4 insertions(+), 28 deletions(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index c0a0a1fae..51df6aaae 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -67,7 +67,6 @@ static int CreateTenantStats(MultiTenantMonitor *monitor, time_t queryTime);
 static int FindTenantStats(MultiTenantMonitor *monitor);
 static size_t MultiTenantMonitorshmemSize(void);
 static char * ExtractTopComment(const char *inputString);
-static char * Substring(const char *str, int start, int end);
 static char * EscapeCommentChars(const char *str);
 static char * UnescapeCommentChars(const char *str);
 
@@ -709,7 +708,10 @@ ExtractTopComment(const char *inputString)
 
 	if (commentEndCharsIndex > commentStartCharsLength)
 	{
-		return Substring(inputString, commentStartCharsLength, commentEndCharsIndex);
+		Datum substringTextDatum = DirectFunctionCall3(text_substr, PointerGetDatum(inputString),
+			Int32GetDatum(commentStartCharsLength),
+			Int32GetDatum(commentEndCharsIndex - commentStartCharsLength));
+		return TextDatumGetCString(substringTextDatum);
 	}
 	else
 	{
@@ -717,32 +719,6 @@ ExtractTopComment(const char *inputString)
 	}
 }
 
-
-/* Extracts a substring from the input string between the specified start and end indices.*/
-static char *
-Substring(const char *str, int start, int end)
-{
-	int len = strlen(str);
-
-	/* Ensure start and end are within the bounds of the string */
-	if (start < 0 || end > len || start > end)
-	{
-		return NULL;
-	}
-
-	/* Allocate memory for the substring */
-	char *substr = (char *) palloc((end - start + 1) * sizeof(char));
-
-	/* Copy the substring to the new memory location */
-	strncpy_s(substr, end - start + 1, str + start, end - start);
-
-	/* Add null terminator to end the substring */
-	substr[end - start] = '\0';
-
-	return substr;
-}
-
-
 /*  EscapeCommentChars adds a backslash before each occurrence of '*' or '/' in the input string */
 static char *
 EscapeCommentChars(const char *str)

From 8b09a4f8c0bb1dbc1d261f3d69dfb4de729b3a62 Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Mon, 20 Mar 2023 08:46:52 +0300
Subject: [PATCH 48/58] Handle no comment end chars

---
 src/backend/distributed/utils/attribute.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index 51df6aaae..2ffd1e416 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -706,6 +706,13 @@ ExtractTopComment(const char *inputString)
 		commentEndCharsIndex++;
 	}
 
+	/* If there is no end of comment chars , return NULL */
+	if (inputString[commentEndCharsIndex] != '*' &&
+		inputString[commentEndCharsIndex + 1] != '/')
+	{
+		return NULL;
+	}
+
 	if (commentEndCharsIndex > commentStartCharsLength)
 	{
 		Datum substringTextDatum = DirectFunctionCall3(text_substr, PointerGetDatum(inputString),

From 426cfd3ce5c3f2b2f74042fc18d4ee428cf43066 Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Mon, 20 Mar 2023 08:49:39 +0300
Subject: [PATCH 49/58] Indent

---
 src/backend/distributed/utils/attribute.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index 2ffd1e416..b7014b5cc 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -715,9 +715,13 @@ ExtractTopComment(const char *inputString)
 
 	if (commentEndCharsIndex > commentStartCharsLength)
 	{
-		Datum substringTextDatum = DirectFunctionCall3(text_substr, PointerGetDatum(inputString),
-			Int32GetDatum(commentStartCharsLength),
-			Int32GetDatum(commentEndCharsIndex - commentStartCharsLength));
+		Datum substringTextDatum = DirectFunctionCall3(text_substr, PointerGetDatum(
+														   inputString),
+													   Int32GetDatum(
+														   commentStartCharsLength),
+													   Int32GetDatum(
+														   commentEndCharsIndex -
+														   commentStartCharsLength));
 		return TextDatumGetCString(substringTextDatum);
 	}
 	else
@@ -726,6 +730,7 @@ ExtractTopComment(const char *inputString)
 	}
 }
 
+
 /*  EscapeCommentChars adds a backslash before each occurrence of '*' or '/' in the input string */
 static char *
 EscapeCommentChars(const char *str)

From 81953d7ac669eb512f7deb77d91645ab2f18472b Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Mon, 20 Mar 2023 09:40:08 +0300
Subject: [PATCH 50/58] Convert char* to text for text_substr call

---
 src/backend/distributed/utils/attribute.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index b7014b5cc..b233b3fe3 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -716,9 +716,9 @@ ExtractTopComment(const char *inputString)
 	if (commentEndCharsIndex > commentStartCharsLength)
 	{
 		Datum substringTextDatum = DirectFunctionCall3(text_substr, PointerGetDatum(
-														   inputString),
+														   cstring_to_text(inputString)),
 													   Int32GetDatum(
-														   commentStartCharsLength),
+														   commentStartCharsLength + 1),
 													   Int32GetDatum(
 														   commentEndCharsIndex -
 														   commentStartCharsLength));
@@ -736,7 +736,7 @@ static char *
 EscapeCommentChars(const char *str)
 {
 	int originalStringLength = strlen(str);
-	char *escapedString = (char *) palloc(originalStringLength * 2 + 1);
+	char *escapedString = (char *) palloc0((originalStringLength * 2 + 1) * sizeof(char));
 	int escapedStringIndex = 0;
 
 	for (int originalStringIndex = 0; originalStringIndex < originalStringLength;
@@ -759,7 +759,7 @@ static char *
 UnescapeCommentChars(const char *str)
 {
 	int originalStringLength = strlen(str);
-	char *unescapedString = (char *) palloc(originalStringLength + 1);
+	char *unescapedString = (char *) palloc0((originalStringLength + 1) * sizeof(char));
 	int unescapedStringIndex = 0;
 
 	for (int originalStringindex = 0; originalStringindex < originalStringLength;

From 2ac56ef9551f5f9175416fe2e9e209551f3b25e5 Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Mon, 20 Mar 2023 11:02:34 +0300
Subject: [PATCH 51/58] Revert "Use text_substr for getting top comment"

This reverts commit 9531cfd3bf4d9950e3593eed90f092418bb6ff4b.
---
 src/backend/distributed/utils/attribute.c | 35 +++++++++++++++++------
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index b233b3fe3..2f0bcab02 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -67,6 +67,7 @@ static int CreateTenantStats(MultiTenantMonitor *monitor, time_t queryTime);
 static int FindTenantStats(MultiTenantMonitor *monitor);
 static size_t MultiTenantMonitorshmemSize(void);
 static char * ExtractTopComment(const char *inputString);
+static char * Substring(const char *str, int start, int end);
 static char * EscapeCommentChars(const char *str);
 static char * UnescapeCommentChars(const char *str);
 
@@ -715,14 +716,7 @@ ExtractTopComment(const char *inputString)
 
 	if (commentEndCharsIndex > commentStartCharsLength)
 	{
-		Datum substringTextDatum = DirectFunctionCall3(text_substr, PointerGetDatum(
-														   cstring_to_text(inputString)),
-													   Int32GetDatum(
-														   commentStartCharsLength + 1),
-													   Int32GetDatum(
-														   commentEndCharsIndex -
-														   commentStartCharsLength));
-		return TextDatumGetCString(substringTextDatum);
+		return Substring(inputString, commentStartCharsLength, commentEndCharsIndex);
 	}
 	else
 	{
@@ -731,6 +725,31 @@ ExtractTopComment(const char *inputString)
 }
 
 
+/* Extracts a substring from the input string between the specified start and end indices.*/
+static char *
+Substring(const char *str, int start, int end)
+{
+	int len = strlen(str);
+
+	/* Ensure start and end are within the bounds of the string */
+	if (start < 0 || end > len || start > end)
+	{
+		return NULL;
+	}
+
+	/* Allocate memory for the substring */
+	char *substr = (char *) palloc((end - start + 1) * sizeof(char));
+
+	/* Copy the substring to the new memory location */
+	strncpy_s(substr, end - start + 1, str + start, end - start);
+
+	/* Add null terminator to end the substring */
+	substr[end - start] = '\0';
+
+	return substr;
+}
+
+
 /*  EscapeCommentChars adds a backslash before each occurrence of '*' or '/' in the input string */
 static char *
 EscapeCommentChars(const char *str)

From 5e9dd3c8949d054d4c809e0db6c98014da6908cb Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Tue, 21 Mar 2023 11:22:08 +0300
Subject: [PATCH 52/58] Add an additional comment

Co-authored-by: Jelte Fennema <github-tech@jeltef.nl>
---
 src/backend/distributed/utils/attribute.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index 2f0bcab02..b63f3ae85 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -700,6 +700,8 @@ ExtractTopComment(const char *inputString)
 
 	/* Skip the comment start characters */
 	commentEndCharsIndex += commentStartCharsLength;
+	
+	/* Find the first comment end character */
 	while (commentEndCharsIndex < inputStringLen &&
 		   !(inputString[commentEndCharsIndex] == '*' &&
 			 inputString [commentEndCharsIndex + 1] == '/'))

From 0744384bac7951a3545f14240f156d576b30203b Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Tue, 21 Mar 2023 11:22:29 +0300
Subject: [PATCH 53/58] Update comment

Co-authored-by: Jelte Fennema <github-tech@jeltef.nl>
---
 src/backend/distributed/utils/attribute.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index b63f3ae85..35fd67333 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -745,7 +745,7 @@ Substring(const char *str, int start, int end)
 	/* Copy the substring to the new memory location */
 	strncpy_s(substr, end - start + 1, str + start, end - start);
 
-	/* Add null terminator to end the substring */
+	/* Add null terminator to end of the substring */
 	substr[end - start] = '\0';
 
 	return substr;

From 0b06e64c3f2174c1e305e15beef63ebc126252a5 Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Tue, 21 Mar 2023 09:58:31 +0300
Subject: [PATCH 54/58] Use stringinfo for escaping/unescaping

---
 src/backend/distributed/utils/attribute.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index 35fd67333..ebf073d19 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -28,6 +28,7 @@
 #include "utils/json.h"
 #include "distributed/utils/attribute.h"
 #include "common/base64.h"
+#include "miscadmin.h"
 
 #include <time.h>
 
@@ -757,31 +758,28 @@ static char *
 EscapeCommentChars(const char *str)
 {
 	int originalStringLength = strlen(str);
-	char *escapedString = (char *) palloc0((originalStringLength * 2 + 1) * sizeof(char));
-	int escapedStringIndex = 0;
+	StringInfo escapedString = makeStringInfo();
 
 	for (int originalStringIndex = 0; originalStringIndex < originalStringLength;
 		 originalStringIndex++)
 	{
 		if (str[originalStringIndex] == '*' || str[originalStringIndex] == '/')
 		{
-			escapedString[escapedStringIndex++] = '\\';
+			appendStringInfoChar(escapedString, '\\');
 		}
-		escapedString[escapedStringIndex++] = str[originalStringIndex];
-	}
-	escapedString[escapedStringIndex] = '\0';
 
-	return escapedString;
+		appendStringInfoChar(escapedString, str[originalStringIndex]);
+ 	}
+
+	return escapedString->data;
 }
 
-
 /*  UnescapeCommentChars removes the backslash that precedes '*' or '/' in the input string. */
 static char *
 UnescapeCommentChars(const char *str)
 {
 	int originalStringLength = strlen(str);
-	char *unescapedString = (char *) palloc0((originalStringLength + 1) * sizeof(char));
-	int unescapedStringIndex = 0;
+	StringInfo unescapedString = makeStringInfo();
 
 	for (int originalStringindex = 0; originalStringindex < originalStringLength;
 		 originalStringindex++)
@@ -793,9 +791,8 @@ UnescapeCommentChars(const char *str)
 		{
 			originalStringindex++;
 		}
-		unescapedString[unescapedStringIndex++] = str[originalStringindex];
+		appendStringInfoChar(unescapedString, str[originalStringindex]);
 	}
-	unescapedString[unescapedStringIndex] = '\0';
 
-	return unescapedString;
+	return unescapedString->data;
 }

From 508ee987e64195a78cfb49de1ea93056b5337658 Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Tue, 21 Mar 2023 12:34:05 +0300
Subject: [PATCH 55/58] Rewrite ExtractTopComment by using strstr() and
 stringinfo

---
 src/backend/distributed/utils/attribute.c | 96 +++++++----------------
 1 file changed, 30 insertions(+), 66 deletions(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index ebf073d19..b4ed43e57 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -27,8 +27,7 @@
 #include "utils/builtins.h"
 #include "utils/json.h"
 #include "distributed/utils/attribute.h"
-#include "common/base64.h"
-#include "miscadmin.h"
+
 
 #include <time.h>
 
@@ -68,7 +67,6 @@ static int CreateTenantStats(MultiTenantMonitor *monitor, time_t queryTime);
 static int FindTenantStats(MultiTenantMonitor *monitor);
 static size_t MultiTenantMonitorshmemSize(void);
 static char * ExtractTopComment(const char *inputString);
-static char * Substring(const char *str, int start, int end);
 static char * EscapeCommentChars(const char *str);
 static char * UnescapeCommentChars(const char *str);
 
@@ -683,76 +681,42 @@ MultiTenantMonitorshmemSize(void)
 static char *
 ExtractTopComment(const char *inputString)
 {
-	int commentStartCharsLength = 2;
-	int inputStringLen = strlen(inputString);
-	if (inputStringLen < commentStartCharsLength)
-	{
-		return NULL;
-	}
+    int commentCharsLength = 2;
+    int inputStringLen = strlen(inputString);
+    if (inputStringLen < commentCharsLength)
+    {
+        return NULL;
+    }
 
-	int commentEndCharsIndex = 0;
+    const char *commentStartChars = "/*";
+    const char *commentEndChars = "*/";
 
-	/* If query doesn't start with a comment, return NULL */
-	if (inputString[commentEndCharsIndex] != '/' ||
-		inputString[commentEndCharsIndex + 1] != '*')
-	{
-		return NULL;
-	}
+    /* If query doesn't start with a comment, return NULL */
+    if (strstr(inputString, commentStartChars) != inputString)
+    {
+        return NULL;
+    }
 
-	/* Skip the comment start characters */
-	commentEndCharsIndex += commentStartCharsLength;
-	
-	/* Find the first comment end character */
-	while (commentEndCharsIndex < inputStringLen &&
-		   !(inputString[commentEndCharsIndex] == '*' &&
-			 inputString [commentEndCharsIndex + 1] == '/'))
-	{
-		commentEndCharsIndex++;
-	}
+    StringInfo commentData = makeStringInfo();
 
-	/* If there is no end of comment chars , return NULL */
-	if (inputString[commentEndCharsIndex] != '*' &&
-		inputString[commentEndCharsIndex + 1] != '/')
-	{
-		return NULL;
-	}
+    /* Skip the comment start characters */
+    const char *commentStart = inputString + commentCharsLength;
 
-	if (commentEndCharsIndex > commentStartCharsLength)
-	{
-		return Substring(inputString, commentStartCharsLength, commentEndCharsIndex);
-	}
-	else
-	{
-		return NULL;
-	}
+    /* Find the first comment end character */
+    const char *commentEnd = strstr(commentStart, commentEndChars);
+    if (commentEnd == NULL)
+    {
+        return NULL;
+    }
+
+    /* Append the comment to the StringInfo buffer */
+    int commentLength = commentEnd - commentStart;
+    appendStringInfo(commentData, "%.*s", commentLength, commentStart);
+
+    /* Return the extracted comment */
+    return commentData->data;
 }
 
-
-/* Extracts a substring from the input string between the specified start and end indices.*/
-static char *
-Substring(const char *str, int start, int end)
-{
-	int len = strlen(str);
-
-	/* Ensure start and end are within the bounds of the string */
-	if (start < 0 || end > len || start > end)
-	{
-		return NULL;
-	}
-
-	/* Allocate memory for the substring */
-	char *substr = (char *) palloc((end - start + 1) * sizeof(char));
-
-	/* Copy the substring to the new memory location */
-	strncpy_s(substr, end - start + 1, str + start, end - start);
-
-	/* Add null terminator to end of the substring */
-	substr[end - start] = '\0';
-
-	return substr;
-}
-
-
 /*  EscapeCommentChars adds a backslash before each occurrence of '*' or '/' in the input string */
 static char *
 EscapeCommentChars(const char *str)

From b86f2b5607274d8794c38e9f613a357c5a25d13d Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Thu, 23 Mar 2023 10:20:50 +0300
Subject: [PATCH 56/58] Indent

---
 src/backend/distributed/utils/attribute.c | 58 ++++++++++++-----------
 1 file changed, 30 insertions(+), 28 deletions(-)

diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c
index b4ed43e57..9c6871598 100644
--- a/src/backend/distributed/utils/attribute.c
+++ b/src/backend/distributed/utils/attribute.c
@@ -681,42 +681,43 @@ MultiTenantMonitorshmemSize(void)
 static char *
 ExtractTopComment(const char *inputString)
 {
-    int commentCharsLength = 2;
-    int inputStringLen = strlen(inputString);
-    if (inputStringLen < commentCharsLength)
-    {
-        return NULL;
-    }
+	int commentCharsLength = 2;
+	int inputStringLen = strlen(inputString);
+	if (inputStringLen < commentCharsLength)
+	{
+		return NULL;
+	}
 
-    const char *commentStartChars = "/*";
-    const char *commentEndChars = "*/";
+	const char *commentStartChars = "/*";
+	const char *commentEndChars = "*/";
 
-    /* If query doesn't start with a comment, return NULL */
-    if (strstr(inputString, commentStartChars) != inputString)
-    {
-        return NULL;
-    }
+	/* If query doesn't start with a comment, return NULL */
+	if (strstr(inputString, commentStartChars) != inputString)
+	{
+		return NULL;
+	}
 
-    StringInfo commentData = makeStringInfo();
+	StringInfo commentData = makeStringInfo();
 
-    /* Skip the comment start characters */
-    const char *commentStart = inputString + commentCharsLength;
+	/* Skip the comment start characters */
+	const char *commentStart = inputString + commentCharsLength;
 
-    /* Find the first comment end character */
-    const char *commentEnd = strstr(commentStart, commentEndChars);
-    if (commentEnd == NULL)
-    {
-        return NULL;
-    }
+	/* Find the first comment end character */
+	const char *commentEnd = strstr(commentStart, commentEndChars);
+	if (commentEnd == NULL)
+	{
+		return NULL;
+	}
 
-    /* Append the comment to the StringInfo buffer */
-    int commentLength = commentEnd - commentStart;
-    appendStringInfo(commentData, "%.*s", commentLength, commentStart);
+	/* Append the comment to the StringInfo buffer */
+	int commentLength = commentEnd - commentStart;
+	appendStringInfo(commentData, "%.*s", commentLength, commentStart);
 
-    /* Return the extracted comment */
-    return commentData->data;
+	/* Return the extracted comment */
+	return commentData->data;
 }
 
+
 /*  EscapeCommentChars adds a backslash before each occurrence of '*' or '/' in the input string */
 static char *
 EscapeCommentChars(const char *str)
@@ -733,11 +734,12 @@ EscapeCommentChars(const char *str)
 		}
 
 		appendStringInfoChar(escapedString, str[originalStringIndex]);
- 	}
+	}
 
 	return escapedString->data;
 }
 
+
 /*  UnescapeCommentChars removes the backslash that precedes '*' or '/' in the input string. */
 static char *
 UnescapeCommentChars(const char *str)

From bac337debda4c88fbfc20b7eddbe138ab2c9b139 Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Thu, 23 Mar 2023 11:10:29 +0300
Subject: [PATCH 57/58] Increase citus.stats_tenants_limit to 10

---
 src/test/regress/pg_regress_multi.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/test/regress/pg_regress_multi.pl b/src/test/regress/pg_regress_multi.pl
index f4e85ab61..edee0eef4 100755
--- a/src/test/regress/pg_regress_multi.pl
+++ b/src/test/regress/pg_regress_multi.pl
@@ -487,7 +487,7 @@ push(@pgOptions, "citus.explain_analyze_sort_method='taskId'");
 push(@pgOptions, "citus.enable_manual_changes_to_shards=on");
 push(@pgOptions, "citus.allow_unsafe_locks_from_workers=on");
 push(@pgOptions, "citus.stat_statements_track = 'all'");
-push(@pgOptions, "citus.stats_tenants_limit = 2");
+push(@pgOptions, "citus.stats_tenants_limit = 10");
 
 # Some tests look at shards in pg_class, make sure we can usually see them:
 push(@pgOptions, "citus.show_shards_for_app_name_prefixes='pg_regress'");

From 96cf7d00e2db00cc0291f7eea59495950f522a4c Mon Sep 17 00:00:00 2001
From: Gokhan Gulbiz <ggulbiz@gmail.com>
Date: Thu, 23 Mar 2023 11:12:16 +0300
Subject: [PATCH 58/58] Test special and multibyte characters in tenant
 attribute

---
 .../regress/expected/citus_stats_tenants.out  | 116 +++++++++++++++++-
 src/test/regress/sql/citus_stats_tenants.sql  |  25 ++++
 2 files changed, 140 insertions(+), 1 deletion(-)

diff --git a/src/test/regress/expected/citus_stats_tenants.out b/src/test/regress/expected/citus_stats_tenants.out
index 783f38240..48004b0d7 100644
--- a/src/test/regress/expected/citus_stats_tenants.out
+++ b/src/test/regress/expected/citus_stats_tenants.out
@@ -220,8 +220,10 @@ SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tena
  bcde             |                          3 | 3000000000
  2                |                          1 | 1000000000
  3                |                          1 | 1000000000
+ 4                |                          1 | 1000000000
+ cdef             |                          1 | 1000000000
  defg             |                          1 | 1000000000
-(5 rows)
+(7 rows)
 
 -- test period passing
 SELECT result FROM run_command_on_all_nodes('SELECT clean_citus_stats_tenants()');
@@ -262,6 +264,118 @@ SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, q
  5                |                         0 |                         0 |                          0 |                          1
 (2 rows)
 
+\c - - - :worker_2_port
+SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) ORDER BY score DESC;
+ tenant_attribute | query_count_in_this_period |   score
+---------------------------------------------------------------------
+ 1                |                          0 | 500000000
+ 5                |                          0 | 500000000
+(2 rows)
+
+\c - - - :master_port
+SET search_path TO citus_stats_tenants;
+-- test special and multibyte characters in tenant attribute
+SELECT result FROM run_command_on_all_nodes('SELECT clean_citus_stats_tenants()');
+ result
+---------------------------------------------------------------------
+
+
+
+(3 rows)
+
+TRUNCATE TABLE dist_tbl_text;
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = '/bcde';
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = '/*bcde';
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = '/b*cde';
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = '/b*c/de';
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'b/*//cde';
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = '/b/*/cde';
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = '/b/**/cde';
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde*';
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde*/';
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = U&'\0061\0308bc';
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+\c - - - :worker_1_port
+SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute;
+ tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period
+---------------------------------------------------------------------
+ /*bcde           |                         1 |                         0 |                          1 |                          0
+ /b*c/de          |                         1 |                         0 |                          1 |                          0
+ /b*cde           |                         1 |                         0 |                          1 |                          0
+ /b/**/cde        |                         1 |                         0 |                          1 |                          0
+ /b/*/cde         |                         1 |                         0 |                          1 |                          0
+ /bcde            |                         1 |                         0 |                          1 |                          0
+ äbc              |                         1 |                         0 |                          1 |                          0
+ b/*//cde         |                         1 |                         0 |                          1 |                          0
+ bcde*            |                         1 |                         0 |                          1 |                          0
+ bcde*/           |                         1 |                         0 |                          1 |                          0
+(10 rows)
+
+\c - - - :worker_2_port
+SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute;
+ tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period
+---------------------------------------------------------------------
+ /*bcde           |                         1 |                         0 |                          1 |                          0
+ /b*c/de          |                         1 |                         0 |                          1 |                          0
+ /b*cde           |                         1 |                         0 |                          1 |                          0
+ /b/**/cde        |                         1 |                         0 |                          1 |                          0
+ /b/*/cde         |                         1 |                         0 |                          1 |                          0
+ /bcde            |                         1 |                         0 |                          1 |                          0
+ äbc              |                         1 |                         0 |                          1 |                          0
+ b/*//cde         |                         1 |                         0 |                          1 |                          0
+ bcde*            |                         1 |                         0 |                          1 |                          0
+ bcde*/           |                         1 |                         0 |                          1 |                          0
+(10 rows)
+
 \c - - - :master_port
 SET search_path TO citus_stats_tenants;
 SET client_min_messages TO ERROR;
diff --git a/src/test/regress/sql/citus_stats_tenants.sql b/src/test/regress/sql/citus_stats_tenants.sql
index 981533a6e..1e72cd6bd 100644
--- a/src/test/regress/sql/citus_stats_tenants.sql
+++ b/src/test/regress/sql/citus_stats_tenants.sql
@@ -93,6 +93,31 @@ SET citus.stats_tenants_period TO 2;
 SELECT sleep_until_next_period();
 
 SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants_local ORDER BY tenant_attribute;
+\c - - - :worker_2_port
+SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) ORDER BY score DESC;
+
+\c - - - :master_port
+SET search_path TO citus_stats_tenants;
+
+-- test special and multibyte characters in tenant attribute
+SELECT result FROM run_command_on_all_nodes('SELECT clean_citus_stats_tenants()');
+TRUNCATE TABLE dist_tbl_text;
+
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = '/bcde';
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = '/*bcde';
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = '/b*cde';
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = '/b*c/de';
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'b/*//cde';
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = '/b/*/cde';
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = '/b/**/cde';
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde*';
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde*/';
+SELECT count(*)>=0 FROM dist_tbl_text WHERE a = U&'\0061\0308bc';
+
+\c - - - :worker_1_port
+SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute;
+\c - - - :worker_2_port
+SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute;
 \c - - - :master_port
 SET search_path TO citus_stats_tenants;