From 98ac3aa93806fdb90f696a9bd9ff224664b28ca6 Mon Sep 17 00:00:00 2001
From: paragjain <paragikjain786@gmail.com>
Date: Sat, 13 Jul 2024 06:32:24 +0000
Subject: [PATCH] addressing review comments

---
 src/backend/distributed/commands/multi_copy.c |   2 +-
 .../distributed/planner/merge_planner.c       |  35 +-
 src/test/regress/expected/merge_vcore.out     | 480 ++++++++++++++++++
 src/test/regress/multi_schedule               |   1 +
 src/test/regress/sql/merge_vcore.sql          | 313 ++++++++++++
 5 files changed, 814 insertions(+), 17 deletions(-)
 create mode 100644 src/test/regress/expected/merge_vcore.out
 create mode 100644 src/test/regress/sql/merge_vcore.sql

diff --git a/src/backend/distributed/commands/multi_copy.c b/src/backend/distributed/commands/multi_copy.c
index 23847ac01..cb64ef7f5 100644
--- a/src/backend/distributed/commands/multi_copy.c
+++ b/src/backend/distributed/commands/multi_copy.c
@@ -2568,7 +2568,7 @@ ShardIdForTuple(CitusCopyDestReceiver *copyDest, Datum *columnValues, bool *colu
 	 * Find the shard interval and id for the partition column value for
 	 * non-reference tables.
 	 *
-	 * For reference table, this function blindly returns the tables single
+	 * For reference table, and single shard distributed table this function blindly returns the tables single
 	 * shard.
 	 */
 	ShardInterval *shardInterval = FindShardInterval(partitionColumnValue, cacheEntry);
diff --git a/src/backend/distributed/planner/merge_planner.c b/src/backend/distributed/planner/merge_planner.c
index 417996310..1f9d17c43 100644
--- a/src/backend/distributed/planner/merge_planner.c
+++ b/src/backend/distributed/planner/merge_planner.c
@@ -243,18 +243,26 @@ CreateNonPushableMergePlan(Oid targetRelationId, uint64 planId, Query *originalQ
 
 	CitusTableCacheEntry *targetRelation = GetCitusTableCacheEntry(targetRelationId);
 
-	bool repartitioned = IsSupportedRedistributionTarget(targetRelationId);
 
-	if(repartitioned)
+	if (IsCitusTableType(targetRelation->relationId, SINGLE_SHARD_DISTRIBUTED))
 	{
 		/*
-		* Get the index of the column in the source query that will be utilized
-		* to repartition the source rows, ensuring colocation with the target
-		*/
+		 * if target table is SINGLE_SHARD_DISTRIBUTED let's set this to invalid -1
+		 * so later in execution phase we don't rely on this value and try to find single shard of target instead.
+		 */
+		distributedPlan->sourceResultRepartitionColumnIndex = -1;
+	}
+	else
+	{
+		/*
+		 * Get the index of the column in the source query that will be utilized
+		 * to repartition the source rows, ensuring colocation with the target
+		 */
+
 		distributedPlan->sourceResultRepartitionColumnIndex =
 			SourceResultPartitionColumnIndex(mergeQuery,
-											sourceQuery->targetList,
-											targetRelation);
+											 sourceQuery->targetList,
+											 targetRelation);
 	}
 
 	/*
@@ -267,9 +275,11 @@ CreateNonPushableMergePlan(Oid targetRelationId, uint64 planId, Query *originalQ
 	int cursorOptions = CURSOR_OPT_PARALLEL_OK;
 	PlannedStmt *sourceRowsPlan = pg_plan_query(sourceQueryCopy, NULL, cursorOptions,
 												boundParams);
-	repartitioned = repartitioned && IsRedistributablePlan(sourceRowsPlan->planTree);
+	bool isRepartitionAllowed = IsRedistributablePlan(sourceRowsPlan->planTree) &&
+								IsSupportedRedistributionTarget(targetRelationId);
+
 	/* If plan is distributed, no work at the coordinator */
-	if (repartitioned)
+	if (isRepartitionAllowed)
 	{
 		distributedPlan->modifyWithSelectMethod = MODIFY_WITH_SELECT_REPARTITION;
 	}
@@ -1276,13 +1286,6 @@ static int
 SourceResultPartitionColumnIndex(Query *mergeQuery, List *sourceTargetList,
 								 CitusTableCacheEntry *targetRelation)
 {
-	if (IsCitusTableType(targetRelation->relationId, SINGLE_SHARD_DISTRIBUTED))
-	{
-		ereport(ERROR, (errmsg("MERGE operation across distributed schemas "
-							   "or with a row-based distributed table is "
-							   "not yet supported")));
-	}
-
 	/* Get all the Join conditions from the ON clause */
 	List *mergeJoinConditionList = WhereClauseList(mergeQuery->jointree);
 	Var *targetColumn = targetRelation->partitionColumn;
diff --git a/src/test/regress/expected/merge_vcore.out b/src/test/regress/expected/merge_vcore.out
new file mode 100644
index 000000000..91fd49330
--- /dev/null
+++ b/src/test/regress/expected/merge_vcore.out
@@ -0,0 +1,480 @@
+SHOW server_version \gset
+SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15
+\gset
+\if :server_version_ge_15
+\else
+\q
+\endif
+-- MERGE command performs a join from data_source to target_table_name
+DROP SCHEMA IF EXISTS merge_schema CASCADE;
+NOTICE:  schema "merge_schema" does not exist, skipping
+--MERGE INTO target
+--USING source
+--WHEN NOT MATCHED
+--WHEN MATCHED AND <condition>
+--WHEN MATCHED
+CREATE SCHEMA merge_schema;
+SET search_path TO merge_schema;
+SET citus.shard_count TO 4;
+SET citus.next_shard_id TO 4000000;
+SET citus.explain_all_tasks TO true;
+SET citus.shard_replication_factor TO 1;
+SET citus.max_adaptive_executor_pool_size TO 1;
+SET client_min_messages = warning;
+SELECT 1 FROM master_add_node('localhost', :master_port, groupid => 0);
+ ?column?
+---------------------------------------------------------------------
+        1
+(1 row)
+
+RESET client_min_messages;
+-- ****************************************** CASE 1 : Both are singleSharded***************************************
+CREATE TABLE source (
+    id bigint,
+    doc text
+);
+CREATE TABLE target (
+    id bigint,
+    doc text
+);
+SELECT create_distributed_table('source', null, colocate_with=>'none');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_distributed_table('target', null, colocate_with=>'none');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+INSERT INTO source (id, doc) VALUES (1, '{"a" : 1}'), (1, '{"a" : 2}');
+-- insert
+MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src
+ON (src.t_id = target.id) AND src.doc = target.doc
+WHEN MATCHED THEN
+UPDATE SET doc = '{"b" : 1}'
+WHEN NOT MATCHED THEN
+INSERT (id, doc)
+VALUES (src.t_id, doc);
+SELECT * FROM target;
+ id |    doc
+---------------------------------------------------------------------
+  2 | {"a" : 1}
+  2 | {"a" : 2}
+(2 rows)
+
+-- update
+MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src
+ON (src.t_id = target.id) AND src.doc = target.doc
+WHEN MATCHED THEN
+UPDATE SET doc = '{"b" : 1}'
+WHEN NOT MATCHED THEN
+INSERT (id, doc)
+VALUES (src.t_id, doc);
+SELECT * FROM target;
+ id |    doc
+---------------------------------------------------------------------
+  2 | {"b" : 1}
+  2 | {"b" : 1}
+(2 rows)
+
+-- Explain
+EXPLAIN MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src
+ON (src.t_id = target.id)
+WHEN MATCHED THEN DO NOTHING;
+                                         QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus MERGE INTO ...)  (cost=0.00..0.00 rows=0 width=0)
+   MERGE INTO target method: pull to coordinator
+   ->  Custom Scan (Citus Adaptive)  (cost=0.00..0.00 rows=0 width=0)
+         Task Count: 1
+         Tasks Shown: All
+         ->  Task
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Seq Scan on source_4000000 source  (cost=0.00..22.00 rows=1200 width=40)
+(8 rows)
+
+DROP TABLE IF EXISTS source;
+DROP TABLE IF EXISTS target;
+-- *************** CASE 2 : source is single sharded and target is distributed *******************************
+CREATE TABLE source (
+    id bigint,
+    doc text
+);
+CREATE TABLE target (
+    id bigint,
+    doc text
+);
+SELECT create_distributed_table('source', null, colocate_with=>'none');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_distributed_table('target', 'id');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+INSERT INTO source (id, doc) VALUES (1, '{"a" : 1}'), (1, '{"a" : 2}');
+-- insert
+MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src
+ON (src.t_id = target.id) AND src.doc = target.doc
+WHEN MATCHED THEN
+UPDATE SET doc = '{"b" : 1}'
+WHEN NOT MATCHED THEN
+INSERT (id, doc)
+VALUES (src.t_id, doc);
+SELECT * FROM target;
+ id |    doc
+---------------------------------------------------------------------
+  2 | {"a" : 1}
+  2 | {"a" : 2}
+(2 rows)
+
+-- update
+MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src
+ON (src.t_id = target.id) AND src.doc = target.doc
+WHEN MATCHED THEN
+UPDATE SET doc = '{"b" : 1}'
+WHEN NOT MATCHED THEN
+INSERT (id, doc)
+VALUES (src.t_id, doc);
+SELECT * FROM target;
+ id |    doc
+---------------------------------------------------------------------
+  2 | {"b" : 1}
+  2 | {"b" : 1}
+(2 rows)
+
+-- Explain
+EXPLAIN MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src
+ON (src.t_id = target.id)
+WHEN MATCHED THEN DO NOTHING;
+                                         QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus MERGE INTO ...)  (cost=0.00..0.00 rows=0 width=0)
+   MERGE INTO target method: pull to coordinator
+   ->  Custom Scan (Citus Adaptive)  (cost=0.00..0.00 rows=0 width=0)
+         Task Count: 1
+         Tasks Shown: All
+         ->  Task
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Seq Scan on source_4000002 source  (cost=0.00..22.00 rows=1200 width=40)
+(8 rows)
+
+DROP TABLE IF EXISTS source;
+DROP TABLE IF EXISTS target;
+-- *************** CASE 3 : source is distributed  and target is single sharded *******************************
+CREATE TABLE source (
+    id bigint,
+    doc text
+);
+CREATE TABLE target (
+    id bigint,
+    doc text
+);
+SELECT create_distributed_table('source', 'id');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_distributed_table('target', null);
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+INSERT INTO source (id, doc) VALUES (1, '{"a" : 1}'), (1, '{"a" : 2}');
+-- insert
+MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src
+ON (src.t_id = target.id) AND src.doc = target.doc
+WHEN MATCHED THEN
+UPDATE SET doc = '{"b" : 1}'
+WHEN NOT MATCHED THEN
+INSERT (id, doc)
+VALUES (src.t_id, doc);
+SELECT * FROM target;
+ id |    doc
+---------------------------------------------------------------------
+  2 | {"a" : 1}
+  2 | {"a" : 2}
+(2 rows)
+
+-- update
+MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src
+ON (src.t_id = target.id) AND src.doc = target.doc
+WHEN MATCHED THEN
+UPDATE SET doc = '{"b" : 1}'
+WHEN NOT MATCHED THEN
+INSERT (id, doc)
+VALUES (src.t_id, doc);
+SELECT * FROM target;
+ id |    doc
+---------------------------------------------------------------------
+  2 | {"b" : 1}
+  2 | {"b" : 1}
+(2 rows)
+
+-- Explain
+EXPLAIN MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src
+ON (src.t_id = target.id)
+WHEN MATCHED THEN DO NOTHING;
+                                         QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus MERGE INTO ...)  (cost=0.00..0.00 rows=0 width=0)
+   MERGE INTO target method: pull to coordinator
+   ->  Custom Scan (Citus Adaptive)  (cost=0.00..0.00 rows=100000 width=40)
+         Task Count: 4
+         Tasks Shown: All
+         ->  Task
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Seq Scan on source_4000007 source  (cost=0.00..22.00 rows=1200 width=40)
+         ->  Task
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Seq Scan on source_4000008 source  (cost=0.00..22.00 rows=1200 width=40)
+         ->  Task
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Seq Scan on source_4000009 source  (cost=0.00..22.00 rows=1200 width=40)
+         ->  Task
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Seq Scan on source_4000010 source  (cost=0.00..22.00 rows=1200 width=40)
+(17 rows)
+
+DROP TABLE IF EXISTS source;
+DROP TABLE IF EXISTS target;
+-- *************** CASE 4 : both are distributed  *******************************
+CREATE TABLE source (
+    id bigint,
+    doc text
+);
+CREATE TABLE target (
+    id bigint,
+    doc text
+);
+SELECT create_distributed_table('source', 'id');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_distributed_table('target', 'id');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+INSERT INTO source (id, doc) VALUES (1, '{"a" : 1}'), (1, '{"a" : 2}');
+-- insert
+MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src
+ON (src.t_id = target.id) AND src.doc = target.doc
+WHEN MATCHED THEN
+UPDATE SET doc = '{"b" : 1}'
+WHEN NOT MATCHED THEN
+INSERT (id, doc)
+VALUES (src.t_id, doc);
+SELECT * FROM target;
+ id |    doc
+---------------------------------------------------------------------
+  2 | {"a" : 1}
+  2 | {"a" : 2}
+(2 rows)
+
+-- update
+MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src
+ON (src.t_id = target.id) AND src.doc = target.doc
+WHEN MATCHED THEN
+UPDATE SET doc = '{"b" : 1}'
+WHEN NOT MATCHED THEN
+INSERT (id, doc)
+VALUES (src.t_id, doc);
+SELECT * FROM target;
+ id |    doc
+---------------------------------------------------------------------
+  2 | {"b" : 1}
+  2 | {"b" : 1}
+(2 rows)
+
+-- Explain
+EXPLAIN MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src
+ON (src.t_id = target.id)
+WHEN MATCHED THEN DO NOTHING;
+                                         QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus MERGE INTO ...)  (cost=0.00..0.00 rows=0 width=0)
+   MERGE INTO target method: repartition
+   ->  Custom Scan (Citus Adaptive)  (cost=0.00..0.00 rows=100000 width=40)
+         Task Count: 4
+         Tasks Shown: All
+         ->  Task
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Seq Scan on source_4000012 source  (cost=0.00..22.00 rows=1200 width=40)
+         ->  Task
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Seq Scan on source_4000013 source  (cost=0.00..22.00 rows=1200 width=40)
+         ->  Task
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Seq Scan on source_4000014 source  (cost=0.00..22.00 rows=1200 width=40)
+         ->  Task
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Seq Scan on source_4000015 source  (cost=0.00..22.00 rows=1200 width=40)
+(17 rows)
+
+DROP TABLE IF EXISTS source;
+DROP TABLE IF EXISTS target;
+-- *************** CASE 5 : both are distributed & colocated *******************************
+CREATE TABLE source (
+    id bigint,
+    doc text
+);
+CREATE TABLE target (
+    id bigint,
+    doc text
+);
+SELECT create_distributed_table('source', 'id');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_distributed_table('target', 'id', colocate_with=>'source');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+INSERT INTO source (id, doc) VALUES (1, '{"a" : 1}'), (1, '{"a" : 2}');
+-- insert
+MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src
+ON (src.t_id = target.id) AND src.doc = target.doc
+WHEN MATCHED THEN
+UPDATE SET doc = '{"b" : 1}'
+WHEN NOT MATCHED THEN
+INSERT (id, doc)
+VALUES (src.t_id, doc);
+SELECT * FROM target;
+ id |    doc
+---------------------------------------------------------------------
+  2 | {"a" : 1}
+  2 | {"a" : 2}
+(2 rows)
+
+-- update
+MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src
+ON (src.t_id = target.id) AND src.doc = target.doc
+WHEN MATCHED THEN
+UPDATE SET doc = '{"b" : 1}'
+WHEN NOT MATCHED THEN
+INSERT (id, doc)
+VALUES (src.t_id, doc);
+SELECT * FROM target;
+ id |    doc
+---------------------------------------------------------------------
+  2 | {"b" : 1}
+  2 | {"b" : 1}
+(2 rows)
+
+-- Explain
+EXPLAIN MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src
+ON (src.t_id = target.id)
+WHEN MATCHED THEN DO NOTHING;
+                                         QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus MERGE INTO ...)  (cost=0.00..0.00 rows=0 width=0)
+   MERGE INTO target method: repartition
+   ->  Custom Scan (Citus Adaptive)  (cost=0.00..0.00 rows=100000 width=40)
+         Task Count: 4
+         Tasks Shown: All
+         ->  Task
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Seq Scan on source_4000020 source  (cost=0.00..22.00 rows=1200 width=40)
+         ->  Task
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Seq Scan on source_4000021 source  (cost=0.00..22.00 rows=1200 width=40)
+         ->  Task
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Seq Scan on source_4000022 source  (cost=0.00..22.00 rows=1200 width=40)
+         ->  Task
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Seq Scan on source_4000023 source  (cost=0.00..22.00 rows=1200 width=40)
+(17 rows)
+
+DROP TABLE IF EXISTS source;
+DROP TABLE IF EXISTS target;
+-- *************** CASE 6 : both are singlesharded & colocated *******************************
+CREATE TABLE source (
+    id bigint,
+    doc text
+);
+CREATE TABLE target (
+    id bigint,
+    doc text
+);
+SELECT create_distributed_table('source', null);
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_distributed_table('target', null, colocate_with=>'source');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+INSERT INTO source (id, doc) VALUES (1, '{"a" : 1}'), (1, '{"a" : 2}');
+-- insert
+MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src
+ON (src.t_id = target.id) AND src.doc = target.doc
+WHEN MATCHED THEN
+UPDATE SET doc = '{"b" : 1}'
+WHEN NOT MATCHED THEN
+INSERT (id, doc)
+VALUES (src.t_id, doc);
+SELECT * FROM target;
+ id |    doc
+---------------------------------------------------------------------
+  2 | {"a" : 1}
+  2 | {"a" : 2}
+(2 rows)
+
+-- update
+MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src
+ON (src.t_id = target.id) AND src.doc = target.doc
+WHEN MATCHED THEN
+UPDATE SET doc = '{"b" : 1}'
+WHEN NOT MATCHED THEN
+INSERT (id, doc)
+VALUES (src.t_id, doc);
+SELECT * FROM target;
+ id |    doc
+---------------------------------------------------------------------
+  2 | {"b" : 1}
+  2 | {"b" : 1}
+(2 rows)
+
+-- Explain
+EXPLAIN MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src
+ON (src.t_id = target.id)
+WHEN MATCHED THEN DO NOTHING;
+                                             QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive)  (cost=0.00..0.00 rows=0 width=0)
+   Task Count: 1
+   Tasks Shown: All
+   ->  Task
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Merge on target_4000029 target  (cost=0.00..137.01 rows=0 width=0)
+               ->  Nested Loop  (cost=0.00..137.01 rows=7200 width=6)
+                     ->  Seq Scan on source_4000028 source  (cost=0.00..22.00 rows=1200 width=0)
+                     ->  Materialize  (cost=0.00..25.03 rows=6 width=6)
+                           ->  Seq Scan on target_4000029 target  (cost=0.00..25.00 rows=6 width=6)
+                                 Filter: ('2'::bigint = id)
+(11 rows)
+
+DROP TABLE IF EXISTS source;
+DROP TABLE IF EXISTS target;
diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule
index 7f0c7ca57..220ce1964 100644
--- a/src/test/regress/multi_schedule
+++ b/src/test/regress/multi_schedule
@@ -120,6 +120,7 @@ test: merge pgmerge
 test: merge_repartition2
 test: merge_repartition1 merge_schema_sharding
 test: merge_partition_tables
+test: merge_vcore
 
 # ---------
 # test that no tests leaked intermediate results. This should always be last
diff --git a/src/test/regress/sql/merge_vcore.sql b/src/test/regress/sql/merge_vcore.sql
new file mode 100644
index 000000000..f4ef4cb80
--- /dev/null
+++ b/src/test/regress/sql/merge_vcore.sql
@@ -0,0 +1,313 @@
+SHOW server_version \gset
+SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15
+\gset
+\if :server_version_ge_15
+\else
+\q
+\endif
+
+-- MERGE command performs a join from data_source to target_table_name
+DROP SCHEMA IF EXISTS merge_schema CASCADE;
+--MERGE INTO target
+--USING source
+--WHEN NOT MATCHED
+--WHEN MATCHED AND <condition>
+--WHEN MATCHED
+
+CREATE SCHEMA merge_schema;
+SET search_path TO merge_schema;
+SET citus.shard_count TO 4;
+SET citus.next_shard_id TO 4000000;
+SET citus.explain_all_tasks TO true;
+SET citus.shard_replication_factor TO 1;
+SET citus.max_adaptive_executor_pool_size TO 1;
+SET client_min_messages = warning;
+SELECT 1 FROM master_add_node('localhost', :master_port, groupid => 0);
+RESET client_min_messages;
+
+
+-- ****************************************** CASE 1 : Both are singleSharded***************************************
+CREATE TABLE source (
+    id bigint,
+    doc text
+);
+
+CREATE TABLE target (
+    id bigint,
+    doc text
+);
+
+SELECT create_distributed_table('source', null, colocate_with=>'none');
+SELECT create_distributed_table('target', null, colocate_with=>'none');
+
+INSERT INTO source (id, doc) VALUES (1, '{"a" : 1}'), (1, '{"a" : 2}');
+
+-- insert
+MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src 
+ON (src.t_id = target.id) AND src.doc = target.doc
+WHEN MATCHED THEN
+UPDATE SET doc = '{"b" : 1}'
+WHEN NOT MATCHED THEN
+INSERT (id, doc)
+VALUES (src.t_id, doc);
+
+SELECT * FROM target;
+
+-- update
+MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src 
+ON (src.t_id = target.id) AND src.doc = target.doc
+WHEN MATCHED THEN
+UPDATE SET doc = '{"b" : 1}'
+WHEN NOT MATCHED THEN
+INSERT (id, doc)
+VALUES (src.t_id, doc);
+
+SELECT * FROM target;
+
+-- Explain
+EXPLAIN MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src 
+ON (src.t_id = target.id)
+WHEN MATCHED THEN DO NOTHING;
+
+
+DROP TABLE IF EXISTS source;
+DROP TABLE IF EXISTS target;
+
+
+-- *************** CASE 2 : source is single sharded and target is distributed *******************************
+CREATE TABLE source (
+    id bigint,
+    doc text
+);
+
+CREATE TABLE target (
+    id bigint,
+    doc text
+);
+
+SELECT create_distributed_table('source', null, colocate_with=>'none');
+SELECT create_distributed_table('target', 'id');
+
+INSERT INTO source (id, doc) VALUES (1, '{"a" : 1}'), (1, '{"a" : 2}');
+
+-- insert
+MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src 
+ON (src.t_id = target.id) AND src.doc = target.doc
+WHEN MATCHED THEN
+UPDATE SET doc = '{"b" : 1}'
+WHEN NOT MATCHED THEN
+INSERT (id, doc)
+VALUES (src.t_id, doc);
+
+SELECT * FROM target;
+
+-- update
+MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src 
+ON (src.t_id = target.id) AND src.doc = target.doc
+WHEN MATCHED THEN
+UPDATE SET doc = '{"b" : 1}'
+WHEN NOT MATCHED THEN
+INSERT (id, doc)
+VALUES (src.t_id, doc);
+
+SELECT * FROM target;
+
+-- Explain
+EXPLAIN MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src 
+ON (src.t_id = target.id)
+WHEN MATCHED THEN DO NOTHING;
+
+
+
+DROP TABLE IF EXISTS source;
+DROP TABLE IF EXISTS target;
+
+
+-- *************** CASE 3 : source is distributed  and target is single sharded *******************************
+CREATE TABLE source (
+    id bigint,
+    doc text
+);
+
+CREATE TABLE target (
+    id bigint,
+    doc text
+);
+
+SELECT create_distributed_table('source', 'id');
+SELECT create_distributed_table('target', null);
+
+INSERT INTO source (id, doc) VALUES (1, '{"a" : 1}'), (1, '{"a" : 2}');
+
+-- insert
+MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src 
+ON (src.t_id = target.id) AND src.doc = target.doc
+WHEN MATCHED THEN
+UPDATE SET doc = '{"b" : 1}'
+WHEN NOT MATCHED THEN
+INSERT (id, doc)
+VALUES (src.t_id, doc);
+
+SELECT * FROM target;
+
+-- update
+MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src 
+ON (src.t_id = target.id) AND src.doc = target.doc
+WHEN MATCHED THEN
+UPDATE SET doc = '{"b" : 1}'
+WHEN NOT MATCHED THEN
+INSERT (id, doc)
+VALUES (src.t_id, doc);
+
+SELECT * FROM target;
+
+-- Explain
+EXPLAIN MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src 
+ON (src.t_id = target.id)
+WHEN MATCHED THEN DO NOTHING;
+
+DROP TABLE IF EXISTS source;
+DROP TABLE IF EXISTS target;
+
+
+-- *************** CASE 4 : both are distributed  *******************************
+CREATE TABLE source (
+    id bigint,
+    doc text
+);
+
+CREATE TABLE target (
+    id bigint,
+    doc text
+);
+
+SELECT create_distributed_table('source', 'id');
+SELECT create_distributed_table('target', 'id');
+
+INSERT INTO source (id, doc) VALUES (1, '{"a" : 1}'), (1, '{"a" : 2}');
+
+-- insert
+MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src 
+ON (src.t_id = target.id) AND src.doc = target.doc
+WHEN MATCHED THEN
+UPDATE SET doc = '{"b" : 1}'
+WHEN NOT MATCHED THEN
+INSERT (id, doc)
+VALUES (src.t_id, doc);
+
+SELECT * FROM target;
+
+-- update
+MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src 
+ON (src.t_id = target.id) AND src.doc = target.doc
+WHEN MATCHED THEN
+UPDATE SET doc = '{"b" : 1}'
+WHEN NOT MATCHED THEN
+INSERT (id, doc)
+VALUES (src.t_id, doc);
+
+SELECT * FROM target;
+
+-- Explain
+EXPLAIN MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src 
+ON (src.t_id = target.id)
+WHEN MATCHED THEN DO NOTHING;
+
+DROP TABLE IF EXISTS source;
+DROP TABLE IF EXISTS target;
+
+
+-- *************** CASE 5 : both are distributed & colocated *******************************
+
+CREATE TABLE source (
+    id bigint,
+    doc text
+);
+
+CREATE TABLE target (
+    id bigint,
+    doc text
+);
+
+SELECT create_distributed_table('source', 'id');
+SELECT create_distributed_table('target', 'id', colocate_with=>'source');
+
+INSERT INTO source (id, doc) VALUES (1, '{"a" : 1}'), (1, '{"a" : 2}');
+
+-- insert
+MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src 
+ON (src.t_id = target.id) AND src.doc = target.doc
+WHEN MATCHED THEN
+UPDATE SET doc = '{"b" : 1}'
+WHEN NOT MATCHED THEN
+INSERT (id, doc)
+VALUES (src.t_id, doc);
+
+SELECT * FROM target;
+
+-- update
+MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src 
+ON (src.t_id = target.id) AND src.doc = target.doc
+WHEN MATCHED THEN
+UPDATE SET doc = '{"b" : 1}'
+WHEN NOT MATCHED THEN
+INSERT (id, doc)
+VALUES (src.t_id, doc);
+
+SELECT * FROM target;
+
+-- Explain
+EXPLAIN MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src 
+ON (src.t_id = target.id)
+WHEN MATCHED THEN DO NOTHING;
+
+DROP TABLE IF EXISTS source;
+DROP TABLE IF EXISTS target;
+
+
+-- *************** CASE 6 : both are singlesharded & colocated *******************************
+
+CREATE TABLE source (
+    id bigint,
+    doc text
+);
+
+CREATE TABLE target (
+    id bigint,
+    doc text
+);
+
+SELECT create_distributed_table('source', null);
+SELECT create_distributed_table('target', null, colocate_with=>'source');
+
+INSERT INTO source (id, doc) VALUES (1, '{"a" : 1}'), (1, '{"a" : 2}');
+
+-- insert
+MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src 
+ON (src.t_id = target.id) AND src.doc = target.doc
+WHEN MATCHED THEN
+UPDATE SET doc = '{"b" : 1}'
+WHEN NOT MATCHED THEN
+INSERT (id, doc)
+VALUES (src.t_id, doc);
+
+SELECT * FROM target;
+
+-- update
+MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src 
+ON (src.t_id = target.id) AND src.doc = target.doc
+WHEN MATCHED THEN
+UPDATE SET doc = '{"b" : 1}'
+WHEN NOT MATCHED THEN
+INSERT (id, doc)
+VALUES (src.t_id, doc);
+
+SELECT * FROM target;
+
+-- Explain
+EXPLAIN MERGE INTO ONLY target USING (SELECT 2::bigint AS t_id, doc FROM source) src 
+ON (src.t_id = target.id)
+WHEN MATCHED THEN DO NOTHING;
+
+DROP TABLE IF EXISTS source;
+DROP TABLE IF EXISTS target;