Merge branch 'main' into sqlancer-test-gha

2023-04-04 16:00:28 +03:00 · 2023-04-04 16:00:28 +03:00 · 201d976a3b
parent 54a4accfbb dcee370270
commit 201d976a3b
290 changed files with 24462 additions and 5360 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -6,7 +6,7 @@ orbs:
 parameters:
  image_suffix:
    type: string
-    default: '-vc4b1573'
+    default: '-v087ecd7'
  pg13_version:
    type: string
    default: '13.10'
@ -201,6 +201,9 @@ jobs:
      - run:
          name: 'Check if all GUCs are sorted alphabetically'
          command: ci/check_gucs_are_alphabetically_sorted.sh
+      - run:
+          name: 'Check for missing downgrade scripts'
+          command: ci/check_migration_files.sh

  check-sql-snapshots:
    docker:
@ -266,6 +269,41 @@ jobs:
      - coverage:
          flags: 'test_<< parameters.old_pg_major >>_<< parameters.new_pg_major >>,upgrade'

+  test-pytest:
+    description: Runs pytest based tests
+    parameters:
+      pg_major:
+        description: 'postgres major version'
+        type: integer
+      image:
+        description: 'docker image to use as for the tests'
+        type: string
+        default: citus/failtester
+      image_tag:
+        description: 'docker image tag to use'
+        type: string
+    docker:
+      - image: '<< parameters.image >>:<< parameters.image_tag >><< pipeline.parameters.image_suffix >>'
+    working_directory: /home/circleci/project
+    steps:
+      - checkout
+      - attach_workspace:
+          at: .
+      - install_extension:
+          pg_major: << parameters.pg_major >>
+      - configure
+      - enable_core
+      - run:
+          name: 'Run pytest'
+          command: |
+            gosu circleci \
+              make -C src/test/regress check-pytest
+          no_output_timeout: 2m
+      - stack_trace
+      - coverage:
+          flags: 'test_<< parameters.pg_major >>,pytest'
+
+
  test-arbitrary-configs:
    description: Runs tests on arbitrary configs
    parallelism: 6
@ -452,6 +490,10 @@ jobs:
          pg_major: << parameters.pg_major >>
      - configure
      - enable_core
+      - run:
+          name: 'Install DBI.pm'
+          command: |
+            apt-get update && apt-get install libdbi-perl && apt-get install libdbd-pg-perl
      - run:
          name: 'Run Test'
          command: |
@ -551,7 +593,7 @@ jobs:
            testForDebugging="<< parameters.test >>"

            if [ -z "$testForDebugging" ]; then
-              detected_changes=$(git diff origin/main... --name-only --diff-filter=AM | (grep 'src/test/regress/sql/.*.sql\|src/test/regress/spec/.*.spec' || true))
+              detected_changes=$(git diff origin/main... --name-only --diff-filter=AM | (grep 'src/test/regress/sql/.*\.sql\|src/test/regress/spec/.*\.spec\|src/test/regress/citus_tests/test/test_.*\.py' || true))
              tests=${detected_changes}
            else
              tests=$testForDebugging;
@ -854,38 +896,30 @@ workflows:
          image: citus/failtester
          make: check-failure

-      - tap-test-citus: &tap-test-citus-13
-          name: 'test-13_tap-recovery'
-          suite: recovery
+      - test-pytest:
+          name: 'test-13_pytest'
          pg_major: 13
          image_tag: '<< pipeline.parameters.pg13_version >>'
          requires: [build-13]
-      - tap-test-citus:
-          <<:  *tap-test-citus-13
-          name: 'test-13_tap-columnar-freezing'
-          suite: columnar_freezing

-      - tap-test-citus: &tap-test-citus-14
-          name: 'test-14_tap-recovery'
-          suite: recovery
+      - test-pytest:
+          name: 'test-14_pytest'
          pg_major: 14
          image_tag: '<< pipeline.parameters.pg14_version >>'
          requires: [build-14]
-      - tap-test-citus:
-          <<:  *tap-test-citus-14
-          name: 'test-14_tap-columnar-freezing'
-          suite: columnar_freezing

-      - tap-test-citus: &tap-test-citus-15
-          name: 'test-15_tap-recovery'
-          suite: recovery
+      - test-pytest:
+          name: 'test-15_pytest'
          pg_major: 15
          image_tag: '<< pipeline.parameters.pg15_version >>'
          requires: [build-15]
+
      - tap-test-citus:
-          <<:  *tap-test-citus-15
-          name: 'test-15_tap-columnar-freezing'
-          suite: columnar_freezing
+          name: 'test-15_tap-cdc'
+          suite: cdc
+          pg_major: 15
+          image_tag: '<< pipeline.parameters.pg15_version >>'
+          requires: [build-15]

      - test-arbitrary-configs:
          name: 'test-13_check-arbitrary-configs'
@ -936,8 +970,6 @@ workflows:
            - test-13_check-follower-cluster
            - test-13_check-columnar
            - test-13_check-columnar-isolation
-            - test-13_tap-recovery
-            - test-13_tap-columnar-freezing
            - test-13_check-failure
            - test-13_check-enterprise
            - test-13_check-enterprise-isolation
@ -956,8 +988,6 @@ workflows:
            - test-14_check-follower-cluster
            - test-14_check-columnar
            - test-14_check-columnar-isolation
-            - test-14_tap-recovery
-            - test-14_tap-columnar-freezing
            - test-14_check-failure
            - test-14_check-enterprise
            - test-14_check-enterprise-isolation
@ -976,8 +1006,6 @@ workflows:
            - test-15_check-follower-cluster
            - test-15_check-columnar
            - test-15_check-columnar-isolation
-            - test-15_tap-recovery
-            - test-15_tap-columnar-freezing
            - test-15_check-failure
            - test-15_check-enterprise
            - test-15_check-enterprise-isolation
--- a/.editorconfig
+++ b/.editorconfig
@ -17,7 +17,7 @@ trim_trailing_whitespace = true
 insert_final_newline = unset
 trim_trailing_whitespace = unset

-[*.{sql,sh,py}]
+[*.{sql,sh,py,toml}]
 indent_style = space
 indent_size = 4
 tab_width = 4
--- a/.flake8
+++ b/.flake8
@ -1,7 +1,6 @@
 [flake8]
 # E203 is ignored for black
-# E402 is ignored because of te way we do relative imports
-extend-ignore = E203, E402
+extend-ignore = E203
 # black will truncate to 88 characters usually, but long string literals it
 # might keep. That's fine in most cases unless it gets really excessive.
 max-line-length = 150
--- a/.github/workflows/packaging-test-pipelines.yml
+++ b/.github/workflows/packaging-test-pipelines.yml
@ -157,7 +157,6 @@ jobs:

          apt-get update -y
          ## Install required packages to execute packaging tools for deb based distros
-          apt install python3-dev python3-pip -y
-          sudo apt-get purge -y python3-yaml
-          python3 -m pip install --upgrade pip  setuptools==57.5.0
+          apt-get install python3-dev python3-pip -y
+          apt-get purge -y python3-yaml
          ./.github/packaging/validate_build_output.sh "deb"
--- a/ci/README.md
+++ b/ci/README.md
@ -283,6 +283,14 @@ actually run in CI. This is most commonly forgotten for newly added CI tests
 that the developer only ran locally. It also checks that all CI scripts have a
 section in this `README.md` file and that they include `ci/ci_helpers.sh`.

+## `check_migration_files.sh`
+
+A branch that touches a set of upgrade scripts is also expected to touch
+corresponding downgrade scripts as well. If this script fails, read the output
+and make sure you update the downgrade scripts in the printed list. If you
+really don't need a downgrade to run any SQL. You can write a comment in the
+file explaining why a downgrade step is not necessary.
+
 ## `disallow_c_comments_in_migrations.sh`

 We do not use C-style comments in migration files as the stripped
--- a/ci/check_migration_files.sh
+++ b/ci/check_migration_files.sh
@ -0,0 +1,33 @@
+#! /bin/bash
+
+set -euo pipefail
+# shellcheck disable=SC1091
+source ci/ci_helpers.sh
+
+# This file checks for the existence of downgrade scripts for every upgrade script that is changed in the branch.
+
+# create list of migration files for upgrades
+upgrade_files=$(git diff --name-only origin/main | { grep "src/backend/distributed/sql/citus--.*sql" || exit 0 ; })
+downgrade_files=$(git diff --name-only origin/main | { grep "src/backend/distributed/sql/downgrades/citus--.*sql" || exit 0 ; })
+ret_value=0
+
+for file in $upgrade_files
+do
+    # There should always be 2 matches, and no need to avoid splitting here
+    # shellcheck disable=SC2207
+    versions=($(grep --only-matching --extended-regexp "[0-9]+\.[0-9]+[-.][0-9]+" <<< "$file"))
+
+    from_version=${versions[0]};
+    to_version=${versions[1]};
+
+    downgrade_migration_file="src/backend/distributed/sql/downgrades/citus--$to_version--$from_version.sql"
+
+    # check for the existence of migration scripts
+    if [[ $(grep --line-regexp --count "$downgrade_migration_file" <<< "$downgrade_files") == 0 ]]
+    then
+        echo "$file is updated, but $downgrade_migration_file is not updated in branch"
+        ret_value=1
+    fi
+done
+
+exit $ret_value;
--- a/pyproject.toml
+++ b/pyproject.toml
@ -3,3 +3,35 @@ profile = 'black'

 [tool.black]
 include = '(src/test/regress/bin/diff-filter|\.pyi?|\.ipynb)$'
+
+[tool.pytest.ini_options]
+addopts = [
+    "--import-mode=importlib",
+    "--showlocals",
+    "--tb=short",
+]
+pythonpath = 'src/test/regress/citus_tests'
+asyncio_mode = 'auto'
+
+# Make test discovery quicker from the root dir of the repo
+testpaths = ['src/test/regress/citus_tests/test']
+
+# Make test discovery quicker from other directories than root directory
+norecursedirs = [
+    '*.egg',
+    '.*',
+    'build',
+    'venv',
+    'ci',
+    'vendor',
+    'backend',
+    'bin',
+    'include',
+    'tmp_*',
+    'results',
+    'expected',
+    'sql',
+    'spec',
+    'data',
+    '__pycache__',
+]
--- a/src/backend/columnar/.gitignore
+++ b/src/backend/columnar/.gitignore
@ -0,0 +1,3 @@
+# The directory used to store columnar sql files after pre-processing them
+# with 'cpp' in build-time, see src/backend/columnar/Makefile.
+/build/
--- a/src/backend/columnar/Makefile
+++ b/src/backend/columnar/Makefile
@ -10,14 +10,51 @@ OBJS += \
 MODULE_big = citus_columnar
 EXTENSION = citus_columnar

-columnar_sql_files = $(patsubst $(citus_abs_srcdir)/%,%,$(wildcard $(citus_abs_srcdir)/sql/*.sql))
-columnar_downgrade_sql_files = $(patsubst $(citus_abs_srcdir)/%,%,$(wildcard $(citus_abs_srcdir)/sql/downgrades/*.sql))
-DATA = $(columnar_sql_files) \
-		$(columnar_downgrade_sql_files)
+template_sql_files = $(patsubst $(citus_abs_srcdir)/%,%,$(wildcard $(citus_abs_srcdir)/sql/*.sql))
+template_downgrade_sql_files = $(patsubst $(citus_abs_srcdir)/sql/downgrades/%,%,$(wildcard $(citus_abs_srcdir)/sql/downgrades/*.sql))
+generated_sql_files = $(patsubst %,$(citus_abs_srcdir)/build/%,$(template_sql_files))
+generated_downgrade_sql_files += $(patsubst %,$(citus_abs_srcdir)/build/sql/%,$(template_downgrade_sql_files))
+
+DATA_built = $(generated_sql_files)

 PG_CPPFLAGS += -I$(libpq_srcdir) -I$(safestringlib_srcdir)/include

 include $(citus_top_builddir)/Makefile.global

-.PHONY: install-all
+SQL_DEPDIR=.deps/sql
+SQL_BUILDDIR=build/sql
+
+$(generated_sql_files): $(citus_abs_srcdir)/build/%: %
+	@mkdir -p $(citus_abs_srcdir)/$(SQL_DEPDIR) $(citus_abs_srcdir)/$(SQL_BUILDDIR)
+	@# -MF is used to store dependency files(.Po) in another directory for separation
+	@# -MT is used to change the target of the rule emitted by dependency generation.
+	@# -P is used to inhibit generation of linemarkers in the output from the preprocessor.
+	@# -undef is used to not predefine any system-specific or GCC-specific macros.
+	@# `man cpp` for further information
+	cd $(citus_abs_srcdir) && cpp -undef -w -P -MMD -MP -MF$(SQL_DEPDIR)/$(*F).Po -MT$@ $< > $@
+
+$(generated_downgrade_sql_files): $(citus_abs_srcdir)/build/sql/%: sql/downgrades/%
+	@mkdir -p $(citus_abs_srcdir)/$(SQL_DEPDIR) $(citus_abs_srcdir)/$(SQL_BUILDDIR)
+	@# -MF is used to store dependency files(.Po) in another directory for separation
+	@# -MT is used to change the target of the rule emitted by dependency generation.
+	@# -P is used to inhibit generation of linemarkers in the output from the preprocessor.
+	@# -undef is used to not predefine any system-specific or GCC-specific macros.
+	@# `man cpp` for further information
+	cd $(citus_abs_srcdir) && cpp -undef -w -P -MMD -MP -MF$(SQL_DEPDIR)/$(*F).Po -MT$@ $< > $@
+
+.PHONY: install install-downgrades install-all
+
+cleanup-before-install:
+	rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus_columnar.control
+	rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/columnar--*
+	rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus_columnar--*
+
+install: cleanup-before-install
+
+# install and install-downgrades should be run sequentially
 install-all: install
+	$(MAKE) install-downgrades
+
+install-downgrades: $(generated_downgrade_sql_files)
+	$(INSTALL_DATA) $(generated_downgrade_sql_files) '$(DESTDIR)$(datadir)/$(datamoduledir)/'
+
--- a/src/backend/columnar/sql/citus_columnar--11.1-1--11.2-1.sql
+++ b/src/backend/columnar/sql/citus_columnar--11.1-1--11.2-1.sql
@ -1 +1,19 @@
 -- citus_columnar--11.1-1--11.2-1
+
+#include "udfs/columnar_ensure_am_depends_catalog/11.2-1.sql"
+
+DELETE FROM pg_depend
+WHERE classid = 'pg_am'::regclass::oid
+    AND objid IN (select oid from pg_am where amname = 'columnar')
+    AND objsubid = 0
+    AND refclassid = 'pg_class'::regclass::oid
+    AND refobjid IN (
+        'columnar_internal.stripe_first_row_number_idx'::regclass::oid,
+        'columnar_internal.chunk_group_pkey'::regclass::oid,
+        'columnar_internal.chunk_pkey'::regclass::oid,
+        'columnar_internal.options_pkey'::regclass::oid,
+        'columnar_internal.stripe_first_row_number_idx'::regclass::oid,
+        'columnar_internal.stripe_pkey'::regclass::oid
+    )
+    AND refobjsubid = 0
+    AND deptype = 'n';
--- a/src/backend/columnar/sql/downgrades/citus_columnar--11.2-1--11.1-1.sql
+++ b/src/backend/columnar/sql/downgrades/citus_columnar--11.2-1--11.1-1.sql
@ -1 +1,4 @@
 -- citus_columnar--11.2-1--11.1-1
+
+-- Note that we intentionally do not re-insert the pg_depend records that we
+-- deleted via citus_columnar--11.1-1--11.2-1.sql.
--- a/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/11.2-1.sql
+++ b/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/11.2-1.sql
@ -0,0 +1,43 @@
+CREATE OR REPLACE FUNCTION columnar_internal.columnar_ensure_am_depends_catalog()
+  RETURNS void
+  LANGUAGE plpgsql
+  SET search_path = pg_catalog
+AS $func$
+BEGIN
+  INSERT INTO pg_depend
+  WITH columnar_schema_members(relid) AS (
+    SELECT pg_class.oid AS relid FROM pg_class
+      WHERE relnamespace =
+            COALESCE(
+	       (SELECT pg_namespace.oid FROM pg_namespace WHERE nspname = 'columnar_internal'),
+	       (SELECT pg_namespace.oid FROM pg_namespace WHERE nspname = 'columnar')
+	    )
+        AND relname IN ('chunk',
+                        'chunk_group',
+                        'options',
+                        'storageid_seq',
+                        'stripe')
+  )
+  SELECT -- Define a dependency edge from "columnar table access method" ..
+         'pg_am'::regclass::oid as classid,
+         (select oid from pg_am where amname = 'columnar') as objid,
+         0 as objsubid,
+         -- ... to some objects registered as regclass and that lives in
+         -- "columnar" schema. That contains catalog tables and the sequences
+         -- created in "columnar" schema.
+         --
+         -- Given the possibility of user might have created their own objects
+         -- in columnar schema, we explicitly specify list of objects that we
+         -- are interested in.
+         'pg_class'::regclass::oid as refclassid,
+         columnar_schema_members.relid as refobjid,
+         0 as refobjsubid,
+         'n' as deptype
+  FROM columnar_schema_members
+  -- Avoid inserting duplicate entries into pg_depend.
+  EXCEPT TABLE pg_depend;
+END;
+$func$;
+COMMENT ON FUNCTION columnar_internal.columnar_ensure_am_depends_catalog()
+  IS 'internal function responsible for creating dependencies from columnar '
+     'table access method to the rel objects in columnar schema';
--- a/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/latest.sql
+++ b/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/latest.sql
@ -1,4 +1,4 @@
-CREATE OR REPLACE FUNCTION citus_internal.columnar_ensure_am_depends_catalog()
+CREATE OR REPLACE FUNCTION columnar_internal.columnar_ensure_am_depends_catalog()
  RETURNS void
  LANGUAGE plpgsql
  SET search_path = pg_catalog
@ -14,22 +14,17 @@ BEGIN
 	    )
        AND relname IN ('chunk',
                        'chunk_group',
-                        'chunk_group_pkey',
-                        'chunk_pkey',
                        'options',
-                        'options_pkey',
                        'storageid_seq',
-                        'stripe',
-                        'stripe_first_row_number_idx',
-                        'stripe_pkey')
+                        'stripe')
  )
  SELECT -- Define a dependency edge from "columnar table access method" ..
         'pg_am'::regclass::oid as classid,
         (select oid from pg_am where amname = 'columnar') as objid,
         0 as objsubid,
-         -- ... to each object that is registered to pg_class and that lives
-         -- in "columnar" schema. That contains catalog tables, indexes
-         -- created on them and the sequences created in "columnar" schema.
+         -- ... to some objects registered as regclass and that lives in
+         -- "columnar" schema. That contains catalog tables and the sequences
+         -- created in "columnar" schema.
         --
         -- Given the possibility of user might have created their own objects
         -- in columnar schema, we explicitly specify list of objects that we
@ -43,6 +38,6 @@ BEGIN
  EXCEPT TABLE pg_depend;
 END;
 $func$;
-COMMENT ON FUNCTION citus_internal.columnar_ensure_am_depends_catalog()
+COMMENT ON FUNCTION columnar_internal.columnar_ensure_am_depends_catalog()
  IS 'internal function responsible for creating dependencies from columnar '
     'table access method to the rel objects in columnar schema';
--- a/src/backend/distributed/Makefile
+++ b/src/backend/distributed/Makefile
@ -32,7 +32,13 @@ OBJS += \
 	$(patsubst $(citus_abs_srcdir)/%.c,%.o,$(foreach dir,$(SUBDIRS), $(sort $(wildcard $(citus_abs_srcdir)/$(dir)/*.c))))

 # be explicit about the default target
-all:
+.PHONY: cdc
+
+all: cdc
+
+cdc:
+	echo "running cdc make"
+	$(MAKE) DECODER=pgoutput -C cdc all

 NO_PGXS = 1

@ -81,11 +87,19 @@ endif

 .PHONY: clean-full install install-downgrades install-all

+clean: clean-cdc
+
+clean-cdc:
+	$(MAKE) DECODER=pgoutput -C cdc clean
+
 cleanup-before-install:
 	rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus.control
 	rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus--*

-install: cleanup-before-install
+install: cleanup-before-install install-cdc
+
+install-cdc:
+	$(MAKE) DECODER=pgoutput -C cdc install

 # install and install-downgrades should be run sequentially
 install-all: install
@ -96,4 +110,5 @@ install-downgrades: $(generated_downgrade_sql_files)

 clean-full:
 	$(MAKE) clean
+	$(MAKE) -C cdc clean-full
 	rm -rf $(safestringlib_builddir)
--- a/src/backend/distributed/cdc/Makefile
+++ b/src/backend/distributed/cdc/Makefile
@ -0,0 +1,26 @@
+ifndef DECODER
+	DECODER = pgoutput
+endif
+
+MODULE_big = citus_$(DECODER)
+citus_subdir = src/backend/distributed/cdc
+citus_top_builddir = ../../../..
+citus_decoders_dir = $(DESTDIR)$(pkglibdir)/citus_decoders
+
+OBJS += cdc_decoder.o cdc_decoder_utils.o
+
+include $(citus_top_builddir)/Makefile.global
+
+override CFLAGS += -DDECODER=\"$(DECODER)\" -I$(citus_abs_top_srcdir)/include
+override CPPFLAGS += -DDECODER=\"$(DECODER)\" -I$(citus_abs_top_srcdir)/include
+
+install: install-cdc
+
+clean: clean-cdc
+
+install-cdc:
+	mkdir -p '$(citus_decoders_dir)'
+	$(INSTALL_SHLIB) citus_$(DECODER).so '$(citus_decoders_dir)/$(DECODER).so'
+
+clean-cdc:
+	rm -f '$(DESTDIR)$(datadir)/$(datamoduledir)/citus_decoders/$(DECODER).so'
--- a/src/backend/distributed/cdc/cdc_decoder.c
+++ b/src/backend/distributed/cdc/cdc_decoder.c
@ -0,0 +1,500 @@
+/*-------------------------------------------------------------------------
+ *
+ * cdc_decoder.c
+ *		CDC Decoder plugin for Citus
+ *
+ * Copyright (c) Citus Data, Inc.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "cdc_decoder_utils.h"
+#include "postgres.h"
+#include "fmgr.h"
+
+#include "access/genam.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_publication.h"
+#include "commands/extension.h"
+#include "common/hashfn.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/typcache.h"
+
+PG_MODULE_MAGIC;
+
+extern void _PG_output_plugin_init(OutputPluginCallbacks *cb);
+static LogicalDecodeChangeCB ouputPluginChangeCB;
+
+static void InitShardToDistributedTableMap(void);
+
+static void PublishDistributedTableChanges(LogicalDecodingContext *ctx,
+										   ReorderBufferTXN *txn,
+										   Relation relation,
+										   ReorderBufferChange *change);
+
+
+static bool replication_origin_filter_cb(LogicalDecodingContext *ctx, RepOriginId
+										 origin_id);
+
+static void TranslateChangesIfSchemaChanged(Relation relation, Relation targetRelation,
+											ReorderBufferChange *change);
+
+static void TranslateAndPublishRelationForCDC(LogicalDecodingContext *ctx,
+											  ReorderBufferTXN *txn,
+											  Relation relation,
+											  ReorderBufferChange *change, Oid shardId,
+											  Oid targetRelationid);
+
+typedef struct
+{
+	uint64 shardId;
+	Oid distributedTableId;
+	bool isReferenceTable;
+	bool isNull;
+} ShardIdHashEntry;
+
+static HTAB *shardToDistributedTableMap = NULL;
+
+static void cdc_change_cb(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
+						  Relation relation, ReorderBufferChange *change);
+
+
+/* build time macro for base decoder plugin name for CDC and Shard Split. */
+#ifndef DECODER
+#define DECODER "pgoutput"
+#endif
+
+#define DECODER_INIT_FUNCTION_NAME "_PG_output_plugin_init"
+
+#define CITUS_SHARD_TRANSFER_SLOT_PREFIX "citus_shard_"
+#define CITUS_SHARD_TRANSFER_SLOT_PREFIX_SIZE (sizeof(CITUS_SHARD_TRANSFER_SLOT_PREFIX) - \
+											   1)
+
+/*
+ * Postgres uses 'pgoutput' as default plugin for logical replication.
+ * We want to reuse Postgres pgoutput's functionality as much as possible.
+ * Hence we load all the functions of this plugin and override as required.
+ */
+void
+_PG_output_plugin_init(OutputPluginCallbacks *cb)
+{
+	elog(LOG, "Initializing CDC decoder");
+
+	/*
+	 * We build custom .so files whose name matches common decoders (pgoutput, wal2json)
+	 * and place them in $libdir/citus_decoders/ such that administrators can configure
+	 * dynamic_library_path to include this directory, and users can then use the
+	 * regular decoder names when creating replications slots.
+	 *
+	 * To load the original decoder, we need to remove citus_decoders/ from the
+	 * dynamic_library_path.
+	 */
+	char *originalDLP = Dynamic_library_path;
+	Dynamic_library_path = RemoveCitusDecodersFromPaths(Dynamic_library_path);
+
+	LogicalOutputPluginInit plugin_init =
+		(LogicalOutputPluginInit) (void *)
+		load_external_function(DECODER,
+							   DECODER_INIT_FUNCTION_NAME,
+							   false, NULL);
+
+	if (plugin_init == NULL)
+	{
+		elog(ERROR, "output plugins have to declare the _PG_output_plugin_init symbol");
+	}
+
+	/* in case this session is used for different replication slots */
+	Dynamic_library_path = originalDLP;
+
+	/* ask the output plugin to fill the callback struct */
+	plugin_init(cb);
+
+	/* Initialize the Shard Id to Distributed Table id mapping hash table.*/
+	InitShardToDistributedTableMap();
+
+	/* actual pgoutput callback function will be called  */
+	ouputPluginChangeCB = cb->change_cb;
+	cb->change_cb = cdc_change_cb;
+	cb->filter_by_origin_cb = replication_origin_filter_cb;
+}
+
+
+/*
+ *  Check if the replication slot is for Shard transfer by checking for prefix.
+ */
+inline static
+bool
+IsShardTransferSlot(char *replicationSlotName)
+{
+	return strncmp(replicationSlotName, CITUS_SHARD_TRANSFER_SLOT_PREFIX,
+				   CITUS_SHARD_TRANSFER_SLOT_PREFIX_SIZE) == 0;
+}
+
+
+/*
+ * shard_split_and_cdc_change_cb function emits the incoming tuple change
+ * to the appropriate destination shard.
+ */
+static void
+cdc_change_cb(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
+			  Relation relation, ReorderBufferChange *change)
+{
+	/*
+	 * If Citus has not been loaded yet, pass the changes
+	 * through to the undrelying decoder plugin.
+	 */
+	if (!CdcCitusHasBeenLoaded())
+	{
+		ouputPluginChangeCB(ctx, txn, relation, change);
+		return;
+	}
+
+	/* check if the relation is publishable.*/
+	if (!is_publishable_relation(relation))
+	{
+		return;
+	}
+
+	char *replicationSlotName = ctx->slot->data.name.data;
+	if (replicationSlotName == NULL)
+	{
+		elog(ERROR, "Replication slot name is NULL!");
+		return;
+	}
+
+	/* If the slot is for internal shard operations, call the base plugin's call back. */
+	if (IsShardTransferSlot(replicationSlotName))
+	{
+		ouputPluginChangeCB(ctx, txn, relation, change);
+		return;
+	}
+
+	/* Transalate the changes from shard to distributes table and publish. */
+	PublishDistributedTableChanges(ctx, txn, relation, change);
+}
+
+
+/*
+ * InitShardToDistributedTableMap initializes the hash table that is used to
+ * translate the changes in the shard table to the changes in the distributed table.
+ */
+static void
+InitShardToDistributedTableMap()
+{
+	HASHCTL info;
+	memset(&info, 0, sizeof(info));
+	info.keysize = sizeof(uint64);
+	info.entrysize = sizeof(ShardIdHashEntry);
+	info.hash = tag_hash;
+	info.hcxt = CurrentMemoryContext;
+
+	int hashFlags = (HASH_ELEM | HASH_CONTEXT | HASH_FUNCTION);
+	shardToDistributedTableMap = hash_create("CDC Decoder translation hash table", 1024,
+											 &info, hashFlags);
+}
+
+
+/*
+ * AddShardIdToHashTable adds the shardId to the hash table.
+ */
+static Oid
+AddShardIdToHashTable(uint64 shardId, ShardIdHashEntry *entry)
+{
+	entry->shardId = shardId;
+	entry->distributedTableId = CdcLookupShardRelationFromCatalog(shardId, true);
+	entry->isReferenceTable = CdcPartitionMethodViaCatalog(entry->distributedTableId) ==
+							  'n';
+	return entry->distributedTableId;
+}
+
+
+static Oid
+LookupDistributedTableIdForShardId(uint64 shardId, bool *isReferenceTable)
+{
+	bool found;
+	Oid distributedTableId = InvalidOid;
+	ShardIdHashEntry *entry = (ShardIdHashEntry *) hash_search(shardToDistributedTableMap,
+															   &shardId,
+															   HASH_ENTER,
+															   &found);
+	if (found)
+	{
+		distributedTableId = entry->distributedTableId;
+	}
+	else
+	{
+		distributedTableId = AddShardIdToHashTable(shardId, entry);
+	}
+	*isReferenceTable = entry->isReferenceTable;
+	return distributedTableId;
+}
+
+
+/*
+ * replication_origin_filter_cb call back function filters out publication of changes
+ * originated from any other node other than the current node. This is
+ * identified by the "origin_id" of the changes. The origin_id is set to
+ * a non-zero value in the origin node as part of WAL replication for internal
+ * operations like shard split/moves/create_distributed_table etc.
+ */
+static bool
+replication_origin_filter_cb(LogicalDecodingContext *ctx, RepOriginId origin_id)
+{
+	return  (origin_id != InvalidRepOriginId);
+}
+
+
+/*
+ * This function is responsible for translating the changes in the shard table to
+ * the changes in the shell table and publishing the changes as a change to the
+ * distributed table so that CDD clients are not aware of the shard tables. It also
+ * handles schema changes to the distributed table.
+ */
+static void
+TranslateAndPublishRelationForCDC(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
+								  Relation relation, ReorderBufferChange *change, Oid
+								  shardId, Oid targetRelationid)
+{
+	/* Get the distributed table's relation for this shard.*/
+	Relation targetRelation = RelationIdGetRelation(targetRelationid);
+
+	/*
+	 * Check if there has been a schema change (such as a dropped column), by comparing
+	 * the number of attributes in the shard table and the shell table.
+	 */
+	TranslateChangesIfSchemaChanged(relation, targetRelation, change);
+
+	/*
+	 * Publish the change to the shard table as the change in the distributed table,
+	 * so that the CDC client can see the change in the distributed table,
+	 * instead of the shard table, by calling the pgoutput's callback function.
+	 */
+	ouputPluginChangeCB(ctx, txn, targetRelation, change);
+	RelationClose(targetRelation);
+}
+
+
+/*
+ * PublishChangesIfCdcSlot checks if the current slot is a CDC slot. If so, it publishes
+ * the changes as the change for the distributed table instead of shard.
+ * If not, it returns false. It also skips the Citus metadata tables.
+ */
+static void
+PublishDistributedTableChanges(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
+							   Relation relation, ReorderBufferChange *change)
+{
+	char *shardRelationName = RelationGetRelationName(relation);
+
+	/* Skip publishing CDC changes for any system relations in pg_catalog*/
+	if (relation->rd_rel->relnamespace == PG_CATALOG_NAMESPACE)
+	{
+		return;
+	}
+
+	/* Check if the relation is a distributed table by checking for shard name.	*/
+	uint64 shardId = CdcExtractShardIdFromTableName(shardRelationName, true);
+
+	/* If this relation is not distributed, call the pgoutput's callback and return. */
+	if (shardId == INVALID_SHARD_ID)
+	{
+		ouputPluginChangeCB(ctx, txn, relation, change);
+		return;
+	}
+
+	bool isReferenceTable = false;
+	Oid distRelationId = LookupDistributedTableIdForShardId(shardId, &isReferenceTable);
+	if (distRelationId == InvalidOid)
+	{
+		ouputPluginChangeCB(ctx, txn, relation, change);
+		return;
+	}
+
+	/* Publish changes for reference table only from the coordinator node. */
+	if (isReferenceTable && !CdcIsCoordinator())
+	{
+		return;
+	}
+
+	/* translate and publish from shard relation to distributed table relation for CDC. */
+	TranslateAndPublishRelationForCDC(ctx, txn, relation, change, shardId,
+									  distRelationId);
+}
+
+
+/*
+ * GetTupleForTargetSchemaForCdc returns a heap tuple with the data from sourceRelationTuple
+ * to match the schema in targetRelDesc. Either or both source and target relations may have
+ * dropped columns. This function handles it by adding NULL values for dropped columns in
+ * target relation and skipping dropped columns in source relation. It returns a heap tuple
+ * adjusted to the current schema of the target relation.
+ */
+static HeapTuple
+GetTupleForTargetSchemaForCdc(HeapTuple sourceRelationTuple,
+							  TupleDesc sourceRelDesc,
+							  TupleDesc targetRelDesc)
+{
+	/* Allocate memory for sourceValues and sourceNulls arrays. */
+	Datum *sourceValues = (Datum *) palloc0(sourceRelDesc->natts * sizeof(Datum));
+	bool *sourceNulls = (bool *) palloc0(sourceRelDesc->natts * sizeof(bool));
+
+	/* Deform the source tuple to sourceValues and sourceNulls arrays. */
+	heap_deform_tuple(sourceRelationTuple, sourceRelDesc, sourceValues,
+					  sourceNulls);
+
+	/* This is the next field to Read in the source relation */
+	uint32 sourceIndex = 0;
+	uint32 targetIndex = 0;
+
+	/* Allocate memory for sourceValues and sourceNulls arrays. */
+	Datum *targetValues = (Datum *) palloc0(targetRelDesc->natts * sizeof(Datum));
+	bool *targetNulls = (bool *) palloc0(targetRelDesc->natts * sizeof(bool));
+
+	/* Loop through all source and target attributes one by one and handle any dropped attributes.*/
+	while (targetIndex < targetRelDesc->natts)
+	{
+		/* If this target attribute has been dropped, add a NULL attribute in targetValues and continue.*/
+		if (TupleDescAttr(targetRelDesc, targetIndex)->attisdropped)
+		{
+			Datum nullDatum = (Datum) 0;
+			targetValues[targetIndex] = nullDatum;
+			targetNulls[targetIndex] = true;
+			targetIndex++;
+		}
+		/* If this source attribute has been dropped, just skip this source attribute.*/
+		else if (TupleDescAttr(sourceRelDesc, sourceIndex)->attisdropped)
+		{
+			sourceIndex++;
+			continue;
+		}
+		/* If both source and target attributes are not dropped, add the attribute field to targetValues. */
+		else if (sourceIndex < sourceRelDesc->natts)
+		{
+			targetValues[targetIndex] = sourceValues[sourceIndex];
+			targetNulls[targetIndex] = sourceNulls[sourceIndex];
+			sourceIndex++;
+			targetIndex++;
+		}
+		else
+		{
+			/* If there are no more source fields, add a NULL field in targetValues. */
+			Datum nullDatum = (Datum) 0;
+			targetValues[targetIndex] = nullDatum;
+			targetNulls[targetIndex] = true;
+			targetIndex++;
+		}
+	}
+
+	/* Form a new tuple from the target values created by the above loop. */
+	HeapTuple targetRelationTuple = heap_form_tuple(targetRelDesc, targetValues,
+													targetNulls);
+	return targetRelationTuple;
+}
+
+
+/* HasSchemaChanged function returns if there any schema changes between source and target relations.*/
+static bool
+HasSchemaChanged(TupleDesc sourceRelationDesc, TupleDesc targetRelationDesc)
+{
+	bool hasSchemaChanged = (sourceRelationDesc->natts != targetRelationDesc->natts);
+	if (hasSchemaChanged)
+	{
+		return true;
+	}
+
+	for (uint32 i = 0; i < sourceRelationDesc->natts; i++)
+	{
+		if (TupleDescAttr(sourceRelationDesc, i)->attisdropped ||
+			TupleDescAttr(targetRelationDesc, i)->attisdropped)
+		{
+			hasSchemaChanged = true;
+			break;
+		}
+	}
+
+	return hasSchemaChanged;
+}
+
+
+/*
+ * TranslateChangesIfSchemaChanged translates the tuples ReorderBufferChange
+ * if there is a schema change between source and target relations.
+ */
+static void
+TranslateChangesIfSchemaChanged(Relation sourceRelation, Relation targetRelation,
+								ReorderBufferChange *change)
+{
+	TupleDesc sourceRelationDesc = RelationGetDescr(sourceRelation);
+	TupleDesc targetRelationDesc = RelationGetDescr(targetRelation);
+
+	/* if there are no changes between source and target relations, return. */
+	if (!HasSchemaChanged(sourceRelationDesc, targetRelationDesc))
+	{
+		return;
+	}
+
+	/* Check the ReorderBufferChange's action type and handle them accordingly.*/
+	switch (change->action)
+	{
+		case REORDER_BUFFER_CHANGE_INSERT:
+		{
+			/* For insert action, only new tuple should always be translated*/
+			HeapTuple sourceRelationNewTuple = &(change->data.tp.newtuple->tuple);
+			HeapTuple targetRelationNewTuple = GetTupleForTargetSchemaForCdc(
+				sourceRelationNewTuple, sourceRelationDesc, targetRelationDesc);
+			change->data.tp.newtuple->tuple = *targetRelationNewTuple;
+			break;
+		}
+
+		/*
+		 * For update changes both old and new tuples need to be translated for target relation
+		 * if the REPLICA IDENTITY is set to FULL. Otherwise, only the new tuple needs to be
+		 * translated for target relation.
+		 */
+		case REORDER_BUFFER_CHANGE_UPDATE:
+		{
+			/* For update action, new tuple should always be translated*/
+			/* Get the new tuple from the ReorderBufferChange, and translate it to target relation. */
+			HeapTuple sourceRelationNewTuple = &(change->data.tp.newtuple->tuple);
+			HeapTuple targetRelationNewTuple = GetTupleForTargetSchemaForCdc(
+				sourceRelationNewTuple, sourceRelationDesc, targetRelationDesc);
+			change->data.tp.newtuple->tuple = *targetRelationNewTuple;
+
+			/*
+			 * Format oldtuple according to the target relation. If the column values of replica
+			 * identiy change, then the old tuple is non-null and needs to be formatted according
+			 * to the target relation schema.
+			 */
+			if (change->data.tp.oldtuple != NULL)
+			{
+				HeapTuple sourceRelationOldTuple = &(change->data.tp.oldtuple->tuple);
+				HeapTuple targetRelationOldTuple = GetTupleForTargetSchemaForCdc(
+					sourceRelationOldTuple,
+					sourceRelationDesc,
+					targetRelationDesc);
+
+				change->data.tp.oldtuple->tuple = *targetRelationOldTuple;
+			}
+			break;
+		}
+
+		case REORDER_BUFFER_CHANGE_DELETE:
+		{
+			/* For delete action, only old tuple should be translated*/
+			HeapTuple sourceRelationOldTuple = &(change->data.tp.oldtuple->tuple);
+			HeapTuple targetRelationOldTuple = GetTupleForTargetSchemaForCdc(
+				sourceRelationOldTuple,
+				sourceRelationDesc,
+				targetRelationDesc);
+
+			change->data.tp.oldtuple->tuple = *targetRelationOldTuple;
+			break;
+		}
+
+		default:
+		{
+			/* Do nothing for other action types. */
+			break;
+		}
+	}
+}
--- a/src/backend/distributed/cdc/cdc_decoder_utils.c
+++ b/src/backend/distributed/cdc/cdc_decoder_utils.c
@ -0,0 +1,432 @@
+/*-------------------------------------------------------------------------
+ *
+ * cdc_decoder_utils.c
+ *		CDC Decoder plugin utility functions for Citus
+ *
+ * Copyright (c) Citus Data, Inc.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+#include "commands/extension.h"
+#include "fmgr.h"
+#include "miscadmin.h"
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "common/hashfn.h"
+#include "common/string.h"
+#include "utils/fmgroids.h"
+#include "utils/typcache.h"
+#include "utils/lsyscache.h"
+#include "catalog/pg_namespace.h"
+#include "cdc_decoder_utils.h"
+#include "distributed/pg_dist_partition.h"
+#include "distributed/pg_dist_shard.h"
+#include "distributed/relay_utility.h"
+
+static int32 LocalGroupId = -1;
+static Oid PgDistLocalGroupRelationId = InvalidOid;
+static Oid PgDistShardRelationId = InvalidOid;
+static Oid PgDistShardShardidIndexId = InvalidOid;
+static Oid PgDistPartitionRelationId = InvalidOid;
+static Oid PgDistPartitionLogicalrelidIndexId = InvalidOid;
+static bool IsCitusExtensionLoaded = false;
+
+#define COORDINATOR_GROUP_ID 0
+#define InvalidRepOriginId 0
+#define Anum_pg_dist_local_groupid 1
+#define GROUP_ID_UPGRADING -2
+
+
+static Oid DistLocalGroupIdRelationId(void);
+static int32 CdcGetLocalGroupId(void);
+static HeapTuple CdcPgDistPartitionTupleViaCatalog(Oid relationId);
+
+/*
+ * DistLocalGroupIdRelationId returns the relation id of the pg_dist_local_group
+ */
+static Oid
+DistLocalGroupIdRelationId(void)
+{
+	if (PgDistLocalGroupRelationId == InvalidOid)
+	{
+		PgDistLocalGroupRelationId = get_relname_relid("pg_dist_local_group",
+													   PG_CATALOG_NAMESPACE);
+	}
+	return PgDistLocalGroupRelationId;
+}
+
+
+/*
+ * DistShardRelationId returns the relation id of the pg_dist_shard
+ */
+static Oid
+DistShardRelationId(void)
+{
+	if (PgDistShardRelationId == InvalidOid)
+	{
+		PgDistShardRelationId = get_relname_relid("pg_dist_shard", PG_CATALOG_NAMESPACE);
+	}
+	return PgDistShardRelationId;
+}
+
+
+/*
+ * DistShardRelationId returns the relation id of the pg_dist_shard
+ */
+static Oid
+DistShardShardidIndexId(void)
+{
+	if (PgDistShardShardidIndexId == InvalidOid)
+	{
+		PgDistShardShardidIndexId = get_relname_relid("pg_dist_shard_shardid_index",
+													  PG_CATALOG_NAMESPACE);
+	}
+	return PgDistShardShardidIndexId;
+}
+
+
+/*
+ * DistShardRelationId returns the relation id of the pg_dist_shard
+ */
+static Oid
+DistPartitionRelationId(void)
+{
+	if (PgDistPartitionRelationId == InvalidOid)
+	{
+		PgDistPartitionRelationId = get_relname_relid("pg_dist_partition",
+													  PG_CATALOG_NAMESPACE);
+	}
+	return PgDistPartitionRelationId;
+}
+
+
+static Oid
+DistPartitionLogicalRelidIndexId(void)
+{
+	if (PgDistPartitionLogicalrelidIndexId == InvalidOid)
+	{
+		PgDistPartitionLogicalrelidIndexId = get_relname_relid(
+			"pg_dist_partition_logicalrelid_index", PG_CATALOG_NAMESPACE);
+	}
+	return PgDistPartitionLogicalrelidIndexId;
+}
+
+
+/*
+ * CdcIsCoordinator function returns true if this node is identified as the
+ * schema/coordinator/master node of the cluster.
+ */
+bool
+CdcIsCoordinator(void)
+{
+	return (CdcGetLocalGroupId() == COORDINATOR_GROUP_ID);
+}
+
+
+/*
+ * CdcCitusHasBeenLoaded function returns true if the citus extension has been loaded.
+ */
+bool
+CdcCitusHasBeenLoaded()
+{
+	if (!IsCitusExtensionLoaded)
+	{
+		IsCitusExtensionLoaded = (get_extension_oid("citus", true) != InvalidOid);
+	}
+
+	return IsCitusExtensionLoaded;
+}
+
+
+/*
+ * ExtractShardIdFromTableName tries to extract shard id from the given table name,
+ * and returns the shard id if table name is formatted as shard name.
+ * Else, the function returns INVALID_SHARD_ID.
+ */
+uint64
+CdcExtractShardIdFromTableName(const char *tableName, bool missingOk)
+{
+	char *shardIdStringEnd = NULL;
+
+	/* find the last underscore and increment for shardId string */
+	char *shardIdString = strrchr(tableName, SHARD_NAME_SEPARATOR);
+	if (shardIdString == NULL && !missingOk)
+	{
+		ereport(ERROR, (errmsg("could not extract shardId from table name \"%s\"",
+							   tableName)));
+	}
+	else if (shardIdString == NULL && missingOk)
+	{
+		return INVALID_SHARD_ID;
+	}
+
+	shardIdString++;
+
+	errno = 0;
+	uint64 shardId = strtoull(shardIdString, &shardIdStringEnd, 0);
+
+	if (errno != 0 || (*shardIdStringEnd != '\0'))
+	{
+		if (!missingOk)
+		{
+			ereport(ERROR, (errmsg("could not extract shardId from table name \"%s\"",
+								   tableName)));
+		}
+		else
+		{
+			return INVALID_SHARD_ID;
+		}
+	}
+
+	return shardId;
+}
+
+
+/*
+ * CdcGetLocalGroupId returns the group identifier of the local node. The function assumes
+ * that pg_dist_local_node_group has exactly one row and has at least one column.
+ * Otherwise, the function errors out.
+ */
+static int32
+CdcGetLocalGroupId(void)
+{
+	ScanKeyData scanKey[1];
+	int scanKeyCount = 0;
+	int32 groupId = 0;
+
+	/*
+	 * Already set the group id, no need to read the heap again.
+	 */
+	if (LocalGroupId != -1)
+	{
+		return LocalGroupId;
+	}
+
+	Oid localGroupTableOid = DistLocalGroupIdRelationId();
+	if (localGroupTableOid == InvalidOid)
+	{
+		return 0;
+	}
+
+	Relation pgDistLocalGroupId = table_open(localGroupTableOid, AccessShareLock);
+
+	SysScanDesc scanDescriptor = systable_beginscan(pgDistLocalGroupId,
+													InvalidOid, false,
+													NULL, scanKeyCount, scanKey);
+
+	TupleDesc tupleDescriptor = RelationGetDescr(pgDistLocalGroupId);
+
+	HeapTuple heapTuple = systable_getnext(scanDescriptor);
+
+	if (HeapTupleIsValid(heapTuple))
+	{
+		bool isNull = false;
+		Datum groupIdDatum = heap_getattr(heapTuple,
+										  Anum_pg_dist_local_groupid,
+										  tupleDescriptor, &isNull);
+
+		groupId = DatumGetInt32(groupIdDatum);
+
+		/* set the local cache variable */
+		LocalGroupId = groupId;
+	}
+	else
+	{
+		/*
+		 * Upgrade is happening. When upgrading postgres, pg_dist_local_group is
+		 * temporarily empty before citus_finish_pg_upgrade() finishes execution.
+		 */
+		groupId = GROUP_ID_UPGRADING;
+	}
+
+	systable_endscan(scanDescriptor);
+	table_close(pgDistLocalGroupId, AccessShareLock);
+
+	return groupId;
+}
+
+
+/*
+ * CdcLookupShardRelationFromCatalog returns the logical relation oid a shard belongs to.
+ *
+ * Errors out if the shardId does not exist and missingOk is false.
+ * Returns InvalidOid if the shardId does not exist and missingOk is true.
+ */
+Oid
+CdcLookupShardRelationFromCatalog(int64 shardId, bool missingOk)
+{
+	ScanKeyData scanKey[1];
+	int scanKeyCount = 1;
+	Form_pg_dist_shard shardForm = NULL;
+	Relation pgDistShard = table_open(DistShardRelationId(), AccessShareLock);
+	Oid relationId = InvalidOid;
+
+	ScanKeyInit(&scanKey[0], Anum_pg_dist_shard_shardid,
+				BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(shardId));
+
+	SysScanDesc scanDescriptor = systable_beginscan(pgDistShard,
+													DistShardShardidIndexId(), true,
+													NULL, scanKeyCount, scanKey);
+
+	HeapTuple heapTuple = systable_getnext(scanDescriptor);
+	if (!HeapTupleIsValid(heapTuple) && !missingOk)
+	{
+		ereport(ERROR, (errmsg("could not find valid entry for shard "
+							   UINT64_FORMAT, shardId)));
+	}
+
+	if (!HeapTupleIsValid(heapTuple))
+	{
+		relationId = InvalidOid;
+	}
+	else
+	{
+		shardForm = (Form_pg_dist_shard) GETSTRUCT(heapTuple);
+		relationId = shardForm->logicalrelid;
+	}
+
+	systable_endscan(scanDescriptor);
+	table_close(pgDistShard, NoLock);
+
+	return relationId;
+}
+
+
+/*
+ * CdcPgDistPartitionTupleViaCatalog is a helper function that searches
+ * pg_dist_partition for the given relationId. The caller is responsible
+ * for ensuring that the returned heap tuple is valid before accessing
+ * its fields.
+ */
+static HeapTuple
+CdcPgDistPartitionTupleViaCatalog(Oid relationId)
+{
+	const int scanKeyCount = 1;
+	ScanKeyData scanKey[1];
+	bool indexOK = true;
+
+	Relation pgDistPartition = table_open(DistPartitionRelationId(), AccessShareLock);
+
+	ScanKeyInit(&scanKey[0], Anum_pg_dist_partition_logicalrelid,
+				BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relationId));
+
+	SysScanDesc scanDescriptor = systable_beginscan(pgDistPartition,
+													DistPartitionLogicalRelidIndexId(),
+													indexOK, NULL, scanKeyCount, scanKey);
+
+	HeapTuple partitionTuple = systable_getnext(scanDescriptor);
+
+	if (HeapTupleIsValid(partitionTuple))
+	{
+		/* callers should have the tuple in their memory contexts */
+		partitionTuple = heap_copytuple(partitionTuple);
+	}
+
+	systable_endscan(scanDescriptor);
+	table_close(pgDistPartition, AccessShareLock);
+
+	return partitionTuple;
+}
+
+
+/*
+ * CdcPartitionMethodViaCatalog gets a relationId and returns the partition
+ * method column from pg_dist_partition via reading from catalog.
+ */
+char
+CdcPartitionMethodViaCatalog(Oid relationId)
+{
+	HeapTuple partitionTuple = CdcPgDistPartitionTupleViaCatalog(relationId);
+	if (!HeapTupleIsValid(partitionTuple))
+	{
+		return DISTRIBUTE_BY_INVALID;
+	}
+
+	Datum datumArray[Natts_pg_dist_partition];
+	bool isNullArray[Natts_pg_dist_partition];
+
+	Relation pgDistPartition = table_open(DistPartitionRelationId(), AccessShareLock);
+
+	TupleDesc tupleDescriptor = RelationGetDescr(pgDistPartition);
+	heap_deform_tuple(partitionTuple, tupleDescriptor, datumArray, isNullArray);
+
+	if (isNullArray[Anum_pg_dist_partition_partmethod - 1])
+	{
+		/* partition method cannot be NULL, still let's make sure */
+		heap_freetuple(partitionTuple);
+		table_close(pgDistPartition, NoLock);
+		return DISTRIBUTE_BY_INVALID;
+	}
+
+	Datum partitionMethodDatum = datumArray[Anum_pg_dist_partition_partmethod - 1];
+	char partitionMethodChar = DatumGetChar(partitionMethodDatum);
+
+	heap_freetuple(partitionTuple);
+	table_close(pgDistPartition, NoLock);
+
+	return partitionMethodChar;
+}
+
+
+/*
+ * RemoveCitusDecodersFromPaths removes a path ending in citus_decoders
+ * from the given input paths.
+ */
+char *
+RemoveCitusDecodersFromPaths(char *paths)
+{
+	if (strlen(paths) == 0)
+	{
+		/* dynamic_library_path is empty */
+		return paths;
+	}
+
+	StringInfo newPaths = makeStringInfo();
+
+	char *remainingPaths = paths;
+
+	for (;;)
+	{
+		int pathLength = 0;
+
+		char *pathStart = first_path_var_separator(remainingPaths);
+		if (pathStart == remainingPaths)
+		{
+			/*
+			 * This will error out in find_in_dynamic_libpath, return
+			 * original value here.
+			 */
+			return paths;
+		}
+		else if (pathStart == NULL)
+		{
+			/* final path */
+			pathLength = strlen(remainingPaths);
+		}
+		else
+		{
+			/* more paths remaining */
+			pathLength = pathStart - remainingPaths;
+		}
+
+		char *currentPath = palloc(pathLength + 1);
+		strlcpy(currentPath, remainingPaths, pathLength + 1);
+		canonicalize_path(currentPath);
+
+		if (!pg_str_endswith(currentPath, "/citus_decoders"))
+		{
+			appendStringInfo(newPaths, "%s%s", newPaths->len > 0 ? ":" : "", currentPath);
+		}
+
+		if (remainingPaths[pathLength] == '\0')
+		{
+			/* end of string */
+			break;
+		}
+
+		remainingPaths += pathLength + 1;
+	}
+
+	return newPaths->data;
+}
--- a/src/backend/distributed/cdc/cdc_decoder_utils.h
+++ b/src/backend/distributed/cdc/cdc_decoder_utils.h
@ -0,0 +1,34 @@
+/*-------------------------------------------------------------------------
+ *
+ * cdc_decoder_utils.h
+ *	  Utility functions and declerations for cdc decoder.
+ *
+ * Copyright (c) Citus Data, Inc.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef CITUS_CDC_DECODER_H
+#define CITUS_CDC_DECODER_H
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "replication/logical.h"
+#include "c.h"
+
+#define InvalidRepOriginId 0
+#define INVALID_SHARD_ID 0
+
+bool CdcIsCoordinator(void);
+
+uint64 CdcExtractShardIdFromTableName(const char *tableName, bool missingOk);
+
+Oid CdcLookupShardRelationFromCatalog(int64 shardId, bool missingOk);
+
+char CdcPartitionMethodViaCatalog(Oid relationId);
+
+bool CdcCitusHasBeenLoaded(void);
+
+char * RemoveCitusDecodersFromPaths(char *paths);
+
+#endif   /* CITUS_CDC_DECODER_UTILS_H */
--- a/src/backend/distributed/commands/alter_table.c
+++ b/src/backend/distributed/commands/alter_table.c
@ -55,6 +55,7 @@
 #include "distributed/multi_partitioning_utils.h"
 #include "distributed/reference_table_utils.h"
 #include "distributed/relation_access_tracking.h"
+#include "distributed/replication_origin_session_utils.h"
 #include "distributed/shared_library_init.h"
 #include "distributed/shard_utils.h"
 #include "distributed/worker_protocol.h"
@ -183,6 +184,7 @@ static TableConversionReturn * AlterDistributedTable(TableConversionParameters *
 static TableConversionReturn * AlterTableSetAccessMethod(
 	TableConversionParameters *params);
 static TableConversionReturn * ConvertTable(TableConversionState *con);
+static TableConversionReturn * ConvertTableInternal(TableConversionState *con);
 static bool SwitchToSequentialAndLocalExecutionIfShardNameTooLong(char *relationName,
 																  char *longestShardName);
 static void DropIndexesNotSupportedByColumnar(Oid relationId,
@ -215,7 +217,10 @@ static bool WillRecreateForeignKeyToReferenceTable(Oid relationId,
 												   CascadeToColocatedOption cascadeOption);
 static void WarningsForDroppingForeignKeysWithDistributedTables(Oid relationId);
 static void ErrorIfUnsupportedCascadeObjects(Oid relationId);
+static List * WrapTableDDLCommands(List *commandStrings);
 static bool DoesCascadeDropUnsupportedObject(Oid classId, Oid id, HTAB *nodeMap);
+static TableConversionReturn * CopyTableConversionReturnIntoCurrentContext(
+	TableConversionReturn *tableConversionReturn);

 PG_FUNCTION_INFO_V1(undistribute_table);
 PG_FUNCTION_INFO_V1(alter_distributed_table);
@ -402,7 +407,11 @@ UndistributeTable(TableConversionParameters *params)
 	params->conversionType = UNDISTRIBUTE_TABLE;
 	params->shardCountIsNull = true;
 	TableConversionState *con = CreateTableConversion(params);
-	return ConvertTable(con);
+
+	SetupReplicationOriginLocalSession();
+	TableConversionReturn *conv = ConvertTable(con);
+	ResetReplicationOriginLocalSession();
+	return conv;
 }


@ -441,6 +450,7 @@ AlterDistributedTable(TableConversionParameters *params)
 		ereport(DEBUG1, (errmsg("setting multi shard modify mode to sequential")));
 		SetLocalMultiShardModifyModeToSequential();
 	}
+
 	return ConvertTable(con);
 }

@ -511,9 +521,9 @@ AlterTableSetAccessMethod(TableConversionParameters *params)


 /*
- * ConvertTable is used for converting a table into a new table with different properties.
- * The conversion is done by creating a new table, moving everything to the new table and
- * dropping the old one. So the oid of the table is not preserved.
+ * ConvertTableInternal is used for converting a table into a new table with different
+ * properties. The conversion is done by creating a new table, moving everything to the
+ * new table and dropping the old one. So the oid of the table is not preserved.
 *
 * The new table will have the same name, columns and rows. It will also have partitions,
 * views, sequences of the old table. Finally it will have everything created by
@ -532,7 +542,7 @@ AlterTableSetAccessMethod(TableConversionParameters *params)
 * in case you add a new way to return from this function.
 */
 TableConversionReturn *
-ConvertTable(TableConversionState *con)
+ConvertTableInternal(TableConversionState *con)
 {
 	InTableTypeConversionFunctionCall = true;

@ -595,9 +605,18 @@ ConvertTable(TableConversionState *con)
 	List *justBeforeDropCommands = NIL;
 	List *attachPartitionCommands = NIL;

-	postLoadCommands =
-		list_concat(postLoadCommands,
-					GetViewCreationTableDDLCommandsOfTable(con->relationId));
+	List *createViewCommands = GetViewCreationCommandsOfTable(con->relationId);
+
+	postLoadCommands = list_concat(postLoadCommands,
+								   WrapTableDDLCommands(createViewCommands));
+
+	/* need to add back to publications after dropping the original table */
+	bool isAdd = true;
+	List *alterPublicationCommands =
+		GetAlterPublicationDDLCommandsForTable(con->relationId, isAdd);
+
+	postLoadCommands = list_concat(postLoadCommands,
+								   WrapTableDDLCommands(alterPublicationCommands));

 	List *foreignKeyCommands = NIL;
 	if (con->conversionType == ALTER_DISTRIBUTED_TABLE)
@ -800,9 +819,21 @@ ConvertTable(TableConversionState *con)
 		ExecuteQueryViaSPI(tableConstructionSQL, SPI_OK_UTILITY);
 	}

+	/*
+	 * when there are many partitions, each call to ProcessUtilityParseTree
+	 * accumulates used memory. Free context after each call.
+	 */
+	MemoryContext citusPerPartitionContext =
+		AllocSetContextCreate(CurrentMemoryContext,
+							  "citus_per_partition_context",
+							  ALLOCSET_DEFAULT_SIZES);
+	MemoryContext oldContext = MemoryContextSwitchTo(citusPerPartitionContext);
+
 	char *attachPartitionCommand = NULL;
 	foreach_ptr(attachPartitionCommand, attachPartitionCommands)
 	{
+		MemoryContextReset(citusPerPartitionContext);
+
 		Node *parseTree = ParseTreeNode(attachPartitionCommand);

 		ProcessUtilityParseTree(parseTree, attachPartitionCommand,
@ -810,6 +841,9 @@ ConvertTable(TableConversionState *con)
 								NULL, None_Receiver, NULL);
 	}

+	MemoryContextSwitchTo(oldContext);
+	MemoryContextDelete(citusPerPartitionContext);
+
 	if (isPartitionTable)
 	{
 		ExecuteQueryViaSPI(attachToParentCommand, SPI_OK_UTILITY);
@ -869,10 +903,77 @@ ConvertTable(TableConversionState *con)
 	SetLocalEnableLocalReferenceForeignKeys(oldEnableLocalReferenceForeignKeys);

 	InTableTypeConversionFunctionCall = false;
+
 	return ret;
 }


+/*
+ * CopyTableConversionReturnIntoCurrentContext copies given tableConversionReturn
+ * into CurrentMemoryContext.
+ */
+static TableConversionReturn *
+CopyTableConversionReturnIntoCurrentContext(TableConversionReturn *tableConversionReturn)
+{
+	TableConversionReturn *tableConversionReturnCopy = NULL;
+	if (tableConversionReturn)
+	{
+		tableConversionReturnCopy = palloc0(sizeof(TableConversionReturn));
+		List *copyForeignKeyCommands = NIL;
+		char *foreignKeyCommand = NULL;
+		foreach_ptr(foreignKeyCommand, tableConversionReturn->foreignKeyCommands)
+		{
+			char *copyForeignKeyCommand = MemoryContextStrdup(CurrentMemoryContext,
+															  foreignKeyCommand);
+			copyForeignKeyCommands = lappend(copyForeignKeyCommands,
+											 copyForeignKeyCommand);
+		}
+		tableConversionReturnCopy->foreignKeyCommands = copyForeignKeyCommands;
+	}
+
+	return tableConversionReturnCopy;
+}
+
+
+/*
+ * ConvertTable is a wrapper for ConvertTableInternal to persist only
+ * TableConversionReturn and delete all other allocations.
+ */
+static TableConversionReturn *
+ConvertTable(TableConversionState *con)
+{
+	/*
+	 * We do not allow alter_distributed_table and undistribute_table operations
+	 * for tables with identity columns. This is because we do not have a proper way
+	 * of keeping sequence states consistent across the cluster.
+	 */
+	ErrorIfTableHasIdentityColumn(con->relationId);
+
+	/*
+	 * when there are many partitions or colocated tables, memory usage is
+	 * accumulated. Free context for each call to ConvertTable.
+	 */
+	MemoryContext convertTableContext =
+		AllocSetContextCreate(CurrentMemoryContext,
+							  "citus_convert_table_context",
+							  ALLOCSET_DEFAULT_SIZES);
+	MemoryContext oldContext = MemoryContextSwitchTo(convertTableContext);
+
+	TableConversionReturn *tableConversionReturn = ConvertTableInternal(con);
+
+	MemoryContextSwitchTo(oldContext);
+
+	/* persist TableConversionReturn in oldContext */
+	TableConversionReturn *tableConversionReturnCopy =
+		CopyTableConversionReturnIntoCurrentContext(tableConversionReturn);
+
+	/* delete convertTableContext */
+	MemoryContextDelete(convertTableContext);
+
+	return tableConversionReturnCopy;
+}
+
+
 /*
 * DropIndexesNotSupportedByColumnar is a helper function used during accces
 * method conversion to drop the indexes that are not supported by columnarAM.
@ -1268,8 +1369,7 @@ CreateCitusTableLike(TableConversionState *con)
 	}
 	else if (IsCitusTableType(con->relationId, REFERENCE_TABLE))
 	{
-		CreateDistributedTable(con->newRelationId, NULL, DISTRIBUTE_BY_NONE, 0, false,
-							   NULL);
+		CreateReferenceTable(con->newRelationId);
 	}
 	else if (IsCitusTableType(con->relationId, CITUS_LOCAL_TABLE))
 	{
@ -1410,17 +1510,16 @@ GetViewCreationCommandsOfTable(Oid relationId)


 /*
- * GetViewCreationTableDDLCommandsOfTable is the same as GetViewCreationCommandsOfTable,
- * but the returned list includes objects of TableDDLCommand's, not strings.
+ * WrapTableDDLCommands takes a list of command strings and wraps them
+ * in TableDDLCommand structs.
 */
-List *
-GetViewCreationTableDDLCommandsOfTable(Oid relationId)
+static List *
+WrapTableDDLCommands(List *commandStrings)
 {
-	List *commands = GetViewCreationCommandsOfTable(relationId);
 	List *tableDDLCommands = NIL;

 	char *command = NULL;
-	foreach_ptr(command, commands)
+	foreach_ptr(command, commandStrings)
 	{
 		tableDDLCommands = lappend(tableDDLCommands, makeTableDDLCommandString(command));
 	}
@ -1523,96 +1622,6 @@ CreateMaterializedViewDDLCommand(Oid matViewOid)
 }


-/*
- * This function marks all the identity sequences as distributed on the given table.
- */
-static void
-MarkIdentitiesAsDistributed(Oid targetRelationId)
-{
-	Relation relation = relation_open(targetRelationId, AccessShareLock);
-	TupleDesc tupleDescriptor = RelationGetDescr(relation);
-	relation_close(relation, NoLock);
-
-	bool missingSequenceOk = false;
-
-	for (int attributeIndex = 0; attributeIndex < tupleDescriptor->natts;
-		 attributeIndex++)
-	{
-		Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, attributeIndex);
-
-		if (attributeForm->attidentity)
-		{
-			Oid seqOid = getIdentitySequence(targetRelationId, attributeForm->attnum,
-											 missingSequenceOk);
-
-			ObjectAddress seqAddress = { 0 };
-			ObjectAddressSet(seqAddress, RelationRelationId, seqOid);
-			MarkObjectDistributed(&seqAddress);
-		}
-	}
-}
-
-
-/*
- * This function returns sql statements to rename identites on the given table
- */
-static void
-PrepareRenameIdentitiesCommands(Oid sourceRelationId, Oid targetRelationId,
-								List **outCoordinatorCommands, List **outWorkerCommands)
-{
-	Relation targetRelation = relation_open(targetRelationId, AccessShareLock);
-	TupleDesc targetTupleDescriptor = RelationGetDescr(targetRelation);
-	relation_close(targetRelation, NoLock);
-
-	bool missingSequenceOk = false;
-
-	for (int attributeIndex = 0; attributeIndex < targetTupleDescriptor->natts;
-		 attributeIndex++)
-	{
-		Form_pg_attribute attributeForm = TupleDescAttr(targetTupleDescriptor,
-														attributeIndex);
-
-		if (attributeForm->attidentity)
-		{
-			char *columnName = NameStr(attributeForm->attname);
-
-			Oid targetSequenceOid = getIdentitySequence(targetRelationId,
-														attributeForm->attnum,
-														missingSequenceOk);
-			char *targetSequenceName = generate_relation_name(targetSequenceOid, NIL);
-
-			Oid sourceSequenceOid = getIdentitySequence(sourceRelationId,
-														attributeForm->attnum,
-														missingSequenceOk);
-			char *sourceSequenceName = generate_relation_name(sourceSequenceOid, NIL);
-
-			/* to rename sequence on the coordinator */
-			*outCoordinatorCommands = lappend(*outCoordinatorCommands, psprintf(
-												  "SET citus.enable_ddl_propagation TO OFF; ALTER SEQUENCE %s RENAME TO %s; RESET citus.enable_ddl_propagation;",
-												  quote_identifier(
-													  targetSequenceName),
-												  quote_identifier(
-													  sourceSequenceName)));
-
-			/* update workers to use existing sequence and drop the new one generated by PG */
-			bool missingTableOk = true;
-			*outWorkerCommands = lappend(*outWorkerCommands,
-										 GetAlterColumnWithNextvalDefaultCmd(
-											 sourceSequenceOid, sourceRelationId,
-											 columnName,
-											 missingTableOk));
-
-
-			/* drop the sequence generated by identity column */
-			*outWorkerCommands = lappend(*outWorkerCommands, psprintf(
-											 "DROP SEQUENCE IF EXISTS %s",
-											 quote_identifier(
-												 targetSequenceName)));
-		}
-	}
-}
-
-
 /*
 * ReplaceTable replaces the source table with the target table.
 * It moves all the rows of the source table to target table with INSERT SELECT.
@ -1671,24 +1680,6 @@ ReplaceTable(Oid sourceId, Oid targetId, List *justBeforeDropCommands,
 		ExecuteQueryViaSPI(query->data, SPI_OK_INSERT);
 	}

-	/*
-	 * Drop identity dependencies (sequences marked as DEPENDENCY_INTERNAL) on the workers
-	 * to keep their states after the source table is dropped.
-	 */
-	List *ownedIdentitySequences = getOwnedSequences_internal(sourceId, 0,
-															  DEPENDENCY_INTERNAL);
-	if (ownedIdentitySequences != NIL && ShouldSyncTableMetadata(sourceId))
-	{
-		char *qualifiedTableName = quote_qualified_identifier(schemaName, sourceName);
-		StringInfo command = makeStringInfo();
-
-		appendStringInfo(command,
-						 "SELECT pg_catalog.worker_drop_sequence_dependency(%s);",
-						 quote_literal_cstr(qualifiedTableName));
-
-		SendCommandToWorkersWithMetadata(command->data);
-	}
-
 	/*
 	 * Modify regular sequence dependencies (sequences marked as DEPENDENCY_AUTO)
 	 */
@ -1748,23 +1739,6 @@ ReplaceTable(Oid sourceId, Oid targetId, List *justBeforeDropCommands,
 								quote_qualified_identifier(schemaName, sourceName))));
 	}

-	/*
-	 * We need to prepare rename identities commands before dropping the original table,
-	 * otherwise we can't find the original names of the identity sequences.
-	 * We prepare separate commands for the coordinator and the workers because:
-	 * In the coordinator, we simply need to rename the identity sequences
-	 * to their names on the old table, because right now the identity
-	 * sequences have default names generated by Postgres with the creation of the new table
-	 * In the workers, we have not dropped the original identity sequences,
-	 * so what we do is we alter the columns and set their default to the
-	 * original identity sequences, and after that we drop the new sequences.
-	 */
-	List *coordinatorCommandsToRenameIdentites = NIL;
-	List *workerCommandsToRenameIdentites = NIL;
-	PrepareRenameIdentitiesCommands(sourceId, targetId,
-									&coordinatorCommandsToRenameIdentites,
-									&workerCommandsToRenameIdentites);
-
 	resetStringInfo(query);
 	appendStringInfo(query, "DROP %sTABLE %s CASCADE",
 					 IsForeignTable(sourceId) ? "FOREIGN " : "",
@ -1782,27 +1756,6 @@ ReplaceTable(Oid sourceId, Oid targetId, List *justBeforeDropCommands,
 					 quote_qualified_identifier(schemaName, targetName),
 					 quote_identifier(sourceName));
 	ExecuteQueryViaSPI(query->data, SPI_OK_UTILITY);
-
-	char *coordinatorCommand = NULL;
-	foreach_ptr(coordinatorCommand, coordinatorCommandsToRenameIdentites)
-	{
-		ExecuteQueryViaSPI(coordinatorCommand, SPI_OK_UTILITY);
-	}
-
-	char *workerCommand = NULL;
-	foreach_ptr(workerCommand, workerCommandsToRenameIdentites)
-	{
-		SendCommandToWorkersWithMetadata(workerCommand);
-	}
-
-	/*
-	 *  To preserve identity sequences states in case of redistributing the table again,
-	 *  we don't drop them when we undistribute a table. To maintain consistency and
-	 *  avoid future problems if we redistribute the table, we want to apply all changes happening to
-	 *  the identity sequence in the coordinator to their corresponding sequences in the workers as well.
-	 *  That's why we have to mark identity sequences as distributed
-	 */
-	MarkIdentitiesAsDistributed(targetId);
 }


--- a/src/backend/distributed/commands/citus_add_local_table_to_metadata.c
+++ b/src/backend/distributed/commands/citus_add_local_table_to_metadata.c
@ -85,6 +85,7 @@ static void DropRelationTruncateTriggers(Oid relationId);
 static char * GetDropTriggerCommand(Oid relationId, char *triggerName);
 static void DropViewsOnTable(Oid relationId);
 static void DropIdentitiesOnTable(Oid relationId);
+static void DropTableFromPublications(Oid relationId);
 static List * GetRenameStatsCommandList(List *statsOidList, uint64 shardId);
 static List * ReversedOidList(List *oidList);
 static void AppendExplicitIndexIdsToList(Form_pg_index indexForm,
@ -338,6 +339,10 @@ CreateCitusLocalTable(Oid relationId, bool cascadeViaForeignKeys, bool autoConve
 	List *shellTableDDLEvents = GetShellTableDDLEventsForCitusLocalTable(relationId);
 	List *tableViewCreationCommands = GetViewCreationCommandsOfTable(relationId);

+	bool isAdd = true;
+	List *alterPublicationCommands =
+		GetAlterPublicationDDLCommandsForTable(relationId, isAdd);
+
 	char *relationName = get_rel_name(relationId);
 	Oid relationSchemaId = get_rel_namespace(relationId);

@ -347,6 +352,12 @@ CreateCitusLocalTable(Oid relationId, bool cascadeViaForeignKeys, bool autoConve
 	 */
 	DropIdentitiesOnTable(relationId);

+	/*
+	 * We do not want the shard to be in the publication (subscribers are
+	 * unlikely to recognize it).
+	 */
+	DropTableFromPublications(relationId);
+
 	/* below we convert relation with relationId to the shard relation */
 	uint64 shardId = ConvertLocalTableToShard(relationId);

@ -363,6 +374,11 @@ CreateCitusLocalTable(Oid relationId, bool cascadeViaForeignKeys, bool autoConve
 	 */
 	ExecuteAndLogUtilityCommandListInTableTypeConversionViaSPI(tableViewCreationCommands);

+	/*
+	 * Execute the publication creation commands with the shell table.
+	 */
+	ExecuteAndLogUtilityCommandListInTableTypeConversionViaSPI(alterPublicationCommands);
+
 	/*
 	 * Set shellRelationId as the relation with relationId now points
 	 * to the shard relation.
@ -1131,7 +1147,7 @@ DropIdentitiesOnTable(Oid relationId)
 {
 	Relation relation = relation_open(relationId, AccessShareLock);
 	TupleDesc tupleDescriptor = RelationGetDescr(relation);
-	relation_close(relation, NoLock);
+	List *dropCommandList = NIL;

 	for (int attributeIndex = 0; attributeIndex < tupleDescriptor->natts;
 		 attributeIndex++)
@ -1151,15 +1167,38 @@ DropIdentitiesOnTable(Oid relationId)
 							 qualifiedTableName,
 							 columnName);

+			dropCommandList = lappend(dropCommandList, dropCommand->data);
+		}
+	}
+
+	relation_close(relation, NoLock);
+
+	char *dropCommand = NULL;
+	foreach_ptr(dropCommand, dropCommandList)
+	{
 		/*
 		 * We need to disable/enable ddl propagation for this command, to prevent
 		 * sending unnecessary ALTER COLUMN commands for partitions, to MX workers.
 		 */
 		ExecuteAndLogUtilityCommandList(list_make3(DISABLE_DDL_PROPAGATION,
-													   dropCommand->data,
+												   dropCommand,
 												   ENABLE_DDL_PROPAGATION));
 	}
 }
+
+
+/*
+ * DropTableFromPublications drops the table from all of its publications.
+ */
+static void
+DropTableFromPublications(Oid relationId)
+{
+	bool isAdd = false;
+
+	List *alterPublicationCommands =
+		GetAlterPublicationDDLCommandsForTable(relationId, isAdd);
+
+	ExecuteAndLogUtilityCommandList(alterPublicationCommands);
 }


--- a/src/backend/distributed/commands/create_distributed_table.c
+++ b/src/backend/distributed/commands/create_distributed_table.c
@ -94,6 +94,28 @@
 #include "utils/syscache.h"
 #include "utils/inval.h"

+
+/* common params that apply to all Citus table types */
+typedef struct
+{
+	char distributionMethod;
+	char replicationModel;
+} CitusTableParams;
+
+
+/*
+ * Params that only apply to distributed tables, i.e., the ones that are
+ * known as DISTRIBUTED_TABLE by Citus metadata.
+ */
+typedef struct
+{
+	int shardCount;
+	bool shardCountIsStrict;
+	char *colocateWithTableName;
+	char *distributionColumnName;
+} DistributedTableParams;
+
+
 /*
 * once every LOG_PER_TUPLE_AMOUNT, the copy will be logged.
 */
@ -106,17 +128,22 @@ static void CreateDistributedTableConcurrently(Oid relationId,
 											   char *colocateWithTableName,
 											   int shardCount,
 											   bool shardCountIsStrict);
-static char DecideReplicationModel(char distributionMethod, char *colocateWithTableName);
+static char DecideDistTableReplicationModel(char distributionMethod,
+											char *colocateWithTableName);
 static List * HashSplitPointsForShardList(List *shardList);
 static List * HashSplitPointsForShardCount(int shardCount);
 static List * WorkerNodesForShardList(List *shardList);
 static List * RoundRobinWorkerNodeList(List *workerNodeList, int listLength);
+static CitusTableParams DecideCitusTableParams(CitusTableType tableType,
+											   DistributedTableParams *
+											   distributedTableParams);
+static void CreateCitusTable(Oid relationId, CitusTableType tableType,
+							 DistributedTableParams *distributedTableParams);
 static void CreateHashDistributedTableShards(Oid relationId, int shardCount,
 											 Oid colocatedTableId, bool localTableEmpty);
-static uint32 ColocationIdForNewTable(Oid relationId, Var *distributionColumn,
-									  char distributionMethod, char replicationModel,
-									  int shardCount, bool shardCountIsStrict,
-									  char *colocateWithTableName);
+static uint32 ColocationIdForNewTable(Oid relationId, CitusTableType tableType,
+									  DistributedTableParams *distributedTableParams,
+									  Var *distributionColumn);
 static void EnsureRelationCanBeDistributed(Oid relationId, Var *distributionColumn,
 										   char distributionMethod, uint32 colocationId,
 										   char replicationModel);
@ -377,7 +404,7 @@ CreateDistributedTableConcurrently(Oid relationId, char *distributionColumnName,

 	EnsureForeignKeysForDistributedTableConcurrently(relationId);

-	char replicationModel = DecideReplicationModel(distributionMethod,
+	char replicationModel = DecideDistTableReplicationModel(distributionMethod,
 															colocateWithTableName);

 	/*
@ -622,7 +649,7 @@ static void
 EnsureColocateWithTableIsValid(Oid relationId, char distributionMethod,
 							   char *distributionColumnName, char *colocateWithTableName)
 {
-	char replicationModel = DecideReplicationModel(distributionMethod,
+	char replicationModel = DecideDistTableReplicationModel(distributionMethod,
 															colocateWithTableName);

 	/*
@ -860,9 +887,6 @@ create_reference_table(PG_FUNCTION_ARGS)
 	CheckCitusVersion(ERROR);
 	Oid relationId = PG_GETARG_OID(0);

-	char *colocateWithTableName = NULL;
-	char *distributionColumnName = NULL;
-
 	EnsureCitusTableCanBeCreated(relationId);

 	/* enable create_reference_table on an empty node */
@ -895,8 +919,7 @@ create_reference_table(PG_FUNCTION_ARGS)
 						errdetail("There are no active worker nodes.")));
 	}

-	CreateDistributedTable(relationId, distributionColumnName, DISTRIBUTE_BY_NONE,
-						   ShardCount, false, colocateWithTableName);
+	CreateReferenceTable(relationId);
 	PG_RETURN_VOID();
 }

@ -951,18 +974,90 @@ EnsureRelationExists(Oid relationId)


 /*
- * CreateDistributedTable creates distributed table in the given configuration.
+ * CreateReferenceTable is a wrapper around CreateCitusTable that creates a
+ * distributed table.
+ */
+void
+CreateDistributedTable(Oid relationId, char *distributionColumnName,
+					   char distributionMethod,
+					   int shardCount, bool shardCountIsStrict,
+					   char *colocateWithTableName)
+{
+	CitusTableType tableType;
+	switch (distributionMethod)
+	{
+		case DISTRIBUTE_BY_HASH:
+		{
+			tableType = HASH_DISTRIBUTED;
+			break;
+		}
+
+		case DISTRIBUTE_BY_APPEND:
+		{
+			tableType = APPEND_DISTRIBUTED;
+			break;
+		}
+
+		case DISTRIBUTE_BY_RANGE:
+		{
+			tableType = RANGE_DISTRIBUTED;
+			break;
+		}
+
+		default:
+		{
+			ereport(ERROR, (errmsg("unexpected distribution method when "
+								   "deciding Citus table type")));
+			break;
+		}
+	}
+
+	DistributedTableParams distributedTableParams = {
+		.colocateWithTableName = colocateWithTableName,
+		.shardCount = shardCount,
+		.shardCountIsStrict = shardCountIsStrict,
+		.distributionColumnName = distributionColumnName
+	};
+	CreateCitusTable(relationId, tableType, &distributedTableParams);
+}
+
+
+/*
+ * CreateReferenceTable is a wrapper around CreateCitusTable that creates a
+ * reference table.
+ */
+void
+CreateReferenceTable(Oid relationId)
+{
+	CreateCitusTable(relationId, REFERENCE_TABLE, NULL);
+}
+
+
+/*
+ * CreateCitusTable is the internal method that creates a Citus table in
+ * given configuration.
+ *
+ * DistributedTableParams should be non-null only if we're creating a distributed
+ * table.
+ *
 * This functions contains all necessary logic to create distributed tables. It
 * performs necessary checks to ensure distributing the table is safe. If it is
 * safe to distribute the table, this function creates distributed table metadata,
 * creates shards and copies local data to shards. This function also handles
 * partitioned tables by distributing its partitions as well.
 */
-void
-CreateDistributedTable(Oid relationId, char *distributionColumnName,
-					   char distributionMethod, int shardCount,
-					   bool shardCountIsStrict, char *colocateWithTableName)
+static void
+CreateCitusTable(Oid relationId, CitusTableType tableType,
+				 DistributedTableParams *distributedTableParams)
 {
+	if ((tableType == HASH_DISTRIBUTED || tableType == APPEND_DISTRIBUTED ||
+		 tableType == RANGE_DISTRIBUTED) != (distributedTableParams != NULL))
+	{
+		ereport(ERROR, (errmsg("distributed table params must be provided "
+							   "when creating a distributed table and must "
+							   "not be otherwise")));
+	}
+
 	/*
 	 * EnsureTableNotDistributed errors out when relation is a citus table but
 	 * we don't want to ask user to first undistribute their citus local tables
@ -988,11 +1083,8 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName,
 	 * that ALTER TABLE hook does the necessary job, which means converting
 	 * local tables to citus local tables to properly support such foreign
 	 * keys.
-	 *
-	 * This function does not expect to create Citus local table, so we blindly
-	 * create reference table when the method is DISTRIBUTE_BY_NONE.
 	 */
-	else if (distributionMethod == DISTRIBUTE_BY_NONE &&
+	else if (tableType == REFERENCE_TABLE &&
 			 ShouldEnableLocalReferenceForeignKeys() &&
 			 HasForeignKeyWithLocalTable(relationId))
 	{
@ -1022,24 +1114,29 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName,

 	PropagatePrerequisiteObjectsForDistributedTable(relationId);

-	char replicationModel = DecideReplicationModel(distributionMethod,
-												   colocateWithTableName);
-
-	Var *distributionColumn = BuildDistributionKeyFromColumnName(relationId,
+	Var *distributionColumn = NULL;
+	if (distributedTableParams)
+	{
+		distributionColumn = BuildDistributionKeyFromColumnName(relationId,
+																distributedTableParams->
 																distributionColumnName,
 																NoLock);
+	}
+
+	CitusTableParams citusTableParams = DecideCitusTableParams(tableType,
+															   distributedTableParams);

 	/*
 	 * ColocationIdForNewTable assumes caller acquires lock on relationId. In our case,
 	 * our caller already acquired lock on relationId.
 	 */
-	uint32 colocationId = ColocationIdForNewTable(relationId, distributionColumn,
-												  distributionMethod, replicationModel,
-												  shardCount, shardCountIsStrict,
-												  colocateWithTableName);
+	uint32 colocationId = ColocationIdForNewTable(relationId, tableType,
+												  distributedTableParams,
+												  distributionColumn);

-	EnsureRelationCanBeDistributed(relationId, distributionColumn, distributionMethod,
-								   colocationId, replicationModel);
+	EnsureRelationCanBeDistributed(relationId, distributionColumn,
+								   citusTableParams.distributionMethod,
+								   colocationId, citusTableParams.replicationModel);

 	/*
 	 * Make sure that existing reference tables have been replicated to all the nodes
@ -1068,8 +1165,10 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName,
 	bool autoConverted = false;

 	/* create an entry for distributed table in pg_dist_partition */
-	InsertIntoPgDistPartition(relationId, distributionMethod, distributionColumn,
-							  colocationId, replicationModel, autoConverted);
+	InsertIntoPgDistPartition(relationId, citusTableParams.distributionMethod,
+							  distributionColumn,
+							  colocationId, citusTableParams.replicationModel,
+							  autoConverted);

 	/* foreign tables do not support TRUNCATE trigger */
 	if (RegularTable(relationId))
@ -1078,17 +1177,14 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName,
 	}

 	/* create shards for hash distributed and reference tables */
-	if (distributionMethod == DISTRIBUTE_BY_HASH)
+	if (tableType == HASH_DISTRIBUTED)
 	{
-		CreateHashDistributedTableShards(relationId, shardCount, colocatedTableId,
+		CreateHashDistributedTableShards(relationId, distributedTableParams->shardCount,
+										 colocatedTableId,
 										 localTableEmpty);
 	}
-	else if (distributionMethod == DISTRIBUTE_BY_NONE)
+	else if (tableType == REFERENCE_TABLE)
 	{
-		/*
-		 * This function does not expect to create Citus local table, so we blindly
-		 * create reference table when the method is DISTRIBUTE_BY_NONE.
-		 */
 		CreateReferenceTableShard(relationId);
 	}

@ -1116,17 +1212,36 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName,
 		char *relationName = get_rel_name(relationId);
 		char *parentRelationName = quote_qualified_identifier(schemaName, relationName);

+		/*
+		 * when there are many partitions, each call to CreateDistributedTable
+		 * accumulates used memory. Create and free context for each call.
+		 */
+		MemoryContext citusPartitionContext =
+			AllocSetContextCreate(CurrentMemoryContext,
+								  "citus_per_partition_context",
+								  ALLOCSET_DEFAULT_SIZES);
+		MemoryContext oldContext = MemoryContextSwitchTo(citusPartitionContext);
+
 		foreach_oid(partitionRelationId, partitionList)
 		{
-			CreateDistributedTable(partitionRelationId, distributionColumnName,
-								   distributionMethod, shardCount, false,
-								   parentRelationName);
+			MemoryContextReset(citusPartitionContext);
+
+			DistributedTableParams childDistributedTableParams = {
+				.colocateWithTableName = parentRelationName,
+				.shardCount = distributedTableParams->shardCount,
+				.shardCountIsStrict = false,
+				.distributionColumnName = distributedTableParams->distributionColumnName,
+			};
+			CreateCitusTable(partitionRelationId, tableType,
+							 &childDistributedTableParams);
 		}
+
+		MemoryContextSwitchTo(oldContext);
+		MemoryContextDelete(citusPartitionContext);
 	}

 	/* copy over data for hash distributed and reference tables */
-	if (distributionMethod == DISTRIBUTE_BY_HASH ||
-		distributionMethod == DISTRIBUTE_BY_NONE)
+	if (tableType == HASH_DISTRIBUTED || tableType == REFERENCE_TABLE)
 	{
 		if (RegularTable(relationId))
 		{
@ -1145,6 +1260,70 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName,
 }


+/*
+ * DecideCitusTableParams decides CitusTableParams based on given CitusTableType
+ * and DistributedTableParams if it's a distributed table.
+ *
+ * DistributedTableParams should be non-null only if CitusTableType corresponds
+ * to a distributed table.
+ */
+static
+CitusTableParams
+DecideCitusTableParams(CitusTableType tableType,
+					   DistributedTableParams *distributedTableParams)
+{
+	CitusTableParams citusTableParams = { 0 };
+	switch (tableType)
+	{
+		case HASH_DISTRIBUTED:
+		{
+			citusTableParams.distributionMethod = DISTRIBUTE_BY_HASH;
+			citusTableParams.replicationModel =
+				DecideDistTableReplicationModel(DISTRIBUTE_BY_HASH,
+												distributedTableParams->
+												colocateWithTableName);
+			break;
+		}
+
+		case APPEND_DISTRIBUTED:
+		{
+			citusTableParams.distributionMethod = DISTRIBUTE_BY_APPEND;
+			citusTableParams.replicationModel =
+				DecideDistTableReplicationModel(APPEND_DISTRIBUTED,
+												distributedTableParams->
+												colocateWithTableName);
+			break;
+		}
+
+		case RANGE_DISTRIBUTED:
+		{
+			citusTableParams.distributionMethod = DISTRIBUTE_BY_RANGE;
+			citusTableParams.replicationModel =
+				DecideDistTableReplicationModel(RANGE_DISTRIBUTED,
+												distributedTableParams->
+												colocateWithTableName);
+			break;
+		}
+
+		case REFERENCE_TABLE:
+		{
+			citusTableParams.distributionMethod = DISTRIBUTE_BY_NONE;
+			citusTableParams.replicationModel = REPLICATION_MODEL_2PC;
+			break;
+		}
+
+		default:
+		{
+			ereport(ERROR, (errmsg("unexpected table type when deciding Citus "
+								   "table params")));
+			break;
+		}
+	}
+
+	return citusTableParams;
+}
+
+
 /*
 * PropagatePrerequisiteObjectsForDistributedTable ensures we can create shards
 * on all nodes by ensuring all dependent objects exist on all node.
@ -1190,7 +1369,7 @@ EnsureSequenceTypeSupported(Oid seqOid, Oid attributeTypeId, Oid ownerRelationId
 	foreach_oid(citusTableId, citusTableIdList)
 	{
 		List *seqInfoList = NIL;
-		GetDependentSequencesWithRelation(citusTableId, &seqInfoList, 0);
+		GetDependentSequencesWithRelation(citusTableId, &seqInfoList, 0, DEPENDENCY_AUTO);

 		SequenceInfo *seqInfo = NULL;
 		foreach_ptr(seqInfo, seqInfoList)
@ -1267,7 +1446,7 @@ EnsureRelationHasCompatibleSequenceTypes(Oid relationId)
 {
 	List *seqInfoList = NIL;

-	GetDependentSequencesWithRelation(relationId, &seqInfoList, 0);
+	GetDependentSequencesWithRelation(relationId, &seqInfoList, 0, DEPENDENCY_AUTO);
 	EnsureDistributedSequencesHaveOneType(relationId, seqInfoList);
 }

@ -1405,17 +1584,15 @@ DropFKeysRelationInvolvedWithTableType(Oid relationId, int tableTypeFlag)


 /*
- * DecideReplicationModel function decides which replication model should be
- * used depending on given distribution configuration.
+ * DecideDistTableReplicationModel function decides which replication model should be
+ * used for a distributed table depending on given distribution configuration.
 */
 static char
-DecideReplicationModel(char distributionMethod, char *colocateWithTableName)
+DecideDistTableReplicationModel(char distributionMethod, char *colocateWithTableName)
 {
-	if (distributionMethod == DISTRIBUTE_BY_NONE)
-	{
-		return REPLICATION_MODEL_2PC;
-	}
-	else if (pg_strncasecmp(colocateWithTableName, "default", NAMEDATALEN) != 0 &&
+	Assert(distributionMethod != DISTRIBUTE_BY_NONE);
+
+	if (!IsColocateWithDefault(colocateWithTableName) &&
 		!IsColocateWithNone(colocateWithTableName))
 	{
 		text *colocateWithTableNameText = cstring_to_text(colocateWithTableName);
@ -1491,28 +1668,34 @@ CreateHashDistributedTableShards(Oid relationId, int shardCount,


 /*
- * ColocationIdForNewTable returns a colocation id for hash-distributed table
+ * ColocationIdForNewTable returns a colocation id for given table
 * according to given configuration. If there is no such configuration, it
 * creates one and returns colocation id of newly the created colocation group.
+ * Note that DistributedTableParams and the distribution column Var should be
+ * non-null only if CitusTableType corresponds to a distributed table.
+ *
 * For append and range distributed tables, this function errors out if
 * colocateWithTableName parameter is not NULL, otherwise directly returns
 * INVALID_COLOCATION_ID.
 *
+ * For reference tables, returns the common reference table colocation id.
+ *
 * This function assumes its caller take necessary lock on relationId to
 * prevent possible changes on it.
 */
 static uint32
-ColocationIdForNewTable(Oid relationId, Var *distributionColumn,
-						char distributionMethod, char replicationModel,
-						int shardCount, bool shardCountIsStrict,
-						char *colocateWithTableName)
+ColocationIdForNewTable(Oid relationId, CitusTableType tableType,
+						DistributedTableParams *distributedTableParams,
+						Var *distributionColumn)
 {
+	CitusTableParams citusTableParams = DecideCitusTableParams(tableType,
+															   distributedTableParams);
+
 	uint32 colocationId = INVALID_COLOCATION_ID;

-	if (distributionMethod == DISTRIBUTE_BY_APPEND ||
-		distributionMethod == DISTRIBUTE_BY_RANGE)
+	if (tableType == APPEND_DISTRIBUTED || tableType == RANGE_DISTRIBUTED)
 	{
-		if (pg_strncasecmp(colocateWithTableName, "default", NAMEDATALEN) != 0)
+		if (!IsColocateWithDefault(distributedTableParams->colocateWithTableName))
 		{
 			ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 							errmsg("cannot distribute relation"),
@ -1522,7 +1705,7 @@ ColocationIdForNewTable(Oid relationId, Var *distributionColumn,

 		return colocationId;
 	}
-	else if (distributionMethod == DISTRIBUTE_BY_NONE)
+	else if (tableType == REFERENCE_TABLE)
 	{
 		return CreateReferenceTableColocationId();
 	}
@ -1533,27 +1716,29 @@ ColocationIdForNewTable(Oid relationId, Var *distributionColumn,
 		 * can be sure that there will no modifications on the colocation table
 		 * until this transaction is committed.
 		 */
-		Assert(distributionMethod == DISTRIBUTE_BY_HASH);
+		Assert(citusTableParams.distributionMethod == DISTRIBUTE_BY_HASH);

 		Oid distributionColumnType = distributionColumn->vartype;
 		Oid distributionColumnCollation = get_typcollation(distributionColumnType);

 		/* get an advisory lock to serialize concurrent default group creations */
-		if (IsColocateWithDefault(colocateWithTableName))
+		if (IsColocateWithDefault(distributedTableParams->colocateWithTableName))
 		{
 			AcquireColocationDefaultLock();
 		}

 		colocationId = FindColocateWithColocationId(relationId,
-													replicationModel,
+													citusTableParams.replicationModel,
 													distributionColumnType,
 													distributionColumnCollation,
-													shardCount,
+													distributedTableParams->shardCount,
+													distributedTableParams->
 													shardCountIsStrict,
+													distributedTableParams->
 													colocateWithTableName);

-		if (IsColocateWithDefault(colocateWithTableName) && (colocationId !=
-															 INVALID_COLOCATION_ID))
+		if (IsColocateWithDefault(distributedTableParams->colocateWithTableName) &&
+			(colocationId != INVALID_COLOCATION_ID))
 		{
 			/*
 			 * we can release advisory lock if there is already a default entry for given params;
@ -1565,23 +1750,25 @@ ColocationIdForNewTable(Oid relationId, Var *distributionColumn,

 		if (colocationId == INVALID_COLOCATION_ID)
 		{
-			if (IsColocateWithDefault(colocateWithTableName))
+			if (IsColocateWithDefault(distributedTableParams->colocateWithTableName))
 			{
 				/*
 				 * Generate a new colocation ID and insert a pg_dist_colocation
 				 * record.
 				 */
-				colocationId = CreateColocationGroup(shardCount, ShardReplicationFactor,
+				colocationId = CreateColocationGroup(distributedTableParams->shardCount,
+													 ShardReplicationFactor,
 													 distributionColumnType,
 													 distributionColumnCollation);
 			}
-			else if (IsColocateWithNone(colocateWithTableName))
+			else if (IsColocateWithNone(distributedTableParams->colocateWithTableName))
 			{
 				/*
 				 * Generate a new colocation ID and insert a pg_dist_colocation
 				 * record.
 				 */
-				colocationId = CreateColocationGroup(shardCount, ShardReplicationFactor,
+				colocationId = CreateColocationGroup(distributedTableParams->shardCount,
+													 ShardReplicationFactor,
 													 distributionColumnType,
 													 distributionColumnCollation);
 			}
@ -1608,6 +1795,8 @@ EnsureRelationCanBeDistributed(Oid relationId, Var *distributionColumn,
 {
 	Oid parentRelationId = InvalidOid;

+	ErrorIfTableHasUnsupportedIdentityColumn(relationId);
+
 	EnsureLocalTableEmptyIfNecessary(relationId, distributionMethod);

 	/* user really wants triggers? */
@ -2219,12 +2408,12 @@ CopyLocalDataIntoShards(Oid distributedRelationId)
 	EState *estate = CreateExecutorState();
 	ExprContext *econtext = GetPerTupleExprContext(estate);
 	econtext->ecxt_scantuple = slot;
-
+	const bool nonPublishableData = false;
 	DestReceiver *copyDest =
 		(DestReceiver *) CreateCitusCopyDestReceiver(distributedRelationId,
 													 columnNameList,
 													 partitionColumnIndex,
-													 estate, NULL);
+													 estate, NULL, nonPublishableData);

 	/* initialise state for writing to shards, we'll open connections on demand */
 	copyDest->rStartup(copyDest, 0, tupleDescriptor);
--- a/src/backend/distributed/commands/dependencies.c
+++ b/src/backend/distributed/commands/dependencies.c
@ -29,16 +29,14 @@
 #include "storage/lmgr.h"
 #include "utils/lsyscache.h"

-typedef bool (*AddressPredicate)(const ObjectAddress *);

 static void EnsureDependenciesCanBeDistributed(const ObjectAddress *relationAddress);
 static void ErrorIfCircularDependencyExists(const ObjectAddress *objectAddress);
 static int ObjectAddressComparator(const void *a, const void *b);
-static List * FilterObjectAddressListByPredicate(List *objectAddressList,
-												 AddressPredicate predicate);
 static void EnsureDependenciesExistOnAllNodes(const ObjectAddress *target);
 static List * GetDependencyCreateDDLCommands(const ObjectAddress *dependency);
 static bool ShouldPropagateObject(const ObjectAddress *address);
+static char * DropTableIfExistsCommand(Oid relationId);

 /*
 * EnsureDependenciesExistOnAllNodes finds all the dependencies that we support and makes
@ -325,6 +323,21 @@ GetDistributableDependenciesForObject(const ObjectAddress *target)
 }


+/*
+ * DropTableIfExistsCommand returns command to drop given table if exists.
+ */
+static char *
+DropTableIfExistsCommand(Oid relationId)
+{
+	char *qualifiedRelationName = generate_qualified_relation_name(relationId);
+	StringInfo dropTableCommand = makeStringInfo();
+	appendStringInfo(dropTableCommand, "DROP TABLE IF EXISTS %s CASCADE",
+					 qualifiedRelationName);
+
+	return dropTableCommand->data;
+}
+
+
 /*
 * GetDependencyCreateDDLCommands returns a list (potentially empty or NIL) of ddl
 * commands to execute on a worker to create the object.
@ -370,7 +383,7 @@ GetDependencyCreateDDLCommands(const ObjectAddress *dependency)
 					bool creatingShellTableOnRemoteNode = true;
 					List *tableDDLCommands = GetFullTableCreationCommands(relationId,
 																		  WORKER_NEXTVAL_SEQUENCE_DEFAULTS,
-																		  INCLUDE_IDENTITY_AS_SEQUENCE_DEFAULTS,
+																		  INCLUDE_IDENTITY,
 																		  creatingShellTableOnRemoteNode);
 					TableDDLCommand *tableDDLCommand = NULL;
 					foreach_ptr(tableDDLCommand, tableDDLCommands)
@ -379,6 +392,10 @@ GetDependencyCreateDDLCommands(const ObjectAddress *dependency)
 						commandList = lappend(commandList, GetTableDDLCommand(
 												  tableDDLCommand));
 					}
+
+					/* we need to drop table, if exists, first to make table creation idempotent */
+					commandList = lcons(DropTableIfExistsCommand(relationId),
+										commandList);
 				}

 				return commandList;
@ -438,6 +455,11 @@ GetDependencyCreateDDLCommands(const ObjectAddress *dependency)
 			return DDLCommands;
 		}

+		case OCLASS_PUBLICATION:
+		{
+			return CreatePublicationDDLCommandsIdempotent(dependency);
+		}
+
 		case OCLASS_ROLE:
 		{
 			return GenerateCreateOrAlterRoleCommand(dependency->objectId);
@ -527,68 +549,6 @@ GetAllDependencyCreateDDLCommands(const List *dependencies)
 }


-/*
- * ReplicateAllObjectsToNodeCommandList returns commands to replicate all
- * previously marked objects to a worker node. The function also sets
- * clusterHasDistributedFunction if there are any distributed functions.
- */
-List *
-ReplicateAllObjectsToNodeCommandList(const char *nodeName, int nodePort)
-{
-	/* since we are executing ddl commands disable propagation first, primarily for mx */
-	List *ddlCommands = list_make1(DISABLE_DDL_PROPAGATION);
-
-	/*
-	 * collect all dependencies in creation order and get their ddl commands
-	 */
-	List *dependencies = GetDistributedObjectAddressList();
-
-	/*
-	 * Depending on changes in the environment, such as the enable_metadata_sync guc
-	 * there might be objects in the distributed object address list that should currently
-	 * not be propagated by citus as they are 'not supported'.
-	 */
-	dependencies = FilterObjectAddressListByPredicate(dependencies,
-													  &SupportedDependencyByCitus);
-
-	/*
-	 * When dependency lists are getting longer we see a delay in the creation time on the
-	 * workers. We would like to inform the user. Currently we warn for lists greater than
-	 * 100 items, where 100 is an arbitrarily chosen number. If we find it too high or too
-	 * low we can adjust this based on experience.
-	 */
-	if (list_length(dependencies) > 100)
-	{
-		ereport(NOTICE, (errmsg("Replicating postgres objects to node %s:%d", nodeName,
-								nodePort),
-						 errdetail("There are %d objects to replicate, depending on your "
-								   "environment this might take a while",
-								   list_length(dependencies))));
-	}
-
-	dependencies = OrderObjectAddressListInDependencyOrder(dependencies);
-	ObjectAddress *dependency = NULL;
-	foreach_ptr(dependency, dependencies)
-	{
-		if (IsAnyObjectAddressOwnedByExtension(list_make1(dependency), NULL))
-		{
-			/*
-			 * we expect extension-owned objects to be created as a result
-			 * of the extension being created.
-			 */
-			continue;
-		}
-
-		ddlCommands = list_concat(ddlCommands,
-								  GetDependencyCreateDDLCommands(dependency));
-	}
-
-	ddlCommands = lappend(ddlCommands, ENABLE_DDL_PROPAGATION);
-
-	return ddlCommands;
-}
-
-
 /*
 * ShouldPropagate determines if we should be propagating anything
 */
@ -744,7 +704,7 @@ ShouldPropagateAnyObject(List *addresses)
 * FilterObjectAddressListByPredicate takes a list of ObjectAddress *'s and returns a list
 * only containing the ObjectAddress *'s for which the predicate returned true.
 */
-static List *
+List *
 FilterObjectAddressListByPredicate(List *objectAddressList, AddressPredicate predicate)
 {
 	List *result = NIL;
--- a/src/backend/distributed/commands/distribute_object_ops.c
+++ b/src/backend/distributed/commands/distribute_object_ops.c
@ -245,6 +245,15 @@ static DistributeObjectOps Any_CreatePolicy = {
 	.address = NULL,
 	.markDistributed = false,
 };
+static DistributeObjectOps Any_CreatePublication = {
+	.deparse = DeparseCreatePublicationStmt,
+	.qualify = QualifyCreatePublicationStmt,
+	.preprocess = NULL,
+	.postprocess = PostProcessCreatePublicationStmt,
+	.operationType = DIST_OPS_CREATE,
+	.address = CreatePublicationStmtObjectAddress,
+	.markDistributed = true,
+};
 static DistributeObjectOps Any_CreateRole = {
 	.deparse = DeparseCreateRoleStmt,
 	.qualify = NULL,
@ -707,6 +716,45 @@ static DistributeObjectOps Procedure_Rename = {
 	.address = RenameFunctionStmtObjectAddress,
 	.markDistributed = false,
 };
+static DistributeObjectOps Publication_Alter = {
+	.deparse = DeparseAlterPublicationStmt,
+	.qualify = QualifyAlterPublicationStmt,
+	.preprocess = PreprocessAlterPublicationStmt,
+	.postprocess = PostprocessAlterDistributedObjectStmt,
+	.objectType = OBJECT_PUBLICATION,
+	.operationType = DIST_OPS_ALTER,
+	.address = AlterPublicationStmtObjectAddress,
+	.markDistributed = false,
+};
+static DistributeObjectOps Publication_AlterOwner = {
+	.deparse = DeparseAlterPublicationOwnerStmt,
+	.qualify = NULL,
+	.preprocess = PreprocessAlterDistributedObjectStmt,
+	.postprocess = PostprocessAlterDistributedObjectStmt,
+	.objectType = OBJECT_PUBLICATION,
+	.operationType = DIST_OPS_ALTER,
+	.address = AlterPublicationOwnerStmtObjectAddress,
+	.markDistributed = false,
+};
+static DistributeObjectOps Publication_Drop = {
+	.deparse = DeparseDropPublicationStmt,
+	.qualify = NULL,
+	.preprocess = PreprocessDropDistributedObjectStmt,
+	.postprocess = NULL,
+	.operationType = DIST_OPS_DROP,
+	.address = NULL,
+	.markDistributed = false,
+};
+static DistributeObjectOps Publication_Rename = {
+	.deparse = DeparseRenamePublicationStmt,
+	.qualify = NULL,
+	.preprocess = PreprocessAlterDistributedObjectStmt,
+	.postprocess = NULL,
+	.objectType = OBJECT_PUBLICATION,
+	.operationType = DIST_OPS_ALTER,
+	.address = RenamePublicationStmtObjectAddress,
+	.markDistributed = false,
+};
 static DistributeObjectOps Routine_AlterObjectDepends = {
 	.deparse = DeparseAlterFunctionDependsStmt,
 	.qualify = QualifyAlterFunctionDependsStmt,
@ -1399,6 +1447,11 @@ GetDistributeObjectOps(Node *node)
 					return &Procedure_AlterOwner;
 				}

+				case OBJECT_PUBLICATION:
+				{
+					return &Publication_AlterOwner;
+				}
+
 				case OBJECT_ROUTINE:
 				{
 					return &Routine_AlterOwner;
@ -1436,6 +1489,11 @@ GetDistributeObjectOps(Node *node)
 			return &Any_AlterPolicy;
 		}

+		case T_AlterPublicationStmt:
+		{
+			return &Publication_Alter;
+		}
+
 		case T_AlterRoleStmt:
 		{
 			return &Any_AlterRole;
@ -1610,6 +1668,11 @@ GetDistributeObjectOps(Node *node)
 			return &Any_CreatePolicy;
 		}

+		case T_CreatePublicationStmt:
+		{
+			return &Any_CreatePublication;
+		}
+
 		case T_CreateRoleStmt:
 		{
 			return &Any_CreateRole;
@ -1722,6 +1785,11 @@ GetDistributeObjectOps(Node *node)
 					return &Procedure_Drop;
 				}

+				case OBJECT_PUBLICATION:
+				{
+					return &Publication_Drop;
+				}
+
 				case OBJECT_ROUTINE:
 				{
 					return &Routine_Drop;
@ -1901,6 +1969,11 @@ GetDistributeObjectOps(Node *node)
 					return &Procedure_Rename;
 				}

+				case OBJECT_PUBLICATION:
+				{
+					return &Publication_Rename;
+				}
+
 				case OBJECT_ROUTINE:
 				{
 					return &Routine_Rename;
--- a/src/backend/distributed/commands/foreign_constraint.c
+++ b/src/backend/distributed/commands/foreign_constraint.c
@ -221,7 +221,8 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis
 		if (!referencedIsCitus && !selfReferencingTable)
 		{
 			if (IsCitusLocalTableByDistParams(referencingDistMethod,
-											  referencingReplicationModel))
+											  referencingReplicationModel,
+											  referencingColocationId))
 			{
 				ErrorOutForFKeyBetweenPostgresAndCitusLocalTable(referencedTableId);
 			}
@ -245,8 +246,7 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis
 		if (!selfReferencingTable)
 		{
 			referencedDistMethod = PartitionMethod(referencedTableId);
-			referencedDistKey = IsCitusTableType(referencedTableId,
-												 CITUS_TABLE_WITH_NO_DIST_KEY) ?
+			referencedDistKey = !HasDistributionKey(referencedTableId) ?
 								NULL :
 								DistPartitionKey(referencedTableId);
 			referencedColocationId = TableColocationId(referencedTableId);
@ -278,9 +278,17 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis
 		}

 		bool referencingIsCitusLocalOrRefTable =
-			(referencingDistMethod == DISTRIBUTE_BY_NONE);
+			IsCitusLocalTableByDistParams(referencingDistMethod,
+										  referencingReplicationModel,
+										  referencingColocationId) ||
+			IsReferenceTableByDistParams(referencingDistMethod,
+										 referencingReplicationModel);
 		bool referencedIsCitusLocalOrRefTable =
-			(referencedDistMethod == DISTRIBUTE_BY_NONE);
+			IsCitusLocalTableByDistParams(referencedDistMethod,
+										  referencedReplicationModel,
+										  referencedColocationId) ||
+			IsReferenceTableByDistParams(referencedDistMethod,
+										 referencedReplicationModel);
 		if (referencingIsCitusLocalOrRefTable && referencedIsCitusLocalOrRefTable)
 		{
 			EnsureSupportedFKeyBetweenCitusLocalAndRefTable(constraintForm,
@ -313,7 +321,8 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis
 		 * reference table is referenced.
 		 */
 		bool referencedIsReferenceTable =
-			(referencedReplicationModel == REPLICATION_MODEL_2PC);
+			IsReferenceTableByDistParams(referencedDistMethod,
+										 referencedReplicationModel);
 		if (!referencedIsReferenceTable && (
 				referencingColocationId == INVALID_COLOCATION_ID ||
 				referencingColocationId != referencedColocationId))
--- a/src/backend/distributed/commands/index.c
+++ b/src/backend/distributed/commands/index.c
@ -1190,7 +1190,7 @@ ErrorIfUnsupportedIndexStmt(IndexStmt *createIndexStatement)
 		 * Non-distributed tables do not have partition key, and unique constraints
 		 * are allowed for them. Thus, we added a short-circuit for non-distributed tables.
 		 */
-		if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
+		if (!HasDistributionKey(relationId))
 		{
 			return;
 		}
--- a/src/backend/distributed/commands/local_multi_copy.c
+++ b/src/backend/distributed/commands/local_multi_copy.c
@ -36,6 +36,7 @@
 #include "distributed/local_multi_copy.h"
 #include "distributed/shard_utils.h"
 #include "distributed/version_compat.h"
+#include "distributed/replication_origin_session_utils.h"

 /* managed via GUC, default is 512 kB */
 int LocalCopyFlushThresholdByte = 512 * 1024;
@ -46,7 +47,7 @@ static void AddSlotToBuffer(TupleTableSlot *slot, CitusCopyDestReceiver *copyDes
 static bool ShouldAddBinaryHeaders(StringInfo buffer, bool isBinary);
 static bool ShouldSendCopyNow(StringInfo buffer);
 static void DoLocalCopy(StringInfo buffer, Oid relationId, int64 shardId,
-						CopyStmt *copyStatement, bool isEndOfCopy);
+						CopyStmt *copyStatement, bool isEndOfCopy, bool isPublishable);
 static int ReadFromLocalBufferCallback(void *outBuf, int minRead, int maxRead);


@ -94,7 +95,7 @@ WriteTupleToLocalShard(TupleTableSlot *slot, CitusCopyDestReceiver *copyDest, in
 		bool isEndOfCopy = false;
 		DoLocalCopy(localCopyOutState->fe_msgbuf, copyDest->distributedRelationId,
 					shardId,
-					copyDest->copyStatement, isEndOfCopy);
+					copyDest->copyStatement, isEndOfCopy, copyDest->isPublishable);
 		resetStringInfo(localCopyOutState->fe_msgbuf);
 	}
 }
@ -133,7 +134,7 @@ FinishLocalCopyToShard(CitusCopyDestReceiver *copyDest, int64 shardId,
 	}
 	bool isEndOfCopy = true;
 	DoLocalCopy(localCopyOutState->fe_msgbuf, copyDest->distributedRelationId, shardId,
-				copyDest->copyStatement, isEndOfCopy);
+				copyDest->copyStatement, isEndOfCopy, copyDest->isPublishable);
 }


@ -197,7 +198,7 @@ ShouldSendCopyNow(StringInfo buffer)
 */
 static void
 DoLocalCopy(StringInfo buffer, Oid relationId, int64 shardId, CopyStmt *copyStatement,
-			bool isEndOfCopy)
+			bool isEndOfCopy, bool isPublishable)
 {
 	/*
 	 * Set the buffer as a global variable to allow ReadFromLocalBufferCallback
@ -205,6 +206,10 @@ DoLocalCopy(StringInfo buffer, Oid relationId, int64 shardId, CopyStmt *copyStat
 	 * ReadFromLocalBufferCallback.
 	 */
 	LocalCopyBuffer = buffer;
+	if (!isPublishable)
+	{
+		SetupReplicationOriginLocalSession();
+	}

 	Oid shardOid = GetTableLocalShardOid(relationId, shardId);
 	Relation shard = table_open(shardOid, RowExclusiveLock);
@ -219,6 +224,10 @@ DoLocalCopy(StringInfo buffer, Oid relationId, int64 shardId, CopyStmt *copyStat
 	EndCopyFrom(cstate);

 	table_close(shard, NoLock);
+	if (!isPublishable)
+	{
+		ResetReplicationOriginLocalSession();
+	}
 	free_parsestate(pState);
 }

--- a/src/backend/distributed/commands/multi_copy.c
+++ b/src/backend/distributed/commands/multi_copy.c
@ -85,6 +85,7 @@
 #include "distributed/relation_access_tracking.h"
 #include "distributed/remote_commands.h"
 #include "distributed/remote_transaction.h"
+#include "distributed/replication_origin_session_utils.h"
 #include "distributed/resource_lock.h"
 #include "distributed/shard_pruning.h"
 #include "distributed/shared_connection_stats.h"
@ -270,7 +271,8 @@ static CopyConnectionState * GetConnectionState(HTAB *connectionStateHash,
 static CopyShardState * GetShardState(uint64 shardId, HTAB *shardStateHash,
 									  HTAB *connectionStateHash,
 									  bool *found, bool shouldUseLocalCopy, CopyOutState
-									  copyOutState, bool isColocatedIntermediateResult);
+									  copyOutState, bool isColocatedIntermediateResult,
+									  bool isPublishable);
 static MultiConnection * CopyGetPlacementConnection(HTAB *connectionStateHash,
 													ShardPlacement *placement,
 													bool colocatedIntermediateResult);
@ -285,7 +287,8 @@ static void InitializeCopyShardState(CopyShardState *shardState,
 									 uint64 shardId,
 									 bool canUseLocalCopy,
 									 CopyOutState copyOutState,
-									 bool colocatedIntermediateResult);
+									 bool colocatedIntermediateResult, bool
+									 isPublishable);
 static void StartPlacementStateCopyCommand(CopyPlacementState *placementState,
 										   CopyStmt *copyStatement,
 										   CopyOutState copyOutState);
@ -393,7 +396,7 @@ CitusCopyFrom(CopyStmt *copyStatement, QueryCompletion *completionTag)
 	if (IsCitusTableTypeCacheEntry(cacheEntry, HASH_DISTRIBUTED) ||
 		IsCitusTableTypeCacheEntry(cacheEntry, RANGE_DISTRIBUTED) ||
 		IsCitusTableTypeCacheEntry(cacheEntry, APPEND_DISTRIBUTED) ||
-		IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
+		!HasDistributionKeyCacheEntry(cacheEntry))
 	{
 		CopyToExistingShards(copyStatement, completionTag);
 	}
@ -492,9 +495,11 @@ CopyToExistingShards(CopyStmt *copyStatement, QueryCompletion *completionTag)
 	ExprContext *executorExpressionContext = GetPerTupleExprContext(executorState);

 	/* set up the destination for the COPY */
+	const bool publishableData = true;
 	CitusCopyDestReceiver *copyDest = CreateCitusCopyDestReceiver(tableId, columnNameList,
 																  partitionColumnIndex,
-																  executorState, NULL);
+																  executorState, NULL,
+																  publishableData);

 	/* if the user specified an explicit append-to_shard option, write to it */
 	uint64 appendShardId = ProcessAppendToShardOption(tableId, copyStatement);
@ -1934,7 +1939,7 @@ CopyFlushOutput(CopyOutState cstate, char *start, char *pointer)
 CitusCopyDestReceiver *
 CreateCitusCopyDestReceiver(Oid tableId, List *columnNameList, int partitionColumnIndex,
 							EState *executorState,
-							char *intermediateResultIdPrefix)
+							char *intermediateResultIdPrefix, bool isPublishable)
 {
 	CitusCopyDestReceiver *copyDest = (CitusCopyDestReceiver *) palloc0(
 		sizeof(CitusCopyDestReceiver));
@ -1953,6 +1958,7 @@ CreateCitusCopyDestReceiver(Oid tableId, List *columnNameList, int partitionColu
 	copyDest->executorState = executorState;
 	copyDest->colocatedIntermediateResultIdPrefix = intermediateResultIdPrefix;
 	copyDest->memoryContext = CurrentMemoryContext;
+	copyDest->isPublishable = isPublishable;

 	return copyDest;
 }
@ -2318,7 +2324,9 @@ CitusSendTupleToPlacements(TupleTableSlot *slot, CitusCopyDestReceiver *copyDest
 											   &cachedShardStateFound,
 											   copyDest->shouldUseLocalCopy,
 											   copyDest->copyOutState,
-											   isColocatedIntermediateResult);
+											   isColocatedIntermediateResult,
+											   copyDest->isPublishable);
+
 	if (!cachedShardStateFound)
 	{
 		firstTupleInShard = true;
@ -2751,6 +2759,11 @@ ShutdownCopyConnectionState(CopyConnectionState *connectionState,
 	if (activePlacementState != NULL)
 	{
 		EndPlacementStateCopyCommand(activePlacementState, copyOutState);
+		if (!copyDest->isPublishable)
+		{
+			ResetReplicationOriginRemoteSession(
+				activePlacementState->connectionState->connection);
+		}
 	}

 	dlist_foreach(iter, &connectionState->bufferedPlacementList)
@ -2764,6 +2777,10 @@ ShutdownCopyConnectionState(CopyConnectionState *connectionState,
 		SendCopyDataToPlacement(placementState->data, shardId,
 								connectionState->connection);
 		EndPlacementStateCopyCommand(placementState, copyOutState);
+		if (!copyDest->isPublishable)
+		{
+			ResetReplicationOriginRemoteSession(connectionState->connection);
+		}
 	}
 }

@ -3436,7 +3453,7 @@ static CopyShardState *
 GetShardState(uint64 shardId, HTAB *shardStateHash,
 			  HTAB *connectionStateHash, bool *found, bool
 			  shouldUseLocalCopy, CopyOutState copyOutState,
-			  bool isColocatedIntermediateResult)
+			  bool isColocatedIntermediateResult, bool isPublishable)
 {
 	CopyShardState *shardState = (CopyShardState *) hash_search(shardStateHash, &shardId,
 																HASH_ENTER, found);
@ -3444,7 +3461,8 @@ GetShardState(uint64 shardId, HTAB *shardStateHash,
 	{
 		InitializeCopyShardState(shardState, connectionStateHash,
 								 shardId, shouldUseLocalCopy,
-								 copyOutState, isColocatedIntermediateResult);
+								 copyOutState, isColocatedIntermediateResult,
+								 isPublishable);
 	}

 	return shardState;
@ -3461,7 +3479,8 @@ InitializeCopyShardState(CopyShardState *shardState,
 						 HTAB *connectionStateHash, uint64 shardId,
 						 bool shouldUseLocalCopy,
 						 CopyOutState copyOutState,
-						 bool colocatedIntermediateResult)
+						 bool colocatedIntermediateResult,
+						 bool isPublishable)
 {
 	ListCell *placementCell = NULL;
 	int failedPlacementCount = 0;
@ -3532,6 +3551,11 @@ InitializeCopyShardState(CopyShardState *shardState,
 			RemoteTransactionBeginIfNecessary(connection);
 		}

+		if (!isPublishable)
+		{
+			SetupReplicationOriginRemoteSession(connection);
+		}
+
 		CopyPlacementState *placementState = palloc0(sizeof(CopyPlacementState));
 		placementState->shardState = shardState;
 		placementState->data = makeStringInfo();
--- a/src/backend/distributed/commands/publication.c
+++ b/src/backend/distributed/commands/publication.c
@ -0,0 +1,634 @@
+/*-------------------------------------------------------------------------
+ *
+ * publication.c
+ *    Commands for creating publications
+ *
+ * Copyright (c) Citus Data, Inc.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+#include "miscadmin.h"
+
+#include "catalog/pg_publication.h"
+#include "catalog/pg_publication_rel.h"
+#include "distributed/commands.h"
+#include "distributed/deparser.h"
+#include "distributed/listutils.h"
+#include "distributed/metadata_utility.h"
+#include "distributed/metadata_sync.h"
+#include "distributed/metadata/distobject.h"
+#include "distributed/reference_table_utils.h"
+#include "distributed/worker_create_or_replace.h"
+#include "nodes/makefuncs.h"
+#include "nodes/parsenodes.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/syscache.h"
+
+#include "pg_version_compat.h"
+
+
+static CreatePublicationStmt * BuildCreatePublicationStmt(Oid publicationId);
+#if (PG_VERSION_NUM >= PG_VERSION_15)
+static PublicationObjSpec * BuildPublicationRelationObjSpec(Oid relationId,
+															Oid publicationId,
+															bool tableOnly);
+#endif
+static void AppendPublishOptionList(StringInfo str, List *strings);
+static char * AlterPublicationOwnerCommand(Oid publicationId);
+static bool ShouldPropagateCreatePublication(CreatePublicationStmt *stmt);
+static List * ObjectAddressForPublicationName(char *publicationName, bool missingOk);
+
+
+/*
+ * PostProcessCreatePublicationStmt handles CREATE PUBLICATION statements
+ * that contain distributed tables.
+ */
+List *
+PostProcessCreatePublicationStmt(Node *node, const char *queryString)
+{
+	CreatePublicationStmt *stmt = castNode(CreatePublicationStmt, node);
+
+	if (!ShouldPropagateCreatePublication(stmt))
+	{
+		/* should not propagate right now */
+		return NIL;
+	}
+
+	/* call into CreatePublicationStmtObjectAddress */
+	List *publicationAddresses = GetObjectAddressListFromParseTree(node, false, true);
+
+	/*  the code-path only supports a single object */
+	Assert(list_length(publicationAddresses) == 1);
+
+	if (IsAnyObjectAddressOwnedByExtension(publicationAddresses, NULL))
+	{
+		/* should not propagate publications owned by extensions */
+		return NIL;
+	}
+
+	EnsureAllObjectDependenciesExistOnAllNodes(publicationAddresses);
+
+	const ObjectAddress *pubAddress = linitial(publicationAddresses);
+
+	List *commands = NIL;
+	commands = lappend(commands, DISABLE_DDL_PROPAGATION);
+	commands = lappend(commands, CreatePublicationDDLCommand(pubAddress->objectId));
+	commands = lappend(commands, ENABLE_DDL_PROPAGATION);
+
+	return NodeDDLTaskList(NON_COORDINATOR_NODES, commands);
+}
+
+
+/*
+ * CreatePublicationDDLCommandsIdempotent returns a list of DDL statements to be
+ * executed on a node to recreate the publication addressed by the publicationAddress.
+ */
+List *
+CreatePublicationDDLCommandsIdempotent(const ObjectAddress *publicationAddress)
+{
+	Assert(publicationAddress->classId == PublicationRelationId);
+
+	char *ddlCommand =
+		CreatePublicationDDLCommand(publicationAddress->objectId);
+
+	char *alterPublicationOwnerSQL =
+		AlterPublicationOwnerCommand(publicationAddress->objectId);
+
+	return list_make2(
+		WrapCreateOrReplace(ddlCommand),
+		alterPublicationOwnerSQL);
+}
+
+
+/*
+ * CreatePublicationDDLCommand returns the CREATE PUBLICATION string that
+ * can be used to recreate a given publication.
+ */
+char *
+CreatePublicationDDLCommand(Oid publicationId)
+{
+	CreatePublicationStmt *createPubStmt = BuildCreatePublicationStmt(publicationId);
+
+	/* we took the WHERE clause from the catalog where it is already transformed */
+	bool whereClauseRequiresTransform = false;
+
+	/* only propagate Citus tables in publication */
+	bool includeLocalTables = false;
+
+	return DeparseCreatePublicationStmtExtended((Node *) createPubStmt,
+												whereClauseRequiresTransform,
+												includeLocalTables);
+}
+
+
+/*
+ * BuildCreatePublicationStmt constructs a CreatePublicationStmt struct for the
+ * given publication.
+ */
+static CreatePublicationStmt *
+BuildCreatePublicationStmt(Oid publicationId)
+{
+	CreatePublicationStmt *createPubStmt = makeNode(CreatePublicationStmt);
+
+	HeapTuple publicationTuple =
+		SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(publicationId));
+
+	if (!HeapTupleIsValid(publicationTuple))
+	{
+		ereport(ERROR, (errmsg("cannot find publication with oid: %d", publicationId)));
+	}
+
+	Form_pg_publication publicationForm =
+		(Form_pg_publication) GETSTRUCT(publicationTuple);
+
+	/* CREATE PUBLICATION <name> */
+	createPubStmt->pubname = pstrdup(NameStr(publicationForm->pubname));
+
+	/* FOR ALL TABLES */
+	createPubStmt->for_all_tables = publicationForm->puballtables;
+
+	ReleaseSysCache(publicationTuple);
+
+#if (PG_VERSION_NUM >= PG_VERSION_15)
+	List *schemaIds = GetPublicationSchemas(publicationId);
+	Oid schemaId = InvalidOid;
+
+	foreach_oid(schemaId, schemaIds)
+	{
+		char *schemaName = get_namespace_name(schemaId);
+
+		PublicationObjSpec *publicationObject = makeNode(PublicationObjSpec);
+		publicationObject->pubobjtype = PUBLICATIONOBJ_TABLES_IN_SCHEMA;
+		publicationObject->pubtable = NULL;
+		publicationObject->name = schemaName;
+		publicationObject->location = -1;
+
+		createPubStmt->pubobjects = lappend(createPubStmt->pubobjects, publicationObject);
+	}
+#endif
+
+	List *relationIds = GetPublicationRelations(publicationId,
+												publicationForm->pubviaroot ?
+												PUBLICATION_PART_ROOT :
+												PUBLICATION_PART_LEAF);
+	Oid relationId = InvalidOid;
+	int citusTableCount PG_USED_FOR_ASSERTS_ONLY = 0;
+
+	/* mainly for consistent ordering in test output */
+	relationIds = SortList(relationIds, CompareOids);
+
+	foreach_oid(relationId, relationIds)
+	{
+#if (PG_VERSION_NUM >= PG_VERSION_15)
+		bool tableOnly = false;
+
+		/* since postgres 15, tables can have a column list and filter */
+		PublicationObjSpec *publicationObject =
+			BuildPublicationRelationObjSpec(relationId, publicationId, tableOnly);
+
+		createPubStmt->pubobjects = lappend(createPubStmt->pubobjects, publicationObject);
+#else
+
+		/* before postgres 15, only full tables are supported */
+		char *schemaName = get_namespace_name(get_rel_namespace(relationId));
+		char *tableName = get_rel_name(relationId);
+		RangeVar *rangeVar = makeRangeVar(schemaName, tableName, -1);
+
+		createPubStmt->tables = lappend(createPubStmt->tables, rangeVar);
+#endif
+
+		if (IsCitusTable(relationId))
+		{
+			citusTableCount++;
+		}
+	}
+
+	/* WITH (publish_via_partition_root = true) option */
+	bool publishViaRoot = publicationForm->pubviaroot;
+	char *publishViaRootString = publishViaRoot ? "true" : "false";
+	DefElem *pubViaRootOption = makeDefElem("publish_via_partition_root",
+											(Node *) makeString(publishViaRootString),
+											-1);
+	createPubStmt->options = lappend(createPubStmt->options, pubViaRootOption);
+
+	/* WITH (publish = 'insert, update, delete, truncate') option */
+	List *publishList = NIL;
+
+	if (publicationForm->pubinsert)
+	{
+		publishList = lappend(publishList, makeString("insert"));
+	}
+
+	if (publicationForm->pubupdate)
+	{
+		publishList = lappend(publishList, makeString("update"));
+	}
+
+	if (publicationForm->pubdelete)
+	{
+		publishList = lappend(publishList, makeString("delete"));
+	}
+
+	if (publicationForm->pubtruncate)
+	{
+		publishList = lappend(publishList, makeString("truncate"));
+	}
+
+	if (list_length(publishList) > 0)
+	{
+		StringInfo optionValue = makeStringInfo();
+		AppendPublishOptionList(optionValue, publishList);
+
+		DefElem *publishOption = makeDefElem("publish",
+											 (Node *) makeString(optionValue->data), -1);
+		createPubStmt->options = lappend(createPubStmt->options, publishOption);
+	}
+
+
+	return createPubStmt;
+}
+
+
+/*
+ * AppendPublishOptionList appends a list of publication options in
+ * comma-separate form.
+ */
+static void
+AppendPublishOptionList(StringInfo str, List *options)
+{
+	ListCell *stringCell = NULL;
+	foreach(stringCell, options)
+	{
+		const char *string = strVal(lfirst(stringCell));
+		if (stringCell != list_head(options))
+		{
+			appendStringInfoString(str, ", ");
+		}
+
+		/* we cannot escape these strings */
+		appendStringInfoString(str, string);
+	}
+}
+
+
+#if (PG_VERSION_NUM >= PG_VERSION_15)
+
+/*
+ * BuildPublicationRelationObjSpec returns a PublicationObjSpec that
+ * can be included in a CREATE or ALTER PUBLICATION statement.
+ */
+static PublicationObjSpec *
+BuildPublicationRelationObjSpec(Oid relationId, Oid publicationId,
+								bool tableOnly)
+{
+	HeapTuple pubRelationTuple = SearchSysCache2(PUBLICATIONRELMAP,
+												 ObjectIdGetDatum(relationId),
+												 ObjectIdGetDatum(publicationId));
+	if (!HeapTupleIsValid(pubRelationTuple))
+	{
+		ereport(ERROR, (errmsg("cannot find relation with oid %d in publication "
+							   "with oid %d", relationId, publicationId)));
+	}
+
+	List *columnNameList = NIL;
+	Node *whereClause = NULL;
+
+	/* build the column list  */
+	if (!tableOnly)
+	{
+		bool isNull = false;
+		Datum attributesDatum = SysCacheGetAttr(PUBLICATIONRELMAP, pubRelationTuple,
+												Anum_pg_publication_rel_prattrs,
+												&isNull);
+		if (!isNull)
+		{
+			ArrayType *attributesArray = DatumGetArrayTypeP(attributesDatum);
+			int attributeCount = ARR_DIMS(attributesArray)[0];
+			int16 *elems = (int16 *) ARR_DATA_PTR(attributesArray);
+
+			for (int attNumIndex = 0; attNumIndex < attributeCount; attNumIndex++)
+			{
+				AttrNumber attributeNumber = elems[attNumIndex];
+				char *columnName = get_attname(relationId, attributeNumber, false);
+
+				columnNameList = lappend(columnNameList, makeString(columnName));
+			}
+		}
+
+		/* build the WHERE clause */
+		Datum whereClauseDatum = SysCacheGetAttr(PUBLICATIONRELMAP, pubRelationTuple,
+												 Anum_pg_publication_rel_prqual,
+												 &isNull);
+		if (!isNull)
+		{
+			/*
+			 * We use the already-transformed parse tree form here, which does
+			 * not match regular CreatePublicationStmt
+			 */
+			whereClause = stringToNode(TextDatumGetCString(whereClauseDatum));
+		}
+	}
+
+	ReleaseSysCache(pubRelationTuple);
+
+	char *schemaName = get_namespace_name(get_rel_namespace(relationId));
+	char *tableName = get_rel_name(relationId);
+	RangeVar *rangeVar = makeRangeVar(schemaName, tableName, -1);
+
+	/* build the FOR TABLE */
+	PublicationTable *publicationTable =
+		makeNode(PublicationTable);
+	publicationTable->relation = rangeVar;
+	publicationTable->whereClause = whereClause;
+	publicationTable->columns = columnNameList;
+
+	PublicationObjSpec *publicationObject = makeNode(PublicationObjSpec);
+	publicationObject->pubobjtype = PUBLICATIONOBJ_TABLE;
+	publicationObject->pubtable = publicationTable;
+	publicationObject->name = NULL;
+	publicationObject->location = -1;
+
+	return publicationObject;
+}
+
+
+#endif
+
+
+/*
+ * PreprocessAlterPublicationStmt handles ALTER PUBLICATION statements
+ * in a way that is mostly similar to PreprocessAlterDistributedObjectStmt,
+ * except we do not ensure sequential mode (publications do not interact with
+ * shards) and can handle NULL deparse commands for ALTER PUBLICATION commands
+ * that only involve local tables.
+ */
+List *
+PreprocessAlterPublicationStmt(Node *stmt, const char *queryString,
+							   ProcessUtilityContext processUtilityContext)
+{
+	List *addresses = GetObjectAddressListFromParseTree(stmt, false, false);
+
+	/*  the code-path only supports a single object */
+	Assert(list_length(addresses) == 1);
+
+	if (!ShouldPropagateAnyObject(addresses))
+	{
+		return NIL;
+	}
+
+	EnsureCoordinator();
+	QualifyTreeNode(stmt);
+
+	const char *sql = DeparseTreeNode((Node *) stmt);
+	if (sql == NULL)
+	{
+		/*
+		 * Deparsing logic decided that there is nothing to propagate, e.g.
+		 * because the command only concerns local tables.
+		 */
+		return NIL;
+	}
+
+	List *commands = list_make3(DISABLE_DDL_PROPAGATION,
+								(void *) sql,
+								ENABLE_DDL_PROPAGATION);
+
+	return NodeDDLTaskList(NON_COORDINATOR_NODES, commands);
+}
+
+
+/*
+ * GetAlterPublicationDDLCommandsForTable gets a list of ALTER PUBLICATION .. ADD/DROP
+ * commands for the given table.
+ *
+ * If isAdd is true, it return ALTER PUBLICATION .. ADD TABLE commands for all
+ * publications.
+ *
+ * Otherwise, it returns ALTER PUBLICATION  .. DROP TABLE commands for all
+ * publications.
+ */
+List *
+GetAlterPublicationDDLCommandsForTable(Oid relationId, bool isAdd)
+{
+	List *commands = NIL;
+
+	List *publicationIds = GetRelationPublications(relationId);
+	Oid publicationId = InvalidOid;
+
+	foreach_oid(publicationId, publicationIds)
+	{
+		char *command = GetAlterPublicationTableDDLCommand(publicationId,
+														   relationId, isAdd);
+
+		commands = lappend(commands, command);
+	}
+
+	return commands;
+}
+
+
+/*
+ * GetAlterPublicationTableDDLCommand generates an ALTer PUBLICATION .. ADD/DROP TABLE
+ * command for the given publication and relation ID.
+ *
+ * If isAdd is true, it return an ALTER PUBLICATION .. ADD TABLE command.
+ * Otherwise, it returns ALTER PUBLICATION  .. DROP TABLE command.
+ */
+char *
+GetAlterPublicationTableDDLCommand(Oid publicationId, Oid relationId,
+								   bool isAdd)
+{
+	HeapTuple pubTuple = SearchSysCache1(PUBLICATIONOID,
+										 ObjectIdGetDatum(publicationId));
+	if (!HeapTupleIsValid(pubTuple))
+	{
+		ereport(ERROR, (errmsg("cannot find publication with oid: %d",
+							   publicationId)));
+	}
+
+	Form_pg_publication pubForm = (Form_pg_publication) GETSTRUCT(pubTuple);
+
+	AlterPublicationStmt *alterPubStmt = makeNode(AlterPublicationStmt);
+	alterPubStmt->pubname = pstrdup(NameStr(pubForm->pubname));
+
+	ReleaseSysCache(pubTuple);
+
+#if (PG_VERSION_NUM >= PG_VERSION_15)
+	bool tableOnly = !isAdd;
+
+	/* since postgres 15, tables can have a column list and filter */
+	PublicationObjSpec *publicationObject =
+		BuildPublicationRelationObjSpec(relationId, publicationId, tableOnly);
+
+	alterPubStmt->pubobjects = lappend(alterPubStmt->pubobjects, publicationObject);
+	alterPubStmt->action = isAdd ? AP_AddObjects : AP_DropObjects;
+#else
+
+	/* before postgres 15, only full tables are supported */
+	char *schemaName = get_namespace_name(get_rel_namespace(relationId));
+	char *tableName = get_rel_name(relationId);
+	RangeVar *rangeVar = makeRangeVar(schemaName, tableName, -1);
+
+	alterPubStmt->tables = lappend(alterPubStmt->tables, rangeVar);
+	alterPubStmt->tableAction = isAdd ? DEFELEM_ADD : DEFELEM_DROP;
+#endif
+
+	/* we take the WHERE clause from the catalog where it is already transformed */
+	bool whereClauseNeedsTransform = false;
+
+	/*
+	 * We use these commands to restore publications before/after transforming a
+	 * table, including transformations to/from local tables.
+	 */
+	bool includeLocalTables = true;
+
+	char *command = DeparseAlterPublicationStmtExtended((Node *) alterPubStmt,
+														whereClauseNeedsTransform,
+														includeLocalTables);
+
+	return command;
+}
+
+
+/*
+ * AlterPublicationOwnerCommand returns "ALTER PUBLICATION .. OWNER TO .."
+ * statement for the specified publication.
+ */
+static char *
+AlterPublicationOwnerCommand(Oid publicationId)
+{
+	HeapTuple publicationTuple =
+		SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(publicationId));
+
+	if (!HeapTupleIsValid(publicationTuple))
+	{
+		ereport(ERROR, (errmsg("cannot find publication with oid: %d",
+							   publicationId)));
+	}
+
+	Form_pg_publication publicationForm =
+		(Form_pg_publication) GETSTRUCT(publicationTuple);
+
+	char *publicationName = NameStr(publicationForm->pubname);
+	Oid publicationOwnerId = publicationForm->pubowner;
+
+	char *publicationOwnerName = GetUserNameFromId(publicationOwnerId, false);
+
+	StringInfo alterCommand = makeStringInfo();
+	appendStringInfo(alterCommand, "ALTER PUBLICATION %s OWNER TO %s",
+					 quote_identifier(publicationName),
+					 quote_identifier(publicationOwnerName));
+
+	ReleaseSysCache(publicationTuple);
+
+	return alterCommand->data;
+}
+
+
+/*
+ * ShouldPropagateCreatePublication tests if we need to propagate a CREATE PUBLICATION
+ * statement.
+ */
+static bool
+ShouldPropagateCreatePublication(CreatePublicationStmt *stmt)
+{
+	if (!ShouldPropagate())
+	{
+		return false;
+	}
+
+	if (!ShouldPropagateCreateInCoordinatedTransction())
+	{
+		return false;
+	}
+
+	return true;
+}
+
+
+/*
+ * AlterPublicationStmtObjectAddress generates the object address for the
+ * publication altered by a regular ALTER PUBLICATION .. statement.
+ */
+List *
+AlterPublicationStmtObjectAddress(Node *node, bool missingOk, bool isPostProcess)
+{
+	AlterPublicationStmt *stmt = castNode(AlterPublicationStmt, node);
+
+	return ObjectAddressForPublicationName(stmt->pubname, missingOk);
+}
+
+
+/*
+ * AlterPublicationOwnerStmtObjectAddress generates the object address for the
+ * publication altered by the given ALTER PUBLICATION .. OWNER TO statement.
+ */
+List *
+AlterPublicationOwnerStmtObjectAddress(Node *node, bool missingOk, bool isPostProcess)
+{
+	AlterOwnerStmt *stmt = castNode(AlterOwnerStmt, node);
+
+	return ObjectAddressForPublicationName(strVal(stmt->object), missingOk);
+}
+
+
+/*
+ * CreatePublicationStmtObjectAddress generates the object address for the
+ * publication created by the given CREATE PUBLICATION statement.
+ */
+List *
+CreatePublicationStmtObjectAddress(Node *node, bool missingOk, bool isPostProcess)
+{
+	CreatePublicationStmt *stmt = castNode(CreatePublicationStmt, node);
+
+	return ObjectAddressForPublicationName(stmt->pubname, missingOk);
+}
+
+
+/*
+ * RenamePublicationStmtObjectAddress generates the object address for the
+ * publication altered by the given ALter PUBLICATION .. RENAME TO statement.
+ */
+List *
+RenamePublicationStmtObjectAddress(Node *node, bool missingOk, bool isPostprocess)
+{
+	RenameStmt *stmt = castNode(RenameStmt, node);
+
+	return ObjectAddressForPublicationName(strVal(stmt->object), missingOk);
+}
+
+
+/*
+ * ObjectAddressForPublicationName returns the object address for a given publication
+ * name.
+ */
+static List *
+ObjectAddressForPublicationName(char *publicationName, bool missingOk)
+{
+	Oid publicationId = InvalidOid;
+
+	HeapTuple publicationTuple =
+		SearchSysCache1(PUBLICATIONNAME, CStringGetDatum(publicationName));
+	if (HeapTupleIsValid(publicationTuple))
+	{
+		Form_pg_publication publicationForm =
+			(Form_pg_publication) GETSTRUCT(publicationTuple);
+		publicationId = publicationForm->oid;
+
+		ReleaseSysCache(publicationTuple);
+	}
+	else if (!missingOk)
+	{
+		/* it should have just been created */
+		ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT),
+						errmsg("publication \"%s\" does not exist", publicationName)));
+	}
+
+	ObjectAddress *address = palloc0(sizeof(ObjectAddress));
+	ObjectAddressSet(*address, PublicationRelationId, publicationId);
+
+	return list_make1(address);
+}
--- a/src/backend/distributed/commands/sequence.c
+++ b/src/backend/distributed/commands/sequence.c
@ -33,7 +33,8 @@

 /* Local functions forward declarations for helper functions */
 static bool OptionsSpecifyOwnedBy(List *optionList, Oid *ownedByTableId);
-static Oid SequenceUsedInDistributedTable(const ObjectAddress *sequenceAddress);
+static Oid SequenceUsedInDistributedTable(const ObjectAddress *sequenceAddress, char
+										  depType);
 static List * FilterDistributedSequences(GrantStmt *stmt);


@ -183,7 +184,7 @@ ExtractDefaultColumnsAndOwnedSequences(Oid relationId, List **columnNameList,

 		char *columnName = NameStr(attributeForm->attname);
 		List *columnOwnedSequences =
-			getOwnedSequences_internal(relationId, attributeIndex + 1, 0);
+			getOwnedSequences_internal(relationId, attributeIndex + 1, DEPENDENCY_AUTO);

 		if (attributeForm->atthasdef && list_length(columnOwnedSequences) == 0)
 		{
@ -453,21 +454,22 @@ PreprocessAlterSequenceStmt(Node *node, const char *queryString,
 	/*  the code-path only supports a single object */
 	Assert(list_length(addresses) == 1);

+	/* We have already asserted that we have exactly 1 address in the addresses. */
+	ObjectAddress *address = linitial(addresses);
+
 	/* error out if the sequence is distributed */
-	if (IsAnyObjectDistributed(addresses))
+	if (IsAnyObjectDistributed(addresses) || SequenceUsedInDistributedTable(address,
+																			DEPENDENCY_INTERNAL))
 	{
 		ereport(ERROR, (errmsg(
 							"Altering a distributed sequence is currently not supported.")));
 	}

-	/* We have already asserted that we have exactly 1 address in the addresses. */
-	ObjectAddress *address = linitial(addresses);
-
 	/*
 	 * error out if the sequence is used in a distributed table
 	 * and this is an ALTER SEQUENCE .. AS .. statement
 	 */
-	Oid citusTableId = SequenceUsedInDistributedTable(address);
+	Oid citusTableId = SequenceUsedInDistributedTable(address, DEPENDENCY_AUTO);
 	if (citusTableId != InvalidOid)
 	{
 		List *options = stmt->options;
@ -497,16 +499,19 @@ PreprocessAlterSequenceStmt(Node *node, const char *queryString,
 * SequenceUsedInDistributedTable returns true if the argument sequence
 * is used as the default value of a column in a distributed table.
 * Returns false otherwise
+ * See DependencyType for the possible values of depType.
+ * We use DEPENDENCY_INTERNAL for sequences created by identity column.
+ * DEPENDENCY_AUTO for regular sequences.
 */
 static Oid
-SequenceUsedInDistributedTable(const ObjectAddress *sequenceAddress)
+SequenceUsedInDistributedTable(const ObjectAddress *sequenceAddress, char depType)
 {
 	List *citusTableIdList = CitusTableTypeIdList(ANY_CITUS_TABLE_TYPE);
 	Oid citusTableId = InvalidOid;
 	foreach_oid(citusTableId, citusTableIdList)
 	{
 		List *seqInfoList = NIL;
-		GetDependentSequencesWithRelation(citusTableId, &seqInfoList, 0);
+		GetDependentSequencesWithRelation(citusTableId, &seqInfoList, 0, depType);
 		SequenceInfo *seqInfo = NULL;
 		foreach_ptr(seqInfo, seqInfoList)
 		{
--- a/src/backend/distributed/commands/table.c
+++ b/src/backend/distributed/commands/table.c
@ -75,7 +75,7 @@ static void DistributePartitionUsingParent(Oid parentRelationId,
 static void ErrorIfMultiLevelPartitioning(Oid parentRelationId, Oid partitionRelationId);
 static void ErrorIfAttachCitusTableToPgLocalTable(Oid parentRelationId,
 												  Oid partitionRelationId);
-static bool AlterTableDefinesFKeyBetweenPostgresAndNonDistTable(
+static bool ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef(
 	AlterTableStmt *alterTableStatement);
 static bool ShouldMarkConnectedRelationsNotAutoConverted(Oid leftRelationId,
 														 Oid rightRelationId);
@ -1119,7 +1119,7 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand,

 	if (ShouldEnableLocalReferenceForeignKeys() &&
 		processUtilityContext != PROCESS_UTILITY_SUBCOMMAND &&
-		AlterTableDefinesFKeyBetweenPostgresAndNonDistTable(alterTableStatement))
+		ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef(alterTableStatement))
 	{
 		/*
 		 * We don't process subcommands generated by postgres.
@ -1378,29 +1378,6 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand,
 				}
 			}

-			/*
-			 * We check for ADD COLUMN .. GENERATED .. AS IDENTITY expr
-			 * since it uses a sequence as an internal dependency
-			 * we should deparse the statement
-			 */
-			constraint = NULL;
-			foreach_ptr(constraint, columnConstraints)
-			{
-				if (constraint->contype == CONSTR_IDENTITY)
-				{
-					deparseAT = true;
-					useInitialDDLCommandString = false;
-
-					/*
-					 * Since we don't support constraints for AT_AddColumn
-					 * we have to set is_not_null to true explicitly for identity columns
-					 */
-					ColumnDef *newColDef = copyObject(columnDefinition);
-					newColDef->constraints = NULL;
-					newColDef->is_not_null = true;
-					newCmd->def = (Node *) newColDef;
-				}
-			}

 			/*
 			 * We check for ADD COLUMN .. SERIAL pseudo-type
@ -1584,12 +1561,12 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand,


 /*
- * AlterTableDefinesFKeyBetweenPostgresAndNonDistTable returns true if given
+ * ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef returns true if given
 * alter table command defines foreign key between a postgres table and a
 * reference or citus local table.
 */
 static bool
-AlterTableDefinesFKeyBetweenPostgresAndNonDistTable(AlterTableStmt *alterTableStatement)
+ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef(AlterTableStmt *alterTableStatement)
 {
 	List *foreignKeyConstraintList =
 		GetAlterTableAddFKeyConstraintList(alterTableStatement);
@ -1607,9 +1584,12 @@ AlterTableDefinesFKeyBetweenPostgresAndNonDistTable(AlterTableStmt *alterTableSt
 	if (!IsCitusTable(leftRelationId))
 	{
 		return RelationIdListContainsCitusTableType(rightRelationIdList,
-													CITUS_TABLE_WITH_NO_DIST_KEY);
+													CITUS_LOCAL_TABLE) ||
+			   RelationIdListContainsCitusTableType(rightRelationIdList,
+													REFERENCE_TABLE);
 	}
-	else if (IsCitusTableType(leftRelationId, CITUS_TABLE_WITH_NO_DIST_KEY))
+	else if (IsCitusTableType(leftRelationId, CITUS_LOCAL_TABLE) ||
+			 IsCitusTableType(leftRelationId, REFERENCE_TABLE))
 	{
 		return RelationIdListContainsPostgresTable(rightRelationIdList);
 	}
@ -2539,34 +2519,6 @@ PostprocessAlterTableStmt(AlterTableStmt *alterTableStatement)
 					}
 				}
 			}
-
-			/*
-			 * We check for ADD COLUMN .. GENERATED AS IDENTITY expr
-			 * since it uses a seqeunce as an internal dependency
-			 */
-			constraint = NULL;
-			foreach_ptr(constraint, columnConstraints)
-			{
-				if (constraint->contype == CONSTR_IDENTITY)
-				{
-					AttrNumber attnum = get_attnum(relationId,
-												   columnDefinition->colname);
-					bool missing_ok = false;
-					Oid seqOid = getIdentitySequence(relationId, attnum, missing_ok);
-
-					if (ShouldSyncTableMetadata(relationId))
-					{
-						needMetadataSyncForNewSequences = true;
-						alterTableDefaultNextvalCmd =
-							GetAddColumnWithNextvalDefaultCmd(seqOid,
-															  relationId,
-															  columnDefinition
-															  ->colname,
-															  columnDefinition
-															  ->typeName);
-					}
-				}
-			}
 		}
 		/*
 		 * We check for ALTER COLUMN .. SET DEFAULT nextval('user_defined_seq')
@ -3222,6 +3174,17 @@ ErrorIfUnsupportedAlterTableStmt(AlterTableStmt *alterTableStatement)
 					{
 						if (columnConstraint->contype == CONSTR_IDENTITY)
 						{
+							/*
+							 * We currently don't support adding an identity column for an MX table
+							 */
+							if (ShouldSyncTableMetadata(relationId))
+							{
+								ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+												errmsg(
+													"cannot execute ADD COLUMN commands involving identity"
+													" columns when metadata is synchronized to workers")));
+							}
+
 							/*
 							 * Currently we don't support backfilling the new identity column with default values
 							 * if the table is not empty
@ -3352,7 +3315,8 @@ ErrorIfUnsupportedAlterTableStmt(AlterTableStmt *alterTableStatement)
 				 */
 				AttrNumber attnum = get_attnum(relationId, command->name);
 				List *seqInfoList = NIL;
-				GetDependentSequencesWithRelation(relationId, &seqInfoList, attnum);
+				GetDependentSequencesWithRelation(relationId, &seqInfoList, attnum,
+												  DEPENDENCY_AUTO);
 				if (seqInfoList != NIL)
 				{
 					ereport(ERROR, (errmsg("cannot execute ALTER COLUMN TYPE .. command "
@ -3666,7 +3630,7 @@ SetupExecutionModeForAlterTable(Oid relationId, AlterTableCmd *command)
 	 * sequential mode.
 	 */
 	if (executeSequentially &&
-		!IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY) &&
+		HasDistributionKey(relationId) &&
 		ParallelQueryExecutedInTransaction())
 	{
 		char *relationName = get_rel_name(relationId);
@ -4011,3 +3975,59 @@ MakeNameListFromRangeVar(const RangeVar *rel)
 		return list_make1(makeString(rel->relname));
 	}
 }
+
+
+/*
+ * ErrorIfTableHasUnsupportedIdentityColumn errors out if the given table has any identity column other than bigint identity column.
+ */
+void
+ErrorIfTableHasUnsupportedIdentityColumn(Oid relationId)
+{
+	Relation relation = relation_open(relationId, AccessShareLock);
+	TupleDesc tupleDescriptor = RelationGetDescr(relation);
+
+	for (int attributeIndex = 0; attributeIndex < tupleDescriptor->natts;
+		 attributeIndex++)
+	{
+		Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, attributeIndex);
+
+		if (attributeForm->attidentity && attributeForm->atttypid != INT8OID)
+		{
+			char *qualifiedRelationName = generate_qualified_relation_name(relationId);
+			ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							errmsg(
+								"cannot complete operation on %s with smallint/int identity column",
+								qualifiedRelationName),
+							errhint(
+								"Use bigint identity column instead.")));
+		}
+	}
+
+	relation_close(relation, NoLock);
+}
+
+
+/*
+ * ErrorIfTableHasIdentityColumn errors out if the given table has identity column
+ */
+void
+ErrorIfTableHasIdentityColumn(Oid relationId)
+{
+	Relation relation = relation_open(relationId, AccessShareLock);
+	TupleDesc tupleDescriptor = RelationGetDescr(relation);
+
+	for (int attributeIndex = 0; attributeIndex < tupleDescriptor->natts;
+		 attributeIndex++)
+	{
+		Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, attributeIndex);
+
+		if (attributeForm->attidentity)
+		{
+			ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							errmsg(
+								"cannot complete operation on a table with identity column")));
+		}
+	}
+
+	relation_close(relation, NoLock);
+}
--- a/src/backend/distributed/commands/truncate.c
+++ b/src/backend/distributed/commands/truncate.c
@ -324,7 +324,7 @@ ExecuteTruncateStmtSequentialIfNecessary(TruncateStmt *command)
 	{
 		Oid relationId = RangeVarGetRelid(rangeVar, NoLock, failOK);

-		if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY) &&
+		if (IsCitusTable(relationId) && !HasDistributionKey(relationId) &&
 			TableReferenced(relationId))
 		{
 			char *relationName = get_rel_name(relationId);
--- a/src/backend/distributed/commands/utility_hook.c
+++ b/src/backend/distributed/commands/utility_hook.c
@ -53,6 +53,7 @@
 #include "distributed/coordinator_protocol.h"
 #include "distributed/deparser.h"
 #include "distributed/deparse_shard_query.h"
+#include "distributed/executor_util.h"
 #include "distributed/foreign_key_relationship.h"
 #include "distributed/listutils.h"
 #include "distributed/local_executor.h"
--- a/src/backend/distributed/connection/connection_management.c
+++ b/src/backend/distributed/connection/connection_management.c
@ -1202,6 +1202,17 @@ FinishConnectionEstablishment(MultiConnection *connection)
 }


+/*
+ * ForceConnectionCloseAtTransactionEnd marks connection to be closed at the end of the
+ * transaction.
+ */
+void
+ForceConnectionCloseAtTransactionEnd(MultiConnection *connection)
+{
+	connection->forceCloseAtTransactionEnd = true;
+}
+
+
 /*
 * ClaimConnectionExclusively signals that this connection is actively being
 * used. That means it'll not be, again, returned by
@ -1484,6 +1495,7 @@ AfterXactHostConnectionHandling(ConnectionHashEntry *entry, bool isCommit)
 * - Current cached connections is already at MaxCachedConnectionsPerWorker
 * - Connection is forced to close at the end of transaction
 * - Connection is not in OK state
+ * - Connection has a replication origin setup
 * - A transaction is still in progress (usually because we are cancelling a distributed transaction)
 * - A connection reached its maximum lifetime
 */
@ -1503,6 +1515,7 @@ ShouldShutdownConnection(MultiConnection *connection, const int cachedConnection
 		   PQstatus(connection->pgConn) != CONNECTION_OK ||
 		   !RemoteTransactionIdle(connection) ||
 		   connection->requiresReplication ||
+		   connection->isReplicationOriginSessionSetup ||
 		   (MaxCachedConnectionLifetime >= 0 &&
 			MillisecondsToTimeout(connection->connectionEstablishmentStart,
 								  MaxCachedConnectionLifetime) <= 0);
--- a/src/backend/distributed/connection/remote_commands.c
+++ b/src/backend/distributed/connection/remote_commands.c
@ -573,6 +573,47 @@ SendRemoteCommand(MultiConnection *connection, const char *command)
 }


+/*
+ * ExecuteRemoteCommandAndCheckResult executes the given command in the remote node and
+ * checks if the result is equal to the expected result. If the result is equal to the
+ * expected result, the function returns true, otherwise it returns false.
+ */
+bool
+ExecuteRemoteCommandAndCheckResult(MultiConnection *connection, char *command,
+								   char *expected)
+{
+	if (!SendRemoteCommand(connection, command))
+	{
+		/* if we cannot connect, we warn and report false */
+		ReportConnectionError(connection, WARNING);
+		return false;
+	}
+	bool raiseInterrupts = true;
+	PGresult *queryResult = GetRemoteCommandResult(connection, raiseInterrupts);
+
+	/* if remote node throws an error, we also throw an error */
+	if (!IsResponseOK(queryResult))
+	{
+		ReportResultError(connection, queryResult, ERROR);
+	}
+
+	StringInfo queryResultString = makeStringInfo();
+
+	/* Evaluate the queryResult and store it into the queryResultString */
+	bool success = EvaluateSingleQueryResult(connection, queryResult, queryResultString);
+	bool result = false;
+	if (success && strcmp(queryResultString->data, expected) == 0)
+	{
+		result = true;
+	}
+
+	PQclear(queryResult);
+	ForgetResults(connection);
+
+	return result;
+}
+
+
 /*
 * ReadFirstColumnAsText reads the first column of result tuples from the given
 * PGresult struct and returns them in a StringInfo list.
--- a/src/backend/distributed/deparser/citus_ruleutils.c
+++ b/src/backend/distributed/deparser/citus_ruleutils.c
@ -304,10 +304,7 @@ pg_get_sequencedef(Oid sequenceRelationId)
 * When it's WORKER_NEXTVAL_SEQUENCE_DEFAULTS, the function creates the DEFAULT
 * clause using worker_nextval('sequence') and not nextval('sequence')
 * When IncludeIdentities is NO_IDENTITY, the function does not include identity column
- * specifications. When it's INCLUDE_IDENTITY_AS_SEQUENCE_DEFAULTS, the function
- * uses sequences and set them as default values for identity columns by using exactly
- * the same approach with worker_nextval('sequence') & nextval('sequence') logic
- * desribed above. When it's INCLUDE_IDENTITY it creates GENERATED .. AS IDENTIY clauses.
+ * specifications. When it's INCLUDE_IDENTITY it creates GENERATED .. AS IDENTIY clauses.
 */
 char *
 pg_get_tableschemadef_string(Oid tableRelationId, IncludeSequenceDefaults
@ -403,26 +400,9 @@ pg_get_tableschemadef_string(Oid tableRelationId, IncludeSequenceDefaults
 				Oid seqOid = getIdentitySequence(RelationGetRelid(relation),
 												 attributeForm->attnum, missing_ok);

-				char *sequenceName = generate_qualified_relation_name(seqOid);
-
-				if (includeIdentityDefaults == INCLUDE_IDENTITY_AS_SEQUENCE_DEFAULTS)
-				{
-					if (pg_get_sequencedef(seqOid)->seqtypid != INT8OID)
-					{
-						appendStringInfo(&buffer,
-										 " DEFAULT worker_nextval(%s::regclass)",
-										 quote_literal_cstr(sequenceName));
-					}
-					else
-					{
-						appendStringInfo(&buffer, " DEFAULT nextval(%s::regclass)",
-										 quote_literal_cstr(sequenceName));
-					}
-				}
-				else if (includeIdentityDefaults == INCLUDE_IDENTITY)
+				if (includeIdentityDefaults == INCLUDE_IDENTITY)
 				{
 					Form_pg_sequence pgSequenceForm = pg_get_sequencedef(seqOid);
-					uint64 sequenceStart = nextval_internal(seqOid, false);
 					char *sequenceDef = psprintf(
 						" GENERATED %s AS IDENTITY (INCREMENT BY " INT64_FORMAT \
 						" MINVALUE " INT64_FORMAT " MAXVALUE "
@ -433,7 +413,8 @@ pg_get_tableschemadef_string(Oid tableRelationId, IncludeSequenceDefaults
 						"ALWAYS" : "BY DEFAULT",
 						pgSequenceForm->seqincrement,
 						pgSequenceForm->seqmin,
-						pgSequenceForm->seqmax, sequenceStart,
+						pgSequenceForm->seqmax,
+						pgSequenceForm->seqstart,
 						pgSequenceForm->seqcache,
 						pgSequenceForm->seqcycle ? "" : "NO ");

@ -1391,7 +1372,7 @@ convert_aclright_to_string(int aclright)

 /*
 * contain_nextval_expression_walker walks over expression tree and returns
- * true if it contains call to 'nextval' function.
+ * true if it contains call to 'nextval' function or it has an identity column.
 */
 bool
 contain_nextval_expression_walker(Node *node, void *context)
@ -1401,6 +1382,13 @@ contain_nextval_expression_walker(Node *node, void *context)
 		return false;
 	}

+	/* check if the node contains an identity column */
+	if (IsA(node, NextValueExpr))
+	{
+		return true;
+	}
+
+	/* check if the node contains call to 'nextval' */
 	if (IsA(node, FuncExpr))
 	{
 		FuncExpr *funcExpr = (FuncExpr *) node;
--- a/src/backend/distributed/deparser/deparse_publication_stmts.c
+++ b/src/backend/distributed/deparser/deparse_publication_stmts.c
@ -0,0 +1,690 @@
+/*-------------------------------------------------------------------------
+ *
+ * deparse_publication_stmts.c
+ *	  All routines to deparse publication statements.
+ *
+ * Copyright (c) Citus Data, Inc.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/relation.h"
+#include "catalog/namespace.h"
+#include "commands/defrem.h"
+#include "distributed/citus_ruleutils.h"
+#include "distributed/deparser.h"
+#include "distributed/listutils.h"
+#include "distributed/namespace_utils.h"
+#include "lib/stringinfo.h"
+#include "parser/parse_clause.h"
+#include "parser/parse_collate.h"
+#include "parser/parse_node.h"
+#include "parser/parse_relation.h"
+#include "nodes/value.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/ruleutils.h"
+
+
+static void AppendCreatePublicationStmt(StringInfo buf, CreatePublicationStmt *stmt,
+										bool whereClauseNeedsTransform,
+										bool includeLocalTables);
+#if (PG_VERSION_NUM >= PG_VERSION_15)
+static bool AppendPublicationObjects(StringInfo buf, List *publicationObjects,
+									 bool whereClauseNeedsTransform,
+									 bool includeLocalTables);
+static void AppendWhereClauseExpression(StringInfo buf, RangeVar *tableName,
+										Node *whereClause,
+										bool whereClauseNeedsTransform);
+static void AppendAlterPublicationAction(StringInfo buf, AlterPublicationAction action);
+#else
+static bool AppendTables(StringInfo buf, List *tables, bool includeLocalTables);
+static void AppendDefElemAction(StringInfo buf, DefElemAction action);
+#endif
+static bool AppendAlterPublicationStmt(StringInfo buf, AlterPublicationStmt *stmt,
+									   bool whereClauseNeedsTransform,
+									   bool includeLocalTables);
+static void AppendDropPublicationStmt(StringInfo buf, DropStmt *stmt);
+static void AppendRenamePublicationStmt(StringInfo buf, RenameStmt *stmt);
+static void AppendAlterPublicationOwnerStmt(StringInfo buf, AlterOwnerStmt *stmt);
+static void AppendPublicationOptions(StringInfo stringBuffer, List *optionList);
+static void AppendIdentifierList(StringInfo buf, List *objects);
+
+
+/*
+ * DeparseCreatePublicationStmt builds and returns a string representing a
+ * CreatePublicationStmt.
+ */
+char *
+DeparseCreatePublicationStmt(Node *node)
+{
+	/* regular deparsing function takes CREATE PUBLICATION from the parser */
+	bool whereClauseNeedsTransform = false;
+
+	/* for regular CREATE PUBLICATION we do not propagate local tables */
+	bool includeLocalTables = false;
+
+	return DeparseCreatePublicationStmtExtended(node, whereClauseNeedsTransform,
+												includeLocalTables);
+}
+
+
+/*
+ * DeparseCreatePublicationStmtExtended builds and returns a string representing a
+ * CreatePublicationStmt, which may have already-transformed expressions.
+ */
+char *
+DeparseCreatePublicationStmtExtended(Node *node, bool whereClauseNeedsTransform,
+									 bool includeLocalTables)
+{
+	CreatePublicationStmt *stmt = castNode(CreatePublicationStmt, node);
+
+	StringInfoData str = { 0 };
+	initStringInfo(&str);
+
+	AppendCreatePublicationStmt(&str, stmt, whereClauseNeedsTransform,
+								includeLocalTables);
+
+	return str.data;
+}
+
+
+/*
+ * AppendCreatePublicationStmt appends a string representing a
+ * CreatePublicationStmt to a buffer.
+ */
+static void
+AppendCreatePublicationStmt(StringInfo buf, CreatePublicationStmt *stmt,
+							bool whereClauseNeedsTransform,
+							bool includeLocalTables)
+{
+	appendStringInfo(buf, "CREATE PUBLICATION %s",
+					 quote_identifier(stmt->pubname));
+
+	if (stmt->for_all_tables)
+	{
+		appendStringInfoString(buf, " FOR ALL TABLES");
+	}
+#if (PG_VERSION_NUM >= PG_VERSION_15)
+	else if (stmt->pubobjects != NIL)
+	{
+		bool hasObjects = false;
+		PublicationObjSpec *publicationObject = NULL;
+
+		/*
+		 * Check whether there are objects to propagate, mainly to know whether
+		 * we should include "FOR".
+		 */
+		foreach_ptr(publicationObject, stmt->pubobjects)
+		{
+			if (publicationObject->pubobjtype == PUBLICATIONOBJ_TABLE)
+			{
+				/* FOR TABLE ... */
+				PublicationTable *publicationTable = publicationObject->pubtable;
+
+				if (includeLocalTables ||
+					IsCitusTableRangeVar(publicationTable->relation, NoLock, false))
+				{
+					hasObjects = true;
+					break;
+				}
+			}
+			else
+			{
+				hasObjects = true;
+				break;
+			}
+		}
+
+		if (hasObjects)
+		{
+			appendStringInfoString(buf, " FOR");
+			AppendPublicationObjects(buf, stmt->pubobjects, whereClauseNeedsTransform,
+									 includeLocalTables);
+		}
+	}
+#else
+	else if (stmt->tables != NIL)
+	{
+		bool hasTables = false;
+		RangeVar *rangeVar = NULL;
+
+		/*
+		 * Check whether there are tables to propagate, mainly to know whether
+		 * we should include "FOR".
+		 */
+		foreach_ptr(rangeVar, stmt->tables)
+		{
+			if (includeLocalTables || IsCitusTableRangeVar(rangeVar, NoLock, false))
+			{
+				hasTables = true;
+				break;
+			}
+		}
+
+		if (hasTables)
+		{
+			appendStringInfoString(buf, " FOR");
+			AppendTables(buf, stmt->tables, includeLocalTables);
+		}
+	}
+#endif
+
+	if (stmt->options != NIL)
+	{
+		appendStringInfoString(buf, " WITH (");
+		AppendPublicationOptions(buf, stmt->options);
+		appendStringInfoString(buf, ")");
+	}
+}
+
+
+#if (PG_VERSION_NUM >= PG_VERSION_15)
+
+/*
+ * AppendPublicationObjects appends a string representing a list of publication
+ * objects to a buffer.
+ *
+ * For instance: TABLE users, departments, TABLES IN SCHEMA production
+ */
+static bool
+AppendPublicationObjects(StringInfo buf, List *publicationObjects,
+						 bool whereClauseNeedsTransform,
+						 bool includeLocalTables)
+{
+	PublicationObjSpec *publicationObject = NULL;
+	bool appendedObject = false;
+
+	foreach_ptr(publicationObject, publicationObjects)
+	{
+		if (publicationObject->pubobjtype == PUBLICATIONOBJ_TABLE)
+		{
+			/* FOR TABLE ... */
+			PublicationTable *publicationTable = publicationObject->pubtable;
+			RangeVar *rangeVar = publicationTable->relation;
+			char *schemaName = rangeVar->schemaname;
+			char *tableName = rangeVar->relname;
+
+			if (!includeLocalTables && !IsCitusTableRangeVar(rangeVar, NoLock, false))
+			{
+				/* do not propagate local tables */
+				continue;
+			}
+
+			if (schemaName != NULL)
+			{
+				/* qualified table name */
+				appendStringInfo(buf, "%s TABLE %s",
+								 appendedObject ? "," : "",
+								 quote_qualified_identifier(schemaName, tableName));
+			}
+			else
+			{
+				/* unqualified table name */
+				appendStringInfo(buf, "%s TABLE %s",
+								 appendedObject ? "," : "",
+								 quote_identifier(tableName));
+			}
+
+			if (publicationTable->columns != NIL)
+			{
+				appendStringInfoString(buf, " (");
+				AppendIdentifierList(buf, publicationTable->columns);
+				appendStringInfoString(buf, ")");
+			}
+
+			if (publicationTable->whereClause != NULL)
+			{
+				appendStringInfoString(buf, " WHERE (");
+
+				AppendWhereClauseExpression(buf, rangeVar,
+											publicationTable->whereClause,
+											whereClauseNeedsTransform);
+
+				appendStringInfoString(buf, ")");
+			}
+		}
+		else
+		{
+			/* FOR TABLES IN SCHEMA */
+			char *schemaName = publicationObject->name;
+
+			if (publicationObject->pubobjtype == PUBLICATIONOBJ_TABLES_IN_CUR_SCHEMA)
+			{
+				List *searchPath = fetch_search_path(false);
+				if (searchPath == NIL)
+				{
+					ereport(ERROR, errcode(ERRCODE_UNDEFINED_SCHEMA),
+							errmsg("no schema has been selected for "
+								   "CURRENT_SCHEMA"));
+				}
+
+				schemaName = get_namespace_name(linitial_oid(searchPath));
+			}
+
+			appendStringInfo(buf, "%s TABLES IN SCHEMA %s",
+							 appendedObject ? "," : "",
+							 quote_identifier(schemaName));
+		}
+
+		appendedObject = true;
+	}
+
+	return appendedObject;
+}
+
+
+/*
+ * AppendWhereClauseExpression appends a deparsed expression that can
+ * contain a filter on the given table. If whereClauseNeedsTransform is set
+ * the expression is first tranformed.
+ */
+static void
+AppendWhereClauseExpression(StringInfo buf, RangeVar *tableName,
+							Node *whereClause, bool whereClauseNeedsTransform)
+{
+	Relation relation = relation_openrv(tableName, AccessShareLock);
+
+	if (whereClauseNeedsTransform)
+	{
+		ParseState *pstate = make_parsestate(NULL);
+		pstate->p_sourcetext = "";
+		ParseNamespaceItem *nsitem = addRangeTableEntryForRelation(pstate,
+																   relation,
+																   AccessShareLock, NULL,
+																   false, false);
+		addNSItemToQuery(pstate, nsitem, false, true, true);
+
+		whereClause = transformWhereClause(pstate,
+										   copyObject(whereClause),
+										   EXPR_KIND_WHERE,
+										   "PUBLICATION WHERE");
+
+		assign_expr_collations(pstate, whereClause);
+	}
+
+	List *relationContext = deparse_context_for(tableName->relname, relation->rd_id);
+
+	PushOverrideEmptySearchPath(CurrentMemoryContext);
+	char *whereClauseString = deparse_expression(whereClause,
+												 relationContext,
+												 true, true);
+	PopOverrideSearchPath();
+
+	appendStringInfoString(buf, whereClauseString);
+
+	relation_close(relation, AccessShareLock);
+}
+
+
+#else
+
+/*
+ * AppendPublicationObjects appends a string representing a list of publication
+ * objects to a buffer.
+ *
+ * For instance: TABLE users, departments
+ */
+static bool
+AppendTables(StringInfo buf, List *tables, bool includeLocalTables)
+{
+	RangeVar *rangeVar = NULL;
+	bool appendedObject = false;
+
+	foreach_ptr(rangeVar, tables)
+	{
+		if (!includeLocalTables &&
+			!IsCitusTableRangeVar(rangeVar, NoLock, false))
+		{
+			/* do not propagate local tables */
+			continue;
+		}
+
+		char *schemaName = rangeVar->schemaname;
+		char *tableName = rangeVar->relname;
+
+		if (schemaName != NULL)
+		{
+			/* qualified table name */
+			appendStringInfo(buf, "%s %s",
+							 appendedObject ? "," : " TABLE",
+							 quote_qualified_identifier(schemaName, tableName));
+		}
+		else
+		{
+			/* unqualified table name */
+			appendStringInfo(buf, "%s %s",
+							 appendedObject ? "," : " TABLE",
+							 quote_identifier(tableName));
+		}
+
+		appendedObject = true;
+	}
+
+	return appendedObject;
+}
+
+
+#endif
+
+
+/*
+ * DeparseAlterPublicationSchemaStmt builds and returns a string representing
+ * an AlterPublicationStmt.
+ */
+char *
+DeparseAlterPublicationStmt(Node *node)
+{
+	/* regular deparsing function takes ALTER PUBLICATION from the parser */
+	bool whereClauseNeedsTransform = true;
+
+	/* for regular ALTER PUBLICATION we do not propagate local tables */
+	bool includeLocalTables = false;
+
+	return DeparseAlterPublicationStmtExtended(node, whereClauseNeedsTransform,
+											   includeLocalTables);
+}
+
+
+/*
+ * DeparseAlterPublicationStmtExtended builds and returns a string representing a
+ * AlterPublicationStmt, which may have already-transformed expressions.
+ */
+char *
+DeparseAlterPublicationStmtExtended(Node *node, bool whereClauseNeedsTransform,
+									bool includeLocalTables)
+{
+	AlterPublicationStmt *stmt = castNode(AlterPublicationStmt, node);
+	StringInfoData str = { 0 };
+	initStringInfo(&str);
+
+	if (!AppendAlterPublicationStmt(&str, stmt, whereClauseNeedsTransform,
+									includeLocalTables))
+	{
+		Assert(!includeLocalTables);
+
+		/*
+		 * When there are no objects to propagate, then there is no
+		 * valid ALTER PUBLICATION to construct.
+		 */
+		return NULL;
+	}
+
+	return str.data;
+}
+
+
+/*
+ * AppendAlterPublicationStmt appends a string representing an AlterPublicationStmt
+ * of the form ALTER PUBLICATION .. ADD/SET/DROP
+ */
+static bool
+AppendAlterPublicationStmt(StringInfo buf, AlterPublicationStmt *stmt,
+						   bool whereClauseNeedsTransform,
+						   bool includeLocalTables)
+{
+	appendStringInfo(buf, "ALTER PUBLICATION %s",
+					 quote_identifier(stmt->pubname));
+
+	if (stmt->options)
+	{
+		appendStringInfoString(buf, " SET (");
+		AppendPublicationOptions(buf, stmt->options);
+		appendStringInfoString(buf, ")");
+
+		/* changing options cannot be combined with other actions */
+		return true;
+	}
+
+#if (PG_VERSION_NUM >= PG_VERSION_15)
+	AppendAlterPublicationAction(buf, stmt->action);
+	return AppendPublicationObjects(buf, stmt->pubobjects, whereClauseNeedsTransform,
+									includeLocalTables);
+#else
+	AppendDefElemAction(buf, stmt->tableAction);
+	return AppendTables(buf, stmt->tables, includeLocalTables);
+#endif
+}
+
+
+#if (PG_VERSION_NUM >= PG_VERSION_15)
+
+/*
+ * AppendAlterPublicationAction appends a string representing an AlterPublicationAction
+ * to a buffer.
+ */
+static void
+AppendAlterPublicationAction(StringInfo buf, AlterPublicationAction action)
+{
+	switch (action)
+	{
+		case AP_AddObjects:
+		{
+			appendStringInfoString(buf, " ADD");
+			break;
+		}
+
+		case AP_DropObjects:
+		{
+			appendStringInfoString(buf, " DROP");
+			break;
+		}
+
+		case AP_SetObjects:
+		{
+			appendStringInfoString(buf, " SET");
+			break;
+		}
+
+		default:
+		{
+			ereport(ERROR, (errmsg("unrecognized publication action: %d", action)));
+		}
+	}
+}
+
+
+#else
+
+/*
+ * AppendDefElemAction appends a string representing a DefElemAction
+ * to a buffer.
+ */
+static void
+AppendDefElemAction(StringInfo buf, DefElemAction action)
+{
+	switch (action)
+	{
+		case DEFELEM_ADD:
+		{
+			appendStringInfoString(buf, " ADD");
+			break;
+		}
+
+		case DEFELEM_DROP:
+		{
+			appendStringInfoString(buf, " DROP");
+			break;
+		}
+
+		case DEFELEM_SET:
+		{
+			appendStringInfoString(buf, " SET");
+			break;
+		}
+
+		default:
+		{
+			ereport(ERROR, (errmsg("unrecognized publication action: %d", action)));
+		}
+	}
+}
+
+
+#endif
+
+
+/*
+ * DeparseDropPublicationStmt builds and returns a string representing the DropStmt
+ */
+char *
+DeparseDropPublicationStmt(Node *node)
+{
+	DropStmt *stmt = castNode(DropStmt, node);
+	StringInfoData str = { 0 };
+	initStringInfo(&str);
+
+	Assert(stmt->removeType == OBJECT_PUBLICATION);
+
+	AppendDropPublicationStmt(&str, stmt);
+
+	return str.data;
+}
+
+
+/*
+ * AppendDropPublicationStmt appends a string representing the DropStmt to a buffer
+ */
+static void
+AppendDropPublicationStmt(StringInfo buf, DropStmt *stmt)
+{
+	appendStringInfoString(buf, "DROP PUBLICATION ");
+	if (stmt->missing_ok)
+	{
+		appendStringInfoString(buf, "IF EXISTS ");
+	}
+	AppendIdentifierList(buf, stmt->objects);
+	if (stmt->behavior == DROP_CASCADE)
+	{
+		appendStringInfoString(buf, " CASCADE");
+	}
+}
+
+
+/*
+ * DeparseRenamePublicationStmt builds and returns a string representing the RenameStmt
+ */
+char *
+DeparseRenamePublicationStmt(Node *node)
+{
+	RenameStmt *stmt = castNode(RenameStmt, node);
+	StringInfoData str = { 0 };
+	initStringInfo(&str);
+
+	Assert(stmt->renameType == OBJECT_PUBLICATION);
+
+	AppendRenamePublicationStmt(&str, stmt);
+
+	return str.data;
+}
+
+
+/*
+ * AppendRenamePublicationStmt appends a string representing the RenameStmt to a buffer
+ */
+static void
+AppendRenamePublicationStmt(StringInfo buf, RenameStmt *stmt)
+{
+	appendStringInfo(buf, "ALTER PUBLICATION %s RENAME TO %s;",
+					 quote_identifier(strVal(stmt->object)),
+					 quote_identifier(stmt->newname));
+}
+
+
+/*
+ * DeparseAlterPublicationOwnerStmt builds and returns a string representing the AlterOwnerStmt
+ */
+char *
+DeparseAlterPublicationOwnerStmt(Node *node)
+{
+	AlterOwnerStmt *stmt = castNode(AlterOwnerStmt, node);
+	StringInfoData str = { 0 };
+	initStringInfo(&str);
+
+	Assert(stmt->objectType == OBJECT_PUBLICATION);
+
+	AppendAlterPublicationOwnerStmt(&str, stmt);
+
+	return str.data;
+}
+
+
+/*
+ * AppendAlterPublicationOwnerStmt appends a string representing the AlterOwnerStmt to a buffer
+ */
+static void
+AppendAlterPublicationOwnerStmt(StringInfo buf, AlterOwnerStmt *stmt)
+{
+	Assert(stmt->objectType == OBJECT_PUBLICATION);
+
+	appendStringInfo(buf, "ALTER PUBLICATION %s OWNER TO %s;",
+					 quote_identifier(strVal(stmt->object)),
+					 RoleSpecString(stmt->newowner, true));
+}
+
+
+/*
+ * AppendPublicationOptions appends a string representing a list of publication opions.
+ */
+static void
+AppendPublicationOptions(StringInfo stringBuffer, List *optionList)
+{
+	ListCell *optionCell = NULL;
+	bool firstOptionPrinted = false;
+
+	foreach(optionCell, optionList)
+	{
+		DefElem *option = (DefElem *) lfirst(optionCell);
+		char *optionName = option->defname;
+		char *optionValue = defGetString(option);
+		NodeTag valueType = nodeTag(option->arg);
+
+		if (firstOptionPrinted)
+		{
+			appendStringInfo(stringBuffer, ", ");
+		}
+		firstOptionPrinted = true;
+
+		appendStringInfo(stringBuffer, "%s = ",
+						 quote_identifier(optionName));
+
+#if (PG_VERSION_NUM >= PG_VERSION_15)
+		if (valueType == T_Integer || valueType == T_Float || valueType == T_Boolean)
+#else
+		if (valueType == T_Integer || valueType == T_Float)
+#endif
+		{
+			/* string escaping is unnecessary for numeric types and can cause issues */
+			appendStringInfo(stringBuffer, "%s", optionValue);
+		}
+		else
+		{
+			appendStringInfo(stringBuffer, "%s", quote_literal_cstr(optionValue));
+		}
+	}
+}
+
+
+/*
+ * AppendIdentifierList appends a string representing a list of
+ * identifiers (of String type).
+ */
+static void
+AppendIdentifierList(StringInfo buf, List *objects)
+{
+	ListCell *objectCell = NULL;
+
+	foreach(objectCell, objects)
+	{
+		char *name = strVal(lfirst(objectCell));
+
+		if (objectCell != list_head(objects))
+		{
+			appendStringInfo(buf, ", ");
+		}
+
+		appendStringInfoString(buf, quote_identifier(name));
+	}
+}
--- a/src/backend/distributed/deparser/qualify_publication_stmt.c
+++ b/src/backend/distributed/deparser/qualify_publication_stmt.c
@ -0,0 +1,119 @@
+/*-------------------------------------------------------------------------
+ *
+ * qualify_publication_stmt.c
+ *	  Functions specialized in fully qualifying all publication statements. These
+ *	  functions are dispatched from qualify.c
+ *
+ * Copyright (c), Citus Data, Inc.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "catalog/namespace.h"
+#include "distributed/deparser.h"
+#include "distributed/listutils.h"
+#include "nodes/nodes.h"
+#include "utils/guc.h"
+#include "utils/lsyscache.h"
+
+#if (PG_VERSION_NUM >= PG_VERSION_15)
+static void QualifyPublicationObjects(List *publicationObjects);
+#else
+static void QualifyTables(List *tables);
+#endif
+static void QualifyPublicationRangeVar(RangeVar *publication);
+
+
+/*
+ * QualifyCreatePublicationStmt quailifies the publication names of the
+ * CREATE PUBLICATION statement.
+ */
+void
+QualifyCreatePublicationStmt(Node *node)
+{
+	CreatePublicationStmt *stmt = castNode(CreatePublicationStmt, node);
+
+#if (PG_VERSION_NUM >= PG_VERSION_15)
+	QualifyPublicationObjects(stmt->pubobjects);
+#else
+	QualifyTables(stmt->tables);
+#endif
+}
+
+
+#if (PG_VERSION_NUM >= PG_VERSION_15)
+
+/*
+ * QualifyPublicationObjects ensures all table names in a list of
+ * publication objects are fully qualified.
+ */
+static void
+QualifyPublicationObjects(List *publicationObjects)
+{
+	PublicationObjSpec *publicationObject = NULL;
+
+	foreach_ptr(publicationObject, publicationObjects)
+	{
+		if (publicationObject->pubobjtype == PUBLICATIONOBJ_TABLE)
+		{
+			/* FOR TABLE ... */
+			PublicationTable *publicationTable = publicationObject->pubtable;
+
+			QualifyPublicationRangeVar(publicationTable->relation);
+		}
+	}
+}
+
+
+#else
+
+/*
+ * QualifyTables ensures all table names in a list are fully qualified.
+ */
+static void
+QualifyTables(List *tables)
+{
+	RangeVar *rangeVar = NULL;
+
+	foreach_ptr(rangeVar, tables)
+	{
+		QualifyPublicationRangeVar(rangeVar);
+	}
+}
+
+
+#endif
+
+
+/*
+ * QualifyPublicationObjects ensures all table names in a list of
+ * publication objects are fully qualified.
+ */
+void
+QualifyAlterPublicationStmt(Node *node)
+{
+	AlterPublicationStmt *stmt = castNode(AlterPublicationStmt, node);
+
+#if (PG_VERSION_NUM >= PG_VERSION_15)
+	QualifyPublicationObjects(stmt->pubobjects);
+#else
+	QualifyTables(stmt->tables);
+#endif
+}
+
+
+/*
+ * QualifyPublicationRangeVar qualifies the given publication RangeVar if it is not qualified.
+ */
+static void
+QualifyPublicationRangeVar(RangeVar *publication)
+{
+	if (publication->schemaname == NULL)
+	{
+		Oid publicationOid = RelnameGetRelid(publication->relname);
+		Oid schemaOid = get_rel_namespace(publicationOid);
+		publication->schemaname = get_namespace_name(schemaOid);
+	}
+}
--- a/src/backend/distributed/deparser/ruleutils_15.c
+++ b/src/backend/distributed/deparser/ruleutils_15.c
@ -53,6 +53,7 @@
 #include "common/keywords.h"
 #include "distributed/citus_nodefuncs.h"
 #include "distributed/citus_ruleutils.h"
+#include "distributed/multi_router_planner.h"
 #include "executor/spi.h"
 #include "foreign/foreign.h"
 #include "funcapi.h"
@ -3723,7 +3724,6 @@ static void
 get_merge_query_def(Query *query, deparse_context *context)
 {
 	StringInfo buf = context->buf;
-	RangeTblEntry *targetRte;

 	/* Insert the WITH clause if given */
 	get_with_clause(query, context);
@ -3731,7 +3731,7 @@ get_merge_query_def(Query *query, deparse_context *context)
 	/*
 	 * Start the query with MERGE INTO <target>
 	 */
-	targetRte = rt_fetch(query->resultRelation, query->rtable);
+	RangeTblEntry *targetRte = ExtractResultRelationRTE(query);

 	if (PRETTY_INDENT(context))
 	{
@ -3853,6 +3853,15 @@ get_merge_query_def(Query *query, deparse_context *context)
 		}
 	}

+	/*
+	 * RETURNING is not supported in MERGE, so it must be NULL, but if PG adds it later,
+	 * we might miss it, let's raise an exception to investigate.
+	 */
+	if (unlikely(query->returningList))
+	{
+		elog(ERROR, "Unexpected RETURNING clause in MERGE");
+	}
+
 	ereport(DEBUG1, (errmsg("<Deparsed MERGE query: %s>", buf->data)));
 }

--- a/src/backend/distributed/executor/adaptive_executor.c
+++ b/src/backend/distributed/executor/adaptive_executor.c
--- a/src/backend/distributed/executor/distributed_execution_locks.c
+++ b/src/backend/distributed/executor/distributed_execution_locks.c
@ -9,6 +9,7 @@
 *-------------------------------------------------------------------------
 */
 #include "distributed/distributed_execution_locks.h"
+#include "distributed/executor_util.h"
 #include "distributed/listutils.h"
 #include "distributed/coordinator_protocol.h"
 #include "distributed/metadata_cache.h"
@ -19,6 +20,259 @@
 #include "distributed/transaction_management.h"


+/*
+ * AcquireExecutorShardLocksForExecution acquires advisory lock on shard IDs
+ * to prevent unsafe concurrent modifications of shards.
+ *
+ * We prevent concurrent modifications of shards in two cases:
+ * 1. Any non-commutative writes to a replicated table
+ * 2. Multi-shard writes that are executed in parallel
+ *
+ * The first case ensures we do not apply updates in different orders on
+ * different replicas (e.g. of a reference table), which could lead the
+ * replicas to diverge.
+ *
+ * The second case prevents deadlocks due to out-of-order execution.
+ *
+ * There are two GUCs that can override the default behaviors.
+ *  'citus.all_modifications_commutative' relaxes locking
+ *  that's done for the purpose of keeping replicas consistent.
+ *  'citus.enable_deadlock_prevention' relaxes locking done for
+ *  the purpose of avoiding deadlocks between concurrent
+ *  multi-shard commands.
+ *
+ * We do not take executor shard locks for utility commands such as
+ * TRUNCATE because the table locks already prevent concurrent access.
+ */
+void
+AcquireExecutorShardLocksForExecution(RowModifyLevel modLevel, List *taskList)
+{
+	if (modLevel <= ROW_MODIFY_READONLY &&
+		!SelectForUpdateOnReferenceTable(taskList))
+	{
+		/*
+		 * Executor locks only apply to DML commands and SELECT FOR UPDATE queries
+		 * touching reference tables.
+		 */
+		return;
+	}
+
+	bool requiresParallelExecutionLocks =
+		!(list_length(taskList) == 1 || ShouldRunTasksSequentially(taskList));
+
+	bool modifiedTableReplicated = ModifiedTableReplicated(taskList);
+	if (!modifiedTableReplicated && !requiresParallelExecutionLocks)
+	{
+		/*
+		 * When a distributed query on tables with replication
+		 * factor == 1 and command hits only a single shard, we
+		 * rely on Postgres to handle the serialization of the
+		 * concurrent modifications on the workers.
+		 *
+		 * For reference tables, even if their placements are replicated
+		 * ones (e.g., single node), we acquire the distributed execution
+		 * locks to be consistent when new node(s) are added. So, they
+		 * do not return at this point.
+		 */
+		return;
+	}
+
+	/*
+	 * We first assume that all the remaining modifications are going to
+	 * be serialized. So, start with an ExclusiveLock and lower the lock level
+	 * as much as possible.
+	 */
+	int lockMode = ExclusiveLock;
+
+	/*
+	 * In addition to honouring commutativity rules, we currently only
+	 * allow a single multi-shard command on a shard at a time. Otherwise,
+	 * concurrent multi-shard commands may take row-level locks on the
+	 * shard placements in a different order and create a distributed
+	 * deadlock. This applies even when writes are commutative and/or
+	 * there is no replication. This can be relaxed via
+	 * EnableDeadlockPrevention.
+	 *
+	 * 1. If citus.all_modifications_commutative is set to true, then all locks
+	 * are acquired as RowExclusiveLock.
+	 *
+	 * 2. If citus.all_modifications_commutative is false, then only the shards
+	 * with more than one replicas are locked with ExclusiveLock. Otherwise, the
+	 * lock is acquired with ShareUpdateExclusiveLock.
+	 *
+	 * ShareUpdateExclusiveLock conflicts with itself such that only one
+	 * multi-shard modification at a time is allowed on a shard. It also conflicts
+	 * with ExclusiveLock, which ensures that updates/deletes/upserts are applied
+	 * in the same order on all placements. It does not conflict with
+	 * RowExclusiveLock, which is normally obtained by single-shard, commutative
+	 * writes.
+	 */
+	if (!modifiedTableReplicated && requiresParallelExecutionLocks)
+	{
+		/*
+		 * When there is no replication then we only need to prevent
+		 * concurrent multi-shard commands on the same shards. This is
+		 * because concurrent, parallel commands may modify the same
+		 * set of shards, but in different orders. The order of the
+		 * accesses might trigger distributed deadlocks that are not
+		 * possible to happen on non-distributed systems such
+		 * regular Postgres.
+		 *
+		 * As an example, assume that we have two queries: query-1 and query-2.
+		 * Both queries access shard-1 and shard-2. If query-1 first accesses to
+		 * shard-1 then shard-2, and query-2 accesses shard-2 then shard-1, these
+		 * two commands might block each other in case they modify the same rows
+		 * (e.g., cause distributed deadlocks).
+		 *
+		 * In either case, ShareUpdateExclusive has the desired effect, since
+		 * it conflicts with itself and ExclusiveLock (taken by non-commutative
+		 * writes).
+		 *
+		 * However, some users find this too restrictive, so we allow them to
+		 * reduce to a RowExclusiveLock when citus.enable_deadlock_prevention
+		 * is enabled, which lets multi-shard modifications run in parallel as
+		 * long as they all disable the GUC.
+		 */
+		lockMode =
+			EnableDeadlockPrevention ? ShareUpdateExclusiveLock : RowExclusiveLock;
+
+		if (!IsCoordinator())
+		{
+			/*
+			 * We also skip taking a heavy-weight lock when running a multi-shard
+			 * commands from workers, since we currently do not prevent concurrency
+			 * across workers anyway.
+			 */
+			lockMode = RowExclusiveLock;
+		}
+	}
+	else if (modifiedTableReplicated)
+	{
+		/*
+		 * When we are executing distributed queries on replicated tables, our
+		 * default behaviour is to prevent any concurrency. This is valid
+		 * for when parallel execution is happening or not.
+		 *
+		 * The reason is that we cannot control the order of the placement accesses
+		 * of two distributed queries to the same shards. The order of the accesses
+		 * might cause the replicas of the same shard placements diverge. This is
+		 * not possible to happen on non-distributed systems such regular Postgres.
+		 *
+		 * As an example, assume that we have two queries: query-1 and query-2.
+		 * Both queries only access the placements of shard-1, say p-1 and p-2.
+		 *
+		 * And, assume that these queries are non-commutative, such as:
+		 *  query-1: UPDATE table SET b = 1 WHERE key = 1;
+		 *  query-2: UPDATE table SET b = 2 WHERE key = 1;
+		 *
+		 * If query-1 accesses to p-1 then p-2, and query-2 accesses
+		 * p-2 then p-1, these two commands would leave the p-1 and p-2
+		 * diverged (e.g., the values for the column "b" would be different).
+		 *
+		 * The only exception to this rule is the single shard commutative
+		 * modifications, such as INSERTs. In that case, we can allow
+		 * concurrency among such backends, hence lowering the lock level
+		 * to RowExclusiveLock.
+		 */
+		if (!requiresParallelExecutionLocks && modLevel < ROW_MODIFY_NONCOMMUTATIVE)
+		{
+			lockMode = RowExclusiveLock;
+		}
+	}
+
+	if (AllModificationsCommutative)
+	{
+		/*
+		 * The mapping is overridden when all_modifications_commutative is set to true.
+		 * In that case, all modifications are treated as commutative, which can be used
+		 * to communicate that the application is only generating commutative
+		 * UPDATE/DELETE/UPSERT commands and exclusive locks are unnecessary. This
+		 * is irrespective of single-shard/multi-shard or replicated tables.
+		 */
+		lockMode = RowExclusiveLock;
+	}
+
+	/* now, iterate on the tasks and acquire the executor locks on the shards */
+	List *anchorShardIntervalList = NIL;
+	List *relationRowLockList = NIL;
+	List *requiresConsistentSnapshotRelationShardList = NIL;
+
+	Task *task = NULL;
+	foreach_ptr(task, taskList)
+	{
+		ShardInterval *anchorShardInterval = LoadShardInterval(task->anchorShardId);
+		anchorShardIntervalList = lappend(anchorShardIntervalList, anchorShardInterval);
+
+		/* Acquire additional locks for SELECT .. FOR UPDATE on reference tables */
+		AcquireExecutorShardLocksForRelationRowLockList(task->relationRowLockList);
+
+		relationRowLockList =
+			list_concat(relationRowLockList,
+						task->relationRowLockList);
+
+		/*
+		 * If the task has a subselect, then we may need to lock the shards from which
+		 * the query selects as well to prevent the subselects from seeing different
+		 * results on different replicas.
+		 */
+		if (RequiresConsistentSnapshot(task))
+		{
+			/*
+			 * ExclusiveLock conflicts with all lock types used by modifications
+			 * and therefore prevents other modifications from running
+			 * concurrently.
+			 */
+			requiresConsistentSnapshotRelationShardList =
+				list_concat(requiresConsistentSnapshotRelationShardList,
+							task->relationShardList);
+		}
+	}
+
+	/*
+	 * Acquire the locks in a sorted way to avoid deadlocks due to lock
+	 * ordering across concurrent sessions.
+	 */
+	anchorShardIntervalList =
+		SortList(anchorShardIntervalList, CompareShardIntervalsById);
+
+	/*
+	 * If we are dealing with a partition we are also taking locks on parent table
+	 * to prevent deadlocks on concurrent operations on a partition and its parent.
+	 *
+	 * Note that this function currently does not acquire any remote locks as that
+	 * is necessary to control the concurrency across multiple nodes for replicated
+	 * tables. That is because Citus currently does not allow modifications to
+	 * partitions from any node other than the coordinator.
+	 */
+	LockParentShardResourceIfPartition(anchorShardIntervalList, lockMode);
+
+	/* Acquire distribution execution locks on the affected shards */
+	SerializeNonCommutativeWrites(anchorShardIntervalList, lockMode);
+
+	if (relationRowLockList != NIL)
+	{
+		/* Acquire additional locks for SELECT .. FOR UPDATE on reference tables */
+		AcquireExecutorShardLocksForRelationRowLockList(relationRowLockList);
+	}
+
+
+	if (requiresConsistentSnapshotRelationShardList != NIL)
+	{
+		/*
+		 * If the task has a subselect, then we may need to lock the shards from which
+		 * the query selects as well to prevent the subselects from seeing different
+		 * results on different replicas.
+		 *
+		 * ExclusiveLock conflicts with all lock types used by modifications
+		 * and therefore prevents other modifications from running
+		 * concurrently.
+		 */
+		LockRelationShardResources(requiresConsistentSnapshotRelationShardList,
+								   ExclusiveLock);
+	}
+}
+
+
 /*
 * RequiresConsistentSnapshot returns true if the given task need to take
 * the necessary locks to ensure that a subquery in the modify query
@ -188,3 +442,27 @@ LockPartitionRelations(Oid relationId, LOCKMODE lockMode)
 		LockRelationOid(partitionRelationId, lockMode);
 	}
 }
+
+
+/*
+ * LockPartitionsForDistributedPlan ensures commands take locks on all partitions
+ * of a distributed table that appears in the query. We do this primarily out of
+ * consistency with PostgreSQL locking.
+ */
+void
+LockPartitionsForDistributedPlan(DistributedPlan *plan)
+{
+	if (TaskListModifiesDatabase(plan->modLevel, plan->workerJob->taskList))
+	{
+		Oid targetRelationId = plan->targetRelationId;
+
+		LockPartitionsInRelationList(list_make1_oid(targetRelationId), RowExclusiveLock);
+	}
+
+	/*
+	 * Lock partitions of tables that appear in a SELECT or subquery. In the
+	 * DML case this also includes the target relation, but since we already
+	 * have a stronger lock this doesn't do any harm.
+	 */
+	LockPartitionsInRelationList(plan->relationIdList, AccessShareLock);
+}
--- a/src/backend/distributed/executor/executor_util_params.c
+++ b/src/backend/distributed/executor/executor_util_params.c
@ -0,0 +1,101 @@
+/*-------------------------------------------------------------------------
+ *
+ * executor_util_tasks.c
+ *
+ * Utility functions for dealing with task lists in the executor.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+
+#include "distributed/executor_util.h"
+#include "utils/lsyscache.h"
+
+
+/*
+ * ExtractParametersForRemoteExecution extracts parameter types and values from
+ * the given ParamListInfo structure, and fills parameter type and value arrays.
+ * It changes oid of custom types to InvalidOid so that they are the same in workers
+ * and coordinators.
+ */
+void
+ExtractParametersForRemoteExecution(ParamListInfo paramListInfo, Oid **parameterTypes,
+									const char ***parameterValues)
+{
+	ExtractParametersFromParamList(paramListInfo, parameterTypes,
+								   parameterValues, false);
+}
+
+
+/*
+ * ExtractParametersFromParamList extracts parameter types and values from
+ * the given ParamListInfo structure, and fills parameter type and value arrays.
+ * If useOriginalCustomTypeOids is true, it uses the original oids for custom types.
+ */
+void
+ExtractParametersFromParamList(ParamListInfo paramListInfo,
+							   Oid **parameterTypes,
+							   const char ***parameterValues, bool
+							   useOriginalCustomTypeOids)
+{
+	int parameterCount = paramListInfo->numParams;
+
+	*parameterTypes = (Oid *) palloc0(parameterCount * sizeof(Oid));
+	*parameterValues = (const char **) palloc0(parameterCount * sizeof(char *));
+
+	/* get parameter types and values */
+	for (int parameterIndex = 0; parameterIndex < parameterCount; parameterIndex++)
+	{
+		ParamExternData *parameterData = &paramListInfo->params[parameterIndex];
+		Oid typeOutputFunctionId = InvalidOid;
+		bool variableLengthType = false;
+
+		/*
+		 * Use 0 for data types where the oid values can be different on
+		 * the coordinator and worker nodes. Therefore, the worker nodes can
+		 * infer the correct oid.
+		 */
+		if (parameterData->ptype >= FirstNormalObjectId && !useOriginalCustomTypeOids)
+		{
+			(*parameterTypes)[parameterIndex] = 0;
+		}
+		else
+		{
+			(*parameterTypes)[parameterIndex] = parameterData->ptype;
+		}
+
+		/*
+		 * If the parameter is not referenced / used (ptype == 0) and
+		 * would otherwise have errored out inside standard_planner()),
+		 * don't pass a value to the remote side, and pass text oid to prevent
+		 * undetermined data type errors on workers.
+		 */
+		if (parameterData->ptype == 0)
+		{
+			(*parameterValues)[parameterIndex] = NULL;
+			(*parameterTypes)[parameterIndex] = TEXTOID;
+
+			continue;
+		}
+
+		/*
+		 * If the parameter is NULL then we preserve its type, but
+		 * don't need to evaluate its value.
+		 */
+		if (parameterData->isnull)
+		{
+			(*parameterValues)[parameterIndex] = NULL;
+
+			continue;
+		}
+
+		getTypeOutputInfo(parameterData->ptype, &typeOutputFunctionId,
+						  &variableLengthType);
+
+		(*parameterValues)[parameterIndex] = OidOutputFunctionCall(typeOutputFunctionId,
+																   parameterData->value);
+	}
+}
--- a/src/backend/distributed/executor/executor_util_tasks.c
+++ b/src/backend/distributed/executor/executor_util_tasks.c
@ -0,0 +1,297 @@
+/*-------------------------------------------------------------------------
+ *
+ * executor_util_tasks.c
+ *
+ * Utility functions for dealing with task lists in the executor.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+
+#include "distributed/executor_util.h"
+#include "distributed/listutils.h"
+#include "distributed/shardinterval_utils.h"
+
+
+/*
+ *  TaskListModifiesDatabase is a helper function for DistributedExecutionModifiesDatabase and
+ *  DistributedPlanModifiesDatabase.
+ */
+bool
+TaskListModifiesDatabase(RowModifyLevel modLevel, List *taskList)
+{
+	if (modLevel > ROW_MODIFY_READONLY)
+	{
+		return true;
+	}
+
+	/*
+	 * If we cannot decide by only checking the row modify level,
+	 * we should look closer to the tasks.
+	 */
+	if (list_length(taskList) < 1)
+	{
+		/* is this ever possible? */
+		return false;
+	}
+
+	Task *firstTask = (Task *) linitial(taskList);
+
+	return !ReadOnlyTask(firstTask->taskType);
+}
+
+
+/*
+ * TaskListRequiresRollback returns true if the distributed
+ * execution should start a CoordinatedTransaction. In other words, if the
+ * function returns true, the execution sends BEGIN; to every connection
+ * involved in the distributed execution.
+ */
+bool
+TaskListRequiresRollback(List *taskList)
+{
+	int taskCount = list_length(taskList);
+
+	if (taskCount == 0)
+	{
+		return false;
+	}
+
+	Task *task = (Task *) linitial(taskList);
+	if (task->cannotBeExecutedInTransction)
+	{
+		/* vacuum, create index concurrently etc. */
+		return false;
+	}
+
+	bool selectForUpdate = task->relationRowLockList != NIL;
+	if (selectForUpdate)
+	{
+		/*
+		 * Do not check SelectOpensTransactionBlock, always open transaction block
+		 * if SELECT FOR UPDATE is executed inside a distributed transaction.
+		 */
+		return IsMultiStatementTransaction();
+	}
+
+	if (ReadOnlyTask(task->taskType))
+	{
+		return SelectOpensTransactionBlock &&
+			   IsTransactionBlock();
+	}
+
+	if (IsMultiStatementTransaction())
+	{
+		return true;
+	}
+
+	if (list_length(taskList) > 1)
+	{
+		return true;
+	}
+
+	if (list_length(task->taskPlacementList) > 1)
+	{
+		/*
+		 * Single DML/DDL tasks with replicated tables (including
+		 * reference and non-reference tables) should require
+		 * BEGIN/COMMIT/ROLLBACK.
+		 */
+		return true;
+	}
+
+	if (task->queryCount > 1)
+	{
+		/*
+		 * When there are multiple sequential queries in a task
+		 * we need to run those as a transaction.
+		 */
+		return true;
+	}
+
+	return false;
+}
+
+
+/*
+ * TaskListRequires2PC determines whether the given task list requires 2PC.
+ */
+bool
+TaskListRequires2PC(List *taskList)
+{
+	if (taskList == NIL)
+	{
+		return false;
+	}
+
+	Task *task = (Task *) linitial(taskList);
+	if (ReadOnlyTask(task->taskType))
+	{
+		/* we do not trigger 2PC for ReadOnly queries */
+		return false;
+	}
+
+	bool singleTask = list_length(taskList) == 1;
+	if (singleTask && list_length(task->taskPlacementList) == 1)
+	{
+		/* we do not trigger 2PC for modifications that are:
+		 *    - single task
+		 *    - single placement
+		 */
+		return false;
+	}
+
+	/*
+	 * Otherwise, all modifications are done via 2PC. This includes:
+	 *    - Multi-shard commands irrespective of the replication factor
+	 *    - Single-shard commands that are targeting more than one replica
+	 */
+	return true;
+}
+
+
+/*
+ * TaskListCannotBeExecutedInTransaction returns true if any of the
+ * tasks in the input cannot be executed in a transaction. These are
+ * tasks like VACUUM or CREATE INDEX CONCURRENTLY etc.
+ */
+bool
+TaskListCannotBeExecutedInTransaction(List *taskList)
+{
+	Task *task = NULL;
+	foreach_ptr(task, taskList)
+	{
+		if (task->cannotBeExecutedInTransction)
+		{
+			return true;
+		}
+	}
+
+	return false;
+}
+
+
+/*
+ * SelectForUpdateOnReferenceTable returns true if the input task
+ * contains a FOR UPDATE clause that locks any reference tables.
+ */
+bool
+SelectForUpdateOnReferenceTable(List *taskList)
+{
+	if (list_length(taskList) != 1)
+	{
+		/* we currently do not support SELECT FOR UPDATE on multi task queries */
+		return false;
+	}
+
+	Task *task = (Task *) linitial(taskList);
+	RelationRowLock *relationRowLock = NULL;
+	foreach_ptr(relationRowLock, task->relationRowLockList)
+	{
+		Oid relationId = relationRowLock->relationId;
+
+		if (IsCitusTableType(relationId, REFERENCE_TABLE))
+		{
+			return true;
+		}
+	}
+
+	return false;
+}
+
+
+/*
+ * ReadOnlyTask returns true if the input task does a read-only operation
+ * on the database.
+ */
+bool
+ReadOnlyTask(TaskType taskType)
+{
+	switch (taskType)
+	{
+		case READ_TASK:
+		case MAP_OUTPUT_FETCH_TASK:
+		case MAP_TASK:
+		case MERGE_TASK:
+		{
+			return true;
+		}
+
+		default:
+		{
+			return false;
+		}
+	}
+}
+
+
+/*
+ * ModifiedTableReplicated iterates on the task list and returns true
+ * if any of the tasks' anchor shard is a replicated table. We qualify
+ * replicated tables as any reference table or any distributed table with
+ * replication factor > 1.
+ */
+bool
+ModifiedTableReplicated(List *taskList)
+{
+	Task *task = NULL;
+	foreach_ptr(task, taskList)
+	{
+		int64 shardId = task->anchorShardId;
+
+		if (shardId == INVALID_SHARD_ID)
+		{
+			continue;
+		}
+
+		if (ReferenceTableShardId(shardId))
+		{
+			return true;
+		}
+
+		Oid relationId = RelationIdForShard(shardId);
+		if (!SingleReplicatedTable(relationId))
+		{
+			return true;
+		}
+	}
+
+	return false;
+}
+
+
+/*
+ * ShouldRunTasksSequentially returns true if each of the individual tasks
+ * should be executed one by one. Note that this is different than
+ * MultiShardConnectionType == SEQUENTIAL_CONNECTION case. In that case,
+ * running the tasks across the nodes in parallel is acceptable and implemented
+ * in that way.
+ *
+ * However, the executions that are qualified here would perform poorly if the
+ * tasks across the workers are executed in parallel. We currently qualify only
+ * one class of distributed queries here, multi-row INSERTs. If we do not enforce
+ * true sequential execution, concurrent multi-row upserts could easily form
+ * a distributed deadlock when the upserts touch the same rows.
+ */
+bool
+ShouldRunTasksSequentially(List *taskList)
+{
+	if (list_length(taskList) < 2)
+	{
+		/* single task plans are already qualified as sequential by definition */
+		return false;
+	}
+
+	/* all the tasks are the same, so we only look one */
+	Task *initialTask = (Task *) linitial(taskList);
+	if (initialTask->rowValuesLists != NIL)
+	{
+		/* found a multi-row INSERT */
+		return true;
+	}
+
+	return false;
+}
--- a/src/backend/distributed/executor/executor_util_tuples.c
+++ b/src/backend/distributed/executor/executor_util_tuples.c
@ -0,0 +1,129 @@
+/*-------------------------------------------------------------------------
+ *
+ * executor_util_tuples.c
+ *
+ * Utility functions for handling tuples during remote execution.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+
+#include "distributed/executor_util.h"
+#include "utils/lsyscache.h"
+
+
+/*
+ * TupleDescGetAttBinaryInMetadata - Build an AttInMetadata structure based on
+ * the supplied TupleDesc. AttInMetadata can be used in conjunction with
+ * fmStringInfos containing binary encoded types to produce a properly formed
+ * tuple.
+ *
+ * NOTE: This function is a copy of the PG function TupleDescGetAttInMetadata,
+ * except that it uses getTypeBinaryInputInfo instead of getTypeInputInfo.
+ */
+AttInMetadata *
+TupleDescGetAttBinaryInMetadata(TupleDesc tupdesc)
+{
+	int natts = tupdesc->natts;
+	int i;
+	Oid atttypeid;
+	Oid attinfuncid;
+
+	AttInMetadata *attinmeta = (AttInMetadata *) palloc(sizeof(AttInMetadata));
+
+	/* "Bless" the tupledesc so that we can make rowtype datums with it */
+	attinmeta->tupdesc = BlessTupleDesc(tupdesc);
+
+	/*
+	 * Gather info needed later to call the "in" function for each attribute
+	 */
+	FmgrInfo *attinfuncinfo = (FmgrInfo *) palloc0(natts * sizeof(FmgrInfo));
+	Oid *attioparams = (Oid *) palloc0(natts * sizeof(Oid));
+	int32 *atttypmods = (int32 *) palloc0(natts * sizeof(int32));
+
+	for (i = 0; i < natts; i++)
+	{
+		Form_pg_attribute att = TupleDescAttr(tupdesc, i);
+
+		/* Ignore dropped attributes */
+		if (!att->attisdropped)
+		{
+			atttypeid = att->atttypid;
+			getTypeBinaryInputInfo(atttypeid, &attinfuncid, &attioparams[i]);
+			fmgr_info(attinfuncid, &attinfuncinfo[i]);
+			atttypmods[i] = att->atttypmod;
+		}
+	}
+	attinmeta->attinfuncs = attinfuncinfo;
+	attinmeta->attioparams = attioparams;
+	attinmeta->atttypmods = atttypmods;
+
+	return attinmeta;
+}
+
+
+/*
+ * BuildTupleFromBytes - build a HeapTuple given user data in binary form.
+ * values is an array of StringInfos, one for each attribute of the return
+ * tuple. A NULL StringInfo pointer indicates we want to create a NULL field.
+ *
+ * NOTE: This function is a copy of the PG function BuildTupleFromCStrings,
+ * except that it uses ReceiveFunctionCall instead of InputFunctionCall.
+ */
+HeapTuple
+BuildTupleFromBytes(AttInMetadata *attinmeta, fmStringInfo *values)
+{
+	TupleDesc tupdesc = attinmeta->tupdesc;
+	int natts = tupdesc->natts;
+	int i;
+
+	Datum *dvalues = (Datum *) palloc(natts * sizeof(Datum));
+	bool *nulls = (bool *) palloc(natts * sizeof(bool));
+
+	/*
+	 * Call the "in" function for each non-dropped attribute, even for nulls,
+	 * to support domains.
+	 */
+	for (i = 0; i < natts; i++)
+	{
+		if (!TupleDescAttr(tupdesc, i)->attisdropped)
+		{
+			/* Non-dropped attributes */
+			dvalues[i] = ReceiveFunctionCall(&attinmeta->attinfuncs[i],
+											 values[i],
+											 attinmeta->attioparams[i],
+											 attinmeta->atttypmods[i]);
+			if (values[i] != NULL)
+			{
+				nulls[i] = false;
+			}
+			else
+			{
+				nulls[i] = true;
+			}
+		}
+		else
+		{
+			/* Handle dropped attributes by setting to NULL */
+			dvalues[i] = (Datum) 0;
+			nulls[i] = true;
+		}
+	}
+
+	/*
+	 * Form a tuple
+	 */
+	HeapTuple tuple = heap_form_tuple(tupdesc, dvalues, nulls);
+
+	/*
+	 * Release locally palloc'd space.  XXX would probably be good to pfree
+	 * values of pass-by-reference datums, as well.
+	 */
+	pfree(dvalues);
+	pfree(nulls);
+
+	return tuple;
+}
--- a/src/backend/distributed/executor/insert_select_executor.c
+++ b/src/backend/distributed/executor/insert_select_executor.c
@ -409,11 +409,13 @@ ExecutePlanIntoColocatedIntermediateResults(Oid targetRelationId,
 																  columnNameList);

 	/* set up a DestReceiver that copies into the intermediate table */
+	const bool publishableData = true;
 	CitusCopyDestReceiver *copyDest = CreateCitusCopyDestReceiver(targetRelationId,
 																  columnNameList,
 																  partitionColumnIndex,
 																  executorState,
-																  intermediateResultIdPrefix);
+																  intermediateResultIdPrefix,
+																  publishableData);

 	ExecutePlanIntoDestReceiver(selectPlan, paramListInfo, (DestReceiver *) copyDest);

@ -443,10 +445,12 @@ ExecutePlanIntoRelation(Oid targetRelationId, List *insertTargetList,
 																  columnNameList);

 	/* set up a DestReceiver that copies into the distributed table */
+	const bool publishableData = true;
 	CitusCopyDestReceiver *copyDest = CreateCitusCopyDestReceiver(targetRelationId,
 																  columnNameList,
 																  partitionColumnIndex,
-																  executorState, NULL);
+																  executorState, NULL,
+																  publishableData);

 	ExecutePlanIntoDestReceiver(selectPlan, paramListInfo, (DestReceiver *) copyDest);

--- a/src/backend/distributed/executor/local_executor.c
+++ b/src/backend/distributed/executor/local_executor.c
@ -90,6 +90,7 @@
 #include "distributed/local_executor.h"
 #include "distributed/local_plan_cache.h"
 #include "distributed/coordinator_protocol.h"
+#include "distributed/executor_util.h"
 #include "distributed/metadata_cache.h"
 #include "distributed/multi_executor.h"
 #include "distributed/multi_server_executor.h"
--- a/src/backend/distributed/executor/multi_executor.c
+++ b/src/backend/distributed/executor/multi_executor.c
@ -802,6 +802,11 @@ GetObjectTypeString(ObjectType objType)
 			return "function";
 		}

+		case OBJECT_PUBLICATION:
+		{
+			return "publication";
+		}
+
 		case OBJECT_SCHEMA:
 		{
 			return "schema";
--- a/src/backend/distributed/metadata/dependency.c
+++ b/src/backend/distributed/metadata/dependency.c
@ -132,6 +132,7 @@ typedef struct ViewDependencyNode
 static List * GetRelationSequenceDependencyList(Oid relationId);
 static List * GetRelationFunctionDependencyList(Oid relationId);
 static List * GetRelationTriggerFunctionDependencyList(Oid relationId);
+static List * GetPublicationRelationsDependencyList(Oid relationId);
 static List * GetRelationStatsSchemaDependencyList(Oid relationId);
 static List * GetRelationIndicesDependencyList(Oid relationId);
 static DependencyDefinition * CreateObjectAddressDependencyDef(Oid classId, Oid objectId);
@ -722,6 +723,11 @@ SupportedDependencyByCitus(const ObjectAddress *address)
 			return true;
 		}

+		case OCLASS_PUBLICATION:
+		{
+			return true;
+		}
+
 		case OCLASS_TSCONFIG:
 		{
 			return true;
@ -1656,6 +1662,36 @@ ExpandCitusSupportedTypes(ObjectAddressCollector *collector, ObjectAddress targe
 				List *ruleRefDepList = GetViewRuleReferenceDependencyList(relationId);
 				result = list_concat(result, ruleRefDepList);
 			}
+
+			break;
+		}
+
+		case PublicationRelationId:
+		{
+			Oid publicationId = target.objectId;
+
+			/*
+			 * Publications do not depend directly on relations, because dropping
+			 * the relation will only remove it from the publications. However,
+			 * we add a dependency to ensure the relation is created first when
+			 * adding a node.
+			 */
+			List *relationDependencyList =
+				GetPublicationRelationsDependencyList(publicationId);
+			result = list_concat(result, relationDependencyList);
+
+			/*
+			 * As of PostgreSQL 15, the same applies to schemas.
+			 */
+#if PG_VERSION_NUM >= PG_VERSION_15
+			List *schemaIdList =
+				GetPublicationSchemas(publicationId);
+			List *schemaDependencyList =
+				CreateObjectAddressDependencyDefList(NamespaceRelationId, schemaIdList);
+			result = list_concat(result, schemaDependencyList);
+#endif
+
+			break;
 		}

 		default:
@ -1834,7 +1870,7 @@ static List *
 GetRelationSequenceDependencyList(Oid relationId)
 {
 	List *seqInfoList = NIL;
-	GetDependentSequencesWithRelation(relationId, &seqInfoList, 0);
+	GetDependentSequencesWithRelation(relationId, &seqInfoList, 0, DEPENDENCY_AUTO);

 	List *seqIdList = NIL;
 	SequenceInfo *seqInfo = NULL;
@ -1923,6 +1959,33 @@ GetRelationTriggerFunctionDependencyList(Oid relationId)
 }


+/*
+ * GetPublicationRelationsDependencyList creates a list of ObjectAddressDependencies for
+ * a publication on the Citus relations it contains. This helps make sure we distribute
+ * Citus tables before local tables.
+ */
+static List *
+GetPublicationRelationsDependencyList(Oid publicationId)
+{
+	List *allRelationIds = GetPublicationRelations(publicationId, PUBLICATION_PART_ROOT);
+	List *citusRelationIds = NIL;
+
+	Oid relationId = InvalidOid;
+
+	foreach_oid(relationId, allRelationIds)
+	{
+		if (!IsCitusTable(relationId))
+		{
+			continue;
+		}
+
+		citusRelationIds = lappend_oid(citusRelationIds, relationId);
+	}
+
+	return CreateObjectAddressDependencyDefList(RelationRelationId, citusRelationIds);
+}
+
+
 /*
 * GetTypeConstraintDependencyDefinition creates a list of constraint dependency
 * definitions for a given type
--- a/src/backend/distributed/metadata/metadata_cache.c
+++ b/src/backend/distributed/metadata/metadata_cache.c
@ -311,7 +311,7 @@ static void InvalidateDistTableCache(void);
 static void InvalidateDistObjectCache(void);
 static bool InitializeTableCacheEntry(int64 shardId, bool missingOk);
 static bool IsCitusTableTypeInternal(char partitionMethod, char replicationModel,
-									 CitusTableType tableType);
+									 uint32 colocationId, CitusTableType tableType);
 static bool RefreshTableCacheEntryIfInvalid(ShardIdCacheEntry *shardEntry, bool
 											missingOk);

@ -450,7 +450,36 @@ bool
 IsCitusTableTypeCacheEntry(CitusTableCacheEntry *tableEntry, CitusTableType tableType)
 {
 	return IsCitusTableTypeInternal(tableEntry->partitionMethod,
-									tableEntry->replicationModel, tableType);
+									tableEntry->replicationModel,
+									tableEntry->colocationId, tableType);
+}
+
+
+/*
+ * HasDistributionKey returs true if given Citus table doesn't have a
+ * distribution key.
+ */
+bool
+HasDistributionKey(Oid relationId)
+{
+	CitusTableCacheEntry *tableEntry = LookupCitusTableCacheEntry(relationId);
+	if (tableEntry == NULL)
+	{
+		ereport(ERROR, (errmsg("relation with oid %u is not a Citus table", relationId)));
+	}
+
+	return HasDistributionKeyCacheEntry(tableEntry);
+}
+
+
+/*
+ * HasDistributionKey returs true if given cache entry identifies a Citus
+ * table that doesn't have a distribution key.
+ */
+bool
+HasDistributionKeyCacheEntry(CitusTableCacheEntry *tableEntry)
+{
+	return tableEntry->partitionMethod != DISTRIBUTE_BY_NONE;
 }


@ -460,7 +489,7 @@ IsCitusTableTypeCacheEntry(CitusTableCacheEntry *tableEntry, CitusTableType tabl
 */
 static bool
 IsCitusTableTypeInternal(char partitionMethod, char replicationModel,
-						 CitusTableType tableType)
+						 uint32 colocationId, CitusTableType tableType)
 {
 	switch (tableType)
 	{
@ -501,12 +530,8 @@ IsCitusTableTypeInternal(char partitionMethod, char replicationModel,
 		case CITUS_LOCAL_TABLE:
 		{
 			return partitionMethod == DISTRIBUTE_BY_NONE &&
-				   replicationModel != REPLICATION_MODEL_2PC;
-		}
-
-		case CITUS_TABLE_WITH_NO_DIST_KEY:
-		{
-			return partitionMethod == DISTRIBUTE_BY_NONE;
+				   replicationModel != REPLICATION_MODEL_2PC &&
+				   colocationId == INVALID_COLOCATION_ID;
 		}

 		case ANY_CITUS_TABLE_TYPE:
@ -529,33 +554,21 @@ IsCitusTableTypeInternal(char partitionMethod, char replicationModel,
 char *
 GetTableTypeName(Oid tableId)
 {
-	bool regularTable = false;
-	char partitionMethod = ' ';
-	char replicationModel = ' ';
-	if (IsCitusTable(tableId))
-	{
-		CitusTableCacheEntry *referencingCacheEntry = GetCitusTableCacheEntry(tableId);
-		partitionMethod = referencingCacheEntry->partitionMethod;
-		replicationModel = referencingCacheEntry->replicationModel;
-	}
-	else
-	{
-		regularTable = true;
-	}
-
-	if (regularTable)
+	if (!IsCitusTable(tableId))
 	{
 		return "regular table";
 	}
-	else if (partitionMethod == 'h')
+
+	CitusTableCacheEntry *tableCacheEntry = GetCitusTableCacheEntry(tableId);
+	if (IsCitusTableTypeCacheEntry(tableCacheEntry, HASH_DISTRIBUTED))
 	{
 		return "distributed table";
 	}
-	else if (partitionMethod == 'n' && replicationModel == 't')
+	else if (IsCitusTableTypeCacheEntry(tableCacheEntry, REFERENCE_TABLE))
 	{
 		return "reference table";
 	}
-	else if (partitionMethod == 'n' && replicationModel != 't')
+	else if (IsCitusTableTypeCacheEntry(tableCacheEntry, CITUS_LOCAL_TABLE))
 	{
 		return "citus local table";
 	}
@ -577,6 +590,18 @@ IsCitusTable(Oid relationId)
 }


+/*
+ * IsCitusTableRangeVar returns whether the table named in the given
+ * rangeVar is a Citus table.
+ */
+bool
+IsCitusTableRangeVar(RangeVar *rangeVar, LOCKMODE lockMode, bool missingOK)
+{
+	Oid relationId = RangeVarGetRelid(rangeVar, lockMode, missingOK);
+	return IsCitusTable(relationId);
+}
+
+
 /*
 * IsCitusTableViaCatalog returns whether the given relation is a
 * distributed table or not.
@ -765,14 +790,28 @@ PgDistPartitionTupleViaCatalog(Oid relationId)


 /*
- * IsCitusLocalTableByDistParams returns true if given partitionMethod and
- * replicationModel would identify a citus local table.
+ * IsReferenceTableByDistParams returns true if given partitionMethod and
+ * replicationModel would identify a reference table.
 */
 bool
-IsCitusLocalTableByDistParams(char partitionMethod, char replicationModel)
+IsReferenceTableByDistParams(char partitionMethod, char replicationModel)
 {
 	return partitionMethod == DISTRIBUTE_BY_NONE &&
-		   replicationModel != REPLICATION_MODEL_2PC;
+		   replicationModel == REPLICATION_MODEL_2PC;
+}
+
+
+/*
+ * IsCitusLocalTableByDistParams returns true if given partitionMethod,
+ * replicationModel and colocationId would identify a citus local table.
+ */
+bool
+IsCitusLocalTableByDistParams(char partitionMethod, char replicationModel,
+							  uint32 colocationId)
+{
+	return partitionMethod == DISTRIBUTE_BY_NONE &&
+		   replicationModel != REPLICATION_MODEL_2PC &&
+		   colocationId == INVALID_COLOCATION_ID;
 }


@ -4837,11 +4876,14 @@ CitusTableTypeIdList(CitusTableType citusTableType)

 		Datum partMethodDatum = datumArray[Anum_pg_dist_partition_partmethod - 1];
 		Datum replicationModelDatum = datumArray[Anum_pg_dist_partition_repmodel - 1];
+		Datum colocationIdDatum = datumArray[Anum_pg_dist_partition_colocationid - 1];

 		Oid partitionMethod = DatumGetChar(partMethodDatum);
 		Oid replicationModel = DatumGetChar(replicationModelDatum);
+		uint32 colocationId = DatumGetUInt32(colocationIdDatum);

-		if (IsCitusTableTypeInternal(partitionMethod, replicationModel, citusTableType))
+		if (IsCitusTableTypeInternal(partitionMethod, replicationModel, colocationId,
+									 citusTableType))
 		{
 			Datum relationIdDatum = datumArray[Anum_pg_dist_partition_logicalrelid - 1];

--- a/src/backend/distributed/metadata/metadata_sync.c
+++ b/src/backend/distributed/metadata/metadata_sync.c
--- a/src/backend/distributed/metadata/metadata_utility.c
+++ b/src/backend/distributed/metadata/metadata_utility.c
@ -985,7 +985,7 @@ AppendShardSizeQuery(StringInfo selectQuery, ShardInterval *shardInterval)

 	appendStringInfo(selectQuery, "SELECT " UINT64_FORMAT " AS shard_id, ", shardId);
 	appendStringInfo(selectQuery, "%s AS shard_name, ", quotedShardName);
-	appendStringInfo(selectQuery, PG_RELATION_SIZE_FUNCTION, quotedShardName);
+	appendStringInfo(selectQuery, PG_TOTAL_RELATION_SIZE_FUNCTION, quotedShardName);
 }


@ -1670,6 +1670,48 @@ TupleToGroupShardPlacement(TupleDesc tupleDescriptor, HeapTuple heapTuple)
 }


+/*
+ * LookupTaskPlacementHostAndPort sets the nodename and nodeport for the given task placement
+ * with a lookup.
+ */
+void
+LookupTaskPlacementHostAndPort(ShardPlacement *taskPlacement, char **nodeName,
+							   int *nodePort)
+{
+	if (IsDummyPlacement(taskPlacement))
+	{
+		/*
+		 * If we create a dummy placement for the local node, it is possible
+		 * that the entry doesn't exist in pg_dist_node, hence a lookup will fail.
+		 * In that case we want to use the dummy placements values.
+		 */
+		*nodeName = taskPlacement->nodeName;
+		*nodePort = taskPlacement->nodePort;
+	}
+	else
+	{
+		/*
+		 * We want to lookup the node information again since it is possible that
+		 * there were changes in pg_dist_node and we will get those invalidations
+		 * in LookupNodeForGroup.
+		 */
+		WorkerNode *workerNode = LookupNodeForGroup(taskPlacement->groupId);
+		*nodeName = workerNode->workerName;
+		*nodePort = workerNode->workerPort;
+	}
+}
+
+
+/*
+ * IsDummyPlacement returns true if the given placement is a dummy placement.
+ */
+bool
+IsDummyPlacement(ShardPlacement *taskPlacement)
+{
+	return taskPlacement->nodeId == LOCAL_NODE_ID;
+}
+
+
 /*
 * InsertShardRow opens the shard system catalog, and inserts a new row with the
 * given values into that system catalog. Note that we allow the user to pass in
--- a/src/backend/distributed/metadata/node_metadata.c
+++ b/src/backend/distributed/metadata/node_metadata.c
--- a/src/backend/distributed/metadata/pg_get_object_address_13_14_15.c
+++ b/src/backend/distributed/metadata/pg_get_object_address_13_14_15.c
@ -425,6 +425,7 @@ ErrorIfCurrentUserCanNotDistributeObject(char *textType, ObjectType type,
 		case OBJECT_COLLATION:
 		case OBJECT_VIEW:
 		case OBJECT_ROLE:
+		case OBJECT_PUBLICATION:
 		{
 			check_object_ownership(userId, type, *addr, node, *relation);
 			break;
--- a/src/backend/distributed/operations/create_shards.c
+++ b/src/backend/distributed/operations/create_shards.c
@ -215,6 +215,7 @@ CreateColocatedShards(Oid targetRelationId, Oid sourceRelationId, bool
 {
 	bool colocatedShard = true;
 	List *insertedShardPlacements = NIL;
+	List *insertedShardIds = NIL;

 	/* make sure that tables are hash partitioned */
 	CheckHashPartitionedTable(targetRelationId);
@ -254,7 +255,9 @@ CreateColocatedShards(Oid targetRelationId, Oid sourceRelationId, bool
 	foreach_ptr(sourceShardInterval, sourceShardIntervalList)
 	{
 		uint64 sourceShardId = sourceShardInterval->shardId;
-		uint64 newShardId = GetNextShardId();
+		uint64 *newShardIdPtr = (uint64 *) palloc0(sizeof(uint64));
+		*newShardIdPtr = GetNextShardId();
+		insertedShardIds = lappend(insertedShardIds, newShardIdPtr);

 		int32 shardMinValue = DatumGetInt32(sourceShardInterval->minValue);
 		int32 shardMaxValue = DatumGetInt32(sourceShardInterval->maxValue);
@ -263,7 +266,7 @@ CreateColocatedShards(Oid targetRelationId, Oid sourceRelationId, bool
 		List *sourceShardPlacementList = ShardPlacementListSortedByWorker(
 			sourceShardId);

-		InsertShardRow(targetRelationId, newShardId, targetShardStorageType,
+		InsertShardRow(targetRelationId, *newShardIdPtr, targetShardStorageType,
 					   shardMinValueText, shardMaxValueText);

 		ShardPlacement *sourcePlacement = NULL;
@ -272,21 +275,26 @@ CreateColocatedShards(Oid targetRelationId, Oid sourceRelationId, bool
 			int32 groupId = sourcePlacement->groupId;
 			const uint64 shardSize = 0;

-			/*
-			 * Optimistically add shard placement row the pg_dist_shard_placement, in case
-			 * of any error it will be roll-backed.
-			 */
-			uint64 shardPlacementId = InsertShardPlacementRow(newShardId,
+			InsertShardPlacementRow(*newShardIdPtr,
 									INVALID_PLACEMENT_ID,
 									shardSize,
 									groupId);
-
-			ShardPlacement *shardPlacement = LoadShardPlacement(newShardId,
-																shardPlacementId);
-			insertedShardPlacements = lappend(insertedShardPlacements, shardPlacement);
 		}
 	}

+	/*
+	 * load shard placements for the shard at once after all placement insertions
+	 * finished. That prevents MetadataCache from rebuilding unnecessarily after
+	 * each placement insertion.
+	 */
+	uint64 *shardIdPtr;
+	foreach_ptr(shardIdPtr, insertedShardIds)
+	{
+		List *placementsForShard = ShardPlacementList(*shardIdPtr);
+		insertedShardPlacements = list_concat(insertedShardPlacements,
+											  placementsForShard);
+	}
+
 	CreateShardsOnWorkers(targetRelationId, insertedShardPlacements,
 						  useExclusiveConnections, colocatedShard);
 }
--- a/src/backend/distributed/operations/node_protocol.c
+++ b/src/backend/distributed/operations/node_protocol.c
@ -461,10 +461,7 @@ ResolveRelationId(text *relationName, bool missingOk)
 * definition, optional column storage and statistics definitions, and index
 * constraint and trigger definitions.
 * When IncludeIdentities is NO_IDENTITY, the function does not include identity column
- * specifications. When it's INCLUDE_IDENTITY_AS_SEQUENCE_DEFAULTS, the function
- * uses sequences and set them as default values for identity columns by using exactly
- * the same approach with worker_nextval('sequence') & nextval('sequence') logic
- * desribed above. When it's INCLUDE_IDENTITY it creates GENERATED .. AS IDENTIY clauses.
+ * specifications. When it's INCLUDE_IDENTITY it creates GENERATED .. AS IDENTIY clauses.
 */
 List *
 GetFullTableCreationCommands(Oid relationId,
@ -500,6 +497,15 @@ GetFullTableCreationCommands(Oid relationId,
 			tableDDLEventList = lappend(tableDDLEventList,
 										truncateTriggerCommand);
 		}
+
+		/*
+		 * For identity column sequences, we only need to modify
+		 * their min/max values to produce unique values on the worker nodes.
+		 */
+		List *identitySequenceDependencyCommandList =
+			IdentitySequenceDependencyCommandList(relationId);
+		tableDDLEventList = list_concat(tableDDLEventList,
+										identitySequenceDependencyCommandList);
 	}

 	tableDDLEventList = list_concat(tableDDLEventList, postLoadCreationCommandList);
--- a/src/backend/distributed/operations/shard_rebalancer.c
+++ b/src/backend/distributed/operations/shard_rebalancer.c
@ -190,6 +190,19 @@ typedef struct WorkerShardStatistics
 	HTAB *statistics;
 } WorkerShardStatistics;

+/* ShardMoveDependencyHashEntry contains the taskId which any new shard move task within the corresponding colocation group must take a dependency on */
+typedef struct ShardMoveDependencyInfo
+{
+	int64 key;
+	int64 taskId;
+} ShardMoveDependencyInfo;
+
+typedef struct ShardMoveDependencies
+{
+	HTAB *colocationDependencies;
+	HTAB *nodeDependencies;
+} ShardMoveDependencies;
+
 char *VariablesToBePassedToNewConnections = NULL;

 /* static declarations for main logic */
@ -475,6 +488,7 @@ GetRebalanceSteps(RebalanceOptions *options)
 	/* sort the lists to make the function more deterministic */
 	List *activeWorkerList = SortedActiveWorkers();
 	List *activeShardPlacementListList = NIL;
+	List *unbalancedShards = NIL;

 	Oid relationId = InvalidOid;
 	foreach_oid(relationId, options->relationIdList)
@ -490,8 +504,29 @@ GetRebalanceSteps(RebalanceOptions *options)
 				shardPlacementList, options->workerNode);
 		}

-		activeShardPlacementListList =
-			lappend(activeShardPlacementListList, activeShardPlacementListForRelation);
+		if (list_length(activeShardPlacementListForRelation) >= list_length(
+				activeWorkerList))
+		{
+			activeShardPlacementListList = lappend(activeShardPlacementListList,
+												   activeShardPlacementListForRelation);
+		}
+		else
+		{
+			/*
+			 * If the number of shard groups are less than the number of worker nodes,
+			 * at least one of the worker nodes will remain empty. For such cases,
+			 * we consider those shard groups as a colocation group and try to
+			 * distribute them across the cluster.
+			 */
+			unbalancedShards = list_concat(unbalancedShards,
+										   activeShardPlacementListForRelation);
+		}
+	}
+
+	if (list_length(unbalancedShards) > 0)
+	{
+		activeShardPlacementListList = lappend(activeShardPlacementListList,
+											   unbalancedShards);
 	}

 	if (options->threshold < options->rebalanceStrategy->minimumThreshold)
@ -1796,10 +1831,10 @@ static void
 RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid)
 {
 	char transferMode = LookupShardTransferMode(shardReplicationModeOid);
-	EnsureReferenceTablesExistOnAllNodesExtended(transferMode);

 	if (list_length(options->relationIdList) == 0)
 	{
+		EnsureReferenceTablesExistOnAllNodesExtended(transferMode);
 		return;
 	}

@ -1814,6 +1849,25 @@ RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid)

 	List *placementUpdateList = GetRebalanceSteps(options);

+	if (transferMode == TRANSFER_MODE_AUTOMATIC)
+	{
+		/*
+		 * If the shard transfer mode is set to auto, we should check beforehand
+		 * if we are able to use logical replication to transfer shards or not.
+		 * We throw an error if any of the tables do not have a replica identity, which
+		 * is required for logical replication to replicate UPDATE and DELETE commands.
+		 */
+		PlacementUpdateEvent *placementUpdate = NULL;
+		foreach_ptr(placementUpdate, placementUpdateList)
+		{
+			Oid relationId = RelationIdForShard(placementUpdate->shardId);
+			List *colocatedTableList = ColocatedTableList(relationId);
+			VerifyTablesHaveReplicaIdentity(colocatedTableList);
+		}
+	}
+
+	EnsureReferenceTablesExistOnAllNodesExtended(transferMode);
+
 	if (list_length(placementUpdateList) == 0)
 	{
 		return;
@ -1857,6 +1911,137 @@ ErrorOnConcurrentRebalance(RebalanceOptions *options)
 }


+/*
+ * GetColocationId function returns the colocationId of the shard in a PlacementUpdateEvent.
+ */
+static int64
+GetColocationId(PlacementUpdateEvent *move)
+{
+	ShardInterval *shardInterval = LoadShardInterval(move->shardId);
+
+	CitusTableCacheEntry *citusTableCacheEntry = GetCitusTableCacheEntry(
+		shardInterval->relationId);
+
+	return citusTableCacheEntry->colocationId;
+}
+
+
+/*
+ * InitializeShardMoveDependencies function creates the hash maps that we use to track
+ * the latest moves so that subsequent moves with the same properties must take a dependency
+ * on them. There are two hash maps. One is for tracking the latest move scheduled in a
+ * given colocation group and the other one is for tracking the latest move which involves
+ * a given node either as its source node or its target node.
+ */
+static ShardMoveDependencies
+InitializeShardMoveDependencies()
+{
+	ShardMoveDependencies shardMoveDependencies;
+	shardMoveDependencies.colocationDependencies = CreateSimpleHashWithNameAndSize(int64,
+																				   ShardMoveDependencyInfo,
+																				   "colocationDependencyHashMap",
+																				   6);
+	shardMoveDependencies.nodeDependencies = CreateSimpleHashWithNameAndSize(int64,
+																			 ShardMoveDependencyInfo,
+																			 "nodeDependencyHashMap",
+																			 6);
+
+	return shardMoveDependencies;
+}
+
+
+/*
+ * GenerateTaskMoveDependencyList creates and returns a List of taskIds that
+ * the move must take a dependency on.
+ */
+static int64 *
+GenerateTaskMoveDependencyList(PlacementUpdateEvent *move, int64 colocationId,
+							   ShardMoveDependencies shardMoveDependencies, int *nDepends)
+{
+	HTAB *dependsList = CreateSimpleHashSetWithNameAndSize(int64,
+														   "shardMoveDependencyList", 0);
+
+	bool found;
+
+	/* Check if there exists a move in the same colocation group scheduled earlier. */
+	ShardMoveDependencyInfo *shardMoveDependencyInfo = hash_search(
+		shardMoveDependencies.colocationDependencies, &colocationId, HASH_ENTER, &found);
+
+	if (found)
+	{
+		hash_search(dependsList, &shardMoveDependencyInfo->taskId, HASH_ENTER, NULL);
+	}
+
+	/* Check if there exists a move scheduled earlier whose source or target node
+	 * overlaps with the current move's source node. */
+	shardMoveDependencyInfo = hash_search(
+		shardMoveDependencies.nodeDependencies, &move->sourceNode->nodeId, HASH_ENTER,
+		&found);
+
+	if (found)
+	{
+		hash_search(dependsList, &shardMoveDependencyInfo->taskId, HASH_ENTER, NULL);
+	}
+
+	/* Check if there exists a move scheduled earlier whose source or target node
+	 * overlaps with the current move's target node. */
+	shardMoveDependencyInfo = hash_search(
+		shardMoveDependencies.nodeDependencies, &move->targetNode->nodeId, HASH_ENTER,
+		&found);
+
+
+	if (found)
+	{
+		hash_search(dependsList, &shardMoveDependencyInfo->taskId, HASH_ENTER, NULL);
+	}
+
+	*nDepends = hash_get_num_entries(dependsList);
+
+	int64 *dependsArray = NULL;
+
+	if (*nDepends > 0)
+	{
+		HASH_SEQ_STATUS seq;
+
+		dependsArray = palloc((*nDepends) * sizeof(int64));
+
+		hash_seq_init(&seq, dependsList);
+		int i = 0;
+		int64 *dependsTaskId;
+
+		while ((dependsTaskId = (int64 *) hash_seq_search(&seq)) != NULL)
+		{
+			dependsArray[i++] = *dependsTaskId;
+		}
+	}
+
+	return dependsArray;
+}
+
+
+/*
+ * UpdateShardMoveDependencies function updates the dependency maps with the latest move's taskId.
+ */
+static void
+UpdateShardMoveDependencies(PlacementUpdateEvent *move, uint64 colocationId, int64 taskId,
+							ShardMoveDependencies shardMoveDependencies)
+{
+	ShardMoveDependencyInfo *shardMoveDependencyInfo = hash_search(
+		shardMoveDependencies.colocationDependencies, &colocationId, HASH_ENTER, NULL);
+	shardMoveDependencyInfo->taskId = taskId;
+
+	shardMoveDependencyInfo = hash_search(shardMoveDependencies.nodeDependencies,
+										  &move->sourceNode->nodeId, HASH_ENTER, NULL);
+
+	shardMoveDependencyInfo->taskId = taskId;
+
+	shardMoveDependencyInfo = hash_search(shardMoveDependencies.nodeDependencies,
+										  &move->targetNode->nodeId, HASH_ENTER, NULL);
+
+	shardMoveDependencyInfo->taskId = taskId;
+}
+
+
 /*
 * RebalanceTableShardsBackground rebalances the shards for the relations
 * inside the relationIdList across the different workers. It does so using our
@ -1894,12 +2079,6 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo
 		EnsureTableOwner(colocatedTableId);
 	}

-	if (shardTransferMode == TRANSFER_MODE_AUTOMATIC)
-	{
-		/* make sure that all tables included in the rebalance have a replica identity*/
-		VerifyTablesHaveReplicaIdentity(colocatedTableList);
-	}
-
 	List *placementUpdateList = GetRebalanceSteps(options);

 	if (list_length(placementUpdateList) == 0)
@ -1908,6 +2087,23 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo
 		return 0;
 	}

+	if (shardTransferMode == TRANSFER_MODE_AUTOMATIC)
+	{
+		/*
+		 * If the shard transfer mode is set to auto, we should check beforehand
+		 * if we are able to use logical replication to transfer shards or not.
+		 * We throw an error if any of the tables do not have a replica identity, which
+		 * is required for logical replication to replicate UPDATE and DELETE commands.
+		 */
+		PlacementUpdateEvent *placementUpdate = NULL;
+		foreach_ptr(placementUpdate, placementUpdateList)
+		{
+			relationId = RelationIdForShard(placementUpdate->shardId);
+			List *colocatedTables = ColocatedTableList(relationId);
+			VerifyTablesHaveReplicaIdentity(colocatedTables);
+		}
+	}
+
 	DropOrphanedResourcesInSeparateTransaction();

 	/* find the name of the shard transfer mode to interpolate in the scheduled command */
@ -1922,18 +2118,8 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo
 	StringInfoData buf = { 0 };
 	initStringInfo(&buf);

-	/*
-	 * Currently we only have two tasks that any move can depend on:
-	 *  - replicating reference tables
-	 *  - the previous move
-	 *
-	 * prevJobIdx tells what slot to write the id of the task into. We only use both slots
-	 * if we are actually replicating reference tables.
-	 */
-	int64 prevJobId[2] = { 0 };
-	int prevJobIdx = 0;
-
 	List *referenceTableIdList = NIL;
+	int64 replicateRefTablesTaskId = 0;

 	if (HasNodesWithMissingReferenceTables(&referenceTableIdList))
 	{
@ -1949,15 +2135,15 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo
 		appendStringInfo(&buf,
 						 "SELECT pg_catalog.replicate_reference_tables(%s)",
 						 quote_literal_cstr(shardTranferModeLabel));
-		BackgroundTask *task = ScheduleBackgroundTask(jobId, GetUserId(), buf.data,
-													  prevJobIdx, prevJobId);
-		prevJobId[prevJobIdx] = task->taskid;
-		prevJobIdx++;
+		BackgroundTask *task = ScheduleBackgroundTask(jobId, GetUserId(), buf.data, 0,
+													  NULL);
+		replicateRefTablesTaskId = task->taskid;
 	}

 	PlacementUpdateEvent *move = NULL;
-	bool first = true;
-	int prevMoveIndex = prevJobIdx;
+
+	ShardMoveDependencies shardMoveDependencies = InitializeShardMoveDependencies();
+
 	foreach_ptr(move, placementUpdateList)
 	{
 		resetStringInfo(&buf);
@ -1969,14 +2155,27 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo
 						 move->targetNode->nodeId,
 						 quote_literal_cstr(shardTranferModeLabel));

-		BackgroundTask *task = ScheduleBackgroundTask(jobId, GetUserId(), buf.data,
-													  prevJobIdx, prevJobId);
-		prevJobId[prevMoveIndex] = task->taskid;
-		if (first)
+		int64 colocationId = GetColocationId(move);
+
+		int nDepends = 0;
+
+		int64 *dependsArray = GenerateTaskMoveDependencyList(move, colocationId,
+															 shardMoveDependencies,
+															 &nDepends);
+
+		if (nDepends == 0 && replicateRefTablesTaskId > 0)
 		{
-			first = false;
-			prevJobIdx++;
+			nDepends = 1;
+			dependsArray = palloc(nDepends * sizeof(int64));
+			dependsArray[0] = replicateRefTablesTaskId;
 		}
+
+		BackgroundTask *task = ScheduleBackgroundTask(jobId, GetUserId(), buf.data,
+													  nDepends,
+													  dependsArray);
+
+		UpdateShardMoveDependencies(move, colocationId, task->taskid,
+									shardMoveDependencies);
 	}

 	ereport(NOTICE,
--- a/src/backend/distributed/operations/shard_transfer.c
+++ b/src/backend/distributed/operations/shard_transfer.c
@ -70,22 +70,43 @@ typedef struct ShardCommandList
 	List *ddlCommandList;
 } ShardCommandList;

+static const char *ShardTransferTypeNames[] = {
+	[SHARD_TRANSFER_INVALID_FIRST] = "unknown",
+	[SHARD_TRANSFER_MOVE] = "move",
+	[SHARD_TRANSFER_COPY] = "copy",
+};
+
+static const char *ShardTransferTypeNamesCapitalized[] = {
+	[SHARD_TRANSFER_INVALID_FIRST] = "unknown",
+	[SHARD_TRANSFER_MOVE] = "Move",
+	[SHARD_TRANSFER_COPY] = "Copy",
+};
+
+static const char *ShardTransferTypeNamesContinuous[] = {
+	[SHARD_TRANSFER_INVALID_FIRST] = "unknown",
+	[SHARD_TRANSFER_MOVE] = "Moving",
+	[SHARD_TRANSFER_COPY] = "Copying",
+};
+
+static const char *ShardTransferTypeFunctionNames[] = {
+	[SHARD_TRANSFER_INVALID_FIRST] = "unknown",
+	[SHARD_TRANSFER_MOVE] = "citus_move_shard_placement",
+	[SHARD_TRANSFER_COPY] = "citus_copy_shard_placement",
+};
+
 /* local function forward declarations */
 static bool CanUseLogicalReplication(Oid relationId, char shardReplicationMode);
 static void ErrorIfTableCannotBeReplicated(Oid relationId);
-static void ErrorIfTargetNodeIsNotSafeToCopyTo(const char *targetNodeName,
-											   int targetNodePort);
+static void ErrorIfTargetNodeIsNotSafeForTransfer(const char *targetNodeName,
+												  int targetNodePort,
+												  ShardTransferType transferType);
 static void ErrorIfSameNode(char *sourceNodeName, int sourceNodePort,
 							char *targetNodeName, int targetNodePort,
 							const char *operationName);
-static void ReplicateColocatedShardPlacement(int64 shardId, char *sourceNodeName,
-											 int32 sourceNodePort, char *targetNodeName,
-											 int32 targetNodePort,
-											 char shardReplicationMode);
 static void CopyShardTables(List *shardIntervalList, char *sourceNodeName,
 							int32 sourceNodePort, char *targetNodeName,
 							int32 targetNodePort, bool useLogicalReplication,
-							char *operationName);
+							const char *operationName);
 static void CopyShardTablesViaLogicalReplication(List *shardIntervalList,
 												 char *sourceNodeName,
 												 int32 sourceNodePort,
@ -100,7 +121,7 @@ static void EnsureShardCanBeCopied(int64 shardId, const char *sourceNodeName,
 								   int32 targetNodePort);
 static List * RecreateTableDDLCommandList(Oid relationId);
 static void EnsureTableListOwner(List *tableIdList);
-static void EnsureTableListSuitableForReplication(List *tableIdList);
+static void ErrorIfReplicatingDistributedTableWithFKeys(List *tableIdList);

 static void DropShardPlacementsFromMetadata(List *shardList,
 											char *nodeName,
@ -112,12 +133,28 @@ static void UpdateColocatedShardPlacementMetadataOnWorkers(int64 shardId,
 														   int32 targetNodePort);
 static bool IsShardListOnNode(List *colocatedShardList, char *targetNodeName,
 							  uint32 targetPort);
+static void SetupRebalanceMonitorForShardTransfer(uint64 shardId, Oid distributedTableId,
+												  char *sourceNodeName,
+												  uint32 sourceNodePort,
+												  char *targetNodeName,
+												  uint32 targetNodePort,
+												  ShardTransferType transferType);
 static void CheckSpaceConstraints(MultiConnection *connection,
 								  uint64 colocationSizeInBytes);
+static void EnsureAllShardsCanBeCopied(List *colocatedShardList,
+									   char *sourceNodeName, uint32 sourceNodePort,
+									   char *targetNodeName, uint32 targetNodePort);
 static void EnsureEnoughDiskSpaceForShardMove(List *colocatedShardList,
 											  char *sourceNodeName, uint32 sourceNodePort,
-											  char *targetNodeName, uint32
-											  targetNodePort);
+											  char *targetNodeName, uint32 targetNodePort,
+											  ShardTransferType transferType);
+static bool TransferAlreadyCompleted(List *colocatedShardList,
+									 char *sourceNodeName, uint32 sourceNodePort,
+									 char *targetNodeName, uint32 targetNodePort,
+									 ShardTransferType transferType);
+static void LockColocatedRelationsForMove(List *colocatedTableList);
+static void ErrorIfForeignTableForShardTransfer(List *colocatedTableList,
+												ShardTransferType transferType);
 static List * RecreateShardDDLCommandList(ShardInterval *shardInterval,
 										  const char *sourceNodeName,
 										  int32 sourceNodePort);
@ -163,9 +200,9 @@ citus_copy_shard_placement(PG_FUNCTION_ARGS)

 	char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);

-	ReplicateColocatedShardPlacement(shardId, sourceNodeName, sourceNodePort,
+	TransferShards(shardId, sourceNodeName, sourceNodePort,
 				   targetNodeName, targetNodePort,
-									 shardReplicationMode);
+				   shardReplicationMode, SHARD_TRANSFER_COPY);

 	PG_RETURN_VOID();
 }
@ -192,10 +229,9 @@ citus_copy_shard_placement_with_nodeid(PG_FUNCTION_ARGS)

 	char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);

-	ReplicateColocatedShardPlacement(shardId,
-									 sourceNode->workerName, sourceNode->workerPort,
+	TransferShards(shardId, sourceNode->workerName, sourceNode->workerPort,
 				   targetNode->workerName, targetNode->workerPort,
-									 shardReplicationMode);
+				   shardReplicationMode, SHARD_TRANSFER_COPY);

 	PG_RETURN_VOID();
 }
@ -228,9 +264,9 @@ master_copy_shard_placement(PG_FUNCTION_ARGS)
 		ereport(WARNING, (errmsg("do_repair argument is deprecated")));
 	}

-	ReplicateColocatedShardPlacement(shardId, sourceNodeName, sourceNodePort,
+	TransferShards(shardId, sourceNodeName, sourceNodePort,
 				   targetNodeName, targetNodePort,
-									 shardReplicationMode);
+				   shardReplicationMode, SHARD_TRANSFER_COPY);


 	PG_RETURN_VOID();
@ -264,9 +300,10 @@ citus_move_shard_placement(PG_FUNCTION_ARGS)
 	int32 targetNodePort = PG_GETARG_INT32(4);
 	Oid shardReplicationModeOid = PG_GETARG_OID(5);

-	citus_move_shard_placement_internal(shardId, sourceNodeName, sourceNodePort,
+	char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);
+	TransferShards(shardId, sourceNodeName, sourceNodePort,
 				   targetNodeName, targetNodePort,
-										shardReplicationModeOid);
+				   shardReplicationMode, SHARD_TRANSFER_MOVE);

 	PG_RETURN_VOID();
 }
@ -291,126 +328,111 @@ citus_move_shard_placement_with_nodeid(PG_FUNCTION_ARGS)
 	WorkerNode *sourceNode = FindNodeWithNodeId(sourceNodeId, missingOk);
 	WorkerNode *targetNode = FindNodeWithNodeId(targetNodeId, missingOk);

-	citus_move_shard_placement_internal(shardId, sourceNode->workerName,
+	char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);
+	TransferShards(shardId, sourceNode->workerName,
 				   sourceNode->workerPort, targetNode->workerName,
-										targetNode->workerPort,
-										shardReplicationModeOid);
+				   targetNode->workerPort, shardReplicationMode, SHARD_TRANSFER_MOVE);

 	PG_RETURN_VOID();
 }


 /*
- * citus_move_shard_placement_internal is the internal function for shard moves.
+ * TransferShards is the function for shard transfers.
 */
 void
-citus_move_shard_placement_internal(int64 shardId, char *sourceNodeName,
+TransferShards(int64 shardId, char *sourceNodeName,
 			   int32 sourceNodePort, char *targetNodeName,
-									int32 targetNodePort, Oid shardReplicationModeOid)
+			   int32 targetNodePort, char shardReplicationMode,
+			   ShardTransferType transferType)
 {
-	ListCell *colocatedTableCell = NULL;
-	ListCell *colocatedShardCell = NULL;
+	/* strings to be used in log messages */
+	const char *operationName = ShardTransferTypeNames[transferType];
+	const char *operationNameCapitalized =
+		ShardTransferTypeNamesCapitalized[transferType];
+	const char *operationFunctionName = ShardTransferTypeFunctionNames[transferType];

+	/* cannot transfer shard to the same node */
 	ErrorIfSameNode(sourceNodeName, sourceNodePort,
 					targetNodeName, targetNodePort,
-					"move");
-
-	Oid relationId = RelationIdForShard(shardId);
-	ErrorIfMoveUnsupportedTableType(relationId);
-	ErrorIfTargetNodeIsNotSafeToMove(targetNodeName, targetNodePort);
-
-	AcquirePlacementColocationLock(relationId, ExclusiveLock, "move");
+					operationName);

 	ShardInterval *shardInterval = LoadShardInterval(shardId);
 	Oid distributedTableId = shardInterval->relationId;

+	/* error if unsupported shard transfer */
+	if (transferType == SHARD_TRANSFER_MOVE)
+	{
+		ErrorIfMoveUnsupportedTableType(distributedTableId);
+	}
+	else if (transferType == SHARD_TRANSFER_COPY)
+	{
+		ErrorIfTableCannotBeReplicated(distributedTableId);
+		EnsureNoModificationsHaveBeenDone();
+	}
+
+	ErrorIfTargetNodeIsNotSafeForTransfer(targetNodeName, targetNodePort, transferType);
+
+	AcquirePlacementColocationLock(distributedTableId, ExclusiveLock, operationName);
+
 	List *colocatedTableList = ColocatedTableList(distributedTableId);
 	List *colocatedShardList = ColocatedShardIntervalList(shardInterval);

-	foreach(colocatedTableCell, colocatedTableList)
+	EnsureTableListOwner(colocatedTableList);
+
+	if (transferType == SHARD_TRANSFER_MOVE)
 	{
-		Oid colocatedTableId = lfirst_oid(colocatedTableCell);
-
-		/* check that user has owner rights in all co-located tables */
-		EnsureTableOwner(colocatedTableId);
-
 		/*
 		 * Block concurrent DDL / TRUNCATE commands on the relation. Similarly,
 		 * block concurrent citus_move_shard_placement() on any shard of
 		 * the same relation. This is OK for now since we're executing shard
 		 * moves sequentially anyway.
 		 */
-		LockRelationOid(colocatedTableId, ShareUpdateExclusiveLock);
+		LockColocatedRelationsForMove(colocatedTableList);
+	}

-		if (IsForeignTable(relationId))
+	ErrorIfForeignTableForShardTransfer(colocatedTableList, transferType);
+
+	if (transferType == SHARD_TRANSFER_COPY)
 	{
-			char *relationName = get_rel_name(colocatedTableId);
-			ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-							errmsg("cannot move shard"),
-							errdetail("Table %s is a foreign table. Moving "
-									  "shards backed by foreign tables is "
-									  "not supported.", relationName)));
+		ErrorIfReplicatingDistributedTableWithFKeys(colocatedTableList);
 	}
-	}
-
-	/* we sort colocatedShardList so that lock operations will not cause any deadlocks */
-	colocatedShardList = SortList(colocatedShardList, CompareShardIntervalsById);

 	/*
-	 * If there are no active placements on the source and only active placements on
-	 * the target node, we assume the copy to already be done.
+	 * We sort shardIntervalList so that lock operations will not cause any
+	 * deadlocks.
 	 */
-	if (IsShardListOnNode(colocatedShardList, targetNodeName, targetNodePort) &&
-		!IsShardListOnNode(colocatedShardList, sourceNodeName, sourceNodePort))
+	colocatedShardList = SortList(colocatedShardList, CompareShardIntervalsById);
+
+	if (TransferAlreadyCompleted(colocatedShardList,
+								 sourceNodeName, sourceNodePort,
+								 targetNodeName, targetNodePort,
+								 transferType))
 	{
+		/* if the transfer is already completed, we can return right away */
 		ereport(WARNING, (errmsg("shard is already present on node %s:%d",
 								 targetNodeName, targetNodePort),
-						  errdetail("Move may have already completed.")));
+						  errdetail("%s may have already completed.",
+									operationNameCapitalized)));
 		return;
 	}

-	foreach(colocatedShardCell, colocatedShardList)
-	{
-		ShardInterval *colocatedShard = (ShardInterval *) lfirst(colocatedShardCell);
-		uint64 colocatedShardId = colocatedShard->shardId;
-
-		EnsureShardCanBeCopied(colocatedShardId, sourceNodeName, sourceNodePort,
+	EnsureAllShardsCanBeCopied(colocatedShardList, sourceNodeName, sourceNodePort,
 							   targetNodeName, targetNodePort);
-	}

-	char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);
 	if (shardReplicationMode == TRANSFER_MODE_AUTOMATIC)
 	{
 		VerifyTablesHaveReplicaIdentity(colocatedTableList);
 	}

-	EnsureEnoughDiskSpaceForShardMove(colocatedShardList, sourceNodeName, sourceNodePort,
-									  targetNodeName, targetNodePort);
+	EnsureEnoughDiskSpaceForShardMove(colocatedShardList,
+									  sourceNodeName, sourceNodePort,
+									  targetNodeName, targetNodePort, transferType);

-
-	/*
-	 * We want to be able to track progress of shard moves using
-	 * get_rebalancer_progress. If this move is initiated by the rebalancer,
-	 * then the rebalancer call has already set up the shared memory that is
-	 * used to do that. But if citus_move_shard_placement is called directly by
-	 * the user (or through any other mechanism), then the shared memory is not
-	 * set up yet. In that case we do it here.
-	 */
-	if (!IsRebalancerInternalBackend())
-	{
-		WorkerNode *sourceNode = FindWorkerNode(sourceNodeName, sourceNodePort);
-		WorkerNode *targetNode = FindWorkerNode(targetNodeName, targetNodePort);
-
-		PlacementUpdateEvent *placementUpdateEvent = palloc0(
-			sizeof(PlacementUpdateEvent));
-		placementUpdateEvent->updateType = PLACEMENT_UPDATE_MOVE;
-		placementUpdateEvent->shardId = shardId;
-		placementUpdateEvent->sourceNode = sourceNode;
-		placementUpdateEvent->targetNode = targetNode;
-		SetupRebalanceMonitor(list_make1(placementUpdateEvent), relationId,
-							  REBALANCE_PROGRESS_MOVING,
-							  PLACEMENT_UPDATE_STATUS_SETTING_UP);
-	}
+	SetupRebalanceMonitorForShardTransfer(shardId, distributedTableId,
+										  sourceNodeName, sourceNodePort,
+										  targetNodeName, targetNodePort,
+										  transferType);

 	UpdatePlacementUpdateStatusForShardIntervalList(
 		colocatedShardList,
@ -428,7 +450,7 @@ citus_move_shard_placement_internal(int64 shardId, char *sourceNodeName,
 	{
 		BlockWritesToShardList(colocatedShardList);
 	}
-	else
+	else if (transferType == SHARD_TRANSFER_MOVE)
 	{
 		/*
 		 * We prevent multiple shard moves in a transaction that use logical
@ -452,6 +474,20 @@ citus_move_shard_placement_internal(int64 shardId, char *sourceNodeName,
 		PlacementMovedUsingLogicalReplicationInTX = true;
 	}

+	if (transferType == SHARD_TRANSFER_COPY &&
+		!IsCitusTableType(distributedTableId, REFERENCE_TABLE))
+	{
+		/*
+		 * When copying a shard to a new node, we should first ensure that reference
+		 * tables are present such that joins work immediately after copying the shard.
+		 * When copying a reference table, we are probably trying to achieve just that.
+		 *
+		 * Since this a long-running operation we do this after the error checks, but
+		 * before taking metadata locks.
+		 */
+		EnsureReferenceTablesExistOnAllNodesExtended(shardReplicationMode);
+	}
+
 	DropOrphanedResourcesInSeparateTransaction();

 	ShardInterval *colocatedShard = NULL;
@ -466,18 +502,21 @@ citus_move_shard_placement_internal(int64 shardId, char *sourceNodeName,
 		ErrorIfCleanupRecordForShardExists(qualifiedShardName);
 	}

-	/*
-	 * CopyColocatedShardPlacement function copies given shard with its co-located
-	 * shards.
-	 */
 	CopyShardTables(colocatedShardList, sourceNodeName, sourceNodePort, targetNodeName,
-					targetNodePort, useLogicalReplication, "citus_move_shard_placement");
+					targetNodePort, useLogicalReplication, operationFunctionName);

+	if (transferType == SHARD_TRANSFER_MOVE)
+	{
 		/* delete old shards metadata and mark the shards as to be deferred drop */
 		int32 sourceGroupId = GroupForNode(sourceNodeName, sourceNodePort);
 		InsertCleanupRecordsForShardPlacementsOnNode(colocatedShardList,
 													 sourceGroupId);
+	}

+	/*
+	 * Finally insert the placements to pg_dist_placement and sync it to the
+	 * metadata workers.
+	 */
 	colocatedShard = NULL;
 	foreach_ptr(colocatedShard, colocatedShardList)
 	{
@ -488,17 +527,30 @@ citus_move_shard_placement_internal(int64 shardId, char *sourceNodeName,
 		InsertShardPlacementRow(colocatedShardId, placementId,
 								ShardLength(colocatedShardId),
 								groupId);
+
+		if (transferType == SHARD_TRANSFER_COPY &&
+			ShouldSyncTableMetadata(colocatedShard->relationId))
+		{
+			char *placementCommand = PlacementUpsertCommand(colocatedShardId, placementId,
+															0, groupId);
+
+			SendCommandToWorkersWithMetadata(placementCommand);
+		}
 	}

+	if (transferType == SHARD_TRANSFER_MOVE)
+	{
 		/*
 		 * Since this is move operation, we remove the placements from the metadata
 		 * for the source node after copy.
 		 */
-	DropShardPlacementsFromMetadata(colocatedShardList, sourceNodeName, sourceNodePort);
+		DropShardPlacementsFromMetadata(colocatedShardList,
+										sourceNodeName, sourceNodePort);

 		UpdateColocatedShardPlacementMetadataOnWorkers(shardId, sourceNodeName,
 													   sourceNodePort, targetNodeName,
 													   targetNodePort);
+	}

 	UpdatePlacementUpdateStatusForShardIntervalList(
 		colocatedShardList,
@ -611,6 +663,70 @@ IsShardListOnNode(List *colocatedShardList, char *targetNodeName, uint32 targetN
 }


+/*
+ * LockColocatedRelationsForMove takes a list of relations, locks all of them
+ * using ShareUpdateExclusiveLock
+ */
+static void
+LockColocatedRelationsForMove(List *colocatedTableList)
+{
+	Oid colocatedTableId = InvalidOid;
+	foreach_oid(colocatedTableId, colocatedTableList)
+	{
+		LockRelationOid(colocatedTableId, ShareUpdateExclusiveLock);
+	}
+}
+
+
+/*
+ * ErrorIfForeignTableForShardTransfer takes a list of relations, errors out if
+ * there's a foreign table in the list.
+ */
+static void
+ErrorIfForeignTableForShardTransfer(List *colocatedTableList,
+									ShardTransferType transferType)
+{
+	Oid colocatedTableId = InvalidOid;
+	foreach_oid(colocatedTableId, colocatedTableList)
+	{
+		if (IsForeignTable(colocatedTableId))
+		{
+			char *relationName = get_rel_name(colocatedTableId);
+			ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							errmsg("cannot %s shard",
+								   ShardTransferTypeNames[transferType]),
+							errdetail("Table %s is a foreign table. "
+									  "%s shards backed by foreign tables is "
+									  "not supported.", relationName,
+									  ShardTransferTypeNamesContinuous[transferType])));
+		}
+	}
+}
+
+
+/*
+ * EnsureAllShardsCanBeCopied is a wrapper around EnsureShardCanBeCopied.
+ */
+static void
+EnsureAllShardsCanBeCopied(List *colocatedShardList,
+						   char *sourceNodeName, uint32 sourceNodePort,
+						   char *targetNodeName, uint32 targetNodePort)
+{
+	ShardInterval *colocatedShard = NULL;
+	foreach_ptr(colocatedShard, colocatedShardList)
+	{
+		uint64 colocatedShardId = colocatedShard->shardId;
+
+		/*
+		 * To transfer shard, there should be healthy placement in source node and no
+		 * placement in the target node.
+		 */
+		EnsureShardCanBeCopied(colocatedShardId, sourceNodeName, sourceNodePort,
+							   targetNodeName, targetNodePort);
+	}
+}
+
+
 /*
 * EnsureEnoughDiskSpaceForShardMove checks that there is enough space for
 * shard moves of the given colocated shard list from source node to target node.
@ -619,9 +735,10 @@ IsShardListOnNode(List *colocatedShardList, char *targetNodeName, uint32 targetN
 static void
 EnsureEnoughDiskSpaceForShardMove(List *colocatedShardList,
 								  char *sourceNodeName, uint32 sourceNodePort,
-								  char *targetNodeName, uint32 targetNodePort)
+								  char *targetNodeName, uint32 targetNodePort,
+								  ShardTransferType transferType)
 {
-	if (!CheckAvailableSpaceBeforeMove)
+	if (!CheckAvailableSpaceBeforeMove || transferType != SHARD_TRANSFER_MOVE)
 	{
 		return;
 	}
@ -636,6 +753,34 @@ EnsureEnoughDiskSpaceForShardMove(List *colocatedShardList,
 }


+/*
+ * TransferAlreadyCompleted returns true if the given shard transfer is already done.
+ * Returns false otherwise.
+ */
+static bool
+TransferAlreadyCompleted(List *colocatedShardList,
+						 char *sourceNodeName, uint32 sourceNodePort,
+						 char *targetNodeName, uint32 targetNodePort,
+						 ShardTransferType transferType)
+{
+	if (transferType == SHARD_TRANSFER_MOVE &&
+		IsShardListOnNode(colocatedShardList, targetNodeName, targetNodePort) &&
+		!IsShardListOnNode(colocatedShardList, sourceNodeName, sourceNodePort))
+	{
+		return true;
+	}
+
+	if (transferType == SHARD_TRANSFER_COPY &&
+		IsShardListOnNode(colocatedShardList, targetNodeName, targetNodePort) &&
+		IsShardListOnNode(colocatedShardList, sourceNodeName, sourceNodePort))
+	{
+		return true;
+	}
+
+	return false;
+}
+
+
 /*
 * ShardListSizeInBytes returns the size in bytes of a set of shard tables.
 */
@ -682,6 +827,49 @@ ShardListSizeInBytes(List *shardList, char *workerNodeName, uint32
 }


+/*
+ * SetupRebalanceMonitorForShardTransfer prepares the parameters and
+ * calls SetupRebalanceMonitor, unless the current transfer is a move
+ * initiated by the rebalancer.
+ * See comments on SetupRebalanceMonitor
+ */
+static void
+SetupRebalanceMonitorForShardTransfer(uint64 shardId, Oid distributedTableId,
+									  char *sourceNodeName, uint32 sourceNodePort,
+									  char *targetNodeName, uint32 targetNodePort,
+									  ShardTransferType transferType)
+{
+	if (transferType == SHARD_TRANSFER_MOVE && IsRebalancerInternalBackend())
+	{
+		/*
+		 * We want to be able to track progress of shard moves using
+		 * get_rebalancer_progress. If this move is initiated by the rebalancer,
+		 * then the rebalancer call has already set up the shared memory that is
+		 * used to do that, so we should return here.
+		 * But if citus_move_shard_placement is called directly by the user
+		 * (or through any other mechanism), then the shared memory is not
+		 * set up yet. In that case we do it here.
+		 */
+		return;
+	}
+
+	WorkerNode *sourceNode = FindWorkerNode(sourceNodeName, sourceNodePort);
+	WorkerNode *targetNode = FindWorkerNode(targetNodeName, targetNodePort);
+
+	PlacementUpdateEvent *placementUpdateEvent = palloc0(
+		sizeof(PlacementUpdateEvent));
+	placementUpdateEvent->updateType =
+		transferType == SHARD_TRANSFER_COPY ? PLACEMENT_UPDATE_COPY :
+		PLACEMENT_UPDATE_MOVE;
+	placementUpdateEvent->shardId = shardId;
+	placementUpdateEvent->sourceNode = sourceNode;
+	placementUpdateEvent->targetNode = targetNode;
+	SetupRebalanceMonitor(list_make1(placementUpdateEvent), distributedTableId,
+						  REBALANCE_PROGRESS_MOVING,
+						  PLACEMENT_UPDATE_STATUS_SETTING_UP);
+}
+
+
 /*
 * CheckSpaceConstraints checks there is enough space to place the colocation
 * on the node that the connection is connected to.
@ -729,17 +917,19 @@ CheckSpaceConstraints(MultiConnection *connection, uint64 colocationSizeInBytes)


 /*
- * ErrorIfTargetNodeIsNotSafeToMove throws error if the target node is not
- * eligible for moving shards.
+ * ErrorIfTargetNodeIsNotSafeForTransfer throws error if the target node is not
+ * eligible for shard transfers.
 */
-void
-ErrorIfTargetNodeIsNotSafeToMove(const char *targetNodeName, int targetNodePort)
+static void
+ErrorIfTargetNodeIsNotSafeForTransfer(const char *targetNodeName, int targetNodePort,
+									  ShardTransferType transferType)
 {
 	WorkerNode *workerNode = FindWorkerNode(targetNodeName, targetNodePort);
 	if (workerNode == NULL)
 	{
 		ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						errmsg("Moving shards to a non-existing node is not supported"),
+						errmsg("%s shards to a non-existing node is not supported",
+							   ShardTransferTypeNamesContinuous[transferType]),
 						errhint(
 							"Add the target node via SELECT citus_add_node('%s', %d);",
 							targetNodeName, targetNodePort)));
@ -748,13 +938,14 @@ ErrorIfTargetNodeIsNotSafeToMove(const char *targetNodeName, int targetNodePort)
 	if (!workerNode->isActive)
 	{
 		ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						errmsg("Moving shards to a non-active node is not supported"),
+						errmsg("%s shards to a non-active node is not supported",
+							   ShardTransferTypeNamesContinuous[transferType]),
 						errhint(
 							"Activate the target node via SELECT citus_activate_node('%s', %d);",
 							targetNodeName, targetNodePort)));
 	}

-	if (!workerNode->shouldHaveShards)
+	if (transferType == SHARD_TRANSFER_MOVE && !workerNode->shouldHaveShards)
 	{
 		ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 						errmsg("Moving shards to a node that shouldn't have a shard is "
@ -767,8 +958,9 @@ ErrorIfTargetNodeIsNotSafeToMove(const char *targetNodeName, int targetNodePort)
 	if (!NodeIsPrimary(workerNode))
 	{
 		ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						errmsg("Moving shards to a secondary (e.g., replica) node is "
-							   "not supported")));
+						errmsg("%s shards to a secondary (e.g., replica) node is "
+							   "not supported",
+							   ShardTransferTypeNamesContinuous[transferType])));
 	}
 }

@ -1046,41 +1238,6 @@ ErrorIfTableCannotBeReplicated(Oid relationId)
 }


-/*
- * ErrorIfTargetNodeIsNotSafeToCopyTo throws an error if the target node is not
- * eligible for copying shards.
- */
-static void
-ErrorIfTargetNodeIsNotSafeToCopyTo(const char *targetNodeName, int targetNodePort)
-{
-	WorkerNode *workerNode = FindWorkerNode(targetNodeName, targetNodePort);
-	if (workerNode == NULL)
-	{
-		ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						errmsg("Copying shards to a non-existing node is not supported"),
-						errhint(
-							"Add the target node via SELECT citus_add_node('%s', %d);",
-							targetNodeName, targetNodePort)));
-	}
-
-	if (!workerNode->isActive)
-	{
-		ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						errmsg("Copying shards to a non-active node is not supported"),
-						errhint(
-							"Activate the target node via SELECT citus_activate_node('%s', %d);",
-							targetNodeName, targetNodePort)));
-	}
-
-	if (!NodeIsPrimary(workerNode))
-	{
-		ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						errmsg("Copying shards to a secondary (e.g., replica) node is "
-							   "not supported")));
-	}
-}
-
-
 /*
 * LookupShardTransferMode maps the oids of citus.shard_transfer_mode enum
 * values to a char.
@ -1114,154 +1271,6 @@ LookupShardTransferMode(Oid shardReplicationModeOid)
 }


-/*
- * ReplicateColocatedShardPlacement replicates the given shard and its
- * colocated shards from a source node to target node.
- */
-static void
-ReplicateColocatedShardPlacement(int64 shardId, char *sourceNodeName,
-								 int32 sourceNodePort, char *targetNodeName,
-								 int32 targetNodePort, char shardReplicationMode)
-{
-	ShardInterval *shardInterval = LoadShardInterval(shardId);
-	Oid distributedTableId = shardInterval->relationId;
-
-	ErrorIfSameNode(sourceNodeName, sourceNodePort,
-					targetNodeName, targetNodePort,
-					"copy");
-
-	ErrorIfTableCannotBeReplicated(shardInterval->relationId);
-	ErrorIfTargetNodeIsNotSafeToCopyTo(targetNodeName, targetNodePort);
-	EnsureNoModificationsHaveBeenDone();
-
-	AcquirePlacementColocationLock(shardInterval->relationId, ExclusiveLock, "copy");
-
-	List *colocatedTableList = ColocatedTableList(distributedTableId);
-	List *colocatedShardList = ColocatedShardIntervalList(shardInterval);
-
-	EnsureTableListOwner(colocatedTableList);
-	EnsureTableListSuitableForReplication(colocatedTableList);
-
-	/*
-	 * We sort shardIntervalList so that lock operations will not cause any
-	 * deadlocks.
-	 */
-	colocatedShardList = SortList(colocatedShardList, CompareShardIntervalsById);
-
-	/*
-	 * If there are active placements on both nodes, we assume the copy to already
-	 * be done.
-	 */
-	if (IsShardListOnNode(colocatedShardList, targetNodeName, targetNodePort) &&
-		IsShardListOnNode(colocatedShardList, sourceNodeName, sourceNodePort))
-	{
-		ereport(WARNING, (errmsg("shard is already present on node %s:%d",
-								 targetNodeName, targetNodePort),
-						  errdetail("Copy may have already completed.")));
-		return;
-	}
-
-	WorkerNode *sourceNode = FindWorkerNode(sourceNodeName, sourceNodePort);
-	WorkerNode *targetNode = FindWorkerNode(targetNodeName, targetNodePort);
-
-	Oid relationId = RelationIdForShard(shardId);
-	PlacementUpdateEvent *placementUpdateEvent = palloc0(
-		sizeof(PlacementUpdateEvent));
-	placementUpdateEvent->updateType = PLACEMENT_UPDATE_COPY;
-	placementUpdateEvent->shardId = shardId;
-	placementUpdateEvent->sourceNode = sourceNode;
-	placementUpdateEvent->targetNode = targetNode;
-	SetupRebalanceMonitor(list_make1(placementUpdateEvent), relationId,
-						  REBALANCE_PROGRESS_MOVING,
-						  PLACEMENT_UPDATE_STATUS_SETTING_UP);
-
-	UpdatePlacementUpdateStatusForShardIntervalList(
-		colocatedShardList,
-		sourceNodeName,
-		sourceNodePort,
-		PLACEMENT_UPDATE_STATUS_SETTING_UP);
-
-	/*
-	 * At this point of the shard replication, we don't need to block the writes to
-	 * shards when logical replication is used.
-	 */
-	bool useLogicalReplication = CanUseLogicalReplication(distributedTableId,
-														  shardReplicationMode);
-	if (!useLogicalReplication)
-	{
-		BlockWritesToShardList(colocatedShardList);
-	}
-
-	ShardInterval *colocatedShard = NULL;
-	foreach_ptr(colocatedShard, colocatedShardList)
-	{
-		uint64 colocatedShardId = colocatedShard->shardId;
-
-		/*
-		 * For shard copy, there should be healthy placement in source node and no
-		 * placement in the target node.
-		 */
-		EnsureShardCanBeCopied(colocatedShardId, sourceNodeName, sourceNodePort,
-							   targetNodeName, targetNodePort);
-	}
-
-	if (shardReplicationMode == TRANSFER_MODE_AUTOMATIC)
-	{
-		VerifyTablesHaveReplicaIdentity(colocatedTableList);
-	}
-
-	if (!IsCitusTableType(distributedTableId, REFERENCE_TABLE))
-	{
-		/*
-		 * When copying a shard to a new node, we should first ensure that reference
-		 * tables are present such that joins work immediately after copying the shard.
-		 * When copying a reference table, we are probably trying to achieve just that.
-		 *
-		 * Since this a long-running operation we do this after the error checks, but
-		 * before taking metadata locks.
-		 */
-		EnsureReferenceTablesExistOnAllNodesExtended(shardReplicationMode);
-	}
-
-	DropOrphanedResourcesInSeparateTransaction();
-
-	CopyShardTables(colocatedShardList, sourceNodeName, sourceNodePort,
-					targetNodeName, targetNodePort, useLogicalReplication,
-					"citus_copy_shard_placement");
-
-	/*
-	 * Finally insert the placements to pg_dist_placement and sync it to the
-	 * metadata workers.
-	 */
-	foreach_ptr(colocatedShard, colocatedShardList)
-	{
-		uint64 colocatedShardId = colocatedShard->shardId;
-		uint32 groupId = GroupForNode(targetNodeName, targetNodePort);
-		uint64 placementId = GetNextPlacementId();
-
-		InsertShardPlacementRow(colocatedShardId, placementId,
-								ShardLength(colocatedShardId),
-								groupId);
-
-		if (ShouldSyncTableMetadata(colocatedShard->relationId))
-		{
-			char *placementCommand = PlacementUpsertCommand(colocatedShardId, placementId,
-															0, groupId);
-
-			SendCommandToWorkersWithMetadata(placementCommand);
-		}
-	}
-
-	UpdatePlacementUpdateStatusForShardIntervalList(
-		colocatedShardList,
-		sourceNodeName,
-		sourceNodePort,
-		PLACEMENT_UPDATE_STATUS_COMPLETED);
-
-	FinalizeCurrentProgressMonitor();
-}
-
-
 /*
 * EnsureTableListOwner ensures current user owns given tables. Superusers
 * are regarded as owners.
@ -1278,25 +1287,15 @@ EnsureTableListOwner(List *tableIdList)


 /*
- * EnsureTableListSuitableForReplication errors out if given tables are not
+ * ErrorIfReplicatingDistributedTableWithFKeys errors out if given tables are not
 * suitable for replication.
 */
 static void
-EnsureTableListSuitableForReplication(List *tableIdList)
+ErrorIfReplicatingDistributedTableWithFKeys(List *tableIdList)
 {
 	Oid tableId = InvalidOid;
 	foreach_oid(tableId, tableIdList)
 	{
-		if (IsForeignTable(tableId))
-		{
-			char *relationName = get_rel_name(tableId);
-			ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-							errmsg("cannot replicate shard"),
-							errdetail("Table %s is a foreign table. Replicating "
-									  "shards backed by foreign tables is "
-									  "not supported.", relationName)));
-		}
-
 		List *foreignConstraintCommandList =
 			GetReferencingForeignConstaintCommands(tableId);

@ -1318,7 +1317,7 @@ EnsureTableListSuitableForReplication(List *tableIdList)
 static void
 CopyShardTables(List *shardIntervalList, char *sourceNodeName, int32 sourceNodePort,
 				char *targetNodeName, int32 targetNodePort, bool useLogicalReplication,
-				char *operationName)
+				const char *operationName)
 {
 	if (list_length(shardIntervalList) < 1)
 	{
--- a/src/backend/distributed/operations/worker_copy_table_to_node_udf.c
+++ b/src/backend/distributed/operations/worker_copy_table_to_node_udf.c
@ -53,8 +53,14 @@ worker_copy_table_to_node(PG_FUNCTION_ARGS)
 		targetNodeId);

 	StringInfo selectShardQueryForCopy = makeStringInfo();
+
+	/*
+	 * Even though we do COPY(SELECT ...) all the columns, we can't just do SELECT * because we need to not COPY generated colums.
+	 */
+	const char *columnList = CopyableColumnNamesFromRelationName(relationSchemaName,
+																 relationName);
 	appendStringInfo(selectShardQueryForCopy,
-					 "SELECT * FROM %s;", relationQualifiedName);
+					 "SELECT %s FROM %s;", columnList, relationQualifiedName);

 	ParamListInfo params = NULL;
 	ExecuteQueryStringIntoDestReceiver(selectShardQueryForCopy->data, params,
--- a/src/backend/distributed/operations/worker_shard_copy.c
+++ b/src/backend/distributed/operations/worker_shard_copy.c
@ -24,6 +24,7 @@
 #include "distributed/relation_utils.h"
 #include "distributed/version_compat.h"
 #include "distributed/local_executor.h"
+#include "distributed/replication_origin_session_utils.h"

 /*
 * LocalCopyBuffer is used in copy callback to return the copied rows.
@ -73,13 +74,14 @@ static void ShardCopyDestReceiverDestroy(DestReceiver *destReceiver);
 static bool CanUseLocalCopy(uint32_t destinationNodeId);
 static StringInfo ConstructShardCopyStatement(List *destinationShardFullyQualifiedName,
 											  bool
-											  useBinaryFormat);
+											  useBinaryFormat, TupleDesc tupleDesc);
 static void WriteLocalTuple(TupleTableSlot *slot, ShardCopyDestReceiver *copyDest);
 static int ReadFromLocalBufferCallback(void *outBuf, int minRead, int maxRead);
 static void LocalCopyToShard(ShardCopyDestReceiver *copyDest, CopyOutState
 							 localCopyOutState);
 static void ConnectToRemoteAndStartCopy(ShardCopyDestReceiver *copyDest);

+
 static bool
 CanUseLocalCopy(uint32_t destinationNodeId)
 {
@ -103,9 +105,16 @@ ConnectToRemoteAndStartCopy(ShardCopyDestReceiver *copyDest)
 														 NULL /* database (current) */);
 	ClaimConnectionExclusively(copyDest->connection);

+
+	RemoteTransactionBeginIfNecessary(copyDest->connection);
+
+	SetupReplicationOriginRemoteSession(copyDest->connection);
+
+
 	StringInfo copyStatement = ConstructShardCopyStatement(
 		copyDest->destinationShardFullyQualifiedName,
-		copyDest->copyOutState->binary);
+		copyDest->copyOutState->binary,
+		copyDest->tupleDescriptor);

 	if (!SendRemoteCommand(copyDest->connection, copyStatement->data))
 	{
@ -184,6 +193,8 @@ ShardCopyDestReceiverReceive(TupleTableSlot *slot, DestReceiver *dest)
 	CopyOutState copyOutState = copyDest->copyOutState;
 	if (copyDest->useLocalCopy)
 	{
+		/* Setup replication origin session for local copy*/
+
 		WriteLocalTuple(slot, copyDest);
 		if (copyOutState->fe_msgbuf->len > LocalCopyFlushThresholdByte)
 		{
@ -259,6 +270,11 @@ ShardCopyDestReceiverStartup(DestReceiver *dest, int operation, TupleDesc
 	copyDest->columnOutputFunctions = ColumnOutputFunctions(inputTupleDescriptor,
 															copyOutState->binary);
 	copyDest->copyOutState = copyOutState;
+	if (copyDest->useLocalCopy)
+	{
+		/* Setup replication origin session for local copy*/
+		SetupReplicationOriginLocalSession();
+	}
 }


@ -317,6 +333,9 @@ ShardCopyDestReceiverShutdown(DestReceiver *dest)

 		PQclear(result);
 		ForgetResults(copyDest->connection);
+
+		ResetReplicationOriginRemoteSession(copyDest->connection);
+
 		CloseConnection(copyDest->connection);
 	}
 }
@ -329,6 +348,10 @@ static void
 ShardCopyDestReceiverDestroy(DestReceiver *dest)
 {
 	ShardCopyDestReceiver *copyDest = (ShardCopyDestReceiver *) dest;
+	if (copyDest->useLocalCopy)
+	{
+		ResetReplicationOriginLocalSession();
+	}

 	if (copyDest->copyOutState)
 	{
@ -344,21 +367,80 @@ ShardCopyDestReceiverDestroy(DestReceiver *dest)
 }


+/*
+ *  CopyableColumnNamesFromTupleDesc function creates and returns a comma seperated column names string  to be used in COPY
+ *  and SELECT statements when copying a table. The COPY and SELECT statements should filter out the GENERATED columns since COPY
+ *  statement fails to handle them. Iterating over the attributes of the table we also need to skip the dropped columns.
+ */
+const char *
+CopyableColumnNamesFromTupleDesc(TupleDesc tupDesc)
+{
+	StringInfo columnList = makeStringInfo();
+	bool firstInList = true;
+
+	for (int i = 0; i < tupDesc->natts; i++)
+	{
+		Form_pg_attribute att = TupleDescAttr(tupDesc, i);
+		if (att->attgenerated || att->attisdropped)
+		{
+			continue;
+		}
+		if (!firstInList)
+		{
+			appendStringInfo(columnList, ",");
+		}
+
+		firstInList = false;
+
+		appendStringInfo(columnList, "%s", quote_identifier(NameStr(att->attname)));
+	}
+
+	return columnList->data;
+}
+
+
+/*
+ *  CopyableColumnNamesFromRelationName function is a wrapper for CopyableColumnNamesFromTupleDesc.
+ */
+const char *
+CopyableColumnNamesFromRelationName(const char *schemaName, const char *relationName)
+{
+	Oid namespaceOid = get_namespace_oid(schemaName, true);
+
+	Oid relationId = get_relname_relid(relationName, namespaceOid);
+
+	Relation relation = relation_open(relationId, AccessShareLock);
+
+	TupleDesc tupleDesc = RelationGetDescr(relation);
+
+	const char *columnList = CopyableColumnNamesFromTupleDesc(tupleDesc);
+
+	relation_close(relation, NoLock);
+
+	return columnList;
+}
+
+
 /*
 * ConstructShardCopyStatement constructs the text of a COPY statement
 * for copying into a result table
 */
 static StringInfo
 ConstructShardCopyStatement(List *destinationShardFullyQualifiedName, bool
-							useBinaryFormat)
+							useBinaryFormat,
+							TupleDesc tupleDesc)
 {
 	char *destinationShardSchemaName = linitial(destinationShardFullyQualifiedName);
 	char *destinationShardRelationName = lsecond(destinationShardFullyQualifiedName);

+
 	StringInfo command = makeStringInfo();
-	appendStringInfo(command, "COPY %s.%s FROM STDIN",
+
+	const char *columnList = CopyableColumnNamesFromTupleDesc(tupleDesc);
+
+	appendStringInfo(command, "COPY %s.%s (%s) FROM STDIN",
 					 quote_identifier(destinationShardSchemaName), quote_identifier(
-						 destinationShardRelationName));
+						 destinationShardRelationName), columnList);

 	if (useBinaryFormat)
 	{
--- a/src/backend/distributed/operations/worker_split_copy_udf.c
+++ b/src/backend/distributed/operations/worker_split_copy_udf.c
@ -110,8 +110,13 @@ worker_split_copy(PG_FUNCTION_ARGS)
 													   splitCopyInfoList))));

 	StringInfo selectShardQueryForCopy = makeStringInfo();
+	const char *columnList = CopyableColumnNamesFromRelationName(
+		sourceShardToCopySchemaName,
+		sourceShardToCopyName);
+
 	appendStringInfo(selectShardQueryForCopy,
-					 "SELECT * FROM %s;", sourceShardToCopyQualifiedName);
+					 "SELECT %s FROM %s;", columnList,
+					 sourceShardToCopyQualifiedName);

 	ParamListInfo params = NULL;
 	ExecuteQueryStringIntoDestReceiver(selectShardQueryForCopy->data, params,
--- a/src/backend/distributed/planner/distributed_planner.c
+++ b/src/backend/distributed/planner/distributed_planner.c
@ -34,6 +34,7 @@
 #include "distributed/intermediate_results.h"
 #include "distributed/listutils.h"
 #include "distributed/coordinator_protocol.h"
+#include "distributed/merge_planner.h"
 #include "distributed/metadata_cache.h"
 #include "distributed/multi_executor.h"
 #include "distributed/distributed_planner.h"
@ -68,6 +69,17 @@
 #include "utils/syscache.h"


+/* RouterPlanType is used to determine the router plan to invoke */
+typedef enum RouterPlanType
+{
+	INSERT_SELECT_INTO_CITUS_TABLE,
+	INSERT_SELECT_INTO_LOCAL_TABLE,
+	DML_QUERY,
+	SELECT_QUERY,
+	MERGE_QUERY,
+	REPLAN_WITH_BOUND_PARAMETERS
+} RouterPlanType;
+
 static List *plannerRestrictionContextList = NIL;
 int MultiTaskQueryLogLevel = CITUS_LOG_LEVEL_OFF; /* multi-task query log level */
 static uint64 NextPlanId = 1;
@ -75,12 +87,8 @@ static uint64 NextPlanId = 1;
 /* keep track of planner call stack levels */
 int PlannerLevel = 0;

-static void ErrorIfQueryHasUnsupportedMergeCommand(Query *queryTree,
-												   List *rangeTableList);
-static bool ContainsMergeCommandWalker(Node *node);
 static bool ListContainsDistributedTableRTE(List *rangeTableList,
 											bool *maybeHasForeignDistributedTable);
-static bool IsUpdateOrDelete(Query *query);
 static PlannedStmt * CreateDistributedPlannedStmt(
 	DistributedPlanningContext *planContext);
 static PlannedStmt * InlineCtesAndCreateDistributedPlannedStmt(uint64 planId,
@ -132,7 +140,10 @@ static PlannedStmt * PlanDistributedStmt(DistributedPlanningContext *planContext
 static RTEListProperties * GetRTEListProperties(List *rangeTableList);
 static List * TranslatedVars(PlannerInfo *root, int relationIndex);
 static void WarnIfListHasForeignDistributedTable(List *rangeTableList);
-static void ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList);
+static RouterPlanType GetRouterPlanType(Query *query,
+										Query *originalQuery,
+										bool hasUnresolvedParams);
+

 /* Distributed planner hook */
 PlannedStmt *
@ -156,7 +167,7 @@ distributed_planner(Query *parse,
 		 * We cannot have merge command for this path as well because
 		 * there cannot be recursively planned merge command.
 		 */
-		Assert(!ContainsMergeCommandWalker((Node *) parse));
+		Assert(!IsMergeQuery(parse));

 		needsDistributedPlanning = true;
 	}
@ -200,12 +211,6 @@ distributed_planner(Query *parse,

 		if (!fastPathRouterQuery)
 		{
-			/*
-			 * Fast path queries cannot have merge command, and we
-			 * prevent the remaining here.
-			 */
-			ErrorIfQueryHasUnsupportedMergeCommand(parse, rangeTableList);
-
 			/*
 			 * When there are partitioned tables (not applicable to fast path),
 			 * pretend that they are regular tables to avoid unnecessary work
@ -304,72 +309,6 @@ distributed_planner(Query *parse,
 }


-/*
- * ErrorIfQueryHasUnsupportedMergeCommand walks over the query tree and bails out
- * if there is no Merge command (e.g., CMD_MERGE) in the query tree. For merge,
- * looks for all supported combinations, throws an exception if any violations
- * are seen.
- */
-static void
-ErrorIfQueryHasUnsupportedMergeCommand(Query *queryTree, List *rangeTableList)
-{
-	/*
-	 * Postgres currently doesn't support Merge queries inside subqueries and
-	 * ctes, but lets be defensive and do query tree walk anyway.
-	 *
-	 * We do not call this path for fast-path queries to avoid this additional
-	 * overhead.
-	 */
-	if (!ContainsMergeCommandWalker((Node *) queryTree))
-	{
-		/* No MERGE found */
-		return;
-	}
-
-
-	/*
-	 * In Citus we have limited support for MERGE, it's allowed
-	 * only if all the tables(target, source or any CTE) tables
-	 * are are local i.e. a combination of Citus local and Non-Citus
-	 * tables (regular Postgres tables).
-	 */
-	ErrorIfMergeHasUnsupportedTables(queryTree, rangeTableList);
-}
-
-
-/*
- * ContainsMergeCommandWalker walks over the node and finds if there are any
- * Merge command (e.g., CMD_MERGE) in the node.
- */
-static bool
-ContainsMergeCommandWalker(Node *node)
-{
-	#if PG_VERSION_NUM < PG_VERSION_15
-	return false;
-	#endif
-
-	if (node == NULL)
-	{
-		return false;
-	}
-
-	if (IsA(node, Query))
-	{
-		Query *query = (Query *) node;
-		if (IsMergeQuery(query))
-		{
-			return true;
-		}
-
-		return query_tree_walker((Query *) node, ContainsMergeCommandWalker, NULL, 0);
-	}
-
-	return expression_tree_walker(node, ContainsMergeCommandWalker, NULL);
-
-	return false;
-}
-
-
 /*
 * ExtractRangeTableEntryList is a wrapper around ExtractRangeTableEntryWalker.
 * The function traverses the input query and returns all the range table
@ -669,17 +608,6 @@ IsMultiTaskPlan(DistributedPlan *distributedPlan)
 }


-/*
- * IsUpdateOrDelete returns true if the query performs an update or delete.
- */
-bool
-IsUpdateOrDelete(Query *query)
-{
-	return query->commandType == CMD_UPDATE ||
-		   query->commandType == CMD_DELETE;
-}
-
-
 /*
 * PlanFastPathDistributedStmt creates a distributed planned statement using
 * the FastPathPlanner.
@ -850,7 +778,7 @@ CreateDistributedPlannedStmt(DistributedPlanningContext *planContext)
 	 * if it is planned as a multi shard modify query.
 	 */
 	if ((distributedPlan->planningError ||
-		 (IsUpdateOrDelete(planContext->originalQuery) && IsMultiTaskPlan(
+		 (UpdateOrDeleteOrMergeQuery(planContext->originalQuery) && IsMultiTaskPlan(
 			  distributedPlan))) &&
 		hasUnresolvedParams)
 	{
@ -955,6 +883,51 @@ TryCreateDistributedPlannedStmt(PlannedStmt *localPlan,
 }


+/*
+ * GetRouterPlanType checks the parse tree to return appropriate plan type.
+ */
+static RouterPlanType
+GetRouterPlanType(Query *query, Query *originalQuery, bool hasUnresolvedParams)
+{
+	if (!IsModifyCommand(originalQuery))
+	{
+		return SELECT_QUERY;
+	}
+
+	Oid targetRelationId = ModifyQueryResultRelationId(query);
+
+	EnsureModificationsCanRunOnRelation(targetRelationId);
+	EnsurePartitionTableNotReplicated(targetRelationId);
+
+	/* Check the type of modification being done */
+
+	if (InsertSelectIntoCitusTable(originalQuery))
+	{
+		if (hasUnresolvedParams)
+		{
+			return REPLAN_WITH_BOUND_PARAMETERS;
+		}
+		return INSERT_SELECT_INTO_CITUS_TABLE;
+	}
+	else if (InsertSelectIntoLocalTable(originalQuery))
+	{
+		if (hasUnresolvedParams)
+		{
+			return REPLAN_WITH_BOUND_PARAMETERS;
+		}
+		return INSERT_SELECT_INTO_LOCAL_TABLE;
+	}
+	else if (IsMergeQuery(originalQuery))
+	{
+		return MERGE_QUERY;
+	}
+	else
+	{
+		return DML_QUERY;
+	}
+}
+
+
 /*
 * CreateDistributedPlan generates a distributed plan for a query.
 * It goes through 3 steps:
@ -972,51 +945,71 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina
 	DistributedPlan *distributedPlan = NULL;
 	bool hasCtes = originalQuery->cteList != NIL;

-	if (IsModifyCommand(originalQuery))
+	/* Step 1: Try router planner */
+
+	RouterPlanType routerPlan = GetRouterPlanType(query, originalQuery,
+												  hasUnresolvedParams);
+
+	switch (routerPlan)
 	{
-		Oid targetRelationId = ModifyQueryResultRelationId(query);
-
-		EnsureModificationsCanRunOnRelation(targetRelationId);
-
-		EnsurePartitionTableNotReplicated(targetRelationId);
-
-		if (InsertSelectIntoCitusTable(originalQuery))
+		case INSERT_SELECT_INTO_CITUS_TABLE:
 		{
-			if (hasUnresolvedParams)
-			{
-				/*
-				 * Unresolved parameters can cause performance regressions in
-				 * INSERT...SELECT when the partition column is a parameter
-				 * because we don't perform any additional pruning in the executor.
-				 */
-				return NULL;
-			}
-
 			distributedPlan =
-				CreateInsertSelectPlan(planId, originalQuery, plannerRestrictionContext,
+				CreateInsertSelectPlan(planId,
+									   originalQuery,
+									   plannerRestrictionContext,
 									   boundParams);
+			break;
 		}
-		else if (InsertSelectIntoLocalTable(originalQuery))
+
+		case INSERT_SELECT_INTO_LOCAL_TABLE:
 		{
-			if (hasUnresolvedParams)
-			{
-				/*
-				 * Unresolved parameters can cause performance regressions in
-				 * INSERT...SELECT when the partition column is a parameter
-				 * because we don't perform any additional pruning in the executor.
-				 */
-				return NULL;
-			}
 			distributedPlan =
-				CreateInsertSelectIntoLocalTablePlan(planId, originalQuery, boundParams,
+				CreateInsertSelectIntoLocalTablePlan(planId,
+													 originalQuery,
+													 boundParams,
 													 hasUnresolvedParams,
 													 plannerRestrictionContext);
+			break;
 		}
-		else
+
+		case DML_QUERY:
 		{
 			/* modifications are always routed through the same planner/executor */
 			distributedPlan =
 				CreateModifyPlan(originalQuery, query, plannerRestrictionContext);
+			break;
+		}
+
+		case MERGE_QUERY:
+		{
+			distributedPlan =
+				CreateMergePlan(originalQuery, query, plannerRestrictionContext);
+			break;
+		}
+
+		case REPLAN_WITH_BOUND_PARAMETERS:
+		{
+			/*
+			 * Unresolved parameters can cause performance regressions in
+			 * INSERT...SELECT when the partition column is a parameter
+			 * because we don't perform any additional pruning in the executor.
+			 */
+			return NULL;
+		}
+
+		case SELECT_QUERY:
+		{
+			/*
+			 * For select queries we, if router executor is enabled, first try to
+			 * plan the query as a router query. If not supported, otherwise try
+			 * the full blown plan/optimize/physical planning process needed to
+			 * produce distributed query plans.
+			 */
+			distributedPlan =
+				CreateRouterPlan(originalQuery, query, plannerRestrictionContext);
+			break;
+		}
 	}

 	/* the functions above always return a plan, possibly with an error */
@ -1030,31 +1023,6 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina
 	{
 		RaiseDeferredError(distributedPlan->planningError, DEBUG2);
 	}
-	}
-	else
-	{
-		/*
-		 * For select queries we, if router executor is enabled, first try to
-		 * plan the query as a router query. If not supported, otherwise try
-		 * the full blown plan/optimize/physical planning process needed to
-		 * produce distributed query plans.
-		 */
-
-		distributedPlan = CreateRouterPlan(originalQuery, query,
-										   plannerRestrictionContext);
-		if (distributedPlan->planningError == NULL)
-		{
-			return distributedPlan;
-		}
-		else
-		{
-			/*
-			 * For debugging it's useful to display why query was not
-			 * router plannable.
-			 */
-			RaiseDeferredError(distributedPlan->planningError, DEBUG2);
-		}
-	}

 	if (hasUnresolvedParams)
 	{
@ -1082,6 +1050,8 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina
 													boundParams);
 	Assert(originalQuery != NULL);

+	/* Step 2: Generate subplans for CTEs and complex subqueries */
+
 	/*
 	 * Plan subqueries and CTEs that cannot be pushed down by recursively
 	 * calling the planner and return the resulting plans to subPlanList.
@ -1182,6 +1152,8 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina
 	query->cteList = NIL;
 	Assert(originalQuery->cteList == NIL);

+	/* Step 3: Try Logical planner */
+
 	MultiTreeRoot *logicalPlan = MultiLogicalPlanCreate(originalQuery, query,
 														plannerRestrictionContext);
 	MultiLogicalPlanOptimize(logicalPlan);
@ -2611,148 +2583,3 @@ WarnIfListHasForeignDistributedTable(List *rangeTableList)
 		}
 	}
 }
-
-
-/*
- * IsMergeAllowedOnRelation takes a relation entry and checks if MERGE command is
- * permitted on special relations, such as materialized view, returns true only if
- * it's a "source" relation.
- */
-bool
-IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte)
-{
-	if (!IsMergeQuery(parse))
-	{
-		return false;
-	}
-
-	RangeTblEntry *targetRte = rt_fetch(parse->resultRelation, parse->rtable);
-
-	/* Is it a target relation? */
-	if (targetRte->relid == rte->relid)
-	{
-		return false;
-	}
-
-	return true;
-}
-
-
-/*
- * ErrorIfMergeHasUnsupportedTables checks if all the tables(target, source or any CTE
- * present) in the MERGE command are local i.e. a combination of Citus local and Non-Citus
- * tables (regular Postgres tables), raises an exception for all other combinations.
- */
-static void
-ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList)
-{
-	ListCell *tableCell = NULL;
-
-	foreach(tableCell, rangeTableList)
-	{
-		RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(tableCell);
-		Oid relationId = rangeTableEntry->relid;
-
-		switch (rangeTableEntry->rtekind)
-		{
-			case RTE_RELATION:
-			{
-				/* Check the relation type */
-				break;
-			}
-
-			case RTE_SUBQUERY:
-			case RTE_FUNCTION:
-			case RTE_TABLEFUNC:
-			case RTE_VALUES:
-			case RTE_JOIN:
-			case RTE_CTE:
-			{
-				/* Skip them as base table(s) will be checked */
-				continue;
-			}
-
-			/*
-			 * RTE_NAMEDTUPLESTORE is typically used in ephmeral named relations,
-			 * such as, trigger data; until we find a genuine use case, raise an
-			 * exception.
-			 * RTE_RESULT is a node added by the planner and we shouldn't
-			 * encounter it in the parse tree.
-			 */
-			case RTE_NAMEDTUPLESTORE:
-			case RTE_RESULT:
-			{
-				ereport(ERROR,
-						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						 errmsg("MERGE command is not supported with "
-								"Tuplestores and results")));
-				break;
-			}
-
-			default:
-			{
-				ereport(ERROR,
-						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						 errmsg("MERGE command: Unrecognized range table entry.")));
-			}
-		}
-
-		/* RTE Relation can be of various types, check them now */
-
-		/* skip the regular views as they are replaced with subqueries */
-		if (rangeTableEntry->relkind == RELKIND_VIEW)
-		{
-			continue;
-		}
-
-		if (rangeTableEntry->relkind == RELKIND_MATVIEW ||
-			rangeTableEntry->relkind == RELKIND_FOREIGN_TABLE)
-		{
-			/* Materialized view or Foreign table as target is not allowed */
-			if (IsMergeAllowedOnRelation(parse, rangeTableEntry))
-			{
-				/* Non target relation is ok */
-				continue;
-			}
-			else
-			{
-				ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-								errmsg("MERGE command is not allowed "
-									   "on materialized view")));
-			}
-		}
-
-		if (rangeTableEntry->relkind != RELKIND_RELATION &&
-			rangeTableEntry->relkind != RELKIND_PARTITIONED_TABLE)
-		{
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("Unexpected relation type(relkind:%c) in MERGE command",
-							rangeTableEntry->relkind)));
-		}
-
-		Assert(rangeTableEntry->relid != 0);
-
-		/* Distributed tables and Reference tables are not supported yet */
-		if (IsCitusTableType(relationId, REFERENCE_TABLE) ||
-			IsCitusTableType(relationId, DISTRIBUTED_TABLE))
-		{
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("MERGE command is not supported on "
-							"distributed/reference tables yet")));
-		}
-
-		/* Regular Postgres tables and Citus local tables are allowed */
-		if (!IsCitusTable(relationId) ||
-			IsCitusTableType(relationId, CITUS_LOCAL_TABLE))
-		{
-			continue;
-		}
-
-
-		/* Any other Citus table type missing ? */
-	}
-
-	/* All the tables are local, supported */
-}
--- a/src/backend/distributed/planner/fast_path_router_planner.c
+++ b/src/backend/distributed/planner/fast_path_router_planner.c
@ -54,10 +54,11 @@
 bool EnableFastPathRouterPlanner = true;

 static bool ColumnAppearsMultipleTimes(Node *quals, Var *distributionKey);
-static bool ConjunctionContainsColumnFilter(Node *node, Var *column,
-											Node **distributionKeyValue);
 static bool DistKeyInSimpleOpExpression(Expr *clause, Var *distColumn,
 										Node **distributionKeyValue);
+static bool ConjunctionContainsColumnFilter(Node *node,
+											Var *column,
+											Node **distributionKeyValue);


 /*
--- a/src/backend/distributed/planner/insert_select_planner.c
+++ b/src/backend/distributed/planner/insert_select_planner.c
@ -875,7 +875,7 @@ RouterModifyTaskForShardInterval(Query *originalQuery,
 														  &prunedShardIntervalListList,
 														  replacePrunedQueryWithDummy,
 														  &multiShardModifyQuery, NULL,
-														  false);
+														  NULL);

 	Assert(!multiShardModifyQuery);

@ -938,6 +938,7 @@ RouterModifyTaskForShardInterval(Query *originalQuery,
 	modifyTask->taskPlacementList = insertShardPlacementList;
 	modifyTask->relationShardList = relationShardList;
 	modifyTask->replicationModel = targetTableCacheEntry->replicationModel;
+	modifyTask->isLocalTableModification = false;

 	return modifyTask;
 }
--- a/src/backend/distributed/planner/merge_planner.c
+++ b/src/backend/distributed/planner/merge_planner.c
@ -0,0 +1,738 @@
+/*-------------------------------------------------------------------------
+ *
+ * merge_planner.c
+ *
+ * This file contains functions to help plan MERGE queries.
+ *
+ * Copyright (c) Citus Data, Inc.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include <stddef.h>
+
+#include "postgres.h"
+#include "nodes/makefuncs.h"
+#include "optimizer/optimizer.h"
+#include "parser/parsetree.h"
+#include "utils/lsyscache.h"
+
+#include "distributed/citus_clauses.h"
+#include "distributed/listutils.h"
+#include "distributed/merge_planner.h"
+#include "distributed/multi_logical_optimizer.h"
+#include "distributed/multi_router_planner.h"
+#include "distributed/pg_version_constants.h"
+#include "distributed/query_pushdown_planning.h"
+
+#if PG_VERSION_NUM >= PG_VERSION_15
+
+static DeferredErrorMessage * CheckIfRTETypeIsUnsupported(Query *parse,
+														  RangeTblEntry *rangeTableEntry);
+static DeferredErrorMessage * ErrorIfDistTablesNotColocated(Query *parse,
+															List *
+															distTablesList,
+															PlannerRestrictionContext
+															*
+															plannerRestrictionContext);
+static DeferredErrorMessage * ErrorIfMergeHasUnsupportedTables(Query *parse,
+															   List *rangeTableList,
+															   PlannerRestrictionContext *
+															   restrictionContext);
+static bool IsDistributionColumnInMergeSource(Expr *columnExpression, Query *query, bool
+											  skipOuterVars);
+static DeferredErrorMessage * InsertDistributionColumnMatchesSource(Query *query,
+																	RangeTblEntry *
+																	resultRte);
+
+static DeferredErrorMessage * MergeQualAndTargetListFunctionsSupported(Oid
+																	   resultRelationId,
+																	   FromExpr *joinTree,
+																	   Node *quals,
+																	   List *targetList,
+																	   CmdType commandType);
+#endif
+
+
+/*
+ * CreateMergePlan attempts to create a plan for the given MERGE SQL
+ * statement. If planning fails ->planningError is set to a description
+ * of the failure.
+ */
+DistributedPlan *
+CreateMergePlan(Query *originalQuery, Query *query,
+				PlannerRestrictionContext *plannerRestrictionContext)
+{
+	DistributedPlan *distributedPlan = CitusMakeNode(DistributedPlan);
+	bool multiShardQuery = false;
+
+	Assert(originalQuery->commandType == CMD_MERGE);
+
+	distributedPlan->modLevel = RowModifyLevelForQuery(query);
+
+	distributedPlan->planningError = MergeQuerySupported(originalQuery,
+														 multiShardQuery,
+														 plannerRestrictionContext);
+
+	if (distributedPlan->planningError != NULL)
+	{
+		return distributedPlan;
+	}
+
+	Job *job = RouterJob(originalQuery, plannerRestrictionContext,
+						 &distributedPlan->planningError);
+
+	if (distributedPlan->planningError != NULL)
+	{
+		return distributedPlan;
+	}
+
+	ereport(DEBUG1, (errmsg("Creating MERGE router plan")));
+
+	distributedPlan->workerJob = job;
+	distributedPlan->combineQuery = NULL;
+
+	/* MERGE doesn't support RETURNING clause */
+	distributedPlan->expectResults = false;
+	distributedPlan->targetRelationId = ResultRelationOidForQuery(query);
+
+	distributedPlan->fastPathRouterPlan =
+		plannerRestrictionContext->fastPathRestrictionContext->fastPathRouterQuery;
+
+	return distributedPlan;
+}
+
+
+/*
+ * MergeQuerySupported does check for a MERGE command in the query, if it finds
+ * one, it will verify the below criteria
+ * - Supported tables and combinations in ErrorIfMergeHasUnsupportedTables
+ * - Distributed tables requirements in ErrorIfDistTablesNotColocated
+ * - Checks target-lists and functions-in-quals in TargetlistAndFunctionsSupported
+ */
+DeferredErrorMessage *
+MergeQuerySupported(Query *originalQuery, bool multiShardQuery,
+					PlannerRestrictionContext *plannerRestrictionContext)
+{
+	/* function is void for pre-15 versions of Postgres */
+	#if PG_VERSION_NUM < PG_VERSION_15
+
+	return NULL;
+
+	#else
+
+	/*
+	 * TODO: For now, we are adding an exception where any volatile or stable
+	 * functions are not allowed in the MERGE query, but this will become too
+	 * restrictive as this will prevent many useful and simple cases, such as,
+	 * INSERT VALUES(ts::timestamp), bigserial column inserts etc. But without
+	 * this restriction, we have a potential danger of some of the function(s)
+	 * getting executed at the worker which will result in incorrect behavior.
+	 */
+	if (contain_mutable_functions((Node *) originalQuery))
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "non-IMMUTABLE functions are not yet supported "
+							 "in MERGE sql with distributed tables ",
+							 NULL, NULL);
+	}
+
+	List *rangeTableList = ExtractRangeTableEntryList(originalQuery);
+	RangeTblEntry *resultRte = ExtractResultRelationRTE(originalQuery);
+
+	/*
+	 * Fast path queries cannot have merge command, and we prevent the remaining here.
+	 * In Citus we have limited support for MERGE, it's allowed only if all
+	 * the tables(target, source or any CTE) tables are are local i.e. a
+	 * combination of Citus local and Non-Citus tables (regular Postgres tables)
+	 * or distributed tables with some restrictions, please see header of routine
+	 * ErrorIfDistTablesNotColocated for details.
+	 */
+	DeferredErrorMessage *deferredError =
+		ErrorIfMergeHasUnsupportedTables(originalQuery,
+										 rangeTableList,
+										 plannerRestrictionContext);
+	if (deferredError)
+	{
+		/* MERGE's unsupported combination, raise the exception */
+		RaiseDeferredError(deferredError, ERROR);
+	}
+
+	Oid resultRelationId = resultRte->relid;
+	deferredError = MergeQualAndTargetListFunctionsSupported(resultRelationId,
+															 originalQuery->jointree,
+															 originalQuery->jointree->
+															 quals,
+															 originalQuery->targetList,
+															 originalQuery->commandType);
+	if (deferredError)
+	{
+		return deferredError;
+	}
+
+	/*
+	 * MERGE is a special case where we have multiple modify statements
+	 * within itself. Check each INSERT/UPDATE/DELETE individually.
+	 */
+	MergeAction *action = NULL;
+	foreach_ptr(action, originalQuery->mergeActionList)
+	{
+		Assert(originalQuery->returningList == NULL);
+		deferredError = MergeQualAndTargetListFunctionsSupported(resultRelationId,
+																 originalQuery->jointree,
+																 action->qual,
+																 action->targetList,
+																 action->commandType);
+		if (deferredError)
+		{
+			/* MERGE's unsupported scenario, raise the exception */
+			RaiseDeferredError(deferredError, ERROR);
+		}
+	}
+
+	deferredError =
+		InsertDistributionColumnMatchesSource(originalQuery, resultRte);
+	if (deferredError)
+	{
+		/* MERGE's unsupported scenario, raise the exception */
+		RaiseDeferredError(deferredError, ERROR);
+	}
+
+	if (multiShardQuery)
+	{
+		deferredError =
+			DeferErrorIfUnsupportedSubqueryPushdown(originalQuery,
+													plannerRestrictionContext);
+		if (deferredError)
+		{
+			return deferredError;
+		}
+	}
+
+	if (HasDangerousJoinUsing(originalQuery->rtable, (Node *) originalQuery->jointree))
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "a join with USING causes an internal naming "
+							 "conflict, use ON instead", NULL, NULL);
+	}
+
+	return NULL;
+
+	#endif
+}
+
+
+/*
+ * IsMergeAllowedOnRelation takes a relation entry and checks if MERGE command is
+ * permitted on special relations, such as materialized view, returns true only if
+ * it's a "source" relation.
+ */
+bool
+IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte)
+{
+	if (!IsMergeQuery(parse))
+	{
+		return false;
+	}
+
+	/* Fetch the MERGE target relation */
+	RangeTblEntry *targetRte = rt_fetch(parse->resultRelation, parse->rtable);
+
+	/* Is it a target relation? */
+	if (targetRte->relid == rte->relid)
+	{
+		return false;
+	}
+
+	return true;
+}
+
+
+#if PG_VERSION_NUM >= PG_VERSION_15
+
+/*
+ * ErrorIfDistTablesNotColocated Checks to see if
+ *
+ *   - There are a minimum of two distributed tables (source and a target).
+ *   - All the distributed tables are indeed colocated.
+ *
+ * If any of the conditions are not met, it raises an exception.
+ */
+static DeferredErrorMessage *
+ErrorIfDistTablesNotColocated(Query *parse, List *distTablesList,
+							  PlannerRestrictionContext *
+							  plannerRestrictionContext)
+{
+	/* All MERGE tables must be distributed */
+	if (list_length(distTablesList) < 2)
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "For MERGE command, both the source and target "
+							 "must be distributed", NULL, NULL);
+	}
+
+	/* All distributed tables must be colocated */
+	if (!AllDistributedRelationsInRTEListColocated(distTablesList))
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "For MERGE command, all the distributed tables "
+							 "must be colocated", NULL, NULL);
+	}
+
+	return NULL;
+}
+
+
+/*
+ * ErrorIfRTETypeIsUnsupported Checks for types of tables that are not supported, such
+ * as, reference tables, append-distributed tables and materialized view as target relation.
+ * Routine returns NULL for the supported types, error message for everything else.
+ */
+static DeferredErrorMessage *
+CheckIfRTETypeIsUnsupported(Query *parse, RangeTblEntry *rangeTableEntry)
+{
+	if (rangeTableEntry->relkind == RELKIND_MATVIEW ||
+		rangeTableEntry->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		/* Materialized view or Foreign table as target is not allowed */
+		if (IsMergeAllowedOnRelation(parse, rangeTableEntry))
+		{
+			/* Non target relation is ok */
+			return NULL;
+		}
+		else
+		{
+			/* Usually we don't reach this exception as the Postgres parser catches it */
+			StringInfo errorMessage = makeStringInfo();
+			appendStringInfo(errorMessage, "MERGE command is not allowed on "
+										   "relation type(relkind:%c)",
+							 rangeTableEntry->relkind);
+			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+								 errorMessage->data, NULL, NULL);
+		}
+	}
+
+	if (rangeTableEntry->relkind != RELKIND_RELATION &&
+		rangeTableEntry->relkind != RELKIND_PARTITIONED_TABLE)
+	{
+		StringInfo errorMessage = makeStringInfo();
+		appendStringInfo(errorMessage, "Unexpected table type(relkind:%c) "
+									   "in MERGE command", rangeTableEntry->relkind);
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 errorMessage->data, NULL, NULL);
+	}
+
+	Assert(rangeTableEntry->relid != 0);
+
+	/* Reference tables are not supported yet */
+	if (IsCitusTableType(rangeTableEntry->relid, REFERENCE_TABLE))
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "MERGE command is not supported on reference "
+							 "tables yet", NULL, NULL);
+	}
+
+	/* Append/Range tables are not supported */
+	if (IsCitusTableType(rangeTableEntry->relid, APPEND_DISTRIBUTED) ||
+		IsCitusTableType(rangeTableEntry->relid, RANGE_DISTRIBUTED))
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "For MERGE command, all the distributed tables "
+							 "must be colocated, for append/range distribution, "
+							 "colocation is not supported", NULL,
+							 "Consider using hash distribution instead");
+	}
+
+	return NULL;
+}
+
+
+/*
+ * ErrorIfMergeHasUnsupportedTables checks if all the tables(target, source or any CTE
+ * present) in the MERGE command are local i.e. a combination of Citus local and Non-Citus
+ * tables (regular Postgres tables), or distributed tables with some restrictions, please
+ * see header of routine ErrorIfDistTablesNotColocated for details, raises an exception
+ * for all other combinations.
+ */
+static DeferredErrorMessage *
+ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList,
+								 PlannerRestrictionContext *restrictionContext)
+{
+	List *distTablesList = NIL;
+	bool foundLocalTables = false;
+
+	RangeTblEntry *rangeTableEntry = NULL;
+	foreach_ptr(rangeTableEntry, rangeTableList)
+	{
+		Oid relationId = rangeTableEntry->relid;
+
+		switch (rangeTableEntry->rtekind)
+		{
+			case RTE_RELATION:
+			{
+				/* Check the relation type */
+				break;
+			}
+
+			case RTE_SUBQUERY:
+			case RTE_FUNCTION:
+			case RTE_TABLEFUNC:
+			case RTE_VALUES:
+			case RTE_JOIN:
+			case RTE_CTE:
+			{
+				/* Skip them as base table(s) will be checked */
+				continue;
+			}
+
+			/*
+			 * RTE_NAMEDTUPLESTORE is typically used in ephmeral named relations,
+			 * such as, trigger data; until we find a genuine use case, raise an
+			 * exception.
+			 * RTE_RESULT is a node added by the planner and we shouldn't
+			 * encounter it in the parse tree.
+			 */
+			case RTE_NAMEDTUPLESTORE:
+			case RTE_RESULT:
+			{
+				return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+									 "MERGE command is not supported with "
+									 "Tuplestores and results",
+									 NULL, NULL);
+			}
+
+			default:
+			{
+				return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+									 "MERGE command: Unrecognized range table entry.",
+									 NULL, NULL);
+			}
+		}
+
+		/* RTE Relation can be of various types, check them now */
+
+		/* skip the regular views as they are replaced with subqueries */
+		if (rangeTableEntry->relkind == RELKIND_VIEW)
+		{
+			continue;
+		}
+
+		DeferredErrorMessage *errorMessage =
+			CheckIfRTETypeIsUnsupported(parse, rangeTableEntry);
+		if (errorMessage)
+		{
+			return errorMessage;
+		}
+
+		/*
+		 * For now, save all distributed tables, later (below) we will
+		 * check for supported combination(s).
+		 */
+		if (IsCitusTableType(relationId, DISTRIBUTED_TABLE))
+		{
+			distTablesList = lappend(distTablesList, rangeTableEntry);
+			continue;
+		}
+
+		/* Regular Postgres tables and Citus local tables are allowed */
+		if (!IsCitusTable(relationId) ||
+			IsCitusTableType(relationId, CITUS_LOCAL_TABLE))
+		{
+			foundLocalTables = true;
+			continue;
+		}
+
+		/* Any other Citus table type missing ? */
+	}
+
+	/* Ensure all tables are indeed local */
+	if (foundLocalTables && list_length(distTablesList) == 0)
+	{
+		/* All the tables are local, supported */
+		return NULL;
+	}
+	else if (foundLocalTables && list_length(distTablesList) > 0)
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "MERGE command is not supported with "
+							 "combination of distributed/local tables yet",
+							 NULL, NULL);
+	}
+
+	/* Ensure all distributed tables are indeed co-located */
+	return ErrorIfDistTablesNotColocated(parse,
+										 distTablesList,
+										 restrictionContext);
+}
+
+
+/*
+ * IsPartitionColumnInMerge returns true if the given column is a partition column.
+ * The function uses FindReferencedTableColumn to find the original relation
+ * id and column that the column expression refers to. It then checks whether
+ * that column is a partition column of the relation.
+ *
+ * Also, the function returns always false for reference tables given that
+ * reference tables do not have partition column.
+ *
+ * If skipOuterVars is true, then it doesn't process the outervars.
+ */
+bool
+IsDistributionColumnInMergeSource(Expr *columnExpression, Query *query, bool
+								  skipOuterVars)
+{
+	bool isDistributionColumn = false;
+	Var *column = NULL;
+	RangeTblEntry *relationRTE = NULL;
+
+	/* ParentQueryList is same as the original query for MERGE */
+	FindReferencedTableColumn(columnExpression, list_make1(query), query, &column,
+							  &relationRTE,
+							  skipOuterVars);
+	Oid relationId = relationRTE ? relationRTE->relid : InvalidOid;
+	if (relationId != InvalidOid && column != NULL)
+	{
+		Var *distributionColumn = DistPartitionKey(relationId);
+
+		/* not all distributed tables have partition column */
+		if (distributionColumn != NULL && column->varattno ==
+			distributionColumn->varattno)
+		{
+			isDistributionColumn = true;
+		}
+	}
+
+	return isDistributionColumn;
+}
+
+
+/*
+ * InsertDistributionColumnMatchesSource check to see if MERGE is inserting a
+ * value into the target which is not from the source table, if so, it
+ * raises an exception.
+ * Note: Inserting random values other than the joined column values will
+ * result in unexpected behaviour of rows ending up in incorrect shards, to
+ * prevent such mishaps, we disallow such inserts here.
+ */
+static DeferredErrorMessage *
+InsertDistributionColumnMatchesSource(Query *query, RangeTblEntry *resultRte)
+{
+	Assert(IsMergeQuery(query));
+
+	if (!IsCitusTableType(resultRte->relid, DISTRIBUTED_TABLE))
+	{
+		return NULL;
+	}
+
+	bool foundDistributionColumn = false;
+	MergeAction *action = NULL;
+	foreach_ptr(action, query->mergeActionList)
+	{
+		/* Skip MATCHED clause as INSERTS are not allowed in it*/
+		if (action->matched)
+		{
+			continue;
+		}
+
+		/* NOT MATCHED can have either INSERT or DO NOTHING */
+		if (action->commandType == CMD_NOTHING)
+		{
+			return NULL;
+		}
+
+		if (action->targetList == NIL)
+		{
+			/* INSERT DEFAULT VALUES is not allowed */
+			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+								 "cannot perform MERGE INSERT with DEFAULTS",
+								 NULL, NULL);
+		}
+
+		Assert(action->commandType == CMD_INSERT);
+		Var *targetKey = PartitionColumn(resultRte->relid, 1);
+
+		TargetEntry *targetEntry = NULL;
+		foreach_ptr(targetEntry, action->targetList)
+		{
+			AttrNumber originalAttrNo = targetEntry->resno;
+
+			/* skip processing of target table non-partition columns */
+			if (originalAttrNo != targetKey->varattno)
+			{
+				continue;
+			}
+
+			foundDistributionColumn = true;
+
+			if (IsA(targetEntry->expr, Var))
+			{
+				if (IsDistributionColumnInMergeSource(targetEntry->expr, query, true))
+				{
+					return NULL;
+				}
+				else
+				{
+					return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+										 "MERGE INSERT must use the source table "
+										 "distribution column value",
+										 NULL, NULL);
+				}
+			}
+			else
+			{
+				return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+									 "MERGE INSERT must refer a source column "
+									 "for distribution column ",
+									 NULL, NULL);
+			}
+		}
+
+		if (!foundDistributionColumn)
+		{
+			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+								 "MERGE INSERT must have distribution column as value",
+								 NULL, NULL);
+		}
+	}
+
+	return NULL;
+}
+
+
+/*
+ * MergeQualAndTargetListFunctionsSupported Checks WHEN/ON clause actions to see what functions
+ * are allowed, if we are updating distribution column, etc.
+ */
+static DeferredErrorMessage *
+MergeQualAndTargetListFunctionsSupported(Oid resultRelationId, FromExpr *joinTree,
+										 Node *quals,
+										 List *targetList, CmdType commandType)
+{
+	uint32 rangeTableId = 1;
+	Var *distributionColumn = NULL;
+	if (IsCitusTable(resultRelationId) && HasDistributionKey(resultRelationId))
+	{
+		distributionColumn = PartitionColumn(resultRelationId, rangeTableId);
+	}
+
+	ListCell *targetEntryCell = NULL;
+	bool hasVarArgument = false; /* A STABLE function is passed a Var argument */
+	bool hasBadCoalesce = false; /* CASE/COALESCE passed a mutable function */
+	foreach(targetEntryCell, targetList)
+	{
+		TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
+
+		bool targetEntryDistributionColumn = false;
+		AttrNumber targetColumnAttrNumber = InvalidAttrNumber;
+
+		if (distributionColumn)
+		{
+			if (commandType == CMD_UPDATE)
+			{
+				/*
+				 * Note that it is not possible to give an alias to
+				 * UPDATE table SET ...
+				 */
+				if (targetEntry->resname)
+				{
+					targetColumnAttrNumber = get_attnum(resultRelationId,
+														targetEntry->resname);
+					if (targetColumnAttrNumber == distributionColumn->varattno)
+					{
+						targetEntryDistributionColumn = true;
+					}
+				}
+			}
+		}
+
+		if (targetEntryDistributionColumn &&
+			TargetEntryChangesValue(targetEntry, distributionColumn, joinTree))
+		{
+			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+								 "updating the distribution column is not "
+								 "allowed in MERGE actions",
+								 NULL, NULL);
+		}
+
+		if (FindNodeMatchingCheckFunction((Node *) targetEntry->expr,
+										  CitusIsVolatileFunction))
+		{
+			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+								 "functions used in MERGE actions on distributed "
+								 "tables must not be VOLATILE",
+								 NULL, NULL);
+		}
+
+		if (MasterIrreducibleExpression((Node *) targetEntry->expr,
+										&hasVarArgument, &hasBadCoalesce))
+		{
+			Assert(hasVarArgument || hasBadCoalesce);
+		}
+
+		if (FindNodeMatchingCheckFunction((Node *) targetEntry->expr,
+										  NodeIsFieldStore))
+		{
+			/* DELETE cannot do field indirection already */
+			Assert(commandType == CMD_UPDATE || commandType == CMD_INSERT);
+			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+								 "inserting or modifying composite type fields is not "
+								 "supported", NULL,
+								 "Use the column name to insert or update the composite "
+								 "type as a single value");
+		}
+	}
+
+
+	/*
+	 * Check the condition, convert list of expressions into expression tree for further processing
+	 */
+	if (quals)
+	{
+		if (IsA(quals, List))
+		{
+			quals = (Node *) make_ands_explicit((List *) quals);
+		}
+
+		if (FindNodeMatchingCheckFunction((Node *) quals, CitusIsVolatileFunction))
+		{
+			StringInfo errorMessage = makeStringInfo();
+			appendStringInfo(errorMessage, "functions used in the %s clause of MERGE "
+										   "queries on distributed tables must not be VOLATILE",
+							 (commandType == CMD_MERGE) ? "ON" : "WHEN");
+			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+								 errorMessage->data, NULL, NULL);
+		}
+		else if (MasterIrreducibleExpression(quals, &hasVarArgument, &hasBadCoalesce))
+		{
+			Assert(hasVarArgument || hasBadCoalesce);
+		}
+	}
+
+	if (hasVarArgument)
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "STABLE functions used in MERGE queries "
+							 "cannot be called with column references",
+							 NULL, NULL);
+	}
+
+	if (hasBadCoalesce)
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "non-IMMUTABLE functions are not allowed in CASE or "
+							 "COALESCE statements",
+							 NULL, NULL);
+	}
+
+	if (quals != NULL && nodeTag(quals) == T_CurrentOfExpr)
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "cannot run MERGE actions with cursors",
+							 NULL, NULL);
+	}
+
+	return NULL;
+}
+
+
+#endif
--- a/src/backend/distributed/planner/multi_explain.c
+++ b/src/backend/distributed/planner/multi_explain.c
@ -29,6 +29,7 @@
 #include "distributed/citus_nodefuncs.h"
 #include "distributed/connection_management.h"
 #include "distributed/deparse_shard_query.h"
+#include "distributed/executor_util.h"
 #include "distributed/insert_select_planner.h"
 #include "distributed/insert_select_executor.h"
 #include "distributed/listutils.h"
@ -199,20 +200,6 @@ CitusExplainScan(CustomScanState *node, List *ancestors, struct ExplainState *es
 		return;
 	}

-	/*
-	 * ALTER TABLE statements are not explained by postgres. However ALTER TABLE statements
-	 * may trigger SELECT statements causing explain hook to run. This situation causes a crash in a worker.
-	 * Therefore we will detect if we are explaining a triggered query when we are processing
-	 * an ALTER TABLE statement and stop explain in this situation.
-	 */
-	if (AlterTableInProgress())
-	{
-		ExplainPropertyText("Citus Explain Scan",
-							"Explain for triggered constraint validation queries during ALTER TABLE commands are not supported by Citus",
-							es);
-		return;
-	}
-
 	ExplainOpenGroup("Distributed Query", "Distributed Query", true, es);

 	/*
--- a/src/backend/distributed/planner/multi_join_order.c
+++ b/src/backend/distributed/planner/multi_join_order.c
@ -1383,7 +1383,7 @@ DistPartitionKey(Oid relationId)
 	CitusTableCacheEntry *partitionEntry = GetCitusTableCacheEntry(relationId);

 	/* non-distributed tables do not have partition column */
-	if (IsCitusTableTypeCacheEntry(partitionEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
+	if (!HasDistributionKeyCacheEntry(partitionEntry))
 	{
 		return NULL;
 	}
--- a/src/backend/distributed/planner/multi_logical_optimizer.c
+++ b/src/backend/distributed/planner/multi_logical_optimizer.c
@ -3385,6 +3385,13 @@ GetAggregateType(Aggref *aggregateExpression)
 {
 	Oid aggFunctionId = aggregateExpression->aggfnoid;

+	/* custom aggregates with combine func take precedence over name-based logic */
+	if (aggFunctionId >= FirstNormalObjectId &&
+		AggregateEnabledCustom(aggregateExpression))
+	{
+		return AGGREGATE_CUSTOM_COMBINE;
+	}
+
 	/* look up the function name */
 	char *aggregateProcName = get_func_name(aggFunctionId);
 	if (aggregateProcName == NULL)
@ -3395,8 +3402,6 @@ GetAggregateType(Aggref *aggregateExpression)

 	uint32 aggregateCount = lengthof(AggregateNames);

-	Assert(AGGREGATE_INVALID_FIRST == 0);
-
 	for (uint32 aggregateIndex = 1; aggregateIndex < aggregateCount; aggregateIndex++)
 	{
 		const char *aggregateName = AggregateNames[aggregateIndex];
@ -3465,7 +3470,7 @@ GetAggregateType(Aggref *aggregateExpression)
 		}
 	}

-
+	/* handle any remaining built-in aggregates with a suitable combinefn */
 	if (AggregateEnabledCustom(aggregateExpression))
 	{
 		return AGGREGATE_CUSTOM_COMBINE;
--- a/src/backend/distributed/planner/multi_logical_planner.c
+++ b/src/backend/distributed/planner/multi_logical_planner.c
@ -228,7 +228,7 @@ TargetListOnPartitionColumn(Query *query, List *targetEntryList)
 		 * If the expression belongs to a non-distributed table continue searching for
 		 * other partition keys.
 		 */
-		if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
+		if (IsCitusTable(relationId) && !HasDistributionKey(relationId))
 		{
 			continue;
 		}
--- a/src/backend/distributed/planner/multi_physical_planner.c
+++ b/src/backend/distributed/planner/multi_physical_planner.c
@ -2199,7 +2199,7 @@ QueryPushdownSqlTaskList(Query *query, uint64 jobId,
 		Oid relationId = relationRestriction->relationId;

 		CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
-		if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
+		if (!HasDistributionKeyCacheEntry(cacheEntry))
 		{
 			continue;
 		}
@ -2377,7 +2377,7 @@ ErrorIfUnsupportedShardDistribution(Query *query)
 			nonReferenceRelations = lappend_oid(nonReferenceRelations,
 												relationId);
 		}
-		else if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
+		else if (IsCitusTable(relationId) && !HasDistributionKey(relationId))
 		{
 			/* do not need to handle non-distributed tables */
 			continue;
@ -2482,7 +2482,7 @@ QueryPushdownTaskCreate(Query *originalQuery, int shardIndex,
 		ShardInterval *shardInterval = NULL;

 		CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
-		if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
+		if (!HasDistributionKeyCacheEntry(cacheEntry))
 		{
 			/* non-distributed tables have only one shard */
 			shardInterval = cacheEntry->sortedShardIntervalArray[0];
@ -3697,7 +3697,7 @@ PartitionedOnColumn(Var *column, List *rangeTableList, List *dependentJobList)
 		Var *partitionColumn = PartitionColumn(relationId, rangeTableId);

 		/* non-distributed tables do not have partition columns */
-		if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
+		if (IsCitusTable(relationId) && !HasDistributionKey(relationId))
 		{
 			return false;
 		}
@ -4573,7 +4573,8 @@ RowModifyLevelForQuery(Query *query)
 	}

 	if (commandType == CMD_UPDATE ||
-		commandType == CMD_DELETE)
+		commandType == CMD_DELETE ||
+		commandType == CMD_MERGE)
 	{
 		return ROW_MODIFY_NONCOMMUTATIVE;
 	}
@ -5343,8 +5344,7 @@ ActiveShardPlacementLists(List *taskList)


 /*
- * CompareShardPlacements compares two shard placements by their tuple oid; this
- * oid reflects the tuple's insertion order into pg_dist_placement.
+ * CompareShardPlacements compares two shard placements by placement id.
 */
 int
 CompareShardPlacements(const void *leftElement, const void *rightElement)
@ -5370,6 +5370,35 @@ CompareShardPlacements(const void *leftElement, const void *rightElement)
 }


+/*
+ * CompareGroupShardPlacements compares two group shard placements by placement id.
+ */
+int
+CompareGroupShardPlacements(const void *leftElement, const void *rightElement)
+{
+	const GroupShardPlacement *leftPlacement =
+		*((const GroupShardPlacement **) leftElement);
+	const GroupShardPlacement *rightPlacement =
+		*((const GroupShardPlacement **) rightElement);
+
+	uint64 leftPlacementId = leftPlacement->placementId;
+	uint64 rightPlacementId = rightPlacement->placementId;
+
+	if (leftPlacementId < rightPlacementId)
+	{
+		return -1;
+	}
+	else if (leftPlacementId > rightPlacementId)
+	{
+		return 1;
+	}
+	else
+	{
+		return 0;
+	}
+}
+
+
 /*
 * LeftRotateList returns a copy of the given list that has been cyclically
 * shifted to the left by the given rotation count. For this, the function
--- a/src/backend/distributed/planner/multi_router_planner.c
+++ b/src/backend/distributed/planner/multi_router_planner.c
@ -28,11 +28,13 @@
 #include "distributed/deparse_shard_query.h"
 #include "distributed/distribution_column.h"
 #include "distributed/errormessage.h"
+#include "distributed/executor_util.h"
 #include "distributed/log_utils.h"
 #include "distributed/insert_select_planner.h"
 #include "distributed/intermediate_result_pruning.h"
 #include "distributed/metadata_utility.h"
 #include "distributed/coordinator_protocol.h"
+#include "distributed/merge_planner.h"
 #include "distributed/metadata_cache.h"
 #include "distributed/multi_executor.h"
 #include "distributed/multi_join_order.h"
@ -113,6 +115,7 @@ typedef struct WalkerState
 } WalkerState;

 bool EnableRouterExecution = true;
+bool EnableNonColocatedRouterQueryPushdown = false;


 /* planner functions forward declarations */
@ -121,34 +124,24 @@ static void CreateSingleTaskRouterSelectPlan(DistributedPlan *distributedPlan,
 											 Query *query,
 											 PlannerRestrictionContext *
 											 plannerRestrictionContext);
-static Oid ResultRelationOidForQuery(Query *query);
 static bool IsTidColumn(Node *node);
 static DeferredErrorMessage * ModifyPartialQuerySupported(Query *queryTree, bool
 														  multiShardQuery,
 														  Oid *distributedTableId);
-static bool NodeIsFieldStore(Node *node);
-static DeferredErrorMessage * MultiShardUpdateDeleteMergeSupported(Query *originalQuery,
+static DeferredErrorMessage * MultiShardUpdateDeleteSupported(Query *originalQuery,
 															  PlannerRestrictionContext
 															  *
 															  plannerRestrictionContext);
 static DeferredErrorMessage * SingleShardUpdateDeleteSupported(Query *originalQuery,
 															   PlannerRestrictionContext *
 															   plannerRestrictionContext);
-static bool HasDangerousJoinUsing(List *rtableList, Node *jtnode);
-static bool MasterIrreducibleExpression(Node *expression, bool *varArgument,
-										bool *badCoalesce);
 static bool MasterIrreducibleExpressionWalker(Node *expression, WalkerState *state);
 static bool MasterIrreducibleExpressionFunctionChecker(Oid func_id, void *context);
-static bool TargetEntryChangesValue(TargetEntry *targetEntry, Var *column,
-									FromExpr *joinTree);
 static Job * RouterInsertJob(Query *originalQuery);
 static void ErrorIfNoShardsExist(CitusTableCacheEntry *cacheEntry);
 static DeferredErrorMessage * DeferErrorIfModifyView(Query *queryTree);
 static Job * CreateJob(Query *query);
 static Task * CreateTask(TaskType taskType);
-static Job * RouterJob(Query *originalQuery,
-					   PlannerRestrictionContext *plannerRestrictionContext,
-					   DeferredErrorMessage **planningError);
 static bool RelationPrunesToMultipleShards(List *relationShardList);
 static void NormalizeMultiRowInsertTargetList(Query *query);
 static void AppendNextDummyColReference(Alias *expendedReferenceNames);
@ -445,7 +438,7 @@ ModifyQueryResultRelationId(Query *query)
 * ResultRelationOidForQuery returns the OID of the relation this is modified
 * by a given query.
 */
-static Oid
+Oid
 ResultRelationOidForQuery(Query *query)
 {
 	RangeTblEntry *resultRTE = rt_fetch(query->resultRelation, query->rtable);
@ -512,6 +505,161 @@ IsTidColumn(Node *node)
 }


+/*
+ * TargetlistAndFunctionsSupported implements a subset of what ModifyPartialQuerySupported
+ * checks, that subset being checking what functions are allowed, if we are
+ * updating distribution column, etc.
+ * Note: This subset of checks are repeated for each MERGE modify action.
+ */
+DeferredErrorMessage *
+TargetlistAndFunctionsSupported(Oid resultRelationId, FromExpr *joinTree, Node *quals,
+								List *targetList,
+								CmdType commandType, List *returningList)
+{
+	uint32 rangeTableId = 1;
+	Var *partitionColumn = NULL;
+
+	if (IsCitusTable(resultRelationId))
+	{
+		partitionColumn = PartitionColumn(resultRelationId, rangeTableId);
+	}
+
+	bool hasVarArgument = false; /* A STABLE function is passed a Var argument */
+	bool hasBadCoalesce = false; /* CASE/COALESCE passed a mutable function */
+	ListCell *targetEntryCell = NULL;
+
+	foreach(targetEntryCell, targetList)
+	{
+		TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
+
+		/* skip resjunk entries: UPDATE adds some for ctid, etc. */
+		if (targetEntry->resjunk)
+		{
+			continue;
+		}
+
+		bool targetEntryPartitionColumn = false;
+		AttrNumber targetColumnAttrNumber = InvalidAttrNumber;
+
+		/* reference tables do not have partition column */
+		if (partitionColumn == NULL)
+		{
+			targetEntryPartitionColumn = false;
+		}
+		else
+		{
+			if (commandType == CMD_UPDATE)
+			{
+				/*
+				 * Note that it is not possible to give an alias to
+				 * UPDATE table SET ...
+				 */
+				if (targetEntry->resname)
+				{
+					targetColumnAttrNumber = get_attnum(resultRelationId,
+														targetEntry->resname);
+					if (targetColumnAttrNumber == partitionColumn->varattno)
+					{
+						targetEntryPartitionColumn = true;
+					}
+				}
+			}
+		}
+
+
+		if (commandType == CMD_UPDATE &&
+			FindNodeMatchingCheckFunction((Node *) targetEntry->expr,
+										  CitusIsVolatileFunction))
+		{
+			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+								 "functions used in UPDATE queries on distributed "
+								 "tables must not be VOLATILE",
+								 NULL, NULL);
+		}
+
+		if (commandType == CMD_UPDATE && targetEntryPartitionColumn &&
+			TargetEntryChangesValue(targetEntry, partitionColumn,
+									joinTree))
+		{
+			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+								 "modifying the partition value of rows is not "
+								 "allowed",
+								 NULL, NULL);
+		}
+
+		if (commandType == CMD_UPDATE &&
+			MasterIrreducibleExpression((Node *) targetEntry->expr,
+										&hasVarArgument, &hasBadCoalesce))
+		{
+			Assert(hasVarArgument || hasBadCoalesce);
+		}
+
+		if (FindNodeMatchingCheckFunction((Node *) targetEntry->expr,
+										  NodeIsFieldStore))
+		{
+			/* DELETE cannot do field indirection already */
+			Assert(commandType == CMD_UPDATE || commandType == CMD_INSERT);
+			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+								 "inserting or modifying composite type fields is not "
+								 "supported", NULL,
+								 "Use the column name to insert or update the composite "
+								 "type as a single value");
+		}
+	}
+
+	if (joinTree != NULL)
+	{
+		if (FindNodeMatchingCheckFunction((Node *) quals,
+										  CitusIsVolatileFunction))
+		{
+			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+								 "functions used in the WHERE/ON/WHEN clause of modification "
+								 "queries on distributed tables must not be VOLATILE",
+								 NULL, NULL);
+		}
+		else if (MasterIrreducibleExpression(quals, &hasVarArgument,
+											 &hasBadCoalesce))
+		{
+			Assert(hasVarArgument || hasBadCoalesce);
+		}
+	}
+
+	if (hasVarArgument)
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "STABLE functions used in UPDATE queries "
+							 "cannot be called with column references",
+							 NULL, NULL);
+	}
+
+	if (hasBadCoalesce)
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "non-IMMUTABLE functions are not allowed in CASE or "
+							 "COALESCE statements",
+							 NULL, NULL);
+	}
+
+	if (contain_mutable_functions((Node *) returningList))
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "non-IMMUTABLE functions are not allowed in the "
+							 "RETURNING clause",
+							 NULL, NULL);
+	}
+
+	if (quals != NULL &&
+		nodeTag(quals) == T_CurrentOfExpr)
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "cannot run DML queries with cursors", NULL,
+							 NULL);
+	}
+
+	return NULL;
+}
+
+
 /*
 * ModifyPartialQuerySupported implements a subset of what ModifyQuerySupported checks,
 * that subset being what's necessary to check modifying CTEs for.
@ -620,148 +768,21 @@ ModifyPartialQuerySupported(Query *queryTree, bool multiShardQuery,

 	Oid resultRelationId = ModifyQueryResultRelationId(queryTree);
 	*distributedTableIdOutput = resultRelationId;
-	uint32 rangeTableId = 1;

-	Var *partitionColumn = NULL;
-	if (IsCitusTable(resultRelationId))
-	{
-		partitionColumn = PartitionColumn(resultRelationId, rangeTableId);
-	}
 	commandType = queryTree->commandType;
 	if (commandType == CMD_INSERT || commandType == CMD_UPDATE ||
 		commandType == CMD_DELETE)
 	{
-		bool hasVarArgument = false; /* A STABLE function is passed a Var argument */
-		bool hasBadCoalesce = false; /* CASE/COALESCE passed a mutable function */
-		FromExpr *joinTree = queryTree->jointree;
-		ListCell *targetEntryCell = NULL;
-
-		foreach(targetEntryCell, queryTree->targetList)
+		deferredError =
+			TargetlistAndFunctionsSupported(resultRelationId,
+											queryTree->jointree,
+											queryTree->jointree->quals,
+											queryTree->targetList,
+											commandType,
+											queryTree->returningList);
+		if (deferredError)
 		{
-			TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
-
-			/* skip resjunk entries: UPDATE adds some for ctid, etc. */
-			if (targetEntry->resjunk)
-			{
-				continue;
-			}
-
-			bool targetEntryPartitionColumn = false;
-			AttrNumber targetColumnAttrNumber = InvalidAttrNumber;
-
-			/* reference tables do not have partition column */
-			if (partitionColumn == NULL)
-			{
-				targetEntryPartitionColumn = false;
-			}
-			else
-			{
-				if (commandType == CMD_UPDATE)
-				{
-					/*
-					 * Note that it is not possible to give an alias to
-					 * UPDATE table SET ...
-					 */
-					if (targetEntry->resname)
-					{
-						targetColumnAttrNumber = get_attnum(resultRelationId,
-															targetEntry->resname);
-						if (targetColumnAttrNumber == partitionColumn->varattno)
-						{
-							targetEntryPartitionColumn = true;
-						}
-					}
-				}
-			}
-
-
-			if (commandType == CMD_UPDATE &&
-				FindNodeMatchingCheckFunction((Node *) targetEntry->expr,
-											  CitusIsVolatileFunction))
-			{
-				return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
-									 "functions used in UPDATE queries on distributed "
-									 "tables must not be VOLATILE",
-									 NULL, NULL);
-			}
-
-			if (commandType == CMD_UPDATE && targetEntryPartitionColumn &&
-				TargetEntryChangesValue(targetEntry, partitionColumn,
-										queryTree->jointree))
-			{
-				return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
-									 "modifying the partition value of rows is not "
-									 "allowed",
-									 NULL, NULL);
-			}
-
-			if (commandType == CMD_UPDATE &&
-				MasterIrreducibleExpression((Node *) targetEntry->expr,
-											&hasVarArgument, &hasBadCoalesce))
-			{
-				Assert(hasVarArgument || hasBadCoalesce);
-			}
-
-			if (FindNodeMatchingCheckFunction((Node *) targetEntry->expr,
-											  NodeIsFieldStore))
-			{
-				/* DELETE cannot do field indirection already */
-				Assert(commandType == CMD_UPDATE || commandType == CMD_INSERT);
-				return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
-									 "inserting or modifying composite type fields is not "
-									 "supported", NULL,
-									 "Use the column name to insert or update the composite "
-									 "type as a single value");
-			}
-		}
-
-		if (joinTree != NULL)
-		{
-			if (FindNodeMatchingCheckFunction((Node *) joinTree->quals,
-											  CitusIsVolatileFunction))
-			{
-				return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
-									 "functions used in the WHERE clause of modification "
-									 "queries on distributed tables must not be VOLATILE",
-									 NULL, NULL);
-			}
-			else if (MasterIrreducibleExpression(joinTree->quals, &hasVarArgument,
-												 &hasBadCoalesce))
-			{
-				Assert(hasVarArgument || hasBadCoalesce);
-			}
-		}
-
-		if (hasVarArgument)
-		{
-			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
-								 "STABLE functions used in UPDATE queries "
-								 "cannot be called with column references",
-								 NULL, NULL);
-		}
-
-		if (hasBadCoalesce)
-		{
-			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
-								 "non-IMMUTABLE functions are not allowed in CASE or "
-								 "COALESCE statements",
-								 NULL, NULL);
-		}
-
-		if (contain_mutable_functions((Node *) queryTree->returningList))
-		{
-			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
-								 "non-IMMUTABLE functions are not allowed in the "
-								 "RETURNING clause",
-								 NULL, NULL);
-		}
-
-		if (queryTree->jointree->quals != NULL &&
-			nodeTag(queryTree->jointree->quals) == T_CurrentOfExpr)
-		{
-			return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
-								 "cannot run DML queries with cursors", NULL,
-								 NULL);
+			return deferredError;
 		}
 	}

@ -866,7 +887,7 @@ IsLocallyAccessibleCitusLocalTable(Oid relationId)
 /*
 * NodeIsFieldStore returns true if given Node is a FieldStore object.
 */
-static bool
+bool
 NodeIsFieldStore(Node *node)
 {
 	return node && IsA(node, FieldStore);
@ -888,7 +909,9 @@ ModifyQuerySupported(Query *queryTree, Query *originalQuery, bool multiShardQuer
 					 PlannerRestrictionContext *plannerRestrictionContext)
 {
 	Oid distributedTableId = InvalidOid;
-	DeferredErrorMessage *error = ModifyPartialQuerySupported(queryTree, multiShardQuery,
+
+	DeferredErrorMessage *error =
+		ModifyPartialQuerySupported(queryTree, multiShardQuery,
 									&distributedTableId);
 	if (error)
 	{
@ -953,19 +976,12 @@ ModifyQuerySupported(Query *queryTree, Query *originalQuery, bool multiShardQuer
 				 */
 			}
 			else if (rangeTableEntry->relkind == RELKIND_MATVIEW)
-			{
-				if (IsMergeAllowedOnRelation(originalQuery, rangeTableEntry))
-				{
-					continue;
-				}
-				else
 			{
 				return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
 									 "materialized views in "
 									 "modify queries are not supported",
 									 NULL, NULL);
 			}
-			}
 			/* for other kinds of relations, check if it's distributed */
 			else
 			{
@ -1065,7 +1081,7 @@ ModifyQuerySupported(Query *queryTree, Query *originalQuery, bool multiShardQuer

 		if (multiShardQuery)
 		{
-			errorMessage = MultiShardUpdateDeleteMergeSupported(
+			errorMessage = MultiShardUpdateDeleteSupported(
 				originalQuery,
 				plannerRestrictionContext);
 		}
@ -1246,11 +1262,11 @@ ErrorIfOnConflictNotSupported(Query *queryTree)


 /*
- * MultiShardUpdateDeleteMergeSupported returns the error message if the update/delete is
+ * MultiShardUpdateDeleteSupported returns the error message if the update/delete is
 * not pushdownable, otherwise it returns NULL.
 */
 static DeferredErrorMessage *
-MultiShardUpdateDeleteMergeSupported(Query *originalQuery,
+MultiShardUpdateDeleteSupported(Query *originalQuery,
 								PlannerRestrictionContext *plannerRestrictionContext)
 {
 	DeferredErrorMessage *errorMessage = NULL;
@ -1282,7 +1298,8 @@ MultiShardUpdateDeleteMergeSupported(Query *originalQuery,
 	}
 	else
 	{
-		errorMessage = DeferErrorIfUnsupportedSubqueryPushdown(originalQuery,
+		errorMessage = DeferErrorIfUnsupportedSubqueryPushdown(
+			originalQuery,
 			plannerRestrictionContext);
 	}

@ -1323,7 +1340,7 @@ SingleShardUpdateDeleteSupported(Query *originalQuery,
 * HasDangerousJoinUsing search jointree for unnamed JOIN USING. Check the
 * implementation of has_dangerous_join_using in ruleutils.
 */
-static bool
+bool
 HasDangerousJoinUsing(List *rtableList, Node *joinTreeNode)
 {
 	if (IsA(joinTreeNode, RangeTblRef))
@ -1427,7 +1444,7 @@ IsMergeQuery(Query *query)
 * which do, but for now we just error out. That makes both the code and user-education
 * easier.
 */
-static bool
+bool
 MasterIrreducibleExpression(Node *expression, bool *varArgument, bool *badCoalesce)
 {
 	WalkerState data;
@ -1575,7 +1592,7 @@ MasterIrreducibleExpressionFunctionChecker(Oid func_id, void *context)
 * expression is a value that is implied by the qualifiers of the join
 * tree, or the target entry sets a different column.
 */
-static bool
+bool
 TargetEntryChangesValue(TargetEntry *targetEntry, Var *column, FromExpr *joinTree)
 {
 	bool isColumnValueChanged = true;
@ -1796,7 +1813,7 @@ ExtractFirstCitusTableId(Query *query)
 * RouterJob builds a Job to represent a single shard select/update/delete and
 * multiple shard update/delete queries.
 */
-static Job *
+Job *
 RouterJob(Query *originalQuery, PlannerRestrictionContext *plannerRestrictionContext,
 		  DeferredErrorMessage **planningError)
 {
@ -1846,8 +1863,8 @@ RouterJob(Query *originalQuery, PlannerRestrictionContext *plannerRestrictionCon
 	if (*planningError)
 	{
 		/*
-		 * For MERGE, we do _not_ plan anything other than Router job, let's
-		 * not continue further down the lane in distributed planning, simply
+		 * For MERGE, we do _not_ plan any other router job than the MERGE job itself,
+		 * let's not continue further down the lane in distributed planning, simply
 		 * bail out.
 		 */
 		if (IsMergeQuery(originalQuery))
@ -2320,9 +2337,20 @@ PlanRouterQuery(Query *originalQuery,
 		}

 		Assert(UpdateOrDeleteOrMergeQuery(originalQuery));
+
+		if (IsMergeQuery(originalQuery))
+		{
+			planningError = MergeQuerySupported(originalQuery,
+												isMultiShardQuery,
+												plannerRestrictionContext);
+		}
+		else
+		{
 			planningError = ModifyQuerySupported(originalQuery, originalQuery,
 												 isMultiShardQuery,
 												 plannerRestrictionContext);
+		}
+
 		if (planningError != NULL)
 		{
 			return planningError;
@ -2643,7 +2671,7 @@ TargetShardIntervalForFastPathQuery(Query *query, bool *isMultiShardQuery,
 {
 	Oid relationId = ExtractFirstCitusTableId(query);

-	if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
+	if (!HasDistributionKey(relationId))
 	{
 		/* we don't need to do shard pruning for non-distributed tables */
 		return list_make1(LoadShardIntervalList(relationId));
@ -2936,7 +2964,7 @@ BuildRoutesForInsert(Query *query, DeferredErrorMessage **planningError)
 	Assert(query->commandType == CMD_INSERT);

 	/* reference tables and citus local tables can only have one shard */
-	if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
+	if (!HasDistributionKeyCacheEntry(cacheEntry))
 	{
 		List *shardIntervalList = LoadShardIntervalList(distributedTableId);

@ -3477,7 +3505,7 @@ ExtractInsertPartitionKeyValue(Query *query)
 	uint32 rangeTableId = 1;
 	Const *singlePartitionValueConst = NULL;

-	if (IsCitusTableType(distributedTableId, CITUS_TABLE_WITH_NO_DIST_KEY))
+	if (!HasDistributionKey(distributedTableId))
 	{
 		return NULL;
 	}
@ -3589,6 +3617,8 @@ DeferErrorIfUnsupportedRouterPlannableSelectQuery(Query *query)
 	bool hasDistributedTable = false;
 	bool hasReferenceTable = false;

+	List *distributedRelationList = NIL;
+
 	ExtractRangeTableRelationWalker((Node *) query, &rangeTableRelationList);
 	foreach(rangeTableRelationCell, rangeTableRelationList)
 	{
@ -3626,6 +3656,8 @@ DeferErrorIfUnsupportedRouterPlannableSelectQuery(Query *query)
 			if (IsCitusTableType(distributedTableId, DISTRIBUTED_TABLE))
 			{
 				hasDistributedTable = true;
+				distributedRelationList = lappend_oid(distributedRelationList,
+													  distributedTableId);
 			}

 			/*
@ -3680,6 +3712,15 @@ DeferErrorIfUnsupportedRouterPlannableSelectQuery(Query *query)
 							 NULL, NULL);
 	}

+	if (!EnableNonColocatedRouterQueryPushdown &&
+		!AllDistributedRelationsInListColocated(distributedRelationList))
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "router planner does not support queries that "
+							 "reference non-colocated distributed tables",
+							 NULL, NULL);
+	}
+
 #if PG_VERSION_NUM >= PG_VERSION_14
 	DeferredErrorMessage *CTEWithSearchClauseError =
 		ErrorIfQueryHasCTEWithSearchClause(query);
@ -3797,8 +3838,7 @@ ErrorIfQueryHasUnroutableModifyingCTE(Query *queryTree)
 			CitusTableCacheEntry *modificationTableCacheEntry =
 				GetCitusTableCacheEntry(distributedTableId);

-			if (IsCitusTableTypeCacheEntry(modificationTableCacheEntry,
-										   CITUS_TABLE_WITH_NO_DIST_KEY))
+			if (!HasDistributionKeyCacheEntry(modificationTableCacheEntry))
 			{
 				return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
 									 "cannot router plan modification of a non-distributed table",
--- a/src/backend/distributed/planner/query_colocation_checker.c
+++ b/src/backend/distributed/planner/query_colocation_checker.c
@ -168,7 +168,7 @@ AnchorRte(Query *subquery)
 		{
 			Oid relationId = currentRte->relid;

-			if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
+			if (IsCitusTable(relationId) && !HasDistributionKey(relationId))
 			{
 				/*
 				 * Non-distributed tables should not be the anchor rte since they
--- a/src/backend/distributed/planner/query_pushdown_planning.c
+++ b/src/backend/distributed/planner/query_pushdown_planning.c
@ -591,10 +591,16 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
 	}
 	else if (!RestrictionEquivalenceForPartitionKeys(plannerRestrictionContext))
 	{
-		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
-							 "complex joins are only supported when all distributed tables are "
+		StringInfo errorMessage = makeStringInfo();
+		bool isMergeCmd = IsMergeQuery(originalQuery);
+		appendStringInfo(errorMessage,
+						 "%s"
+						 "only supported when all distributed tables are "
 						 "co-located and joined on their distribution columns",
-							 NULL, NULL);
+						 isMergeCmd ? "MERGE command is " : "complex joins are ");
+
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 errorMessage->data, NULL, NULL);
 	}

 	/* we shouldn't allow reference tables in the FROM clause when the query has sublinks */
--- a/src/backend/distributed/planner/relation_restriction_equivalence.c
+++ b/src/backend/distributed/planner/relation_restriction_equivalence.c
@ -151,7 +151,8 @@ static void ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass
 													  secondClass);
 static Var * PartitionKeyForRTEIdentityInQuery(Query *query, int targetRTEIndex,
 											   Index *partitionKeyIndex);
-static bool AllRelationsInRestrictionContextColocated(RelationRestrictionContext *
+static bool AllDistributedRelationsInRestrictionContextColocated(
+	RelationRestrictionContext *
 	restrictionContext);
 static bool IsNotSafeRestrictionToRecursivelyPlan(Node *node);
 static JoinRestrictionContext * FilterJoinRestrictionContext(
@ -383,7 +384,7 @@ SafeToPushdownUnionSubquery(Query *originalQuery,
 		return false;
 	}

-	if (!AllRelationsInRestrictionContextColocated(restrictionContext))
+	if (!AllDistributedRelationsInRestrictionContextColocated(restrictionContext))
 	{
 		/* distribution columns are equal, but tables are not co-located */
 		return false;
@ -703,8 +704,8 @@ EquivalenceListContainsRelationsEquality(List *attributeEquivalenceList,
 		int rteIdentity = GetRTEIdentity(relationRestriction->rte);

 		/* we shouldn't check for the equality of non-distributed tables */
-		if (IsCitusTableType(relationRestriction->relationId,
-							 CITUS_TABLE_WITH_NO_DIST_KEY))
+		if (IsCitusTable(relationRestriction->relationId) &&
+			!HasDistributionKey(relationRestriction->relationId))
 		{
 			continue;
 		}
@ -1919,22 +1920,66 @@ FindQueryContainingRTEIdentityInternal(Node *node,


 /*
- * AllRelationsInRestrictionContextColocated determines whether all of the relations in the
- * given relation restrictions list are co-located.
+ * AllDistributedRelationsInRestrictionContextColocated determines whether all of the
+ * distributed  relations in the given relation restrictions list are co-located.
 */
 static bool
-AllRelationsInRestrictionContextColocated(RelationRestrictionContext *restrictionContext)
+AllDistributedRelationsInRestrictionContextColocated(
+	RelationRestrictionContext *restrictionContext)
 {
 	RelationRestriction *relationRestriction = NULL;
-	int initialColocationId = INVALID_COLOCATION_ID;
+	List *relationIdList = NIL;

 	/* check whether all relations exists in the main restriction list */
 	foreach_ptr(relationRestriction, restrictionContext->relationRestrictionList)
 	{
-		Oid relationId = relationRestriction->relationId;
+		relationIdList = lappend_oid(relationIdList, relationRestriction->relationId);
+	}

-		if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
+	return AllDistributedRelationsInListColocated(relationIdList);
+}
+
+
+/*
+ * AllDistributedRelationsInRTEListColocated determines whether all of the
+ * distributed relations in the given RangeTableEntry list are co-located.
+ */
+bool
+AllDistributedRelationsInRTEListColocated(List *rangeTableEntryList)
 {
+	RangeTblEntry *rangeTableEntry = NULL;
+	List *relationIdList = NIL;
+
+	foreach_ptr(rangeTableEntry, rangeTableEntryList)
+	{
+		relationIdList = lappend_oid(relationIdList, rangeTableEntry->relid);
+	}
+
+	return AllDistributedRelationsInListColocated(relationIdList);
+}
+
+
+/*
+ * AllDistributedRelationsInListColocated determines whether all of the
+ * distributed relations in the given list are co-located.
+ */
+bool
+AllDistributedRelationsInListColocated(List *relationList)
+{
+	int initialColocationId = INVALID_COLOCATION_ID;
+	Oid relationId = InvalidOid;
+
+	foreach_oid(relationId, relationList)
+	{
+		if (!IsCitusTable(relationId))
+		{
+			/* not interested in Postgres tables */
+			continue;
+		}
+
+		if (!IsCitusTableType(relationId, DISTRIBUTED_TABLE))
+		{
+			/* not interested in non-distributed tables */
 			continue;
 		}

--- a/src/backend/distributed/planner/shard_pruning.c
+++ b/src/backend/distributed/planner/shard_pruning.c
@ -333,7 +333,7 @@ PruneShards(Oid relationId, Index rangeTableId, List *whereClauseList,
 	}

 	/* short circuit for non-distributed tables such as reference table */
-	if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
+	if (!HasDistributionKeyCacheEntry(cacheEntry))
 	{
 		prunedList = ShardArrayToList(cacheEntry->sortedShardIntervalArray,
 									  cacheEntry->shardIntervalArrayLength);
--- a/src/backend/distributed/replication/multi_logical_replication.c
+++ b/src/backend/distributed/replication/multi_logical_replication.c
@ -88,6 +88,8 @@ static const char *replicationSlotPrefix[] = {
 * IMPORTANT: All the subscription names should start with "citus_". Otherwise
 * our utility hook does not defend against non-superusers altering or dropping
 * them, which is important for security purposes.
+ *
+ * We should also keep these in sync with IsCitusShardTransferBackend().
 */
 static const char *subscriptionPrefix[] = {
 	[SHARD_MOVE] = "citus_shard_move_subscription_",
@ -1338,7 +1340,9 @@ CreatePublications(MultiConnection *connection,
 											worker->groupId,
 											CLEANUP_ALWAYS);

+		ExecuteCriticalRemoteCommand(connection, DISABLE_DDL_PROPAGATION);
 		ExecuteCriticalRemoteCommand(connection, createPublicationCommand->data);
+		ExecuteCriticalRemoteCommand(connection, ENABLE_DDL_PROPAGATION);
 		pfree(createPublicationCommand->data);
 		pfree(createPublicationCommand);
 	}
--- a/src/backend/distributed/shardsplit/shardsplit_decoder.c
+++ b/src/backend/distributed/shardsplit/shardsplit_decoder.c
@ -10,18 +10,27 @@
 #include "postgres.h"
 #include "distributed/shardinterval_utils.h"
 #include "distributed/shardsplit_shared_memory.h"
+#include "distributed/worker_shard_visibility.h"
+#include "distributed/worker_protocol.h"
 #include "distributed/listutils.h"
+#include "distributed/metadata/distobject.h"
 #include "replication/logical.h"
 #include "utils/typcache.h"
-
+#include "utils/lsyscache.h"
+#include "catalog/pg_namespace.h"

 extern void _PG_output_plugin_init(OutputPluginCallbacks *cb);
-static LogicalDecodeChangeCB pgoutputChangeCB;
+static LogicalDecodeChangeCB pgOutputPluginChangeCB;
+
+#define InvalidRepOriginId 0

 static HTAB *SourceToDestinationShardMap = NULL;
+static bool replication_origin_filter_cb(LogicalDecodingContext *ctx, RepOriginId
+										 origin_id);

 /* Plugin callback */
-static void split_change_cb(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
+static void shard_split_change_cb(LogicalDecodingContext *ctx,
+								  ReorderBufferTXN *txn,
 								  Relation relation, ReorderBufferChange *change);

 /* Helper methods */
@ -47,7 +56,8 @@ void
 _PG_output_plugin_init(OutputPluginCallbacks *cb)
 {
 	LogicalOutputPluginInit plugin_init =
-		(LogicalOutputPluginInit) (void *) load_external_function("pgoutput",
+		(LogicalOutputPluginInit) (void *)
+		load_external_function("pgoutput",
 							   "_PG_output_plugin_init",
 							   false, NULL);

@ -60,25 +70,56 @@ _PG_output_plugin_init(OutputPluginCallbacks *cb)
 	plugin_init(cb);

 	/* actual pgoutput callback will be called with the appropriate destination shard */
-	pgoutputChangeCB = cb->change_cb;
-	cb->change_cb = split_change_cb;
+	pgOutputPluginChangeCB = cb->change_cb;
+	cb->change_cb = shard_split_change_cb;
+	cb->filter_by_origin_cb = replication_origin_filter_cb;
 }


 /*
- * split_change function emits the incoming tuple change
+ * replication_origin_filter_cb call back function filters out publication of changes
+ * originated from any other node other than the current node. This is
+ * identified by the "origin_id" of the changes. The origin_id is set to
+ * a non-zero value in the origin node as part of WAL replication for internal
+ * operations like shard split/moves/create_distributed_table etc.
+ */
+static bool
+replication_origin_filter_cb(LogicalDecodingContext *ctx, RepOriginId origin_id)
+{
+	return  (origin_id != InvalidRepOriginId);
+}
+
+
+/*
+ * shard_split_change_cb function emits the incoming tuple change
 * to the appropriate destination shard.
 */
 static void
-split_change_cb(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
+shard_split_change_cb(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
 					  Relation relation, ReorderBufferChange *change)
 {
+	/*
+	 * If Citus has not been loaded yet, pass the changes
+	 * through to the undrelying decoder plugin.
+	 */
+	if (!CitusHasBeenLoaded())
+	{
+		pgOutputPluginChangeCB(ctx, txn, relation, change);
+		return;
+	}
+
+	/* check if the relation is publishable.*/
 	if (!is_publishable_relation(relation))
 	{
 		return;
 	}

 	char *replicationSlotName = ctx->slot->data.name.data;
+	if (replicationSlotName == NULL)
+	{
+		elog(ERROR, "Replication slot name is NULL!");
+		return;
+	}

 	/*
 	 * Initialize SourceToDestinationShardMap if not already initialized.
@ -198,7 +239,7 @@ split_change_cb(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
 		}
 	}

-	pgoutputChangeCB(ctx, txn, targetRelation, change);
+	pgOutputPluginChangeCB(ctx, txn, targetRelation, change);
 	RelationClose(targetRelation);
 }

--- a/src/backend/distributed/shared_library_init.c
+++ b/src/backend/distributed/shared_library_init.c
@ -74,6 +74,7 @@
 #include "distributed/recursive_planning.h"
 #include "distributed/reference_table_utils.h"
 #include "distributed/relation_access_tracking.h"
+#include "distributed/replication_origin_session_utils.h"
 #include "distributed/run_from_same_connection.h"
 #include "distributed/shard_cleaner.h"
 #include "distributed/shard_transfer.h"
@ -135,6 +136,8 @@ ReadColumnarOptions_type extern_ReadColumnarOptions = NULL;
 	CppConcat(extern_, funcname) = \
 		(typename) (void *) lookup_external_function(handle, # funcname)

+#define CDC_DECODER_DYNAMIC_LIB_PATH "$libdir/citus_decoders:$libdir"
+
 DEFINE_COLUMNAR_PASSTHROUGH_FUNC(columnar_handler)
 DEFINE_COLUMNAR_PASSTHROUGH_FUNC(alter_columnar_table_set)
 DEFINE_COLUMNAR_PASSTHROUGH_FUNC(alter_columnar_table_reset)
@ -206,7 +209,7 @@ static bool StatisticsCollectionGucCheckHook(bool *newval, void **extra, GucSour
 											 source);
 static void CitusAuthHook(Port *port, int status);
 static bool IsSuperuser(char *userName);
-
+static void AdjustDynamicLibraryPathForCdcDecoders(void);

 static ClientAuthentication_hook_type original_client_auth_hook = NULL;

@ -359,6 +362,11 @@ static const struct config_enum_entry cpu_priority_options[] = {
 	{ NULL, 0, false}
 };

+static const struct config_enum_entry metadata_sync_mode_options[] = {
+	{ "transactional", METADATA_SYNC_TRANSACTIONAL, false },
+	{ "nontransactional", METADATA_SYNC_NON_TRANSACTIONAL, false },
+	{ NULL, 0, false }
+};

 /* *INDENT-ON* */

@ -469,6 +477,17 @@ _PG_init(void)
 	InitializeLocallyReservedSharedConnections();
 	InitializeClusterClockMem();

+	/*
+	 * Adjust the Dynamic Library Path to prepend citus_decodes to the dynamic
+	 * library path. This is needed to make sure that the citus decoders are
+	 * loaded before the default decoders for CDC.
+	 */
+	if (EnableChangeDataCapture)
+	{
+		AdjustDynamicLibraryPathForCdcDecoders();
+	}
+
+
 	/* initialize shard split shared memory handle management */
 	InitializeShardSplitSMHandleManagement();

@ -536,6 +555,22 @@ _PG_init(void)
 }


+/*
+ * PrependCitusDecodersToDynamicLibrayPath prepends the $libdir/citus_decoders
+ * to the dynamic library path. This is needed to make sure that the citus
+ * decoders are loaded before the default decoders for CDC.
+ */
+static void
+AdjustDynamicLibraryPathForCdcDecoders(void)
+{
+	if (strcmp(Dynamic_library_path, "$libdir") == 0)
+	{
+		SetConfigOption("dynamic_library_path", CDC_DECODER_DYNAMIC_LIB_PATH,
+						PGC_POSTMASTER, PGC_S_OVERRIDE);
+	}
+}
+
+
 #if PG_VERSION_NUM >= PG_VERSION_15

 /*
@ -1132,6 +1167,16 @@ RegisterCitusConfigVariables(void)
 		GUC_STANDARD,
 		NULL, NULL, NULL);

+	DefineCustomBoolVariable(
+		"citus.enable_change_data_capture",
+		gettext_noop("Enables using replication origin tracking for change data capture"),
+		NULL,
+		&EnableChangeDataCapture,
+		false,
+		PGC_USERSET,
+		GUC_STANDARD,
+		NULL, NULL, NULL);
+
 	DefineCustomBoolVariable(
 		"citus.enable_cluster_clock",
 		gettext_noop("When users explicitly call UDF citus_get_transaction_clock() "
@ -1268,6 +1313,26 @@ RegisterCitusConfigVariables(void)
 		GUC_NO_SHOW_ALL,
 		NULL, NULL, NULL);

+	DefineCustomBoolVariable(
+		"citus.enable_non_colocated_router_query_pushdown",
+		gettext_noop("Enables router planner for the queries that reference "
+					 "non-colocated distributed tables."),
+		gettext_noop("Normally, router planner planner is only enabled for "
+					 "the queries that reference colocated distributed tables "
+					 "because it is not guaranteed to have the target shards "
+					 "always on the same node, e.g., after rebalancing the "
+					 "shards. For this reason, while enabling this flag allows "
+					 "some degree of optimization for the queries that reference "
+					 "non-colocated distributed tables, it is not guaranteed "
+					 "that the same query will work after rebalancing the shards "
+					 "or altering the shard count of one of those distributed "
+					 "tables."),
+		&EnableNonColocatedRouterQueryPushdown,
+		true,
+		PGC_USERSET,
+		GUC_NO_SHOW_ALL,
+		NULL, NULL, NULL);
+
 	DefineCustomBoolVariable(
 		"citus.enable_repartition_joins",
 		gettext_noop("Allows Citus to repartition data between nodes."),
@ -1849,6 +1914,21 @@ RegisterCitusConfigVariables(void)
 		GUC_UNIT_MS | GUC_NO_SHOW_ALL,
 		NULL, NULL, NULL);

+	DefineCustomEnumVariable(
+		"citus.metadata_sync_mode",
+		gettext_noop("Sets transaction mode for metadata syncs."),
+		gettext_noop("metadata sync can be run inside a single coordinated "
+					 "transaction or with multiple small transactions in "
+					 "idempotent way. By default we sync metadata in single "
+					 "coordinated transaction. When we hit memory problems "
+					 "at workers, we have alternative nontransactional mode "
+					 "where we send each command with separate transaction."),
+		&MetadataSyncTransMode,
+		METADATA_SYNC_TRANSACTIONAL, metadata_sync_mode_options,
+		PGC_SUSET,
+		GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL,
+		NULL, NULL, NULL);
+
 	DefineCustomIntVariable(
 		"citus.metadata_sync_retry_interval",
 		gettext_noop("Sets the interval to retry failed metadata syncs."),
@ -2406,7 +2486,6 @@ RegisterCitusConfigVariables(void)
 		GUC_STANDARD,
 		NULL, NULL, NULL);

-
 	/* warn about config items in the citus namespace that are not registered above */
 	EmitWarningsOnPlaceholders("citus");

--- a/src/backend/distributed/sql/citus--11.2-1--11.3-1.sql
+++ b/src/backend/distributed/sql/citus--11.2-1--11.3-1.sql
@ -1,4 +1,12 @@
 -- citus--11.2-1--11.3-1
+#include "udfs/repl_origin_helper/11.3-1.sql"
+#include "udfs/worker_adjust_identity_column_seq_ranges/11.3-1.sql"
+ALTER TABLE pg_catalog.pg_dist_authinfo REPLICA IDENTITY USING INDEX pg_dist_authinfo_identification_index;
+ALTER TABLE pg_catalog.pg_dist_partition REPLICA IDENTITY USING INDEX pg_dist_partition_logical_relid_index;
+ALTER TABLE pg_catalog.pg_dist_placement REPLICA IDENTITY USING INDEX pg_dist_placement_placementid_index;
+ALTER TABLE pg_catalog.pg_dist_rebalance_strategy REPLICA IDENTITY USING INDEX pg_dist_rebalance_strategy_name_key;
+ALTER TABLE pg_catalog.pg_dist_shard REPLICA IDENTITY USING INDEX pg_dist_shard_shardid_index;
+ALTER TABLE pg_catalog.pg_dist_transaction REPLICA IDENTITY USING INDEX pg_dist_transaction_unique_constraint;

-- bump version to 11.3-1
-
+#include "udfs/worker_drop_all_shell_tables/11.3-1.sql"
+#include "udfs/citus_internal_mark_node_not_synced/11.3-1.sql"
--- a/src/backend/distributed/sql/downgrades/citus--11.3-1--11.2-1.sql
+++ b/src/backend/distributed/sql/downgrades/citus--11.3-1--11.2-1.sql
@ -1,2 +1,22 @@
 -- citus--11.3-1--11.2-1
-- this is an empty downgrade path since citus--11.2-1--11.3-1.sql is empty for now
+
+DROP FUNCTION pg_catalog.citus_internal_start_replication_origin_tracking();
+DROP FUNCTION pg_catalog.citus_internal_stop_replication_origin_tracking();
+DROP FUNCTION pg_catalog.citus_internal_is_replication_origin_tracking_active();
+DROP FUNCTION IF EXISTS pg_catalog.worker_adjust_identity_column_seq_ranges(regclass);
+ALTER TABLE pg_catalog.pg_dist_authinfo REPLICA IDENTITY NOTHING;
+ALTER TABLE pg_catalog.pg_dist_partition REPLICA IDENTITY NOTHING;
+ALTER TABLE pg_catalog.pg_dist_placement REPLICA IDENTITY NOTHING;
+ALTER TABLE pg_catalog.pg_dist_rebalance_strategy REPLICA IDENTITY NOTHING;
+ALTER TABLE pg_catalog.pg_dist_shard REPLICA IDENTITY NOTHING;
+ALTER TABLE pg_catalog.pg_dist_transaction REPLICA IDENTITY NOTHING;
+
+ALTER TABLE pg_catalog.pg_dist_authinfo REPLICA IDENTITY NOTHING;
+ALTER TABLE pg_catalog.pg_dist_partition REPLICA IDENTITY NOTHING;
+ALTER TABLE pg_catalog.pg_dist_placement REPLICA IDENTITY NOTHING;
+ALTER TABLE pg_catalog.pg_dist_rebalance_strategy REPLICA IDENTITY NOTHING;
+ALTER TABLE pg_catalog.pg_dist_shard REPLICA IDENTITY NOTHING;
+ALTER TABLE pg_catalog.pg_dist_transaction REPLICA IDENTITY NOTHING;
+
+DROP PROCEDURE pg_catalog.worker_drop_all_shell_tables(bool);
+DROP FUNCTION pg_catalog.citus_internal_mark_node_not_synced(int, int);
--- a/src/backend/distributed/sql/udfs/citus_internal_mark_node_not_synced/11.3-1.sql
+++ b/src/backend/distributed/sql/udfs/citus_internal_mark_node_not_synced/11.3-1.sql
@ -0,0 +1,6 @@
+CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_mark_node_not_synced(parent_pid int, nodeid int)
+    RETURNS VOID
+    LANGUAGE C STRICT
+    AS 'MODULE_PATHNAME', $$citus_internal_mark_node_not_synced$$;
+COMMENT ON FUNCTION citus_internal_mark_node_not_synced(int, int)
+    IS 'marks given node not synced by unsetting metadatasynced column at the start of the nontransactional sync.';
--- a/src/backend/distributed/sql/udfs/citus_internal_mark_node_not_synced/latest.sql
+++ b/src/backend/distributed/sql/udfs/citus_internal_mark_node_not_synced/latest.sql
@ -0,0 +1,6 @@
+CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_mark_node_not_synced(parent_pid int, nodeid int)
+    RETURNS VOID
+    LANGUAGE C STRICT
+    AS 'MODULE_PATHNAME', $$citus_internal_mark_node_not_synced$$;
+COMMENT ON FUNCTION citus_internal_mark_node_not_synced(int, int)
+    IS 'marks given node not synced by unsetting metadatasynced column at the start of the nontransactional sync.';
--- a/src/backend/distributed/sql/udfs/repl_origin_helper/11.3-1.sql
+++ b/src/backend/distributed/sql/udfs/repl_origin_helper/11.3-1.sql
@ -0,0 +1,20 @@
+CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_start_replication_origin_tracking()
+RETURNS void
+LANGUAGE C STRICT
+AS 'MODULE_PATHNAME', $$citus_internal_start_replication_origin_tracking$$;
+COMMENT ON FUNCTION pg_catalog.citus_internal_start_replication_origin_tracking()
+    IS 'To start replication origin tracking for skipping publishing of duplicated events during internal data movements for CDC';
+
+CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_stop_replication_origin_tracking()
+RETURNS void
+LANGUAGE C STRICT
+AS 'MODULE_PATHNAME', $$citus_internal_stop_replication_origin_tracking$$;
+COMMENT ON FUNCTION pg_catalog.citus_internal_stop_replication_origin_tracking()
+    IS 'To stop replication origin tracking for skipping publishing of duplicated events during internal data movements for CDC';
+
+CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_is_replication_origin_tracking_active()
+RETURNS boolean
+LANGUAGE C STRICT
+AS 'MODULE_PATHNAME', $$citus_internal_is_replication_origin_tracking_active$$;
+COMMENT ON FUNCTION pg_catalog.citus_internal_is_replication_origin_tracking_active()
+    IS 'To check if replication origin tracking is active for skipping publishing of duplicated events during internal data movements for CDC';
--- a/src/backend/distributed/sql/udfs/repl_origin_helper/latest.sql
+++ b/src/backend/distributed/sql/udfs/repl_origin_helper/latest.sql
@ -0,0 +1,20 @@
+CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_start_replication_origin_tracking()
+RETURNS void
+LANGUAGE C STRICT
+AS 'MODULE_PATHNAME', $$citus_internal_start_replication_origin_tracking$$;
+COMMENT ON FUNCTION pg_catalog.citus_internal_start_replication_origin_tracking()
+    IS 'To start replication origin tracking for skipping publishing of duplicated events during internal data movements for CDC';
+
+CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_stop_replication_origin_tracking()
+RETURNS void
+LANGUAGE C STRICT
+AS 'MODULE_PATHNAME', $$citus_internal_stop_replication_origin_tracking$$;
+COMMENT ON FUNCTION pg_catalog.citus_internal_stop_replication_origin_tracking()
+    IS 'To stop replication origin tracking for skipping publishing of duplicated events during internal data movements for CDC';
+
+CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_is_replication_origin_tracking_active()
+RETURNS boolean
+LANGUAGE C STRICT
+AS 'MODULE_PATHNAME', $$citus_internal_is_replication_origin_tracking_active$$;
+COMMENT ON FUNCTION pg_catalog.citus_internal_is_replication_origin_tracking_active()
+    IS 'To check if replication origin tracking is active for skipping publishing of duplicated events during internal data movements for CDC';
--- a/src/backend/distributed/sql/udfs/worker_adjust_identity_column_seq_ranges/11.3-1.sql
+++ b/src/backend/distributed/sql/udfs/worker_adjust_identity_column_seq_ranges/11.3-1.sql
@ -0,0 +1,7 @@
+CREATE OR REPLACE FUNCTION pg_catalog.worker_adjust_identity_column_seq_ranges(regclass)
+    RETURNS VOID
+    LANGUAGE C STRICT
+    AS 'MODULE_PATHNAME', $$worker_adjust_identity_column_seq_ranges$$;
+COMMENT ON FUNCTION pg_catalog.worker_adjust_identity_column_seq_ranges(regclass)
+    IS 'modify identity column seq ranges to produce globally unique values';
+
--- a/src/backend/distributed/sql/udfs/worker_adjust_identity_column_seq_ranges/latest.sql
+++ b/src/backend/distributed/sql/udfs/worker_adjust_identity_column_seq_ranges/latest.sql
@ -0,0 +1,7 @@
+CREATE OR REPLACE FUNCTION pg_catalog.worker_adjust_identity_column_seq_ranges(regclass)
+    RETURNS VOID
+    LANGUAGE C STRICT
+    AS 'MODULE_PATHNAME', $$worker_adjust_identity_column_seq_ranges$$;
+COMMENT ON FUNCTION pg_catalog.worker_adjust_identity_column_seq_ranges(regclass)
+    IS 'modify identity column seq ranges to produce globally unique values';
+
--- a/src/backend/distributed/sql/udfs/worker_drop_all_shell_tables/11.3-1.sql
+++ b/src/backend/distributed/sql/udfs/worker_drop_all_shell_tables/11.3-1.sql
@ -0,0 +1,23 @@
+ -- During metadata sync, when we send many ddls over single transaction, worker node can error due
+-- to reaching at max allocation block size for invalidation messages. To find a workaround for the problem,
+-- we added nontransactional metadata sync mode where we create many transaction while dropping shell tables
+-- via https://github.com/citusdata/citus/pull/6728.
+CREATE OR REPLACE PROCEDURE pg_catalog.worker_drop_all_shell_tables(singleTransaction bool DEFAULT true)
+LANGUAGE plpgsql
+AS $$
+DECLARE
+    table_name text;
+BEGIN
+    -- drop shell tables within single or multiple transactions according to the flag singleTransaction
+    FOR table_name IN SELECT logicalrelid::regclass::text FROM pg_dist_partition
+    LOOP
+        PERFORM pg_catalog.worker_drop_shell_table(table_name);
+        IF not singleTransaction THEN
+            COMMIT;
+        END IF;
+    END LOOP;
+END;
+$$;
+COMMENT ON PROCEDURE worker_drop_all_shell_tables(singleTransaction bool)
+    IS 'drop all distributed tables only without the metadata within single transaction or '
+        'multiple transaction specified by the flag singleTransaction';
--- a/src/backend/distributed/sql/udfs/worker_drop_all_shell_tables/latest.sql
+++ b/src/backend/distributed/sql/udfs/worker_drop_all_shell_tables/latest.sql
@ -0,0 +1,23 @@
+ -- During metadata sync, when we send many ddls over single transaction, worker node can error due
+-- to reaching at max allocation block size for invalidation messages. To find a workaround for the problem,
+-- we added nontransactional metadata sync mode where we create many transaction while dropping shell tables
+-- via https://github.com/citusdata/citus/pull/6728.
+CREATE OR REPLACE PROCEDURE pg_catalog.worker_drop_all_shell_tables(singleTransaction bool DEFAULT true)
+LANGUAGE plpgsql
+AS $$
+DECLARE
+    table_name text;
+BEGIN
+    -- drop shell tables within single or multiple transactions according to the flag singleTransaction
+    FOR table_name IN SELECT logicalrelid::regclass::text FROM pg_dist_partition
+    LOOP
+        PERFORM pg_catalog.worker_drop_shell_table(table_name);
+        IF not singleTransaction THEN
+            COMMIT;
+        END IF;
+    END LOOP;
+END;
+$$;
+COMMENT ON PROCEDURE worker_drop_all_shell_tables(singleTransaction bool)
+    IS 'drop all distributed tables only without the metadata within single transaction or '
+        'multiple transaction specified by the flag singleTransaction';
--- a/src/backend/distributed/test/metadata_sync.c
+++ b/src/backend/distributed/test/metadata_sync.c
@ -49,26 +49,23 @@ activate_node_snapshot(PG_FUNCTION_ARGS)
 	 */
 	WorkerNode *dummyWorkerNode = GetFirstPrimaryWorkerNode();

-	List *updateLocalGroupCommand =
-		list_make1(LocalGroupIdUpdateCommand(dummyWorkerNode->groupId));
-	List *syncDistObjCommands = SyncDistributedObjectsCommandList(dummyWorkerNode);
-	List *dropSnapshotCommands = NodeMetadataDropCommands();
-	List *createSnapshotCommands = NodeMetadataCreateCommands();
-	List *pgDistTableMetadataSyncCommands = PgDistTableMetadataSyncCommandList();
+	/*
+	 * Create MetadataSyncContext which is used throughout nodes' activation.
+	 * As we set collectCommands to true, it would not create connections to workers.
+	 * Instead it would collect and return sync commands to be sent to workers.
+	 */
+	bool collectCommands = true;
+	bool nodesAddedInSameTransaction = false;
+	MetadataSyncContext *context = CreateMetadataSyncContext(list_make1(dummyWorkerNode),
+															 collectCommands,
+															 nodesAddedInSameTransaction);

-	List *activateNodeCommandList = NIL;
+	ActivateNodeList(context);
+
+	List *activateNodeCommandList = context->collectedCommands;
 	int activateNodeCommandIndex = 0;
 	Oid ddlCommandTypeId = TEXTOID;

-	activateNodeCommandList = list_concat(activateNodeCommandList,
-										  updateLocalGroupCommand);
-	activateNodeCommandList = list_concat(activateNodeCommandList, syncDistObjCommands);
-	activateNodeCommandList = list_concat(activateNodeCommandList, dropSnapshotCommands);
-	activateNodeCommandList = list_concat(activateNodeCommandList,
-										  createSnapshotCommands);
-	activateNodeCommandList = list_concat(activateNodeCommandList,
-										  pgDistTableMetadataSyncCommands);
-
 	int activateNodeCommandCount = list_length(activateNodeCommandList);
 	Datum *activateNodeCommandDatumArray = palloc0(activateNodeCommandCount *
 												   sizeof(Datum));
--- a/src/backend/distributed/test/shard_rebalancer.c
+++ b/src/backend/distributed/test/shard_rebalancer.c
@ -147,6 +147,26 @@ shard_placement_rebalance_array(PG_FUNCTION_ARGS)
 	shardPlacementList = SortList(shardPlacementList, CompareShardPlacements);
 	shardPlacementListList = lappend(shardPlacementListList, shardPlacementList);

+	List *unbalancedShards = NIL;
+	ListCell *shardPlacementListCell = NULL;
+	foreach(shardPlacementListCell, shardPlacementListList)
+	{
+		List *placementList = (List *) lfirst(shardPlacementListCell);
+
+		if (list_length(placementList) < list_length(workerNodeList))
+		{
+			unbalancedShards = list_concat(unbalancedShards,
+										   placementList);
+			shardPlacementListList = foreach_delete_current(shardPlacementListList,
+															shardPlacementListCell);
+		}
+	}
+
+	if (list_length(unbalancedShards) > 0)
+	{
+		shardPlacementListList = lappend(shardPlacementListList, unbalancedShards);
+	}
+
 	rebalancePlanFunctions.context = &context;

 	/* sort the lists to make the function more deterministic */
--- a/src/backend/distributed/transaction/backend_data.c
+++ b/src/backend/distributed/transaction/backend_data.c
@ -1270,23 +1270,6 @@ MyBackendGotCancelledDueToDeadlock(bool clearState)
 }


-/*
- * MyBackendIsInDisributedTransaction returns true if MyBackendData
- * is in a distributed transaction.
- */
-bool
-MyBackendIsInDisributedTransaction(void)
-{
-	/* backend might not have used citus yet and thus not initialized backend data */
-	if (!MyBackendData)
-	{
-		return false;
-	}
-
-	return IsInDistributedTransaction(MyBackendData);
-}
-
-
 /*
 * ActiveDistributedTransactionNumbers returns a list of pointers to
 * transaction numbers of distributed transactions that are in progress
@ -1452,6 +1435,21 @@ IsExternalClientBackend(void)
 }


+/*
+ * IsRebalancerInitiatedBackend returns true if we are in a backend that citus
+ * rebalancer initiated.
+ */
+bool
+IsCitusShardTransferBackend(void)
+{
+	int prefixLength = strlen(CITUS_SHARD_TRANSFER_APPLICATION_NAME_PREFIX);
+
+	return strncmp(application_name,
+				   CITUS_SHARD_TRANSFER_APPLICATION_NAME_PREFIX,
+				   prefixLength) == 0;
+}
+
+
 /*
 * DetermineCitusBackendType determines the type of backend based on the application_name.
 */
--- a/src/backend/distributed/transaction/relation_access_tracking.c
+++ b/src/backend/distributed/transaction/relation_access_tracking.c
@ -195,7 +195,7 @@ RecordRelationAccessIfNonDistTable(Oid relationId, ShardPlacementAccessType acce
 	 * recursively calling RecordRelationAccessBase(), so becareful about
 	 * removing this check.
 	 */
-	if (!IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
+	if (IsCitusTable(relationId) && HasDistributionKey(relationId))
 	{
 		return;
 	}
@ -732,8 +732,8 @@ CheckConflictingRelationAccesses(Oid relationId, ShardPlacementAccessType access

 	CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);

-	if (!(IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY) &&
-		  cacheEntry->referencingRelationsViaForeignKey != NIL))
+	if (HasDistributionKeyCacheEntry(cacheEntry) ||
+		cacheEntry->referencingRelationsViaForeignKey == NIL)
 	{
 		return;
 	}
@ -931,7 +931,7 @@ HoldsConflictingLockWithReferencedRelations(Oid relationId, ShardPlacementAccess
 		 * We're only interested in foreign keys to reference tables and citus
 		 * local tables.
 		 */
-		if (!IsCitusTableType(referencedRelation, CITUS_TABLE_WITH_NO_DIST_KEY))
+		if (IsCitusTable(referencedRelation) && HasDistributionKey(referencedRelation))
 		{
 			continue;
 		}
@ -993,7 +993,7 @@ HoldsConflictingLockWithReferencingRelations(Oid relationId, ShardPlacementAcces
 	CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
 	bool holdsConflictingLocks = false;

-	Assert(IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY));
+	Assert(!HasDistributionKeyCacheEntry(cacheEntry));

 	Oid referencingRelation = InvalidOid;
 	foreach_oid(referencingRelation, cacheEntry->referencingRelationsViaForeignKey)
--- a/src/backend/distributed/transaction/transaction_management.c
+++ b/src/backend/distributed/transaction/transaction_management.c
@ -34,6 +34,7 @@
 #include "distributed/multi_logical_replication.h"
 #include "distributed/multi_explain.h"
 #include "distributed/repartition_join_execution.h"
+#include "distributed/replication_origin_session_utils.h"
 #include "distributed/transaction_management.h"
 #include "distributed/placement_connection.h"
 #include "distributed/relation_access_tracking.h"
@ -391,6 +392,9 @@ CoordinatedTransactionCallback(XactEvent event, void *arg)
 			ResetGlobalVariables();
 			ResetRelationAccessHash();

+			/* Reset any local replication origin session since transaction has been aborted.*/
+			ResetReplicationOriginLocalSession();
+
 			/* empty the CitusXactCallbackContext to ensure we're not leaking memory */
 			MemoryContextReset(CitusXactCallbackContext);

@ -715,6 +719,8 @@ CoordinatedSubTransactionCallback(SubXactEvent event, SubTransactionId subId,
 				SetCreateCitusTransactionLevel(0);
 			}

+			/* Reset any local replication origin session since subtransaction has been aborted.*/
+			ResetReplicationOriginLocalSession();
 			MemoryContextSwitchTo(previousContext);

 			break;
--- a/src/backend/distributed/transaction/worker_transaction.c
+++ b/src/backend/distributed/transaction/worker_transaction.c
@ -374,6 +374,54 @@ SendCommandListToWorkerOutsideTransactionWithConnection(MultiConnection *workerC
 }


+/*
+ * SendCommandListToWorkerListWithBareConnections sends the command list
+ * over the specified bare connections. This function is mainly useful to
+ * avoid opening an closing connections excessively by allowing reusing
+ * connections to send multiple separate bare commands. The function
+ * raises an error if any of the queries fail.
+ */
+void
+SendCommandListToWorkerListWithBareConnections(List *workerConnectionList,
+											   List *commandList)
+{
+	Assert(!InCoordinatedTransaction());
+	Assert(!GetCoordinatedTransactionShouldUse2PC());
+
+	if (list_length(commandList) == 0 || list_length(workerConnectionList) == 0)
+	{
+		/* nothing to do */
+		return;
+	}
+
+	/*
+	 * In order to avoid round-trips per query in queryStringList,
+	 * we join the string and send as a single command. Also,
+	 * if there is only a single command, avoid additional call to
+	 * StringJoin given that some strings can be quite large.
+	 */
+	char *stringToSend = (list_length(commandList) == 1) ?
+						 linitial(commandList) : StringJoin(commandList, ';');
+
+	/* send commands in parallel */
+	MultiConnection *connection = NULL;
+	foreach_ptr(connection, workerConnectionList)
+	{
+		int querySent = SendRemoteCommand(connection, stringToSend);
+		if (querySent == 0)
+		{
+			ReportConnectionError(connection, ERROR);
+		}
+	}
+
+	bool failOnError = true;
+	foreach_ptr(connection, workerConnectionList)
+	{
+		ClearResults(connection, failOnError);
+	}
+}
+
+
 /*
 * SendCommandListToWorkerInCoordinatedTransaction opens connection to the node
 * with the given nodeName and nodePort. The commands are sent as part of the
@ -390,6 +438,8 @@ SendMetadataCommandListToWorkerListInCoordinatedTransaction(List *workerNodeList
 		return;
 	}

+	ErrorIfAnyMetadataNodeOutOfSync(workerNodeList);
+
 	UseCoordinatedTransaction();

 	List *connectionList = NIL;
--- a/src/backend/distributed/utils/colocation_utils.c
+++ b/src/backend/distributed/utils/colocation_utils.c
@ -442,8 +442,7 @@ ShardsIntervalsEqual(ShardInterval *leftShardInterval, ShardInterval *rightShard
 	{
 		return HashPartitionedShardIntervalsEqual(leftShardInterval, rightShardInterval);
 	}
-	else if (IsCitusTableType(leftShardInterval->relationId,
-							  CITUS_TABLE_WITH_NO_DIST_KEY))
+	else if (!HasDistributionKey(leftShardInterval->relationId))
 	{
 		/*
 		 * Reference tables has only a single shard and all reference tables
--- a/src/backend/distributed/utils/reference_table_utils.c
+++ b/src/backend/distributed/utils/reference_table_utils.c
@ -503,12 +503,11 @@ GetReferenceTableColocationId()


 /*
- * DeleteAllReplicatedTablePlacementsFromNodeGroup function iterates over
- * list of reference and replicated hash distributed tables and deletes
- * all placements from pg_dist_placement table for given group.
+ * GetAllReplicatedTableList returns all tables which has replicated placements.
+ * i.e. (all reference tables) + (distributed tables with more than 1 placements)
 */
-void
-DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId, bool localOnly)
+List *
+GetAllReplicatedTableList(void)
 {
 	List *referenceTableList = CitusTableTypeIdList(REFERENCE_TABLE);
 	List *replicatedMetadataSyncedDistributedTableList =
@ -517,13 +516,25 @@ DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId, bool localOnly)
 	List *replicatedTableList =
 		list_concat(referenceTableList, replicatedMetadataSyncedDistributedTableList);

-	/* if there are no reference tables, we do not need to do anything */
-	if (list_length(replicatedTableList) == 0)
-	{
-		return;
+	return replicatedTableList;
 }

-	StringInfo deletePlacementCommand = makeStringInfo();
+
+/*
+ * ReplicatedPlacementsForNodeGroup filters all replicated placements for given
+ * node group id.
+ */
+List *
+ReplicatedPlacementsForNodeGroup(int32 groupId)
+{
+	List *replicatedTableList = GetAllReplicatedTableList();
+
+	if (list_length(replicatedTableList) == 0)
+	{
+		return NIL;
+	}
+
+	List *replicatedPlacementsForNodeGroup = NIL;
 	Oid replicatedTableId = InvalidOid;
 	foreach_oid(replicatedTableId, replicatedTableList)
 	{
@ -538,25 +549,104 @@ DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId, bool localOnly)
 			continue;
 		}

+		replicatedPlacementsForNodeGroup = list_concat(replicatedPlacementsForNodeGroup,
+													   placements);
+	}
+
+	return replicatedPlacementsForNodeGroup;
+}
+
+
+/*
+ * DeleteShardPlacementCommand returns a command for deleting given placement from
+ * metadata.
+ */
+char *
+DeleteShardPlacementCommand(uint64 placementId)
+{
+	StringInfo deletePlacementCommand = makeStringInfo();
+	appendStringInfo(deletePlacementCommand,
+					 "DELETE FROM pg_catalog.pg_dist_placement "
+					 "WHERE placementid = " UINT64_FORMAT, placementId);
+	return deletePlacementCommand->data;
+}
+
+
+/*
+ * DeleteAllReplicatedTablePlacementsFromNodeGroup function iterates over
+ * list of reference and replicated hash distributed tables and deletes
+ * all placements from pg_dist_placement table for given group.
+ */
+void
+DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId, bool localOnly)
+{
+	List *replicatedPlacementListForGroup = ReplicatedPlacementsForNodeGroup(groupId);
+
+	/* if there are no replicated tables for the group, we do not need to do anything */
+	if (list_length(replicatedPlacementListForGroup) == 0)
+	{
+		return;
+	}
+
 	GroupShardPlacement *placement = NULL;
-		foreach_ptr(placement, placements)
+	foreach_ptr(placement, replicatedPlacementListForGroup)
 	{
 		LockShardDistributionMetadata(placement->shardId, ExclusiveLock);

+		if (!localOnly)
+		{
+			char *deletePlacementCommand =
+				DeleteShardPlacementCommand(placement->placementId);
+
+			SendCommandToWorkersWithMetadata(deletePlacementCommand);
+		}
+
 		DeleteShardPlacementRow(placement->placementId);
+	}
+}
+
+
+/*
+ * DeleteAllReplicatedTablePlacementsFromNodeGroupViaMetadataContext does the same as
+ * DeleteAllReplicatedTablePlacementsFromNodeGroup except it uses metadataSyncContext for
+ * connections.
+ */
+void
+DeleteAllReplicatedTablePlacementsFromNodeGroupViaMetadataContext(
+	MetadataSyncContext *context, int32 groupId, bool localOnly)
+{
+	List *replicatedPlacementListForGroup = ReplicatedPlacementsForNodeGroup(groupId);
+
+	/* if there are no replicated tables for the group, we do not need to do anything */
+	if (list_length(replicatedPlacementListForGroup) == 0)
+	{
+		return;
+	}
+
+	MemoryContext oldContext = MemoryContextSwitchTo(context->context);
+	GroupShardPlacement *placement = NULL;
+	foreach_ptr(placement, replicatedPlacementListForGroup)
+	{
+		LockShardDistributionMetadata(placement->shardId, ExclusiveLock);

 		if (!localOnly)
 		{
-				resetStringInfo(deletePlacementCommand);
-				appendStringInfo(deletePlacementCommand,
-								 "DELETE FROM pg_catalog.pg_dist_placement "
-								 "WHERE placementid = " UINT64_FORMAT,
-								 placement->placementId);
+			char *deletePlacementCommand =
+				DeleteShardPlacementCommand(placement->placementId);

-				SendCommandToWorkersWithMetadata(deletePlacementCommand->data);
+			SendOrCollectCommandListToMetadataNodes(context,
+													list_make1(deletePlacementCommand));
 		}
+
+		/* do not execute local transaction if we collect commands */
+		if (!MetadataSyncCollectsCommands(context))
+		{
+			DeleteShardPlacementRow(placement->placementId);
 		}
+
+		ResetMetadataSyncMemoryContext(context);
 	}
+	MemoryContextSwitchTo(oldContext);
 }


--- a/src/backend/distributed/utils/replication_origin_session_utils.c
+++ b/src/backend/distributed/utils/replication_origin_session_utils.c
@ -0,0 +1,239 @@
+/*-------------------------------------------------------------------------
+ *
+ * replication_origin_session_utils.c
+ *   Functions for managing replication origin session.
+ *
+ * Copyright (c) Citus Data, Inc.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "distributed/replication_origin_session_utils.h"
+#include "distributed/remote_commands.h"
+#include "distributed/metadata_cache.h"
+#include "utils/builtins.h"
+#include "miscadmin.h"
+
+static bool IsRemoteReplicationOriginSessionSetup(MultiConnection *connection);
+
+static void SetupMemoryContextResetReplicationOriginHandler(void);
+
+static void SetupReplicationOriginSessionHelper(bool isContexResetSetupNeeded);
+
+static inline bool IsLocalReplicationOriginSessionActive(void);
+
+PG_FUNCTION_INFO_V1(citus_internal_start_replication_origin_tracking);
+PG_FUNCTION_INFO_V1(citus_internal_stop_replication_origin_tracking);
+PG_FUNCTION_INFO_V1(citus_internal_is_replication_origin_tracking_active);
+
+/*
+ * This variable is used to remember the replication origin id of the current session
+ * before resetting it to DoNotReplicateId in SetupReplicationOriginLocalSession.
+ */
+static RepOriginId OriginalOriginId = InvalidRepOriginId;
+
+/*
+ * Setting that controls whether replication origin tracking is enabled
+ */
+bool EnableChangeDataCapture = false;
+
+
+/* citus_internal_start_replication_origin_tracking starts a new replication origin session
+ * in the local node. This function is used to avoid publishing the WAL records to the
+ * replication slot by setting replication origin to DoNotReplicateId in WAL records.
+ * It remembers the previous replication origin for the current session which will be
+ * used to reset the replication origin to the previous value when the session ends.
+ */
+Datum
+citus_internal_start_replication_origin_tracking(PG_FUNCTION_ARGS)
+{
+	if (!EnableChangeDataCapture)
+	{
+		PG_RETURN_VOID();
+	}
+	SetupReplicationOriginSessionHelper(false);
+	PG_RETURN_VOID();
+}
+
+
+/* citus_internal_stop_replication_origin_tracking ends the current replication origin session
+ * in the local node. This function is used to reset the replication origin to the
+ * earlier value of replication origin.
+ */
+Datum
+citus_internal_stop_replication_origin_tracking(PG_FUNCTION_ARGS)
+{
+	ResetReplicationOriginLocalSession();
+	PG_RETURN_VOID();
+}
+
+
+/* citus_internal_is_replication_origin_tracking_active checks if the current replication origin
+ * session is active in the local node.
+ */
+Datum
+citus_internal_is_replication_origin_tracking_active(PG_FUNCTION_ARGS)
+{
+	bool result = IsLocalReplicationOriginSessionActive();
+	PG_RETURN_BOOL(result);
+}
+
+
+/* IsLocalReplicationOriginSessionActive checks if the current replication origin
+ * session is active in the local node.
+ */
+inline bool
+IsLocalReplicationOriginSessionActive(void)
+{
+	return (replorigin_session_origin == DoNotReplicateId);
+}
+
+
+/*
+ * SetupMemoryContextResetReplicationOriginHandler registers a callback function
+ * that resets the replication origin session in case of any error for the current
+ * memory context.
+ */
+static void
+SetupMemoryContextResetReplicationOriginHandler()
+{
+	MemoryContextCallback *replicationOriginResetCallback = palloc0(
+		sizeof(MemoryContextCallback));
+	replicationOriginResetCallback->func =
+		ResetReplicationOriginLocalSessionCallbackHandler;
+	replicationOriginResetCallback->arg = NULL;
+	MemoryContextRegisterResetCallback(CurrentMemoryContext,
+									   replicationOriginResetCallback);
+}
+
+
+/*
+ * SetupReplicationOriginSessionHelper sets up a new replication origin session in a
+ * local session. It takes an argument isContexResetSetupNeeded to decide whether
+ * to register a callback function that resets the replication origin session in case
+ * of any error for the current memory context.
+ */
+static void
+SetupReplicationOriginSessionHelper(bool isContexResetSetupNeeded)
+{
+	if (!EnableChangeDataCapture)
+	{
+		return;
+	}
+	OriginalOriginId = replorigin_session_origin;
+	replorigin_session_origin = DoNotReplicateId;
+	if (isContexResetSetupNeeded)
+	{
+		SetupMemoryContextResetReplicationOriginHandler();
+	}
+}
+
+
+/*
+ * SetupReplicationOriginLocalSession sets up a new replication origin session in a
+ * local session.
+ */
+void
+SetupReplicationOriginLocalSession()
+{
+	SetupReplicationOriginSessionHelper(true);
+}
+
+
+/*
+ * ResetReplicationOriginLocalSession resets the replication origin session in a
+ * local node.
+ */
+void
+ResetReplicationOriginLocalSession(void)
+{
+	if (replorigin_session_origin != DoNotReplicateId)
+	{
+		return;
+	}
+
+	replorigin_session_origin = OriginalOriginId;
+}
+
+
+/*
+ * ResetReplicationOriginLocalSessionCallbackHandler is a callback function that
+ * resets the replication origin session in a local node. This is used to register
+ * with MemoryContextRegisterResetCallback to reset the replication origin session
+ * in case of any error for the given memory context.
+ */
+void
+ResetReplicationOriginLocalSessionCallbackHandler(void *arg)
+{
+	ResetReplicationOriginLocalSession();
+}
+
+
+/*
+ * SetupReplicationOriginRemoteSession sets up a new replication origin session in a
+ * remote session. The identifier is used to create a unique replication origin name
+ * for the session in the remote node.
+ */
+void
+SetupReplicationOriginRemoteSession(MultiConnection *connection)
+{
+	if (!EnableChangeDataCapture)
+	{
+		return;
+	}
+	if (connection != NULL && !IsRemoteReplicationOriginSessionSetup(connection))
+	{
+		StringInfo replicationOriginSessionSetupQuery = makeStringInfo();
+		appendStringInfo(replicationOriginSessionSetupQuery,
+						 "select pg_catalog.citus_internal_start_replication_origin_tracking();");
+		ExecuteCriticalRemoteCommand(connection,
+									 replicationOriginSessionSetupQuery->data);
+		connection->isReplicationOriginSessionSetup = true;
+	}
+}
+
+
+/*
+ * ResetReplicationOriginRemoteSession resets the replication origin session in a
+ * remote node.
+ */
+void
+ResetReplicationOriginRemoteSession(MultiConnection *connection)
+{
+	if (connection != NULL && connection->isReplicationOriginSessionSetup)
+	{
+		StringInfo replicationOriginSessionResetQuery = makeStringInfo();
+		appendStringInfo(replicationOriginSessionResetQuery,
+						 "select pg_catalog.citus_internal_stop_replication_origin_tracking();");
+		ExecuteCriticalRemoteCommand(connection,
+									 replicationOriginSessionResetQuery->data);
+		connection->isReplicationOriginSessionSetup = false;
+	}
+}
+
+
+/*
+ * IsRemoteReplicationOriginSessionSetup checks if the replication origin is setup
+ * already in the remote session by calliing the UDF
+ * citus_internal_is_replication_origin_tracking_active(). This is also remembered
+ * in the connection object to avoid calling the UDF again next time.
+ */
+static bool
+IsRemoteReplicationOriginSessionSetup(MultiConnection *connection)
+{
+	if (connection->isReplicationOriginSessionSetup)
+	{
+		return true;
+	}
+
+	StringInfo isReplicationOriginSessionSetupQuery = makeStringInfo();
+	appendStringInfo(isReplicationOriginSessionSetupQuery,
+					 "SELECT pg_catalog.citus_internal_is_replication_origin_tracking_active()");
+	bool result =
+		ExecuteRemoteCommandAndCheckResult(connection,
+										   isReplicationOriginSessionSetupQuery->data,
+										   "t");
+
+	connection->isReplicationOriginSessionSetup = result;
+	return result;
+}
--- a/src/backend/distributed/utils/resource_lock.c
+++ b/src/backend/distributed/utils/resource_lock.c
@ -503,45 +503,6 @@ SetLocktagForShardDistributionMetadata(int64 shardId, LOCKTAG *tag)
 }


-/*
- * LockPlacementCleanup takes an exclusive lock to ensure that only one process
- * can cleanup placements at the same time.
- */
-void
-LockPlacementCleanup(void)
-{
-	LOCKTAG tag;
-	const bool sessionLock = false;
-	const bool dontWait = false;
-
-	/* Moves acquire lock with a constant operation id CITUS_SHARD_MOVE.
-	 * This will change as we add support for parallel moves.
-	 */
-	SET_LOCKTAG_CITUS_OPERATION(tag, CITUS_SHARD_MOVE);
-	(void) LockAcquire(&tag, ExclusiveLock, sessionLock, dontWait);
-}
-
-
-/*
- * TryLockPlacementCleanup takes an exclusive lock to ensure that only one
- * process can cleanup placements at the same time.
- */
-bool
-TryLockPlacementCleanup(void)
-{
-	LOCKTAG tag;
-	const bool sessionLock = false;
-	const bool dontWait = true;
-
-	/* Moves acquire lock with a constant operation id CITUS_SHARD_MOVE.
-	 * This will change as we add support for parallel moves.
-	 */
-	SET_LOCKTAG_CITUS_OPERATION(tag, CITUS_SHARD_MOVE);
-	bool lockAcquired = LockAcquire(&tag, ExclusiveLock, sessionLock, dontWait);
-	return lockAcquired;
-}
-
-
 /*
 * LockReferencedReferenceShardDistributionMetadata acquires shard distribution
 * metadata locks with the given lock mode on the reference tables which has a
--- a/src/backend/distributed/utils/shardinterval_utils.c
+++ b/src/backend/distributed/utils/shardinterval_utils.c
@ -223,8 +223,7 @@ ShardIndex(ShardInterval *shardInterval)
 	 * currently it is not required.
 	 */
 	if (!IsCitusTableTypeCacheEntry(cacheEntry, HASH_DISTRIBUTED) &&
-		!IsCitusTableTypeCacheEntry(
-			cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
+		HasDistributionKeyCacheEntry(cacheEntry))
 	{
 		ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 						errmsg("finding index of a given shard is only supported for "
@ -233,7 +232,7 @@ ShardIndex(ShardInterval *shardInterval)
 	}

 	/* short-circuit for reference tables */
-	if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
+	if (!HasDistributionKeyCacheEntry(cacheEntry))
 	{
 		/*
 		 * Reference tables and citus local tables have only a single shard,
@ -333,7 +332,7 @@ FindShardIntervalIndex(Datum searchedValue, CitusTableCacheEntry *cacheEntry)
 			shardIndex = CalculateUniformHashRangeIndex(hashedValue, shardCount);
 		}
 	}
-	else if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
+	else if (!HasDistributionKeyCacheEntry(cacheEntry))
 	{
 		/* non-distributed tables have a single shard, all values mapped to that shard */
 		Assert(shardCount == 1);
--- a/src/backend/distributed/worker/worker_create_or_replace.c
+++ b/src/backend/distributed/worker/worker_create_or_replace.c
@ -35,8 +35,22 @@
 #include "distributed/worker_create_or_replace.h"
 #include "distributed/worker_protocol.h"

+
+/*
+ * OnCollisionAction describes what to do when the created object
+ * and existing object do not match.
+ */
+typedef enum OnCollisionAction
+{
+	ON_COLLISION_RENAME,
+	ON_COLLISION_DROP
+} OnCollisionAction;
+
+
 static List * CreateStmtListByObjectAddress(const ObjectAddress *address);
 static bool CompareStringList(List *list1, List *list2);
+static OnCollisionAction GetOnCollisionAction(const ObjectAddress *address);
+

 PG_FUNCTION_INFO_V1(worker_create_or_replace_object);
 PG_FUNCTION_INFO_V1(worker_create_or_replace_object_array);
@ -192,7 +206,8 @@ WorkerCreateOrReplaceObject(List *sqlStatements)
 		/*
 		 * Object with name from statement is already found locally, check if states are
 		 * identical. If objects differ we will rename the old object (non- destructively)
-		 * as to make room to create the new object according to the spec sent.
+		 * or drop it (if safe) as to make room to create the new object according to the
+		 * spec sent.
 		 */

 		/*
@ -213,11 +228,22 @@ WorkerCreateOrReplaceObject(List *sqlStatements)
 			return false;
 		}

-		char *newName = GenerateBackupNameForCollision(address);
+		Node *utilityStmt = NULL;

-		RenameStmt *renameStmt = CreateRenameStatement(address, newName);
-		const char *sqlRenameStmt = DeparseTreeNode((Node *) renameStmt);
-		ProcessUtilityParseTree((Node *) renameStmt, sqlRenameStmt,
+		if (GetOnCollisionAction(address) == ON_COLLISION_DROP)
+		{
+			/* drop the existing object */
+			utilityStmt = (Node *) CreateDropStmt(address);
+		}
+		else
+		{
+			/* rename the existing object */
+			char *newName = GenerateBackupNameForCollision(address);
+			utilityStmt = (Node *) CreateRenameStatement(address, newName);
+		}
+
+		const char *commandString = DeparseTreeNode(utilityStmt);
+		ProcessUtilityParseTree(utilityStmt, commandString,
 								PROCESS_UTILITY_QUERY,
 								NULL, None_Receiver, NULL);
 	}
@ -286,6 +312,11 @@ CreateStmtListByObjectAddress(const ObjectAddress *address)
 			return list_make1(GetFunctionDDLCommand(address->objectId, false));
 		}

+		case OCLASS_PUBLICATION:
+		{
+			return list_make1(CreatePublicationDDLCommand(address->objectId));
+		}
+
 		case OCLASS_TSCONFIG:
 		{
 			List *stmts = GetCreateTextSearchConfigStatements(address);
@ -312,6 +343,37 @@ CreateStmtListByObjectAddress(const ObjectAddress *address)
 }


+/*
+ * GetOnCollisionAction decides what to do when the object already exists.
+ */
+static OnCollisionAction
+GetOnCollisionAction(const ObjectAddress *address)
+{
+	switch (getObjectClass(address))
+	{
+		case OCLASS_PUBLICATION:
+		{
+			/*
+			 * We prefer to drop publications because they can be
+			 * harmful (cause update/delete failures) and are relatively
+			 * safe to drop.
+			 */
+			return ON_COLLISION_DROP;
+		}
+
+		case OCLASS_COLLATION:
+		case OCLASS_PROC:
+		case OCLASS_TSCONFIG:
+		case OCLASS_TSDICT:
+		case OCLASS_TYPE:
+		default:
+		{
+			return ON_COLLISION_RENAME;
+		}
+	}
+}
+
+
 /*
 * GenerateBackupNameForCollision calculate a backup name for a given object by its
 * address. This name should be used when renaming an existing object before creating the
@ -362,6 +424,64 @@ GenerateBackupNameForCollision(const ObjectAddress *address)
 }


+/*
+ * CreateDropPublicationStmt creates a DROP PUBLICATION statement for the
+ * publication at the given address.
+ */
+static DropStmt *
+CreateDropPublicationStmt(const ObjectAddress *address)
+{
+	Assert(address->classId == PublicationRelationId);
+
+	DropStmt *dropStmt = makeNode(DropStmt);
+	dropStmt->removeType = OBJECT_PUBLICATION;
+	dropStmt->behavior = DROP_RESTRICT;
+
+	HeapTuple publicationTuple =
+		SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(address->objectId));
+
+	if (!HeapTupleIsValid(publicationTuple))
+	{
+		ereport(ERROR, (errmsg("cannot find publication with oid: %d",
+							   address->objectId)));
+	}
+
+	Form_pg_publication publicationForm =
+		(Form_pg_publication) GETSTRUCT(publicationTuple);
+
+	char *publicationName = NameStr(publicationForm->pubname);
+	dropStmt->objects = list_make1(makeString(publicationName));
+
+	ReleaseSysCache(publicationTuple);
+
+	return dropStmt;
+}
+
+
+/*
+ * CreateDropStmt returns a DROP statement for the given object.
+ */
+DropStmt *
+CreateDropStmt(const ObjectAddress *address)
+{
+	switch (getObjectClass(address))
+	{
+		case OCLASS_PUBLICATION:
+		{
+			return CreateDropPublicationStmt(address);
+		}
+
+		default:
+		{
+			break;
+		}
+	}
+
+	ereport(ERROR, (errmsg("unsupported object to construct a drop statement"),
+					errdetail("unable to generate a parsetree for the drop")));
+}
+
+
 /*
 * CreateRenameTypeStmt creates a rename statement for a type based on its ObjectAddress.
 * The rename statement will rename the existing object on its address to the value
--- a/src/backend/distributed/worker/worker_data_fetch_protocol.c
+++ b/src/backend/distributed/worker/worker_data_fetch_protocol.c
@ -70,6 +70,7 @@ static void AlterSequenceMinMax(Oid sequenceId, char *schemaName, char *sequence
 PG_FUNCTION_INFO_V1(worker_apply_shard_ddl_command);
 PG_FUNCTION_INFO_V1(worker_apply_inter_shard_ddl_command);
 PG_FUNCTION_INFO_V1(worker_apply_sequence_command);
+PG_FUNCTION_INFO_V1(worker_adjust_identity_column_seq_ranges);
 PG_FUNCTION_INFO_V1(worker_append_table_to_shard);
 PG_FUNCTION_INFO_V1(worker_nextval);

@ -133,6 +134,60 @@ worker_apply_inter_shard_ddl_command(PG_FUNCTION_ARGS)
 }


+/*
+ * worker_adjust_identity_column_seq_ranges takes a table oid, runs an ALTER SEQUENCE statement
+ * for each identity column to adjust the minvalue and maxvalue of the sequence owned by
+ * identity column such that the sequence creates globally unique values.
+ * We use table oid instead of sequence name to avoid any potential conflicts between sequences of different tables. This way, we can safely iterate through identity columns on a specific table without any issues. While this may introduce a small amount of business logic to workers, it's a much safer approach overall.
+ */
+Datum
+worker_adjust_identity_column_seq_ranges(PG_FUNCTION_ARGS)
+{
+	CheckCitusVersion(ERROR);
+
+	Oid tableRelationId = PG_GETARG_OID(0);
+
+	EnsureTableOwner(tableRelationId);
+
+	Relation tableRelation = relation_open(tableRelationId, AccessShareLock);
+	TupleDesc tableTupleDesc = RelationGetDescr(tableRelation);
+
+	bool missingSequenceOk = false;
+
+	for (int attributeIndex = 0; attributeIndex < tableTupleDesc->natts;
+		 attributeIndex++)
+	{
+		Form_pg_attribute attributeForm = TupleDescAttr(tableTupleDesc,
+														attributeIndex);
+
+		/* skip dropped columns */
+		if (attributeForm->attisdropped)
+		{
+			continue;
+		}
+
+		if (attributeForm->attidentity)
+		{
+			Oid sequenceOid = getIdentitySequence(tableRelationId,
+												  attributeForm->attnum,
+												  missingSequenceOk);
+
+			Oid sequenceSchemaOid = get_rel_namespace(sequenceOid);
+			char *sequenceSchemaName = get_namespace_name(sequenceSchemaOid);
+			char *sequenceName = get_rel_name(sequenceOid);
+			Oid sequenceTypeId = pg_get_sequencedef(sequenceOid)->seqtypid;
+
+			AlterSequenceMinMax(sequenceOid, sequenceSchemaName, sequenceName,
+								sequenceTypeId);
+		}
+	}
+
+	relation_close(tableRelation, NoLock);
+
+	PG_RETURN_VOID();
+}
+
+
 /*
 * worker_apply_sequence_command takes a CREATE SEQUENCE command string, runs the
 * CREATE SEQUENCE command then creates and runs an ALTER SEQUENCE statement
--- a/src/backend/distributed/worker/worker_shard_visibility.c
+++ b/src/backend/distributed/worker/worker_shard_visibility.c
@ -351,18 +351,17 @@ ShouldHideShardsInternal(void)
 			return false;
 		}
 	}
-	else if (MyBackendType != B_BACKEND)
+	else if (MyBackendType != B_BACKEND && MyBackendType != B_WAL_SENDER)
 	{
 		/*
 		 * We are aiming only to hide shards from client
 		 * backends or certain background workers(see above),
-		 * not backends like walsender or checkpointer.
 		 */
 		return false;
 	}

 	if (IsCitusInternalBackend() || IsRebalancerInternalBackend() ||
-		IsCitusRunCommandBackend())
+		IsCitusRunCommandBackend() || IsCitusShardTransferBackend())
 	{
 		/* we never hide shards from Citus */
 		return false;
--- a/Show More
+++ b/Show More