Bump Citus version to 10.2.9

Add changelog entries for 10.2.9
(cherry picked from commit 65f957d345)
2023-04-25 15:51:16 +03:00 · 2023-04-25 15:50:30 +03:00 · 2023-03-06 11:56:05 +01:00 · 2023-02-17 14:38:36 +03:00 · 2023-01-30 19:15:28 +03:00 · 2022-10-11 15:03:47 +03:00
151 changed files with 9352 additions and 1076 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -158,6 +158,14 @@ jobs:
                cp core.* /tmp/core_dumps
            fi
          when: on_fail
+      - run:
+          name: 'Copy pg_upgrade logs for newData dir'
+          command: |
+            mkdir -p /tmp/pg_upgrade_newData_logs
+            if ls src/test/regress/tmp_upgrade/newData/*.log 1> /dev/null 2>&1; then
+                cp src/test/regress/tmp_upgrade/newData/*.log /tmp/pg_upgrade_newData_logs
+            fi
+          when: on_fail
      - store_artifacts:
          name: 'Save regressions'
          path: src/test/regress/regression.diffs
@ -166,6 +174,9 @@ jobs:
          name: 'Save core dumps'
          path: /tmp/core_dumps
          when: on_fail
+      - store_artifacts:
+          name: 'Save pg_upgrade logs for newData dir'
+          path: /tmp/pg_upgrade_newData_logs
      - codecov/upload:
          flags: 'test_<< parameters.old_pg_major >>_<< parameters.new_pg_major >>,upgrade'

@ -379,7 +390,7 @@ jobs:
          when: on_fail
      - store_artifacts:
          name: 'Save tap logs'
-          path: /home/circleci/project/src/test/recovery/tmp_check/log
+          path: /home/circleci/project/src/test/<< parameters.suite >>/tmp_check/log
          when: on_fail
      - store_artifacts:
          name: 'Save core dumps'
@ -451,7 +462,7 @@ workflows:
      - build:
          name: build-14
          pg_major: 14
-          image_tag: '14beta3'
+          image_tag: '14.0'

      - check-style
      - check-sql-snapshots
@ -522,6 +533,12 @@ workflows:
          image_tag: '12.8'
          suite: recovery
          requires: [build-12]
+      - tap-test-citus:
+          name: 'test-12_tap-columnar-freezing'
+          pg_major: 12
+          image_tag: '12.8'
+          suite: columnar_freezing
+          requires: [build-12]
      - test-citus:
          name: 'test-12_check-failure'
          pg_major: 12
@ -596,6 +613,12 @@ workflows:
          image_tag: '13.4'
          suite: recovery
          requires: [build-13]
+      - tap-test-citus:
+          name: 'test-13_tap-columnar-freezing'
+          pg_major: 13
+          image_tag: '13.4'
+          suite: columnar_freezing
+          requires: [build-13]
      - test-citus:
          name: 'test-13_check-failure'
          pg_major: 13
@ -607,74 +630,80 @@ workflows:
      - test-citus:
          name: 'test-14_check-multi'
          pg_major: 14
-          image_tag: '14beta3'
+          image_tag: '14.0'
          make: check-multi
          requires: [build-14]
      - test-citus:
          name: 'test-14_check-multi-1'
          pg_major: 14
-          image_tag: '14beta3'
+          image_tag: '14.0'
          make: check-multi-1
          requires: [build-14]
      - test-citus:
          name: 'test-14_check-mx'
          pg_major: 14
-          image_tag: '14beta3'
+          image_tag: '14.0'
          make: check-multi-mx
          requires: [build-14]
      - test-citus:
          name: 'test-14_check-vanilla'
          pg_major: 14
-          image_tag: '14beta3'
+          image_tag: '14.0'
          make: check-vanilla
          requires: [build-14]
      - test-citus:
          name: 'test-14_check-isolation'
          pg_major: 14
-          image_tag: '14beta3'
+          image_tag: '14.0'
          make: check-isolation
          requires: [build-14]
      - test-citus:
          name: 'test-14_check-worker'
          pg_major: 14
-          image_tag: '14beta3'
+          image_tag: '14.0'
          make: check-worker
          requires: [build-14]
      - test-citus:
          name: 'test-14_check-operations'
          pg_major: 14
-          image_tag: '14beta3'
+          image_tag: '14.0'
          make: check-operations
          requires: [build-14]
      - test-citus:
          name: 'test-14_check-follower-cluster'
          pg_major: 14
-          image_tag: '14beta3'
+          image_tag: '14.0'
          make: check-follower-cluster
          requires: [build-14]
      - test-citus:
          name: 'test-14_check-columnar'
          pg_major: 14
-          image_tag: '14beta3'
+          image_tag: '14.0'
          make: check-columnar
          requires: [build-14]
      - test-citus:
          name: 'test-14_check-columnar-isolation'
          pg_major: 14
-          image_tag: '14beta3'
+          image_tag: '14.0'
          make: check-columnar-isolation
          requires: [build-14]
      - tap-test-citus:
          name: 'test_14_tap-recovery'
          pg_major: 14
-          image_tag: '14beta3'
+          image_tag: '14.0'
          suite: recovery
          requires: [build-14]
+      - tap-test-citus:
+          name: 'test-14_tap-columnar-freezing'
+          pg_major: 14
+          image_tag: '14.0'
+          suite: columnar_freezing
+          requires: [build-14]
      - test-citus:
          name: 'test-14_check-failure'
          pg_major: 14
          image: citus/failtester
-          image_tag: '14beta3'
+          image_tag: '14.0'
          make: check-failure
          requires: [build-14]

@ -689,14 +718,14 @@ workflows:
          name: 'test-12-14_check-pg-upgrade'
          old_pg_major: 12
          new_pg_major: 14
-          image_tag: '12-13-14'
+          image_tag: '12.8-13.4-14.0'
          requires: [build-12,build-14]

      - test-pg-upgrade:
          name: 'test-13-14_check-pg-upgrade'
          old_pg_major: 13
          new_pg_major: 14
-          image_tag: '12-13-14'
+          image_tag: '12.8-13.4-14.0'
          requires: [build-13,build-14]

      - test-citus-upgrade:
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,3 +1,121 @@
+### citus v10.2.9 (April 20, 2023) ###
+
+* Correctly reports shard size in `citus_shards` view (#6748)
+
+* Fixes a bug in `ALTER EXTENSION citus UPDATE` (#6383)
+
+* Fixes a bug that breaks pg upgrades if the user has a columnar table (#6624)
+
+* Fixes a bug that prevents retaining columnar table options after a
+  table-rewrite (#6337)
+
+* Fixes memory leak issue with query results that returns single row (#6724)
+
+* Raises memory limits in columnar from 256MB to 1GB for reads and writes
+  (#6419)
+
+### citus v10.2.8 (August 19, 2022) ###
+
+* Fixes compilation warning caused by latest upgrade script changes
+
+* Fixes compilation warning on PG13 + OpenSSL 3.0
+
+### citus v10.2.7 (August 19, 2022) ###
+
+* Fixes a bug that could cause failures in `INSERT INTO .. SELECT`
+
+* Fixes a bug that could cause leaking files when materialized views are
+  refreshed
+
+* Fixes an unexpected error for foreign tables when upgrading Postgres
+
+* Fixes columnar freezing/wraparound bug
+
+* Fixes reference table lock contention
+
+* Prevents alter table functions from dropping extensions
+
+### citus v10.2.5 (March 15, 2022) ###
+
+* Fixes a bug that could cause `worker_save_query_explain_analyze` to fail on
+  custom types
+
+* Fixes a bug that limits usage of sequences in non-integer columns
+
+* Fixes a crash that occurs when the aggregate that cannot be pushed-down
+  returns empty result from a worker
+
+* Improves concurrent metadata syncing and metadata changing DDL operations
+
+### citus v10.2.4 (February 1, 2022) ###
+
+* Adds support for operator class parameters in indexes
+
+* Fixes a bug with distributed functions that have `OUT` parameters or
+  return `TABLE`
+
+* Fixes a build error that happens when `lz4` is not installed
+
+* Improves self-deadlock prevention for `CREATE INDEX` &
+  `REINDEX CONCURRENTLY` commands for builds using PG14 or higher
+
+* Fixes a bug that causes commands to fail when `application_name` is set
+
+### citus v10.2.3 (November 29, 2021) ###
+
+* Adds `fix_partition_shard_index_names` udf to fix currently broken
+  partition index names
+
+* Fixes a bug that could break `DROP SCHEMA/EXTENSION` commands when there is
+  a columnar table
+
+* Fixes a bug that could break pg upgrades due to missing `pg_depend` records
+  for columnar table access method
+
+* Fixes a missing `FROM` clause entry error
+
+* Fixes an unexpected error that occurs when writing to a columnar table
+  created in older versions
+
+* Fixes issue when compiling Citus from source with some compilers
+
+* Reinstates optimisation for uniform shard interval ranges
+
+* Relaxes table ownership check to privileges check while acquiring lock
+
+### citus v10.2.2 (October 14, 2021) ###
+
+* Fixes a bug that causes reading columnar metapage as all-zeros when
+  writing to a columnar table
+
+* Fixes a bug that could cause prerequisite columnar table access method
+  objects being not created during pg upgrades
+
+* Fixes a bug that could cause `CREATE INDEX` to fail for expressions when
+  using custom `search_path`
+
+* Fixes an unexpected error that occurs due to aborted writes to a columnar
+  table with an index
+
+### citus v10.2.1 (September 24, 2021) ###
+
+* Adds missing version-mismatch checks for columnar tables
+
+* Adds missing version-mismatch checks for internal functions
+
+* Fixes a bug that could cause partition shards being not co-located with
+  parent shards
+
+* Fixes a bug that prevents pushing down boolean expressions when using
+  columnar custom scan
+
+* Fixes a clog lookup failure that could occur when writing to a columnar table
+
+* Fixes an issue that could cause unexpected errors when there is an
+  in-progress write to a columnar table
+
+* Revokes read access to `columnar.chunk` from unprivileged user
+
 ### citus v10.2.0 (September 14, 2021) ###

 * Adds PostgreSQL 14 support
--- a/24
+++ b/24
@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for Citus 10.2devel.
+# Generated by GNU Autoconf 2.69 for Citus 10.2.9.
 #
 #
 # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@ -579,8 +579,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='Citus'
 PACKAGE_TARNAME='citus'
-PACKAGE_VERSION='10.2devel'
-PACKAGE_STRING='Citus 10.2devel'
+PACKAGE_VERSION='10.2.9'
+PACKAGE_STRING='Citus 10.2.9'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''

@ -1260,7 +1260,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-\`configure' configures Citus 10.2devel to adapt to many kinds of systems.
+\`configure' configures Citus 10.2.9 to adapt to many kinds of systems.

 Usage: $0 [OPTION]... [VAR=VALUE]...

@ -1322,7 +1322,7 @@ fi

 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of Citus 10.2devel:";;
+     short | recursive ) echo "Configuration of Citus 10.2.9:";;
   esac
  cat <<\_ACEOF

@ -1425,7 +1425,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
  cat <<\_ACEOF
-Citus configure 10.2devel
+Citus configure 10.2.9
 generated by GNU Autoconf 2.69

 Copyright (C) 2012 Free Software Foundation, Inc.
@ -1908,7 +1908,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.

-It was created by Citus $as_me 10.2devel, which was
+It was created by Citus $as_me 10.2.9, which was
 generated by GNU Autoconf 2.69.  Invocation command line was

  $ $0 $@
@ -4543,7 +4543,9 @@ if test "${with_lz4+set}" = set; then :
  withval=$with_lz4;
  case $withval in
    yes)
-      :
+
+$as_echo "#define HAVE_CITUS_LIBLZ4 1" >>confdefs.h
+
      ;;
    no)
      :
@ -4556,6 +4558,8 @@ if test "${with_lz4+set}" = set; then :
 else
  with_lz4=yes

+$as_echo "#define HAVE_CITUS_LIBLZ4 1" >>confdefs.h
+
 fi


@ -5356,7 +5360,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by Citus $as_me 10.2devel, which was
+This file was extended by Citus $as_me 10.2.9, which was
 generated by GNU Autoconf 2.69.  Invocation command line was

  CONFIG_FILES    = $CONFIG_FILES
@ -5418,7 +5422,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-Citus config.status 10.2devel
+Citus config.status 10.2.9
 configured by $0, generated by GNU Autoconf 2.69,
  with options \\"\$ac_cs_config\\"

--- a/configure.in
+++ b/configure.in
@ -5,7 +5,7 @@
 # everyone needing autoconf installed, the resulting files are checked
 # into the SCM.

-AC_INIT([Citus], [10.2devel])
+AC_INIT([Citus], [10.2.9])
 AC_COPYRIGHT([Copyright (c) Citus Data, Inc.])

 # we'll need sed and awk for some of the version commands
@ -220,7 +220,8 @@ AC_DEFINE_UNQUOTED(REPORTS_BASE_URL, "$REPORTS_BASE_URL",
 # LZ4
 #
 PGAC_ARG_BOOL(with, lz4, yes,
-              [do not use lz4])
+              [do not use lz4],
+              [AC_DEFINE([HAVE_CITUS_LIBLZ4], 1, [Define to 1 to build with lz4 support. (--with-lz4)])])
 AC_SUBST(with_lz4)

 if test "$with_lz4" = yes; then
--- a/src/backend/columnar/columnar.c
+++ b/src/backend/columnar/columnar.c
@ -29,7 +29,7 @@

 #if HAVE_LIBZSTD
 #define DEFAULT_COMPRESSION_TYPE COMPRESSION_ZSTD
-#elif HAVE_LIBLZ4
+#elif HAVE_CITUS_LIBLZ4
 #define DEFAULT_COMPRESSION_TYPE COMPRESSION_LZ4
 #else
 #define DEFAULT_COMPRESSION_TYPE COMPRESSION_PG_LZ
@ -44,7 +44,7 @@ static const struct config_enum_entry columnar_compression_options[] =
 {
 	{ "none", COMPRESSION_NONE, false },
 	{ "pglz", COMPRESSION_PG_LZ, false },
-#if HAVE_LIBLZ4
+#if HAVE_CITUS_LIBLZ4
 	{ "lz4", COMPRESSION_LZ4, false },
 #endif
 #if HAVE_LIBZSTD
--- a/src/backend/columnar/columnar_compression.c
+++ b/src/backend/columnar/columnar_compression.c
@ -19,7 +19,7 @@

 #include "columnar/columnar_compression.h"

-#if HAVE_LIBLZ4
+#if HAVE_CITUS_LIBLZ4
 #include <lz4.h>
 #endif

@ -63,7 +63,7 @@ CompressBuffer(StringInfo inputBuffer,
 {
 	switch (compressionType)
 	{
-#if HAVE_LIBLZ4
+#if HAVE_CITUS_LIBLZ4
 		case COMPRESSION_LZ4:
 		{
 			int maximumLength = LZ4_compressBound(inputBuffer->len);
@ -170,7 +170,7 @@ DecompressBuffer(StringInfo buffer,
 			return buffer;
 		}

-#if HAVE_LIBLZ4
+#if HAVE_CITUS_LIBLZ4
 		case COMPRESSION_LZ4:
 		{
 			StringInfo decompressedBuffer = makeStringInfo();
--- a/src/backend/columnar/columnar_customscan.c
+++ b/src/backend/columnar/columnar_customscan.c
@ -605,10 +605,11 @@ RelationIdGetNumberOfAttributes(Oid relationId)
 /*
 * CheckVarStats() checks whether a qual involving this Var is likely to be
 * useful based on the correlation stats. If so, or if stats are unavailable,
- * return true; otherwise return false.
+ * return true; otherwise return false and sets absVarCorrelation in case
+ * caller wants to use for logging purposes.
 */
 static bool
-CheckVarStats(PlannerInfo *root, Var *var, Oid sortop)
+CheckVarStats(PlannerInfo *root, Var *var, Oid sortop, float4 *absVarCorrelation)
 {
 	/*
 	 * Collect isunique, ndistinct, and varCorrelation.
@ -642,6 +643,14 @@ CheckVarStats(PlannerInfo *root, Var *var, Oid sortop)
 	 */
 	if (Abs(varCorrelation) < ColumnarQualPushdownCorrelationThreshold)
 	{
+		if (absVarCorrelation)
+		{
+			/*
+			 * Report absVarCorrelation if caller wants to know why given
+			 * var is rejected.
+			 */
+			*absVarCorrelation = Abs(varCorrelation);
+		}
 		return false;
 	}

@ -674,7 +683,7 @@ ExprReferencesRelid(Expr *expr, Index relid)


 /*
- * CheckPushdownClause tests to see if clause is a candidate for pushing down
+ * ExtractPushdownClause extracts an Expr node from given clause for pushing down
 * into the given rel (including join clauses). This test may not be exact in
 * all cases; it's used to reduce the search space for parameterization.
 *
@ -683,19 +692,134 @@ ExprReferencesRelid(Expr *expr, Index relid)
 * and that doesn't seem worth the effort. Here we just look for "Var op Expr"
 * or "Expr op Var", where Var references rel and Expr references other rels
 * (or no rels at all).
+ *
+ * Moreover, this function also looks into BoolExpr's to recursively extract
+ * pushdownable OpExpr's of them:
+ * i)   AND_EXPR:
+ *      Take pushdownable args of AND expressions by ignoring the other args.
+ * ii)  OR_EXPR:
+ *      Ignore the whole OR expression if we cannot exract a pushdownable Expr
+ *      from one of its args.
+ * iii) NOT_EXPR:
+ *      Simply ignore NOT expressions since we don't expect to see them before
+ *      an expression that we can pushdown, see the comment in function.
+ *
+ * The reasoning for those three rules could also be summarized as such;
+ * for any expression that we cannot push-down, we must assume that it
+ * evaluates to true.
+ *
+ * For example, given following WHERE clause:
+ * (
+ *     (a > random() OR a < 30)
+ *     AND
+ *     a < 200
+ * ) OR
+ * (
+ *     a = 300
+ *     OR
+ *     a > 400
+ * );
+ * Even if we can pushdown (a < 30), we cannot pushdown (a > random() OR a < 30)
+ * due to (a > random()). However, we can pushdown (a < 200), so we extract
+ * (a < 200) from the lhs of the top level OR expression.
+ *
+ * For the rhs of the top level OR expression, since we can pushdown both (a = 300)
+ * and (a > 400), we take this part as is.
+ *
+ * Finally, since both sides of the top level OR expression yielded pushdownable
+ * expressions, we will pushdown the following:
+ *  (a < 200) OR ((a = 300) OR (a > 400))
 */
-static bool
-CheckPushdownClause(PlannerInfo *root, RelOptInfo *rel, Expr *clause)
+static Expr *
+ExtractPushdownClause(PlannerInfo *root, RelOptInfo *rel, Node *node)
 {
-	if (!IsA(clause, OpExpr) || list_length(((OpExpr *) clause)->args) != 2)
+	CHECK_FOR_INTERRUPTS();
+	check_stack_depth();
+
+	if (node == NULL)
+	{
+		return NULL;
+	}
+
+	if (IsA(node, BoolExpr))
+	{
+		BoolExpr *boolExpr = castNode(BoolExpr, node);
+		if (boolExpr->boolop == NOT_EXPR)
+		{
+			/*
+			 * Standard planner should have already applied de-morgan rule to
+			 * simple NOT expressions. If we encounter with such an expression
+			 * here, then it can't be a pushdownable one, such as:
+			 *   WHERE id NOT IN (SELECT id FROM something).
+			 */
+			ereport(ColumnarPlannerDebugLevel,
+					(errmsg("columnar planner: cannot push down clause: "
+							"must not contain a subplan")));
+			return NULL;
+		}
+
+		List *pushdownableArgs = NIL;
+
+		Node *boolExprArg = NULL;
+		foreach_ptr(boolExprArg, boolExpr->args)
+		{
+			Expr *pushdownableArg = ExtractPushdownClause(root, rel,
+														  (Node *) boolExprArg);
+			if (pushdownableArg)
+			{
+				pushdownableArgs = lappend(pushdownableArgs, pushdownableArg);
+			}
+			else if (boolExpr->boolop == OR_EXPR)
+			{
+				ereport(ColumnarPlannerDebugLevel,
+						(errmsg("columnar planner: cannot push down clause: "
+								"all arguments of an OR expression must be "
+								"pushdownable but one of them was not, due "
+								"to the reason given above")));
+				return NULL;
+			}
+
+			/* simply skip AND args that we cannot pushdown */
+		}
+
+		int npushdownableArgs = list_length(pushdownableArgs);
+		if (npushdownableArgs == 0)
+		{
+			ereport(ColumnarPlannerDebugLevel,
+					(errmsg("columnar planner: cannot push down clause: "
+							"none of the arguments were pushdownable, "
+							"due to the reason(s) given above ")));
+			return NULL;
+		}
+		else if (npushdownableArgs == 1)
+		{
+			return (Expr *) linitial(pushdownableArgs);
+		}
+
+		if (boolExpr->boolop == AND_EXPR)
+		{
+			return make_andclause(pushdownableArgs);
+		}
+		else if (boolExpr->boolop == OR_EXPR)
+		{
+			return make_orclause(pushdownableArgs);
+		}
+		else
+		{
+			/* already discarded NOT expr, so should not be reachable */
+			return NULL;
+		}
+	}
+
+	if (!IsA(node, OpExpr) || list_length(((OpExpr *) node)->args) != 2)
 	{
 		ereport(ColumnarPlannerDebugLevel,
 				(errmsg("columnar planner: cannot push down clause: "
 						"must be binary operator expression")));
-		return false;
+		return NULL;
 	}

-	OpExpr *opExpr = castNode(OpExpr, clause);
+	OpExpr *opExpr = castNode(OpExpr, node);
 	Expr *lhs = list_nth(opExpr->args, 0);
 	Expr *rhs = list_nth(opExpr->args, 1);

@ -721,15 +845,15 @@ CheckPushdownClause(PlannerInfo *root, RelOptInfo *rel, Expr *clause)
 						"must match 'Var <op> Expr' or 'Expr <op> Var'"),
 				 errhint("Var must only reference this rel, "
 						 "and Expr must not reference this rel")));
-		return false;
+		return NULL;
 	}

 	if (varSide->varattno <= 0)
 	{
 		ereport(ColumnarPlannerDebugLevel,
 				(errmsg("columnar planner: cannot push down clause: "
-						"var is whole-row reference")));
-		return false;
+						"var is whole-row reference or system column")));
+		return NULL;
 	}

 	if (contain_volatile_functions((Node *) exprSide))
@ -737,7 +861,7 @@ CheckPushdownClause(PlannerInfo *root, RelOptInfo *rel, Expr *clause)
 		ereport(ColumnarPlannerDebugLevel,
 				(errmsg("columnar planner: cannot push down clause: "
 						"expr contains volatile functions")));
-		return false;
+		return NULL;
 	}

 	/* only the default opclass is used for qual pushdown. */
@ -753,7 +877,7 @@ CheckPushdownClause(PlannerInfo *root, RelOptInfo *rel, Expr *clause)
 				(errmsg("columnar planner: cannot push down clause: "
 						"cannot find default btree opclass and opfamily for type: %s",
 						format_type_be(varSide->vartype))));
-		return false;
+		return NULL;
 	}

 	if (!op_in_opfamily(opExpr->opno, varOpFamily))
@ -762,7 +886,7 @@ CheckPushdownClause(PlannerInfo *root, RelOptInfo *rel, Expr *clause)
 				(errmsg("columnar planner: cannot push down clause: "
 						"operator %d not a member of opfamily %d",
 						opExpr->opno, varOpFamily)));
-		return false;
+		return NULL;
 	}

 	Oid sortop = get_opfamily_member(varOpFamily, varOpcInType,
@ -773,15 +897,20 @@ CheckPushdownClause(PlannerInfo *root, RelOptInfo *rel, Expr *clause)
 	 * Check that statistics on the Var support the utility of this
 	 * clause.
 	 */
-	if (!CheckVarStats(root, varSide, sortop))
+	float4 absVarCorrelation = 0;
+	if (!CheckVarStats(root, varSide, sortop, &absVarCorrelation))
 	{
 		ereport(ColumnarPlannerDebugLevel,
 				(errmsg("columnar planner: cannot push down clause: "
-						"var attribute %d is uncorrelated", varSide->varattno)));
-		return false;
+						"absolute correlation (%.3f) of var attribute %d is "
+						"smaller than the value configured in "
+						"\"columnar.qual_pushdown_correlation_threshold\" "
+						"(%.3f)", absVarCorrelation, varSide->varattno,
+						ColumnarQualPushdownCorrelationThreshold)));
+		return NULL;
 	}

-	return true;
+	return (Expr *) node;
 }


@ -806,12 +935,19 @@ FilterPushdownClauses(PlannerInfo *root, RelOptInfo *rel, List *inputClauses)
 		 * there's something we should do with pseudoconstants here.
 		 */
 		if (rinfo->pseudoconstant ||
-			!bms_is_member(rel->relid, rinfo->required_relids) ||
-			!CheckPushdownClause(root, rel, rinfo->clause))
+			!bms_is_member(rel->relid, rinfo->required_relids))
 		{
 			continue;
 		}

+		Expr *pushdownableExpr = ExtractPushdownClause(root, rel, (Node *) rinfo->clause);
+		if (!pushdownableExpr)
+		{
+			continue;
+		}
+
+		rinfo = copyObject(rinfo);
+		rinfo->clause = pushdownableExpr;
 		filteredClauses = lappend(filteredClauses, rinfo);
 	}

--- a/src/backend/columnar/columnar_metadata.c
+++ b/src/backend/columnar/columnar_metadata.c
@ -60,6 +60,10 @@
 #include "utils/rel.h"
 #include "utils/relfilenodemap.h"

+#define SLOW_METADATA_ACCESS_WARNING \
+	"Metadata index %s is not available, this might mean slower read/writes " \
+	"on columnar tables. This is expected during Postgres upgrades and not " \
+	"expected otherwise."

 typedef struct
 {
@ -335,8 +339,13 @@ DeleteColumnarTableOptions(Oid regclass, bool missingOk)
 	 */
 	Assert(!IsBinaryUpgrade);

-	Relation columnarOptions = relation_open(ColumnarOptionsRelationId(),
+	Relation columnarOptions = try_relation_open(ColumnarOptionsRelationId(),
 												 RowExclusiveLock);
+	if (columnarOptions == NULL)
+	{
+		/* extension has been dropped */
+		return false;
+	}

 	/* find existing item to remove */
 	ScanKeyData scanKey[1] = { 0 };
@ -547,15 +556,23 @@ ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe, TupleDesc tupleDescri

 	Oid columnarChunkOid = ColumnarChunkRelationId();
 	Relation columnarChunk = table_open(columnarChunkOid, AccessShareLock);
-	Relation index = index_open(ColumnarChunkIndexRelationId(), AccessShareLock);

 	ScanKeyInit(&scanKey[0], Anum_columnar_chunk_storageid,
 				BTEqualStrategyNumber, F_OIDEQ, UInt64GetDatum(storageId));
 	ScanKeyInit(&scanKey[1], Anum_columnar_chunk_stripe,
 				BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(stripe));

-	SysScanDesc scanDescriptor = systable_beginscan_ordered(columnarChunk, index,
-															snapshot, 2, scanKey);
+	Oid indexId = ColumnarChunkIndexRelationId();
+	bool indexOk = OidIsValid(indexId);
+	SysScanDesc scanDescriptor = systable_beginscan(columnarChunk, indexId,
+													indexOk, snapshot, 2, scanKey);
+
+	static bool loggedSlowMetadataAccessWarning = false;
+	if (!indexOk && !loggedSlowMetadataAccessWarning)
+	{
+		ereport(WARNING, (errmsg(SLOW_METADATA_ACCESS_WARNING, "chunk_pkey")));
+		loggedSlowMetadataAccessWarning = true;
+	}

 	StripeSkipList *chunkList = palloc0(sizeof(StripeSkipList));
 	chunkList->chunkCount = chunkCount;
@ -567,8 +584,7 @@ ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe, TupleDesc tupleDescri
 			palloc0(chunkCount * sizeof(ColumnChunkSkipNode));
 	}

-	while (HeapTupleIsValid(heapTuple = systable_getnext_ordered(scanDescriptor,
-																 ForwardScanDirection)))
+	while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor)))
 	{
 		Datum datumArray[Natts_columnar_chunk];
 		bool isNullArray[Natts_columnar_chunk];
@ -633,8 +649,7 @@ ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe, TupleDesc tupleDescri
 		}
 	}

-	systable_endscan_ordered(scanDescriptor);
-	index_close(index, AccessShareLock);
+	systable_endscan(scanDescriptor);
 	table_close(columnarChunk, AccessShareLock);

 	chunkList->chunkGroupRowCounts =
@ -645,9 +660,9 @@ ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe, TupleDesc tupleDescri


 /*
- * FindStripeByRowNumber returns StripeMetadata for the stripe whose
- * firstRowNumber is greater than given rowNumber. If no such stripe
- * exists, then returns NULL.
+ * FindStripeByRowNumber returns StripeMetadata for the stripe that has the
+ * smallest firstRowNumber among the stripes whose firstRowNumber is grater
+ * than given rowNumber. If no such stripe exists, then returns NULL.
 */
 StripeMetadata *
 FindNextStripeByRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot)
@ -737,8 +752,7 @@ StripeGetHighestRowNumber(StripeMetadata *stripeMetadata)
 /*
 * StripeMetadataLookupRowNumber returns StripeMetadata for the stripe whose
 * firstRowNumber is less than or equal to (FIND_LESS_OR_EQUAL), or is
- * greater than (FIND_GREATER) given rowNumber by doing backward index
- * scan on stripe_first_row_number_idx.
+ * greater than (FIND_GREATER) given rowNumber.
 * If no such stripe exists, then returns NULL.
 */
 static StripeMetadata *
@ -769,14 +783,23 @@ StripeMetadataLookupRowNumber(Relation relation, uint64 rowNumber, Snapshot snap
 	ScanKeyInit(&scanKey[1], Anum_columnar_stripe_first_row_number,
 				strategyNumber, procedure, UInt64GetDatum(rowNumber));

-
 	Relation columnarStripes = table_open(ColumnarStripeRelationId(), AccessShareLock);
-	Relation index = index_open(ColumnarStripeFirstRowNumberIndexRelationId(),
-								AccessShareLock);
-	SysScanDesc scanDescriptor = systable_beginscan_ordered(columnarStripes, index,
-															snapshot, 2,
-															scanKey);

+	Oid indexId = ColumnarStripeFirstRowNumberIndexRelationId();
+	bool indexOk = OidIsValid(indexId);
+	SysScanDesc scanDescriptor = systable_beginscan(columnarStripes, indexId, indexOk,
+													snapshot, 2, scanKey);
+
+	static bool loggedSlowMetadataAccessWarning = false;
+	if (!indexOk && !loggedSlowMetadataAccessWarning)
+	{
+		ereport(WARNING, (errmsg(SLOW_METADATA_ACCESS_WARNING,
+								 "stripe_first_row_number_idx")));
+		loggedSlowMetadataAccessWarning = true;
+	}
+
+	if (indexOk)
+	{
 		ScanDirection scanDirection = NoMovementScanDirection;
 		if (lookupMode == FIND_LESS_OR_EQUAL)
 		{
@ -791,9 +814,40 @@ StripeMetadataLookupRowNumber(Relation relation, uint64 rowNumber, Snapshot snap
 		{
 			foundStripeMetadata = BuildStripeMetadata(columnarStripes, heapTuple);
 		}
+	}
+	else
+	{
+		HeapTuple heapTuple = NULL;
+		while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor)))
+		{
+			StripeMetadata *stripe = BuildStripeMetadata(columnarStripes, heapTuple);
+			if (!foundStripeMetadata)
+			{
+				/* first match */
+				foundStripeMetadata = stripe;
+			}
+			else if (lookupMode == FIND_LESS_OR_EQUAL &&
+					 stripe->firstRowNumber > foundStripeMetadata->firstRowNumber)
+			{
+				/*
+				 * Among the stripes with firstRowNumber less-than-or-equal-to given,
+				 * we're looking for the one with the greatest firstRowNumber.
+				 */
+				foundStripeMetadata = stripe;
+			}
+			else if (lookupMode == FIND_GREATER &&
+					 stripe->firstRowNumber < foundStripeMetadata->firstRowNumber)
+			{
+				/*
+				 * Among the stripes with firstRowNumber greater-than given,
+				 * we're looking for the one with the smallest firstRowNumber.
+				 */
+				foundStripeMetadata = stripe;
+			}
+		}
+	}

-	systable_endscan_ordered(scanDescriptor);
-	index_close(index, AccessShareLock);
+	systable_endscan(scanDescriptor);
 	table_close(columnarStripes, AccessShareLock);

 	return foundStripeMetadata;
@ -867,8 +921,8 @@ CheckStripeMetadataConsistency(StripeMetadata *stripeMetadata)

 /*
 * FindStripeWithHighestRowNumber returns StripeMetadata for the stripe that
- * has the row with highest rowNumber by doing backward index scan on
- * stripe_first_row_number_idx. If given relation is empty, then returns NULL.
+ * has the row with highest rowNumber. If given relation is empty, then returns
+ * NULL.
 */
 StripeMetadata *
 FindStripeWithHighestRowNumber(Relation relation, Snapshot snapshot)
@ -881,19 +935,46 @@ FindStripeWithHighestRowNumber(Relation relation, Snapshot snapshot)
 				BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(storageId));

 	Relation columnarStripes = table_open(ColumnarStripeRelationId(), AccessShareLock);
-	Relation index = index_open(ColumnarStripeFirstRowNumberIndexRelationId(),
-								AccessShareLock);
-	SysScanDesc scanDescriptor = systable_beginscan_ordered(columnarStripes, index,
+
+	Oid indexId = ColumnarStripeFirstRowNumberIndexRelationId();
+	bool indexOk = OidIsValid(indexId);
+	SysScanDesc scanDescriptor = systable_beginscan(columnarStripes, indexId, indexOk,
 													snapshot, 1, scanKey);

-	HeapTuple heapTuple = systable_getnext_ordered(scanDescriptor, BackwardScanDirection);
+	static bool loggedSlowMetadataAccessWarning = false;
+	if (!indexOk && !loggedSlowMetadataAccessWarning)
+	{
+		ereport(WARNING, (errmsg(SLOW_METADATA_ACCESS_WARNING,
+								 "stripe_first_row_number_idx")));
+		loggedSlowMetadataAccessWarning = true;
+	}
+
+	if (indexOk)
+	{
+		/* do one-time fetch using the index */
+		HeapTuple heapTuple = systable_getnext_ordered(scanDescriptor,
+													   BackwardScanDirection);
 		if (HeapTupleIsValid(heapTuple))
 		{
 			stripeWithHighestRowNumber = BuildStripeMetadata(columnarStripes, heapTuple);
 		}
+	}
+	else
+	{
+		HeapTuple heapTuple = NULL;
+		while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor)))
+		{
+			StripeMetadata *stripe = BuildStripeMetadata(columnarStripes, heapTuple);
+			if (!stripeWithHighestRowNumber ||
+				stripe->firstRowNumber > stripeWithHighestRowNumber->firstRowNumber)
+			{
+				/* first or a greater match */
+				stripeWithHighestRowNumber = stripe;
+			}
+		}
+	}

-	systable_endscan_ordered(scanDescriptor);
-	index_close(index, AccessShareLock);
+	systable_endscan(scanDescriptor);
 	table_close(columnarStripes, AccessShareLock);

 	return stripeWithHighestRowNumber;
@ -910,7 +991,6 @@ ReadChunkGroupRowCounts(uint64 storageId, uint64 stripe, uint32 chunkGroupCount,
 {
 	Oid columnarChunkGroupOid = ColumnarChunkGroupRelationId();
 	Relation columnarChunkGroup = table_open(columnarChunkGroupOid, AccessShareLock);
-	Relation index = index_open(ColumnarChunkGroupIndexRelationId(), AccessShareLock);

 	ScanKeyData scanKey[2];
 	ScanKeyInit(&scanKey[0], Anum_columnar_chunkgroup_storageid,
@ -918,15 +998,22 @@ ReadChunkGroupRowCounts(uint64 storageId, uint64 stripe, uint32 chunkGroupCount,
 	ScanKeyInit(&scanKey[1], Anum_columnar_chunkgroup_stripe,
 				BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(stripe));

+	Oid indexId = ColumnarChunkGroupIndexRelationId();
+	bool indexOk = OidIsValid(indexId);
 	SysScanDesc scanDescriptor =
-		systable_beginscan_ordered(columnarChunkGroup, index, snapshot, 2, scanKey);
+		systable_beginscan(columnarChunkGroup, indexId, indexOk, snapshot, 2, scanKey);
+
+	static bool loggedSlowMetadataAccessWarning = false;
+	if (!indexOk && !loggedSlowMetadataAccessWarning)
+	{
+		ereport(WARNING, (errmsg(SLOW_METADATA_ACCESS_WARNING, "chunk_group_pkey")));
+		loggedSlowMetadataAccessWarning = true;
+	}

-	uint32 chunkGroupIndex = 0;
 	HeapTuple heapTuple = NULL;
 	uint32 *chunkGroupRowCounts = palloc0(chunkGroupCount * sizeof(uint32));

-	while (HeapTupleIsValid(heapTuple = systable_getnext_ordered(scanDescriptor,
-																 ForwardScanDirection)))
+	while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor)))
 	{
 		Datum datumArray[Natts_columnar_chunkgroup];
 		bool isNullArray[Natts_columnar_chunkgroup];
@ -937,24 +1024,16 @@ ReadChunkGroupRowCounts(uint64 storageId, uint64 stripe, uint32 chunkGroupCount,

 		uint32 tupleChunkGroupIndex =
 			DatumGetUInt32(datumArray[Anum_columnar_chunkgroup_chunk - 1]);
-		if (chunkGroupIndex >= chunkGroupCount ||
-			tupleChunkGroupIndex != chunkGroupIndex)
+		if (tupleChunkGroupIndex >= chunkGroupCount)
 		{
 			elog(ERROR, "unexpected chunk group");
 		}

-		chunkGroupRowCounts[chunkGroupIndex] =
+		chunkGroupRowCounts[tupleChunkGroupIndex] =
 			(uint32) DatumGetUInt64(datumArray[Anum_columnar_chunkgroup_row_count - 1]);
-		chunkGroupIndex++;
 	}

-	if (chunkGroupIndex != chunkGroupCount)
-	{
-		elog(ERROR, "unexpected chunk group count");
-	}
-
-	systable_endscan_ordered(scanDescriptor);
-	index_close(index, AccessShareLock);
+	systable_endscan(scanDescriptor);
 	table_close(columnarChunkGroup, AccessShareLock);

 	return chunkGroupRowCounts;
@ -1151,14 +1230,20 @@ UpdateStripeMetadataRow(uint64 storageId, uint64 stripeId, bool *update,
 	Oid columnarStripesOid = ColumnarStripeRelationId();

 	Relation columnarStripes = table_open(columnarStripesOid, AccessShareLock);
-	Relation columnarStripePkeyIndex = index_open(ColumnarStripePKeyIndexRelationId(),
-												  AccessShareLock);

-	SysScanDesc scanDescriptor = systable_beginscan_ordered(columnarStripes,
-															columnarStripePkeyIndex,
+	Oid indexId = ColumnarStripePKeyIndexRelationId();
+	bool indexOk = OidIsValid(indexId);
+	SysScanDesc scanDescriptor = systable_beginscan(columnarStripes, indexId, indexOk,
 													&dirtySnapshot, 2, scanKey);

-	HeapTuple oldTuple = systable_getnext_ordered(scanDescriptor, ForwardScanDirection);
+	static bool loggedSlowMetadataAccessWarning = false;
+	if (!indexOk && !loggedSlowMetadataAccessWarning)
+	{
+		ereport(WARNING, (errmsg(SLOW_METADATA_ACCESS_WARNING, "stripe_pkey")));
+		loggedSlowMetadataAccessWarning = true;
+	}
+
+	HeapTuple oldTuple = systable_getnext(scanDescriptor);
 	if (!HeapTupleIsValid(oldTuple))
 	{
 		ereport(ERROR, (errmsg("attempted to modify an unexpected stripe, "
@ -1178,13 +1263,22 @@ UpdateStripeMetadataRow(uint64 storageId, uint64 stripeId, bool *update,

 	heap_inplace_update(columnarStripes, modifiedTuple);

+	/*
+	 * Existing tuple now contains modifications, because we used
+	 * heap_inplace_update().
+	 */
+	HeapTuple newTuple = oldTuple;
+
+	/*
+	 * Must not pass modifiedTuple, because BuildStripeMetadata expects a real
+	 * heap tuple with MVCC fields.
+	 */
 	StripeMetadata *modifiedStripeMetadata = BuildStripeMetadata(columnarStripes,
-																 modifiedTuple);
+																 newTuple);

 	CommandCounterIncrement();

-	systable_endscan_ordered(scanDescriptor);
-	index_close(columnarStripePkeyIndex, AccessShareLock);
+	systable_endscan(scanDescriptor);
 	table_close(columnarStripes, AccessShareLock);

 	/* return StripeMetadata object built from modified tuple */
@ -1195,6 +1289,10 @@ UpdateStripeMetadataRow(uint64 storageId, uint64 stripeId, bool *update,
 /*
 * ReadDataFileStripeList reads the stripe list for a given storageId
 * in the given snapshot.
+ *
+ * Doesn't sort the stripes by their ids before returning if
+ * stripe_first_row_number_idx is not available --normally can only happen
+ * during pg upgrades.
 */
 static List *
 ReadDataFileStripeList(uint64 storageId, Snapshot snapshot)
@ -1209,22 +1307,27 @@ ReadDataFileStripeList(uint64 storageId, Snapshot snapshot)
 	Oid columnarStripesOid = ColumnarStripeRelationId();

 	Relation columnarStripes = table_open(columnarStripesOid, AccessShareLock);
-	Relation index = index_open(ColumnarStripeFirstRowNumberIndexRelationId(),
-								AccessShareLock);

-	SysScanDesc scanDescriptor = systable_beginscan_ordered(columnarStripes, index,
-															snapshot, 1,
-															scanKey);
+	Oid indexId = ColumnarStripeFirstRowNumberIndexRelationId();
+	bool indexOk = OidIsValid(indexId);
+	SysScanDesc scanDescriptor = systable_beginscan(columnarStripes, indexId,
+													indexOk, snapshot, 1, scanKey);

-	while (HeapTupleIsValid(heapTuple = systable_getnext_ordered(scanDescriptor,
-																 ForwardScanDirection)))
+	static bool loggedSlowMetadataAccessWarning = false;
+	if (!indexOk && !loggedSlowMetadataAccessWarning)
+	{
+		ereport(WARNING, (errmsg(SLOW_METADATA_ACCESS_WARNING,
+								 "stripe_first_row_number_idx")));
+		loggedSlowMetadataAccessWarning = true;
+	}
+
+	while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor)))
 	{
 		StripeMetadata *stripeMetadata = BuildStripeMetadata(columnarStripes, heapTuple);
 		stripeMetadataList = lappend(stripeMetadataList, stripeMetadata);
 	}

-	systable_endscan_ordered(scanDescriptor);
-	index_close(index, AccessShareLock);
+	systable_endscan(scanDescriptor);
 	table_close(columnarStripes, AccessShareLock);

 	return stripeMetadataList;
@ -1233,6 +1336,8 @@ ReadDataFileStripeList(uint64 storageId, Snapshot snapshot)

 /*
 * BuildStripeMetadata builds a StripeMetadata object from given heap tuple.
+ *
+ * NB: heapTuple must be a proper heap tuple with MVCC fields.
 */
 static StripeMetadata *
 BuildStripeMetadata(Relation columnarStripes, HeapTuple heapTuple)
@ -1269,7 +1374,8 @@ BuildStripeMetadata(Relation columnarStripes, HeapTuple heapTuple)
 	 * subtransaction id here.
 	 */
 	TransactionId entryXmin = HeapTupleHeaderGetXmin(heapTuple->t_data);
-	stripeMetadata->aborted = TransactionIdDidAbort(entryXmin);
+	stripeMetadata->aborted = !TransactionIdIsInProgress(entryXmin) &&
+							  TransactionIdDidAbort(entryXmin);
 	stripeMetadata->insertedByCurrentXact =
 		TransactionIdIsCurrentTransactionId(entryXmin);

@ -1332,25 +1438,30 @@ DeleteStorageFromColumnarMetadataTable(Oid metadataTableId,
 		return;
 	}

-	Relation index = index_open(storageIdIndexId, AccessShareLock);
+	bool indexOk = OidIsValid(storageIdIndexId);
+	SysScanDesc scanDescriptor = systable_beginscan(metadataTable, storageIdIndexId,
+													indexOk, NULL, 1, scanKey);

-	SysScanDesc scanDescriptor = systable_beginscan_ordered(metadataTable, index, NULL,
-															1, scanKey);
+	static bool loggedSlowMetadataAccessWarning = false;
+	if (!indexOk && !loggedSlowMetadataAccessWarning)
+	{
+		ereport(WARNING, (errmsg(SLOW_METADATA_ACCESS_WARNING,
+								 "on a columnar metadata table")));
+		loggedSlowMetadataAccessWarning = true;
+	}

 	ModifyState *modifyState = StartModifyRelation(metadataTable);

 	HeapTuple heapTuple;
-	while (HeapTupleIsValid(heapTuple = systable_getnext_ordered(scanDescriptor,
-																 ForwardScanDirection)))
+	while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor)))
 	{
 		DeleteTupleAndEnforceConstraints(modifyState, heapTuple);
 	}

-	systable_endscan_ordered(scanDescriptor);
+	systable_endscan(scanDescriptor);

 	FinishModifyRelation(modifyState);

-	index_close(index, AccessShareLock);
 	table_close(metadataTable, AccessShareLock);
 }

@ -1483,6 +1594,9 @@ create_estate_for_relation(Relation rel)

 /*
 * DatumToBytea serializes a datum into a bytea value.
+ *
+ * Since we don't want to limit datum size to RSIZE_MAX unnecessarily,
+ * we use memcpy instead of memcpy_s several places in this function.
 */
 static bytea *
 DatumToBytea(Datum value, Form_pg_attribute attrForm)
@ -1499,19 +1613,16 @@ DatumToBytea(Datum value, Form_pg_attribute attrForm)
 			Datum tmp;
 			store_att_byval(&tmp, value, attrForm->attlen);

-			memcpy_s(VARDATA(result), datumLength + VARHDRSZ,
-					 &tmp, attrForm->attlen);
+			memcpy(VARDATA(result), &tmp, attrForm->attlen); /* IGNORE-BANNED */
 		}
 		else
 		{
-			memcpy_s(VARDATA(result), datumLength + VARHDRSZ,
-					 DatumGetPointer(value), attrForm->attlen);
+			memcpy(VARDATA(result), DatumGetPointer(value), attrForm->attlen); /* IGNORE-BANNED */
 		}
 	}
 	else
 	{
-		memcpy_s(VARDATA(result), datumLength + VARHDRSZ,
-				 DatumGetPointer(value), datumLength);
+		memcpy(VARDATA(result), DatumGetPointer(value), datumLength); /* IGNORE-BANNED */
 	}

 	return result;
@ -1530,8 +1641,12 @@ ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm)
 	 * after the byteaDatum is freed.
 	 */
 	char *binaryDataCopy = palloc0(VARSIZE_ANY_EXHDR(bytes));
-	memcpy_s(binaryDataCopy, VARSIZE_ANY_EXHDR(bytes),
-			 VARDATA_ANY(bytes), VARSIZE_ANY_EXHDR(bytes));
+
+	/*
+	 * We use IGNORE-BANNED here since we don't want to limit datum size to
+	 * RSIZE_MAX unnecessarily.
+	 */
+	memcpy(binaryDataCopy, VARDATA_ANY(bytes), VARSIZE_ANY_EXHDR(bytes)); /* IGNORE-BANNED */

 	return fetch_att(binaryDataCopy, attrForm->attbyval, attrForm->attlen);
 }
--- a/src/backend/columnar/columnar_storage.c
+++ b/src/backend/columnar/columnar_storage.c
@ -104,6 +104,10 @@ typedef struct PhysicalAddr
 								  "version or run \"ALTER EXTENSION citus UPDATE\"."


+/* only for testing purposes */
+PG_FUNCTION_INFO_V1(test_columnar_storage_write_new_page);
+
+
 /*
 * Map logical offsets to a physical page and offset where the data is kept.
 */
@ -667,6 +671,7 @@ ReadFromBlock(Relation rel, BlockNumber blockno, uint32 offset, char *buf,
 			  uint32 len, bool force)
 {
 	Buffer buffer = ReadBuffer(rel, blockno);
+	LockBuffer(buffer, BUFFER_LOCK_SHARE);
 	Page page = BufferGetPage(buffer);
 	PageHeader phdr = (PageHeader) page;

@ -678,7 +683,7 @@ ReadFromBlock(Relation rel, BlockNumber blockno, uint32 offset, char *buf,
 	}

 	memcpy_s(buf, len, page + offset, len);
-	ReleaseBuffer(buffer);
+	UnlockReleaseBuffer(buffer);
 }


@ -703,13 +708,32 @@ WriteToBlock(Relation rel, BlockNumber blockno, uint32 offset, char *buf,
 		PageInit(page, BLCKSZ, 0);
 	}

-	if (phdr->pd_lower != offset || phdr->pd_upper - offset < len)
+	if (phdr->pd_lower < offset || phdr->pd_upper - offset < len)
 	{
 		elog(ERROR,
 			 "attempt to write columnar data of length %d to offset %d of block %d of relation %d",
 			 len, offset, blockno, rel->rd_id);
 	}

+	/*
+	 * After a transaction has been rolled-back, we might be
+	 * over-writing the rolledback write, so phdr->pd_lower can be
+	 * different from addr.offset.
+	 *
+	 * We reset pd_lower to reset the rolledback write.
+	 *
+	 * Given that we always align page reservation to the next page as of
+	 * 10.2, having such a disk page is only possible if write operaion
+	 * failed in an older version of columnar, but now user attempts writing
+	 * to that table in version >= 10.2.
+	 */
+	if (phdr->pd_lower > offset)
+	{
+		ereport(DEBUG4, (errmsg("overwriting page %u", blockno),
+						 errdetail("This can happen after a roll-back.")));
+		phdr->pd_lower = offset;
+	}
+
 	START_CRIT_SECTION();

 	memcpy_s(page + phdr->pd_lower, phdr->pd_upper - phdr->pd_lower, buf, len);
@ -819,3 +843,36 @@ ColumnarMetapageCheckVersion(Relation rel, ColumnarMetapage *metapage)
 						errhint(OLD_METAPAGE_VERSION_HINT)));
 	}
 }
+
+
+/*
+ * test_columnar_storage_write_new_page is a UDF only used for testing
+ * purposes. It could make more sense to define this in columnar_debug.c,
+ * but the storage layer doesn't expose ColumnarMetapage to any other files,
+ * so we define it here.
+ */
+Datum
+test_columnar_storage_write_new_page(PG_FUNCTION_ARGS)
+{
+	Oid relationId = PG_GETARG_OID(0);
+
+	Relation relation = relation_open(relationId, AccessShareLock);
+
+	/*
+	 * Allocate a new page, write some data to there, and set reserved offset
+	 * to the start of that page. That way, for a subsequent write operation,
+	 * storage layer would try to overwrite the page that we allocated here.
+	 */
+	uint64 newPageOffset = ColumnarStorageGetReservedOffset(relation, false);
+
+	ColumnarStorageReserveData(relation, 100);
+	ColumnarStorageWrite(relation, newPageOffset, "foo_bar", 8);
+
+	ColumnarMetapage metapage = ColumnarMetapageRead(relation, false);
+	metapage.reservedOffset = newPageOffset;
+	ColumnarOverwriteMetapage(relation, metapage);
+
+	relation_close(relation, AccessShareLock);
+
+	PG_RETURN_VOID();
+}
--- a/src/backend/columnar/columnar_tableam.c
+++ b/src/backend/columnar/columnar_tableam.c
@ -85,7 +85,6 @@ typedef struct ColumnarScanDescData
 	List *scanQual;
 } ColumnarScanDescData;

-typedef struct ColumnarScanDescData *ColumnarScanDesc;

 /*
 * IndexFetchColumnarData is the scan state passed between index_fetch_begin,
@ -173,6 +172,8 @@ columnar_beginscan(Relation relation, Snapshot snapshot,
 				   ParallelTableScanDesc parallel_scan,
 				   uint32 flags)
 {
+	CheckCitusVersion(ERROR);
+
 	int natts = relation->rd_att->natts;

 	/* attr_needed represents 0-indexed attribute numbers */
@ -418,6 +419,8 @@ columnar_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
 static IndexFetchTableData *
 columnar_index_fetch_begin(Relation rel)
 {
+	CheckCitusVersion(ERROR);
+
 	Oid relfilenode = rel->rd_node.relNode;
 	if (PendingWritesInUpperTransactions(relfilenode, GetCurrentSubTransactionId()))
 	{
@ -472,8 +475,11 @@ columnar_index_fetch_tuple(struct IndexFetchTableData *sscan,
 	*call_again = false;

 	/*
-	 * No dead tuples are possible in columnar, set it to false if it's
-	 * passed to be non-NULL.
+	 * Initialize all_dead to false if passed to be non-NULL.
+	 *
+	 * XXX: For aborted writes, we should set all_dead to true but this would
+	 * require implementing columnar_index_delete_tuples for simple deletion
+	 * of dead tuples (TM_IndexDeleteOp.bottomup = false).
 	 */
 	if (all_dead)
 	{
@ -638,6 +644,8 @@ static bool
 columnar_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
 								  Snapshot snapshot)
 {
+	CheckCitusVersion(ERROR);
+
 	uint64 rowNumber = tid_to_row_number(slot->tts_tid);
 	StripeMetadata *stripeMetadata = FindStripeByRowNumber(rel, rowNumber, snapshot);
 	return stripeMetadata != NULL;
@ -649,7 +657,47 @@ static TransactionId
 columnar_index_delete_tuples(Relation rel,
 							 TM_IndexDeleteOp *delstate)
 {
-	elog(ERROR, "columnar_index_delete_tuples not implemented");
+	CheckCitusVersion(ERROR);
+
+	/*
+	 * XXX: We didn't bother implementing index_delete_tuple for neither of
+	 * simple deletion and bottom-up deletion cases. There is no particular
+	 * reason for that, just to keep things simple.
+	 *
+	 * See the rest of this function to see how we deal with
+	 * index_delete_tuples requests made to columnarAM.
+	 */
+
+	if (delstate->bottomup)
+	{
+		/*
+		 * Ignore any bottom-up deletion requests.
+		 *
+		 * Currently only caller in postgres that does bottom-up deletion is
+		 * _bt_bottomupdel_pass, which in turn calls _bt_delitems_delete_check.
+		 * And this function is okay with ndeltids being set to 0 by tableAM
+		 * for bottom-up deletion.
+		 */
+		delstate->ndeltids = 0;
+		return InvalidTransactionId;
+	}
+	else
+	{
+		/*
+		 * TableAM is not expected to set ndeltids to 0 for simple deletion
+		 * case, so here we cannot do the same trick that we do for
+		 * bottom-up deletion.
+		 * See the assertion around table_index_delete_tuples call in pg
+		 * function index_compute_xid_horizon_for_tuples.
+		 *
+		 * For this reason, to avoid receiving simple deletion requests for
+		 * columnar tables (bottomup = false), columnar_index_fetch_tuple
+		 * doesn't ever set all_dead to true in order to prevent triggering
+		 * simple deletion of index tuples. But let's throw an error to be on
+		 * the safe side.
+		 */
+		elog(ERROR, "columnar_index_delete_tuples not implemented for simple deletion");
+	}
 }


@ -670,13 +718,17 @@ static void
 columnar_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid,
 					  int options, BulkInsertState bistate)
 {
+	CheckCitusVersion(ERROR);
+
 	/*
 	 * columnar_init_write_state allocates the write state in a longer
 	 * lasting context, so no need to worry about it.
 	 */
 	ColumnarWriteState *writeState = columnar_init_write_state(relation,
 															   RelationGetDescr(relation),
+															   slot->tts_tableOid,
 															   GetCurrentSubTransactionId());
+
 	MemoryContext oldContext = MemoryContextSwitchTo(ColumnarWritePerTupleContext(
 														 writeState));

@ -716,8 +768,16 @@ static void
 columnar_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
 					  CommandId cid, int options, BulkInsertState bistate)
 {
+	CheckCitusVersion(ERROR);
+
+	/*
+	 * The callback to .multi_insert is table_multi_insert() and this is only used for the COPY
+	 * command, so slot[i]->tts_tableoid will always be equal to relation->id. Thus, we can send
+	 * RelationGetRelid(relation) as the tupSlotTableOid
+	 */
 	ColumnarWriteState *writeState = columnar_init_write_state(relation,
 															   RelationGetDescr(relation),
+															   RelationGetRelid(relation),
 															   GetCurrentSubTransactionId());

 	ColumnarCheckLogicalReplication(relation);
@ -790,6 +850,8 @@ columnar_relation_set_new_filenode(Relation rel,
 								   TransactionId *freezeXid,
 								   MultiXactId *minmulti)
 {
+	CheckCitusVersion(ERROR);
+
 	if (persistence == RELPERSISTENCE_UNLOGGED)
 	{
 		ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@ -825,6 +887,8 @@ columnar_relation_set_new_filenode(Relation rel,
 static void
 columnar_relation_nontransactional_truncate(Relation rel)
 {
+	CheckCitusVersion(ERROR);
+
 	RelFileNode relfilenode = rel->rd_node;

 	NonTransactionDropWriteState(relfilenode.relNode);
@ -871,6 +935,8 @@ columnar_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
 								   double *tups_vacuumed,
 								   double *tups_recently_dead)
 {
+	CheckCitusVersion(ERROR);
+
 	TupleDesc sourceDesc = RelationGetDescr(OldHeap);
 	TupleDesc targetDesc = RelationGetDescr(NewHeap);

@ -960,6 +1026,27 @@ NeededColumnsList(TupleDesc tupdesc, Bitmapset *attr_needed)
 }


+/*
+ * ColumnarTableTupleCount returns the number of tuples that columnar
+ * table with relationId has by using stripe metadata.
+ */
+static uint64
+ColumnarTableTupleCount(Relation relation)
+{
+	List *stripeList = StripesForRelfilenode(relation->rd_node);
+	uint64 tupleCount = 0;
+
+	ListCell *lc = NULL;
+	foreach(lc, stripeList)
+	{
+		StripeMetadata *stripe = lfirst(lc);
+		tupleCount += stripe->rowCount;
+	}
+
+	return tupleCount;
+}
+
+
 /*
 * columnar_vacuum_rel implements VACUUM without FULL option.
 */
@ -967,6 +1054,18 @@ static void
 columnar_vacuum_rel(Relation rel, VacuumParams *params,
 					BufferAccessStrategy bstrategy)
 {
+	if (!CheckCitusVersion(WARNING))
+	{
+		/*
+		 * Skip if the extension catalogs are not up-to-date, but avoid
+		 * erroring during auto-vacuum.
+		 */
+		return;
+	}
+
+	pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
+								  RelationGetRelid(rel));
+
 	/*
 	 * If metapage version of relation is older, then we hint users to VACUUM
 	 * the relation in ColumnarMetapageCheckVersion. So if needed, upgrade
@ -990,6 +1089,79 @@ columnar_vacuum_rel(Relation rel, VacuumParams *params,
 	{
 		TruncateColumnar(rel, elevel);
 	}
+
+	RelationOpenSmgr(rel);
+	BlockNumber new_rel_pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM);
+
+	/* get the number of indexes */
+	List *indexList = RelationGetIndexList(rel);
+	int nindexes = list_length(indexList);
+
+	TransactionId oldestXmin;
+	TransactionId freezeLimit;
+	MultiXactId multiXactCutoff;
+
+	/* initialize xids */
+#if PG_VERSION_NUM >= PG_VERSION_15
+	MultiXactId oldestMxact;
+	vacuum_set_xid_limits(rel,
+						  params->freeze_min_age,
+						  params->freeze_table_age,
+						  params->multixact_freeze_min_age,
+						  params->multixact_freeze_table_age,
+						  &oldestXmin, &oldestMxact,
+						  &freezeLimit, &multiXactCutoff);
+
+	Assert(MultiXactIdPrecedesOrEquals(multiXactCutoff, oldestMxact));
+#else
+	TransactionId xidFullScanLimit;
+	MultiXactId mxactFullScanLimit;
+	vacuum_set_xid_limits(rel,
+						  params->freeze_min_age,
+						  params->freeze_table_age,
+						  params->multixact_freeze_min_age,
+						  params->multixact_freeze_table_age,
+						  &oldestXmin, &freezeLimit, &xidFullScanLimit,
+						  &multiXactCutoff, &mxactFullScanLimit);
+#endif
+
+	Assert(TransactionIdPrecedesOrEquals(freezeLimit, oldestXmin));
+
+	/*
+	 * Columnar storage doesn't hold any transaction IDs, so we can always
+	 * just advance to the most aggressive value.
+	 */
+	TransactionId newRelFrozenXid = oldestXmin;
+#if PG_VERSION_NUM >= PG_VERSION_15
+	MultiXactId newRelminMxid = oldestMxact;
+#else
+	MultiXactId newRelminMxid = multiXactCutoff;
+#endif
+
+	double new_live_tuples = ColumnarTableTupleCount(rel);
+
+	/* all visible pages are always 0 */
+	BlockNumber new_rel_allvisible = 0;
+
+#if PG_VERSION_NUM >= PG_VERSION_15
+	bool frozenxid_updated;
+	bool minmulti_updated;
+
+	vac_update_relstats(rel, new_rel_pages, new_live_tuples,
+						new_rel_allvisible, nindexes > 0,
+						newRelFrozenXid, newRelminMxid,
+						&frozenxid_updated, &minmulti_updated, false);
+#else
+	vac_update_relstats(rel, new_rel_pages, new_live_tuples,
+						new_rel_allvisible, nindexes > 0,
+						newRelFrozenXid, newRelminMxid, false);
+#endif
+
+	pgstat_report_vacuum(RelationGetRelid(rel),
+						 rel->rd_rel->relisshared,
+						 Max(new_live_tuples, 0),
+						 0);
+	pgstat_progress_end_command();
 }


@ -1276,6 +1448,8 @@ columnar_index_build_range_scan(Relation columnarRelation,
 								void *callback_state,
 								TableScanDesc scan)
 {
+	CheckCitusVersion(ERROR);
+
 	if (start_blockno != 0 || numblocks != InvalidBlockNumber)
 	{
 		/*
@ -1524,6 +1698,8 @@ columnar_index_validate_scan(Relation columnarRelation,
 							 ValidateIndexState *
 							 validateIndexState)
 {
+	CheckCitusVersion(ERROR);
+
 	ColumnarReportTotalVirtualBlocks(columnarRelation, snapshot,
 									 PROGRESS_SCAN_BLOCKS_TOTAL);

@ -1694,6 +1870,8 @@ TupleSortSkipSmallerItemPointers(Tuplesortstate *tupleSort, ItemPointer targetIt
 static uint64
 columnar_relation_size(Relation rel, ForkNumber forkNumber)
 {
+	CheckCitusVersion(ERROR);
+
 	uint64 nblocks = 0;

 	/* Open it at the smgr level if not already done */
@ -1719,6 +1897,8 @@ columnar_relation_size(Relation rel, ForkNumber forkNumber)
 static bool
 columnar_relation_needs_toast_table(Relation rel)
 {
+	CheckCitusVersion(ERROR);
+
 	return false;
 }

@ -1728,6 +1908,8 @@ columnar_estimate_rel_size(Relation rel, int32 *attr_widths,
 						   BlockNumber *pages, double *tuples,
 						   double *allvisfrac)
 {
+	CheckCitusVersion(ERROR);
+
 	RelationOpenSmgr(rel);
 	*pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM);
 	*tuples = ColumnarTableRowCount(rel);
@ -1899,6 +2081,8 @@ ColumnarTableDropHook(Oid relid)

 	if (IsColumnarTableAmTable(relid))
 	{
+		CheckCitusVersion(ERROR);
+
 		/*
 		 * Drop metadata. No need to drop storage here since for
 		 * tableam tables storage is managed by postgres.
@ -2020,6 +2204,8 @@ ColumnarProcessUtility(PlannedStmt *pstmt,
 									   GetCreateIndexRelationLockMode(indexStmt));
 		if (rel->rd_tableam == GetColumnarTableAmRoutine())
 		{
+			CheckCitusVersion(ERROR);
+
 			if (!ColumnarSupportsIndexAM(indexStmt->accessMethod))
 			{
 				ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@ -2172,8 +2358,13 @@ detoast_values(TupleDesc tupleDesc, Datum *orig_values, bool *isnull)
 			if (values == orig_values)
 			{
 				values = palloc(sizeof(Datum) * natts);
-				memcpy_s(values, sizeof(Datum) * natts,
-						 orig_values, sizeof(Datum) * natts);
+
+				/*
+				 * We use IGNORE-BANNED here since we don't want to limit
+				 * size of the buffer that holds the datum array to RSIZE_MAX
+				 * unnecessarily.
+				 */
+				memcpy(values, orig_values, sizeof(Datum) * natts); /* IGNORE-BANNED */
 			}

 			/* will be freed when per-tuple context is reset */
@ -2356,6 +2547,8 @@ PG_FUNCTION_INFO_V1(alter_columnar_table_set);
 Datum
 alter_columnar_table_set(PG_FUNCTION_ARGS)
 {
+	CheckCitusVersion(ERROR);
+
 	Oid relationId = PG_GETARG_OID(0);

 	Relation rel = table_open(relationId, AccessExclusiveLock); /* ALTER TABLE LOCK */
@ -2483,6 +2676,8 @@ PG_FUNCTION_INFO_V1(alter_columnar_table_reset);
 Datum
 alter_columnar_table_reset(PG_FUNCTION_ARGS)
 {
+	CheckCitusVersion(ERROR);
+
 	Oid relationId = PG_GETARG_OID(0);

 	Relation rel = table_open(relationId, AccessExclusiveLock); /* ALTER TABLE LOCK */
--- a/src/backend/columnar/columnar_writer.c
+++ b/src/backend/columnar/columnar_writer.c
@ -531,6 +531,9 @@ SerializeBoolArray(bool *boolArray, uint32 boolArrayLength)
 /*
 * SerializeSingleDatum serializes the given datum value and appends it to the
 * provided string info buffer.
+ *
+ * Since we don't want to limit datum buffer size to RSIZE_MAX unnecessarily,
+ * we use memcpy instead of memcpy_s several places in this function.
 */
 static void
 SerializeSingleDatum(StringInfo datumBuffer, Datum datum, bool datumTypeByValue,
@ -552,15 +555,13 @@ SerializeSingleDatum(StringInfo datumBuffer, Datum datum, bool datumTypeByValue,
 		}
 		else
 		{
-			memcpy_s(currentDatumDataPointer, datumBuffer->maxlen - datumBuffer->len,
-					 DatumGetPointer(datum), datumTypeLength);
+			memcpy(currentDatumDataPointer, DatumGetPointer(datum), datumTypeLength); /* IGNORE-BANNED */
 		}
 	}
 	else
 	{
 		Assert(!datumTypeByValue);
-		memcpy_s(currentDatumDataPointer, datumBuffer->maxlen - datumBuffer->len,
-				 DatumGetPointer(datum), datumLength);
+		memcpy(currentDatumDataPointer, DatumGetPointer(datum), datumLength); /* IGNORE-BANNED */
 	}

 	datumBuffer->len += datumLengthAligned;
@ -714,7 +715,12 @@ DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength)
 	{
 		uint32 datumLength = att_addlength_datum(0, datumTypeLength, datum);
 		char *datumData = palloc0(datumLength);
-		memcpy_s(datumData, datumLength, DatumGetPointer(datum), datumLength);
+
+		/*
+		 * We use IGNORE-BANNED here since we don't want to limit datum size to
+		 * RSIZE_MAX unnecessarily.
+		 */
+		memcpy(datumData, DatumGetPointer(datum), datumLength); /* IGNORE-BANNED */

 		datumCopy = PointerGetDatum(datumData);
 	}
@ -737,8 +743,12 @@ CopyStringInfo(StringInfo sourceString)
 		targetString->data = palloc0(sourceString->len);
 		targetString->len = sourceString->len;
 		targetString->maxlen = sourceString->len;
-		memcpy_s(targetString->data, sourceString->len,
-				 sourceString->data, sourceString->len);
+
+		/*
+		 * We use IGNORE-BANNED here since we don't want to limit string
+		 * buffer size to RSIZE_MAX unnecessarily.
+		 */
+		memcpy(targetString->data, sourceString->data, sourceString->len); /* IGNORE-BANNED */
 	}

 	return targetString;
--- a/src/backend/columnar/sql/columnar--10.2-1--10.2-2.sql
+++ b/src/backend/columnar/sql/columnar--10.2-1--10.2-2.sql
@ -0,0 +1,5 @@
+-- columnar--10.2-1--10.2-2.sql
+
+-- revoke read access for columnar.chunk from unprivileged
+-- user as it contains chunk min/max values
+REVOKE SELECT ON columnar.chunk FROM PUBLIC;
--- a/src/backend/columnar/sql/columnar--10.2-2--10.2-3.sql
+++ b/src/backend/columnar/sql/columnar--10.2-2--10.2-3.sql
@ -0,0 +1,26 @@
+-- columnar--10.2-2--10.2-3.sql
+
+-- Since stripe_first_row_number_idx is required to scan a columnar table, we
+-- need to make sure that it is created before doing anything with columnar
+-- tables during pg upgrades.
+--
+-- However, a plain btree index is not a dependency of a table, so pg_upgrade
+-- cannot guarantee that stripe_first_row_number_idx gets created when
+-- creating columnar.stripe, unless we make it a unique "constraint".
+--
+-- To do that, drop stripe_first_row_number_idx and create a unique
+-- constraint with the same name to keep the code change at minimum.
+--
+-- If we have a pg_depend entry for this index, we can not drop it as
+-- the extension depends on it. Remove the pg_depend entry if it exists.
+DELETE FROM pg_depend
+WHERE classid = 'pg_am'::regclass::oid
+    AND objid IN (select oid from pg_am where amname = 'columnar')
+    AND objsubid = 0
+    AND refclassid = 'pg_class'::regclass::oid
+    AND refobjid = 'columnar.stripe_first_row_number_idx'::regclass::oid
+    AND refobjsubid = 0
+    AND deptype = 'n';
+DROP INDEX columnar.stripe_first_row_number_idx;
+ALTER TABLE columnar.stripe ADD CONSTRAINT stripe_first_row_number_idx
+UNIQUE (storage_id, first_row_number);
--- a/src/backend/columnar/sql/columnar--10.2-3--10.2-4.sql
+++ b/src/backend/columnar/sql/columnar--10.2-3--10.2-4.sql
@ -0,0 +1,5 @@
+-- columnar--10.2-3--10.2-4.sql
+
+#include "udfs/columnar_ensure_am_depends_catalog/10.2-4.sql"
+
+SELECT citus_internal.columnar_ensure_am_depends_catalog();
--- a/src/backend/columnar/sql/downgrades/columnar--10.2-1--10.1-1.sql
+++ b/src/backend/columnar/sql/downgrades/columnar--10.2-1--10.1-1.sql
@ -8,5 +8,16 @@ DROP FUNCTION citus_internal.upgrade_columnar_storage(regclass);
 DROP FUNCTION citus_internal.downgrade_columnar_storage(regclass);

 -- drop "first_row_number" column and the index defined on it
+--
+-- If we have a pg_depend entry for this index, we can not drop it as
+-- the extension depends on it. Remove the pg_depend entry if it exists.
+DELETE FROM pg_depend
+WHERE classid = 'pg_am'::regclass::oid
+    AND objid IN (select oid from pg_am where amname = 'columnar')
+    AND objsubid = 0
+    AND refclassid = 'pg_class'::regclass::oid
+    AND refobjid = 'columnar.stripe_first_row_number_idx'::regclass::oid
+    AND refobjsubid = 0
+    AND deptype = 'n';
 DROP INDEX columnar.stripe_first_row_number_idx;
 ALTER TABLE columnar.stripe DROP COLUMN first_row_number;
--- a/src/backend/columnar/sql/downgrades/columnar--10.2-2--10.2-1.sql
+++ b/src/backend/columnar/sql/downgrades/columnar--10.2-2--10.2-1.sql
@ -0,0 +1,4 @@
+-- columnar--10.2-2--10.2-1.sql
+
+-- grant read access for columnar.chunk to unprivileged user
+GRANT SELECT ON columnar.chunk TO PUBLIC;
--- a/src/backend/columnar/sql/downgrades/columnar--10.2-3--10.2-2.sql
+++ b/src/backend/columnar/sql/downgrades/columnar--10.2-3--10.2-2.sql
@ -0,0 +1,14 @@
+-- columnar--10.2-3--10.2-2.sql
+--
+-- If we have a pg_depend entry for this index, we can not drop it as
+-- the extension depends on it. Remove the pg_depend entry if it exists.
+DELETE FROM pg_depend
+WHERE classid = 'pg_am'::regclass::oid
+    AND objid IN (select oid from pg_am where amname = 'columnar')
+    AND objsubid = 0
+    AND refclassid = 'pg_class'::regclass::oid
+    AND refobjid = 'columnar.stripe_first_row_number_idx'::regclass::oid
+    AND refobjsubid = 0
+    AND deptype = 'n';
+ALTER TABLE columnar.stripe DROP CONSTRAINT stripe_first_row_number_idx;
+CREATE INDEX stripe_first_row_number_idx ON columnar.stripe USING BTREE(storage_id, first_row_number);
--- a/src/backend/columnar/sql/downgrades/columnar--10.2-4--10.2-3.sql
+++ b/src/backend/columnar/sql/downgrades/columnar--10.2-4--10.2-3.sql
@ -0,0 +1,6 @@
+-- columnar--10.2-4--10.2-3.sql
+
+DROP FUNCTION citus_internal.columnar_ensure_am_depends_catalog();
+
+-- Note that we intentionally do not delete pg_depend records that we inserted
+-- via columnar--10.2-3--10.2-4.sql (by using columnar_ensure_am_depends_catalog).
--- a/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/10.2-4.sql
+++ b/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/10.2-4.sql
@ -0,0 +1,40 @@
+CREATE OR REPLACE FUNCTION citus_internal.columnar_ensure_am_depends_catalog()
+  RETURNS void
+  LANGUAGE plpgsql
+  SET search_path = pg_catalog
+AS $func$
+BEGIN
+  INSERT INTO pg_depend
+  SELECT -- Define a dependency edge from "columnar table access method" ..
+         'pg_am'::regclass::oid as classid,
+         (select oid from pg_am where amname = 'columnar') as objid,
+         0 as objsubid,
+         -- ... to each object that is registered to pg_class and that lives
+         -- in "columnar" schema. That contains catalog tables, indexes
+         -- created on them and the sequences created in "columnar" schema.
+         --
+         -- Given the possibility of user might have created their own objects
+         -- in columnar schema, we explicitly specify list of objects that we
+         -- are interested in.
+         'pg_class'::regclass::oid as refclassid,
+         columnar_schema_members.relname::regclass::oid as refobjid,
+         0 as refobjsubid,
+         'n' as deptype
+  FROM (VALUES ('columnar.chunk'),
+               ('columnar.chunk_group'),
+               ('columnar.chunk_group_pkey'),
+               ('columnar.chunk_pkey'),
+               ('columnar.options'),
+               ('columnar.options_pkey'),
+               ('columnar.storageid_seq'),
+               ('columnar.stripe'),
+               ('columnar.stripe_first_row_number_idx'),
+               ('columnar.stripe_pkey')
+       ) columnar_schema_members(relname)
+  -- Avoid inserting duplicate entries into pg_depend.
+  EXCEPT TABLE pg_depend;
+END;
+$func$;
+COMMENT ON FUNCTION citus_internal.columnar_ensure_am_depends_catalog()
+  IS 'internal function responsible for creating dependencies from columnar '
+     'table access method to the rel objects in columnar schema';
--- a/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/latest.sql
+++ b/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/latest.sql
@ -0,0 +1,40 @@
+CREATE OR REPLACE FUNCTION citus_internal.columnar_ensure_am_depends_catalog()
+  RETURNS void
+  LANGUAGE plpgsql
+  SET search_path = pg_catalog
+AS $func$
+BEGIN
+  INSERT INTO pg_depend
+  SELECT -- Define a dependency edge from "columnar table access method" ..
+         'pg_am'::regclass::oid as classid,
+         (select oid from pg_am where amname = 'columnar') as objid,
+         0 as objsubid,
+         -- ... to each object that is registered to pg_class and that lives
+         -- in "columnar" schema. That contains catalog tables, indexes
+         -- created on them and the sequences created in "columnar" schema.
+         --
+         -- Given the possibility of user might have created their own objects
+         -- in columnar schema, we explicitly specify list of objects that we
+         -- are interested in.
+         'pg_class'::regclass::oid as refclassid,
+         columnar_schema_members.relname::regclass::oid as refobjid,
+         0 as refobjsubid,
+         'n' as deptype
+  FROM (VALUES ('columnar.chunk'),
+               ('columnar.chunk_group'),
+               ('columnar.chunk_group_pkey'),
+               ('columnar.chunk_pkey'),
+               ('columnar.options'),
+               ('columnar.options_pkey'),
+               ('columnar.storageid_seq'),
+               ('columnar.stripe'),
+               ('columnar.stripe_first_row_number_idx'),
+               ('columnar.stripe_pkey')
+       ) columnar_schema_members(relname)
+  -- Avoid inserting duplicate entries into pg_depend.
+  EXCEPT TABLE pg_depend;
+END;
+$func$;
+COMMENT ON FUNCTION citus_internal.columnar_ensure_am_depends_catalog()
+  IS 'internal function responsible for creating dependencies from columnar '
+     'table access method to the rel objects in columnar schema';
--- a/src/backend/columnar/write_state_management.c
+++ b/src/backend/columnar/write_state_management.c
@ -117,6 +117,7 @@ CleanupWriteStateMap(void *arg)

 ColumnarWriteState *
 columnar_init_write_state(Relation relation, TupleDesc tupdesc,
+						  Oid tupSlotRelationId,
 						  SubTransactionId currentSubXid)
 {
 	bool found;
@ -180,7 +181,16 @@ columnar_init_write_state(Relation relation, TupleDesc tupdesc,
 	MemoryContext oldContext = MemoryContextSwitchTo(WriteStateContext);

 	ColumnarOptions columnarOptions = { 0 };
-	ReadColumnarOptions(relation->rd_id, &columnarOptions);
+
+	/*
+	 * In case of a table rewrite, we need to fetch table options based on the
+	 * relation id of the source tuple slot.
+	 *
+	 * For this reason, we always pass tupSlotRelationId here; which should be
+	 * same as the target table if the write operation is not related to a table
+	 * rewrite etc.
+	 */
+	ReadColumnarOptions(tupSlotRelationId, &columnarOptions);

 	SubXidWriteState *stackEntry = palloc0(sizeof(SubXidWriteState));
 	stackEntry->writeState = ColumnarBeginWrite(relation->rd_node,
--- a/src/backend/distributed/citus.control
+++ b/src/backend/distributed/citus.control
@ -1,6 +1,6 @@
 # Citus extension
 comment = 'Citus distributed database'
-default_version = '10.2-1'
+default_version = '10.2-5'
 module_pathname = '$libdir/citus'
 relocatable = false
 schema = pg_catalog
--- a/src/backend/distributed/commands/alter_table.c
+++ b/src/backend/distributed/commands/alter_table.c
@ -32,6 +32,8 @@
 #include "access/xact.h"
 #include "catalog/dependency.h"
 #include "catalog/pg_am.h"
+#include "catalog/pg_depend.h"
+#include "catalog/pg_rewrite_d.h"
 #include "columnar/columnar.h"
 #include "columnar/columnar_tableam.h"
 #include "commands/defrem.h"
@ -209,6 +211,8 @@ static char * GetAccessMethodForMatViewIfExists(Oid viewOid);
 static bool WillRecreateForeignKeyToReferenceTable(Oid relationId,
 												   CascadeToColocatedOption cascadeOption);
 static void WarningsForDroppingForeignKeysWithDistributedTables(Oid relationId);
+static void ErrorIfUnsupportedCascadeObjects(Oid relationId);
+static bool DoesCascadeDropUnsupportedObject(Oid classId, Oid id, HTAB *nodeMap);

 PG_FUNCTION_INFO_V1(undistribute_table);
 PG_FUNCTION_INFO_V1(alter_distributed_table);
@ -385,6 +389,8 @@ UndistributeTable(TableConversionParameters *params)
 		ErrorIfAnyPartitionRelationInvolvedInNonInheritedFKey(partitionList);
 	}

+	ErrorIfUnsupportedCascadeObjects(params->relationId);
+
 	params->conversionType = UNDISTRIBUTE_TABLE;
 	params->shardCountIsNull = true;
 	TableConversionState *con = CreateTableConversion(params);
@ -416,6 +422,8 @@ AlterDistributedTable(TableConversionParameters *params)
 	EnsureTableNotPartition(params->relationId);
 	EnsureHashDistributedTable(params->relationId);

+	ErrorIfUnsupportedCascadeObjects(params->relationId);
+
 	params->conversionType = ALTER_DISTRIBUTED_TABLE;
 	TableConversionState *con = CreateTableConversion(params);
 	CheckAlterDistributedTableConversionParameters(con);
@ -472,6 +480,8 @@ AlterTableSetAccessMethod(TableConversionParameters *params)
 		}
 	}

+	ErrorIfUnsupportedCascadeObjects(params->relationId);
+
 	params->conversionType = ALTER_TABLE_SET_ACCESS_METHOD;
 	params->shardCountIsNull = true;
 	TableConversionState *con = CreateTableConversion(params);
@ -1234,6 +1244,94 @@ CreateCitusTableLike(TableConversionState *con)
 }


+/*
+ * ErrorIfUnsupportedCascadeObjects gets oid of a relation, finds the objects
+ * that dropping this relation cascades into and errors if there are any extensions
+ * that would be dropped.
+ */
+static void
+ErrorIfUnsupportedCascadeObjects(Oid relationId)
+{
+	HASHCTL info;
+	memset(&info, 0, sizeof(info));
+	info.keysize = sizeof(Oid);
+	info.entrysize = sizeof(Oid);
+	info.hash = oid_hash;
+	uint32 hashFlags = (HASH_ELEM | HASH_FUNCTION);
+	HTAB *nodeMap = hash_create("object dependency map (oid)", 64, &info, hashFlags);
+
+	bool unsupportedObjectInDepGraph =
+		DoesCascadeDropUnsupportedObject(RelationRelationId, relationId, nodeMap);
+
+	if (unsupportedObjectInDepGraph)
+	{
+		ereport(ERROR, (errmsg("cannot alter table because an extension depends on it")));
+	}
+}
+
+
+/*
+ * DoesCascadeDropUnsupportedObject walks through the objects that depend on the
+ * object with object id and returns true if it finds any unsupported objects.
+ *
+ * This function only checks extensions as unsupported objects.
+ *
+ * Extension dependency is different than the rest. If an object depends on an extension
+ * dropping the object would drop the extension too.
+ * So we check with IsObjectAddressOwnedByExtension function.
+ */
+static bool
+DoesCascadeDropUnsupportedObject(Oid classId, Oid objectId, HTAB *nodeMap)
+{
+	bool found = false;
+	hash_search(nodeMap, &objectId, HASH_ENTER, &found);
+
+	if (found)
+	{
+		return false;
+	}
+
+	ObjectAddress objectAddress = { 0 };
+	ObjectAddressSet(objectAddress, classId, objectId);
+
+	if (IsObjectAddressOwnedByExtension(&objectAddress, NULL))
+	{
+		return true;
+	}
+
+	Oid targetObjectClassId = classId;
+	Oid targetObjectId = objectId;
+	List *dependencyTupleList = GetPgDependTuplesForDependingObjects(targetObjectClassId,
+																	 targetObjectId);
+
+	HeapTuple depTup = NULL;
+	foreach_ptr(depTup, dependencyTupleList)
+	{
+		Form_pg_depend pg_depend = (Form_pg_depend) GETSTRUCT(depTup);
+
+		Oid dependingOid = InvalidOid;
+		Oid dependingClassId = InvalidOid;
+
+		if (pg_depend->classid == RewriteRelationId)
+		{
+			dependingOid = GetDependingView(pg_depend);
+			dependingClassId = RelationRelationId;
+		}
+		else
+		{
+			dependingOid = pg_depend->objid;
+			dependingClassId = pg_depend->classid;
+		}
+
+		if (DoesCascadeDropUnsupportedObject(dependingClassId, dependingOid, nodeMap))
+		{
+			return true;
+		}
+	}
+	return false;
+}
+
+
 /*
 * GetViewCreationCommandsOfTable takes a table oid generates the CREATE VIEW
 * commands for views that depend to the given table. This includes the views
--- a/src/backend/distributed/commands/create_distributed_table.c
+++ b/src/backend/distributed/commands/create_distributed_table.c
@ -31,6 +31,7 @@
 #include "catalog/pg_opclass.h"
 #include "catalog/pg_proc.h"
 #include "catalog/pg_trigger.h"
+#include "catalog/pg_type.h"
 #include "commands/defrem.h"
 #include "commands/extension.h"
 #include "commands/sequence.h"
@ -58,6 +59,7 @@
 #include "distributed/reference_table_utils.h"
 #include "distributed/relation_access_tracking.h"
 #include "distributed/remote_commands.h"
+#include "distributed/resource_lock.h"
 #include "distributed/shared_library_init.h"
 #include "distributed/worker_protocol.h"
 #include "distributed/worker_transaction.h"
@ -474,9 +476,22 @@ CreateDistributedTable(Oid relationId, Var *distributionColumn, char distributio
 	/*
 	 * Make sure that existing reference tables have been replicated to all the nodes
 	 * such that we can create foreign keys and joins work immediately after creation.
+	 *
+	 * This will take a lock on the nodes to make sure no nodes are added after we have
+	 * verified and ensured the reference tables are copied everywhere.
+	 * Although copying reference tables here for anything but creating a new colocation
+	 * group, it requires significant refactoring which we don't want to perform now.
 	 */
 	EnsureReferenceTablesExistOnAllNodes();

+	/*
+	 * While adding tables to a colocation group we need to make sure no concurrent
+	 * mutations happen on the colocation group with regards to its placements. It is
+	 * important that we have already copied any reference tables before acquiring this
+	 * lock as these are competing operations.
+	 */
+	LockColocationId(colocationId, ShareLock);
+
 	/* we need to calculate these variables before creating distributed metadata */
 	bool localTableEmpty = TableEmpty(relationId);
 	Oid colocatedTableId = ColocatedTableId(colocationId);
@ -554,11 +569,16 @@ CreateDistributedTable(Oid relationId, Var *distributionColumn, char distributio
 	{
 		List *partitionList = PartitionList(relationId);
 		Oid partitionRelationId = InvalidOid;
+		Oid namespaceId = get_rel_namespace(relationId);
+		char *schemaName = get_namespace_name(namespaceId);
+		char *relationName = get_rel_name(relationId);
+		char *parentRelationName = quote_qualified_identifier(schemaName, relationName);
+
 		foreach_oid(partitionRelationId, partitionList)
 		{
 			CreateDistributedTable(partitionRelationId, distributionColumn,
 								   distributionMethod, shardCount, false,
-								   colocateWithTableName, viaDeprecatedAPI);
+								   parentRelationName, viaDeprecatedAPI);
 		}
 	}

@ -592,7 +612,7 @@ CreateDistributedTable(Oid relationId, Var *distributionColumn, char distributio
 * Otherwise, the condition is ensured.
 */
 void
-EnsureSequenceTypeSupported(Oid seqOid, Oid seqTypId)
+EnsureSequenceTypeSupported(Oid seqOid, Oid attributeTypeId)
 {
 	List *citusTableIdList = CitusTableTypeIdList(ANY_CITUS_TABLE_TYPE);
 	Oid citusTableId = InvalidOid;
@ -617,9 +637,9 @@ EnsureSequenceTypeSupported(Oid seqOid, Oid seqTypId)
 			 */
 			if (currentSeqOid == seqOid)
 			{
-				Oid currentSeqTypId = GetAttributeTypeOid(citusTableId,
+				Oid currentAttributeTypId = GetAttributeTypeOid(citusTableId,
 																currentAttnum);
-				if (seqTypId != currentSeqTypId)
+				if (attributeTypeId != currentAttributeTypId)
 				{
 					char *sequenceName = generate_qualified_relation_name(
 						seqOid);
@ -711,17 +731,29 @@ EnsureDistributedSequencesHaveOneType(Oid relationId, List *dependentSequenceLis
 		 * We should make sure that the type of the column that uses
 		 * that sequence is supported
 		 */
-		Oid seqTypId = GetAttributeTypeOid(relationId, attnum);
-		EnsureSequenceTypeSupported(sequenceOid, seqTypId);
+		Oid attributeTypeId = GetAttributeTypeOid(relationId, attnum);
+		EnsureSequenceTypeSupported(sequenceOid, attributeTypeId);

 		/*
 		 * Alter the sequence's data type in the coordinator if needed.
+		 *
+		 * First, we should only change the sequence type if the column
+		 * is a supported sequence type. For example, if a sequence is used
+		 * in an expression which then becomes a text, we should not try to
+		 * alter the sequence type to text. Postgres only supports int2, int4
+		 * and int8 as the sequence type.
+		 *
 		 * A sequence's type is bigint by default and it doesn't change even if
 		 * it's used in an int column. We should change the type if needed,
 		 * and not allow future ALTER SEQUENCE ... TYPE ... commands for
-		 * sequences used as defaults in distributed tables
+		 * sequences used as defaults in distributed tables.
 		 */
-		AlterSequenceType(sequenceOid, seqTypId);
+		if (attributeTypeId == INT2OID ||
+			attributeTypeId == INT4OID ||
+			attributeTypeId == INT8OID)
+		{
+			AlterSequenceType(sequenceOid, attributeTypeId);
+		}
 	}
 }

--- a/src/backend/distributed/commands/extension.c
+++ b/src/backend/distributed/commands/extension.c
@ -149,16 +149,6 @@ PostprocessCreateExtensionStmt(Node *node, const char *queryString)
 	/* extension management can only be done via coordinator node */
 	EnsureCoordinator();

-	/*
-	 * Make sure that no new nodes are added after this point until the end of the
-	 * transaction by taking a RowShareLock on pg_dist_node, which conflicts with the
-	 * ExclusiveLock taken by citus_add_node.
-	 * This guarantees that all active nodes will have the extension, because they will
-	 * either get it now, or get it in citus_add_node after this transaction finishes and
-	 * the pg_dist_object record becomes visible.
-	 */
-	LockRelationOid(DistNodeRelationId(), RowShareLock);
-
 	/*
 	 * Make sure that the current transaction is already in sequential mode,
 	 * or can still safely be put in sequential mode
@ -262,16 +252,6 @@ PreprocessDropExtensionStmt(Node *node, const char *queryString,
 	/* extension management can only be done via coordinator node */
 	EnsureCoordinator();

-	/*
-	 * Make sure that no new nodes are added after this point until the end of the
-	 * transaction by taking a RowShareLock on pg_dist_node, which conflicts with the
-	 * ExclusiveLock taken by citus_add_node.
-	 * This guarantees that all active nodes will drop the extension, because they will
-	 * either get it now, or get it in citus_add_node after this transaction finishes and
-	 * the pg_dist_object record becomes visible.
-	 */
-	LockRelationOid(DistNodeRelationId(), RowShareLock);
-
 	/*
 	 * Make sure that the current transaction is already in sequential mode,
 	 * or can still safely be put in sequential mode
@ -398,15 +378,6 @@ PreprocessAlterExtensionSchemaStmt(Node *node, const char *queryString,
 	/* extension management can only be done via coordinator node */
 	EnsureCoordinator();

-	/*
-	 * Make sure that no new nodes are added after this point until the end of the
-	 * transaction by taking a RowShareLock on pg_dist_node, which conflicts with the
-	 * ExclusiveLock taken by citus_add_node.
-	 * This guarantees that all active nodes will update the extension schema after
-	 * this transaction finishes and the pg_dist_object record becomes visible.
-	 */
-	LockRelationOid(DistNodeRelationId(), RowShareLock);
-
 	/*
 	 * Make sure that the current transaction is already in sequential mode,
 	 * or can still safely be put in sequential mode
@ -466,16 +437,6 @@ PreprocessAlterExtensionUpdateStmt(Node *node, const char *queryString,
 	/* extension management can only be done via coordinator node */
 	EnsureCoordinator();

-	/*
-	 * Make sure that no new nodes are added after this point until the end of the
-	 * transaction by taking a RowShareLock on pg_dist_node, which conflicts with the
-	 * ExclusiveLock taken by citus_add_node.
-	 * This guarantees that all active nodes will update the extension version, because
-	 * they will either get it now, or get it in citus_add_node after this transaction
-	 * finishes and the pg_dist_object record becomes visible.
-	 */
-	LockRelationOid(DistNodeRelationId(), RowShareLock);
-
 	/*
 	 * Make sure that the current transaction is already in sequential mode,
 	 * or can still safely be put in sequential mode
--- a/src/backend/distributed/commands/function.c
+++ b/src/backend/distributed/commands/function.c
@ -83,6 +83,7 @@ static void EnsureSequentialModeForFunctionDDL(void);
 static void TriggerSyncMetadataToPrimaryNodes(void);
 static bool ShouldPropagateCreateFunction(CreateFunctionStmt *stmt);
 static bool ShouldPropagateAlterFunction(const ObjectAddress *address);
+static bool ShouldAddFunctionSignature(FunctionParameterMode mode);
 static ObjectAddress FunctionToObjectAddress(ObjectType objectType,
 											 ObjectWithArgs *objectWithArgs,
 											 bool missing_ok);
@ -1298,7 +1299,11 @@ CreateFunctionStmtObjectAddress(Node *node, bool missing_ok)
 	FunctionParameter *funcParam = NULL;
 	foreach_ptr(funcParam, stmt->parameters)
 	{
-		objectWithArgs->objargs = lappend(objectWithArgs->objargs, funcParam->argType);
+		if (ShouldAddFunctionSignature(funcParam->mode))
+		{
+			objectWithArgs->objargs = lappend(objectWithArgs->objargs,
+											  funcParam->argType);
+		}
 	}

 	return FunctionToObjectAddress(objectType, objectWithArgs, missing_ok);
@ -1855,8 +1860,7 @@ ObjectWithArgsFromOid(Oid funcOid)

 	for (int i = 0; i < numargs; i++)
 	{
-		if (argModes == NULL ||
-			argModes[i] != PROARGMODE_OUT || argModes[i] != PROARGMODE_TABLE)
+		if (argModes == NULL || ShouldAddFunctionSignature(argModes[i]))
 		{
 			objargs = lappend(objargs, makeTypeNameFromOid(argTypes[i], -1));
 		}
@ -1869,6 +1873,35 @@ ObjectWithArgsFromOid(Oid funcOid)
 }


+/*
+ * ShouldAddFunctionSignature takes a FunctionParameterMode and returns true if it should
+ * be included in the function signature. Returns false otherwise.
+ */
+static bool
+ShouldAddFunctionSignature(FunctionParameterMode mode)
+{
+	/* only input parameters should be added to the generated signature */
+	switch (mode)
+	{
+		case FUNC_PARAM_IN:
+		case FUNC_PARAM_INOUT:
+		case FUNC_PARAM_VARIADIC:
+		{
+			return true;
+		}
+
+		case FUNC_PARAM_OUT:
+		case FUNC_PARAM_TABLE:
+		{
+			return false;
+		}
+
+		default:
+			return true;
+	}
+}
+
+
 /*
 * FunctionToObjectAddress returns the ObjectAddress of a Function or Procedure based on
 * its type and ObjectWithArgs describing the Function/Procedure. If missing_ok is set to
--- a/src/backend/distributed/commands/role.c
+++ b/src/backend/distributed/commands/role.c
@ -140,13 +140,6 @@ PostprocessAlterRoleStmt(Node *node, const char *queryString)

 	AlterRoleStmt *stmt = castNode(AlterRoleStmt, node);

-	/*
-	 * Make sure that no new nodes are added after this point until the end of the
-	 * transaction by taking a RowShareLock on pg_dist_node, which conflicts with the
-	 * ExclusiveLock taken by citus_add_node.
-	 */
-	LockRelationOid(DistNodeRelationId(), RowShareLock);
-
 	DefElem *option = NULL;
 	foreach_ptr(option, stmt->options)
 	{
--- a/src/backend/distributed/commands/table.c
+++ b/src/backend/distributed/commands/table.c
@ -143,11 +143,14 @@ PreprocessDropTableStmt(Node *node, const char *queryString,
 			continue;
 		}

-		if (IsCitusTableType(relationId, REFERENCE_TABLE))
+		/*
+		 * While changing the tables that are part of a colocation group we need to
+		 * prevent concurrent mutations to the placements of the shard groups.
+		 */
+		CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
+		if (cacheEntry->colocationId != INVALID_COLOCATION_ID)
 		{
-			/* prevent concurrent EnsureReferenceTablesExistOnAllNodes */
-			int colocationId = CreateReferenceTableColocationId();
-			LockColocationId(colocationId, ExclusiveLock);
+			LockColocationId(cacheEntry->colocationId, ShareLock);
 		}

 		/* invalidate foreign key cache if the table involved in any foreign key */
--- a/src/backend/distributed/commands/type.c
+++ b/src/backend/distributed/commands/type.c
@ -130,16 +130,6 @@ PreprocessCompositeTypeStmt(Node *node, const char *queryString,
 	 */
 	EnsureCoordinator();

-	/*
-	 * Make sure that no new nodes are added after this point until the end of the
-	 * transaction by taking a RowShareLock on pg_dist_node, which conflicts with the
-	 * ExclusiveLock taken by citus_add_node.
-	 * This guarantees that all active nodes will have the object, because they will
-	 * either get it now, or get it in citus_add_node after this transaction finishes and
-	 * the pg_dist_object record becomes visible.
-	 */
-	LockRelationOid(DistNodeRelationId(), RowShareLock);
-
 	/* fully qualify before lookup and later deparsing */
 	QualifyTreeNode(node);

--- a/src/backend/distributed/commands/utility_hook.c
+++ b/src/backend/distributed/commands/utility_hook.c
@ -33,7 +33,9 @@
 #include "access/attnum.h"
 #include "access/heapam.h"
 #include "access/htup_details.h"
+#if PG_VERSION_NUM < 140000
 #include "access/xact.h"
+#endif
 #include "catalog/catalog.h"
 #include "catalog/dependency.h"
 #include "commands/dbcommands.h"
@ -51,7 +53,9 @@
 #include "distributed/local_executor.h"
 #include "distributed/maintenanced.h"
 #include "distributed/coordinator_protocol.h"
+#if PG_VERSION_NUM < 140000
 #include "distributed/metadata_cache.h"
+#endif
 #include "distributed/metadata_sync.h"
 #include "distributed/multi_executor.h"
 #include "distributed/multi_explain.h"
@ -67,6 +71,7 @@
 #include "tcop/utility.h"
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
+#include "utils/snapmgr.h"
 #include "utils/syscache.h"

 bool EnableDDLPropagation = true; /* ddl propagation is enabled */
@ -88,6 +93,9 @@ static void ProcessUtilityInternal(PlannedStmt *pstmt,
 								   struct QueryEnvironment *queryEnv,
 								   DestReceiver *dest,
 								   QueryCompletionCompat *completionTag);
+#if PG_VERSION_NUM >= 140000
+static void set_indexsafe_procflags(void);
+#endif
 static char * SetSearchPathToCurrentSearchPathCommand(void);
 static char * CurrentSearchPath(void);
 static void IncrementUtilityHookCountersIfNecessary(Node *parsetree);
@ -906,9 +914,35 @@ ExecuteDistributedDDLJob(DDLJob *ddlJob)
 		/*
 		 * Start a new transaction to make sure CONCURRENTLY commands
 		 * on localhost do not block waiting for this transaction to finish.
+		 *
+		 * In addition to doing that, we also need to tell other backends
+		 * --including the ones spawned for connections opened to localhost to
+		 * build indexes on shards of this relation-- that concurrent index
+		 * builds can safely ignore us.
+		 *
+		 * Normally, DefineIndex() only does that if index doesn't have any
+		 * predicates (i.e.: where clause) and no index expressions at all.
+		 * However, now that we already called standard process utility,
+		 * index build on the shell table is finished anyway.
+		 *
+		 * The reason behind doing so is that we cannot guarantee not
+		 * grabbing any snapshots via adaptive executor, and the backends
+		 * creating indexes on local shards (if any) might block on waiting
+		 * for current xact of the current backend to finish, which would
+		 * cause self deadlocks that are not detectable.
 		 */
 		if (ddlJob->startNewTransaction)
 		{
+#if PG_VERSION_NUM < 140000
+
+			/*
+			 * Older versions of postgres doesn't have PROC_IN_SAFE_IC flag
+			 * so we cannot use set_indexsafe_procflags in those versions.
+			 *
+			 * For this reason, we do our best to ensure not grabbing any
+			 * snapshots later in the executor.
+			 */
+
 			/*
 			 * If cache is not populated, system catalog lookups will cause
 			 * the xmin of current backend to change. Then the last phase
@ -929,8 +963,34 @@ ExecuteDistributedDDLJob(DDLJob *ddlJob)
 			 * will already be in the hash table, hence we won't be holding any snapshots.
 			 */
 			WarmUpConnParamsHash();
+#endif
+
+			/*
+			 * Since it is not certain whether the code-path that we followed
+			 * until reaching here caused grabbing any snapshots or not, we
+			 * need to pop the active snapshot if we had any, to ensure not
+			 * leaking any snapshots.
+			 *
+			 * For example, EnsureCoordinator might return without grabbing
+			 * any snapshots if we didn't receive any invalidation messages
+			 * but the otherwise is also possible.
+			 */
+			if (ActiveSnapshotSet())
+			{
+				PopActiveSnapshot();
+			}
+
 			CommitTransactionCommand();
 			StartTransactionCommand();
+
+#if PG_VERSION_NUM >= 140000
+
+			/*
+			 * Tell other backends to ignore us, even if we grab any
+			 * snapshots via adaptive executor.
+			 */
+			set_indexsafe_procflags();
+#endif
 		}

 		/* save old commit protocol to restore at xact end */
@ -997,6 +1057,33 @@ ExecuteDistributedDDLJob(DDLJob *ddlJob)
 }


+#if PG_VERSION_NUM >= 140000
+
+/*
+ * set_indexsafe_procflags sets PROC_IN_SAFE_IC flag in MyProc->statusFlags.
+ *
+ * The flag is reset automatically at transaction end, so it must be set
+ * for each transaction.
+ *
+ * Copied from pg/src/backend/commands/indexcmds.c
+ * Also see pg commit c98763bf51bf610b3ee7e209fc76c3ff9a6b3163.
+ */
+static void
+set_indexsafe_procflags(void)
+{
+	Assert(MyProc->xid == InvalidTransactionId &&
+		   MyProc->xmin == InvalidTransactionId);
+
+	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+	MyProc->statusFlags |= PROC_IN_SAFE_IC;
+	ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
+	LWLockRelease(ProcArrayLock);
+}
+
+
+#endif
+
+
 /*
 * CreateCustomDDLTaskList creates a DDLJob which will apply a command to all placements
 * of shards of a distributed table. The command to be applied is generated by the
@ -1260,7 +1347,8 @@ DDLTaskList(Oid relationId, const char *commandString)
 List *
 NodeDDLTaskList(TargetWorkerSet targets, List *commands)
 {
-	List *workerNodes = TargetWorkerSetNodeList(targets, NoLock);
+	/* don't allow concurrent node list changes that require an exclusive lock */
+	List *workerNodes = TargetWorkerSetNodeList(targets, RowShareLock);

 	if (list_length(workerNodes) <= 0)
 	{
--- a/src/backend/distributed/connection/connection_configuration.c
+++ b/src/backend/distributed/connection/connection_configuration.c
@ -72,8 +72,8 @@ InitConnParams()
 /*
 * ResetConnParams frees all strings in the keywords and parameters arrays,
 * sets their elements to null, and resets the ConnParamsSize to zero before
- * adding back any hardcoded global connection settings (at present, only the
- * fallback_application_name of 'citus').
+ * adding back any hardcoded global connection settings (at present, there
+ * are no).
 */
 void
 ResetConnParams()
@ -89,8 +89,6 @@ ResetConnParams()
 	ConnParams.size = 0;

 	InvalidateConnParamsHashEntries();
-
-	AddConnParam("fallback_application_name", CITUS_APPLICATION_NAME);
 }


@ -253,14 +251,16 @@ GetConnParams(ConnectionHashKey *key, char ***keywords, char ***values,
 		"port",
 		"dbname",
 		"user",
-		"client_encoding"
+		"client_encoding",
+		"application_name"
 	};
 	const char *runtimeValues[] = {
 		key->hostname,
 		nodePortString,
 		key->database,
 		key->user,
-		GetDatabaseEncodingName()
+		GetDatabaseEncodingName(),
+		CITUS_APPLICATION_NAME
 	};

 	/*
--- a/src/backend/distributed/connection/connection_management.c
+++ b/src/backend/distributed/connection/connection_management.c
@ -36,6 +36,7 @@
 #include "distributed/version_compat.h"
 #include "distributed/worker_log_messages.h"
 #include "mb/pg_wchar.h"
+#include "pg_config.h"
 #include "portability/instr_time.h"
 #include "storage/ipc.h"
 #include "utils/hsearch.h"
@ -1155,6 +1156,8 @@ StartConnectionEstablishment(MultiConnection *connection, ConnectionHashKey *key
 }


+#if PG_VERSION_NUM < 140000
+
 /*
 * WarmUpConnParamsHash warms up the ConnParamsHash by loading all the
 * conn params for active primary nodes.
@ -1176,6 +1179,9 @@ WarmUpConnParamsHash(void)
 }


+#endif
+
+
 /*
 * FindOrCreateConnParamsEntry searches ConnParamsHash for the given key,
 * if it is not found, it is created.
--- a/src/backend/distributed/deparser/citus_ruleutils.c
+++ b/src/backend/distributed/deparser/citus_ruleutils.c
@ -48,6 +48,7 @@
 #include "distributed/metadata_cache.h"
 #include "distributed/metadata_sync.h"
 #include "distributed/metadata_utility.h"
+#include "distributed/namespace_utils.h"
 #include "distributed/relay_utility.h"
 #include "distributed/version_compat.h"
 #include "distributed/worker_protocol.h"
@ -739,6 +740,12 @@ deparse_shard_index_statement(IndexStmt *origStmt, Oid distrelid, int64 shardid,
 												relationName),
 					 indexStmt->accessMethod);

+	/*
+	 * Switch to empty search_path to deparse_index_columns to produce fully-
+	 * qualified names in expressions.
+	 */
+	PushOverrideEmptySearchPath(CurrentMemoryContext);
+
 	/* index column or expression list begins here */
 	appendStringInfoChar(buffer, '(');
 	deparse_index_columns(buffer, indexStmt->indexParams, deparseContext);
@ -749,10 +756,15 @@ deparse_shard_index_statement(IndexStmt *origStmt, Oid distrelid, int64 shardid,
 	{
 		appendStringInfoString(buffer, "INCLUDE (");
 		deparse_index_columns(buffer, indexStmt->indexIncludingParams, deparseContext);
-		appendStringInfoChar(buffer, ')');
+		appendStringInfoString(buffer, ") ");
 	}

+	if (indexStmt->options != NIL)
+	{
+		appendStringInfoString(buffer, "WITH (");
 		AppendStorageParametersToString(buffer, indexStmt->options);
+		appendStringInfoString(buffer, ") ");
+	}

 	if (indexStmt->whereClause != NULL)
 	{
@ -760,6 +772,9 @@ deparse_shard_index_statement(IndexStmt *origStmt, Oid distrelid, int64 shardid,
 																deparseContext, false,
 																false));
 	}
+
+	/* revert back to original search_path */
+	PopOverrideSearchPath();
 }


@ -948,8 +963,9 @@ deparse_index_columns(StringInfo buffer, List *indexParameterList, List *deparse
 		/* Commit on postgres: 911e70207703799605f5a0e8aad9f06cff067c63*/
 		if (indexElement->opclassopts != NIL)
 		{
-			ereport(ERROR, errmsg(
-						"citus currently doesn't support operator class parameters in indexes"));
+			appendStringInfoString(buffer, "(");
+			AppendStorageParametersToString(buffer, indexElement->opclassopts);
+			appendStringInfoString(buffer, ") ");
 		}
 #endif

@ -1081,13 +1097,6 @@ AppendStorageParametersToString(StringInfo stringBuffer, List *optionList)
 	ListCell *optionCell = NULL;
 	bool firstOptionPrinted = false;

-	if (optionList == NIL)
-	{
-		return;
-	}
-
-	appendStringInfo(stringBuffer, " WITH (");
-
 	foreach(optionCell, optionList)
 	{
 		DefElem *option = (DefElem *) lfirst(optionCell);
@ -1104,8 +1113,6 @@ AppendStorageParametersToString(StringInfo stringBuffer, List *optionList)
 						 quote_identifier(optionName),
 						 quote_literal_cstr(optionValue));
 	}
-
-	appendStringInfo(stringBuffer, ")");
 }


--- a/src/backend/distributed/executor/adaptive_executor.c
+++ b/src/backend/distributed/executor/adaptive_executor.c
@ -4300,6 +4300,7 @@ ReceiveResults(WorkerSession *session, bool storeRows)
 		TupleDesc tupleDescriptor = tupleDest->tupleDescForQuery(tupleDest, queryIndex);
 		if (tupleDescriptor == NULL)
 		{
+			PQclear(result);
 			continue;
 		}

--- a/src/backend/distributed/executor/insert_select_executor.c
+++ b/src/backend/distributed/executor/insert_select_executor.c
@ -55,7 +55,6 @@
 bool EnableRepartitionedInsertSelect = true;


-static Query * WrapSubquery(Query *subquery);
 static List * TwoPhaseInsertSelectTaskList(Oid targetRelationId, Query *insertSelectQuery,
 										   char *resultIdPrefix);
 static void ExecutePlanIntoRelation(Oid targetRelationId, List *insertTargetList,
@ -299,100 +298,6 @@ NonPushableInsertSelectExecScan(CustomScanState *node)
 }


-/*
- * BuildSelectForInsertSelect extracts the SELECT part from an INSERT...SELECT query.
- * If the INSERT...SELECT has CTEs then these are added to the resulting SELECT instead.
- */
-Query *
-BuildSelectForInsertSelect(Query *insertSelectQuery)
-{
-	RangeTblEntry *selectRte = ExtractSelectRangeTableEntry(insertSelectQuery);
-	Query *selectQuery = selectRte->subquery;
-
-	/*
-	 * Wrap the SELECT as a subquery if the INSERT...SELECT has CTEs or the SELECT
-	 * has top-level set operations.
-	 *
-	 * We could simply wrap all queries, but that might create a subquery that is
-	 * not supported by the logical planner. Since the logical planner also does
-	 * not support CTEs and top-level set operations, we can wrap queries containing
-	 * those without breaking anything.
-	 */
-	if (list_length(insertSelectQuery->cteList) > 0)
-	{
-		selectQuery = WrapSubquery(selectRte->subquery);
-
-		/* copy CTEs from the INSERT ... SELECT statement into outer SELECT */
-		selectQuery->cteList = copyObject(insertSelectQuery->cteList);
-		selectQuery->hasModifyingCTE = insertSelectQuery->hasModifyingCTE;
-	}
-	else if (selectQuery->setOperations != NULL)
-	{
-		/* top-level set operations confuse the ReorderInsertSelectTargetLists logic */
-		selectQuery = WrapSubquery(selectRte->subquery);
-	}
-
-	return selectQuery;
-}
-
-
-/*
- * WrapSubquery wraps the given query as a subquery in a newly constructed
- * "SELECT * FROM (...subquery...) citus_insert_select_subquery" query.
- */
-static Query *
-WrapSubquery(Query *subquery)
-{
-	ParseState *pstate = make_parsestate(NULL);
-	List *newTargetList = NIL;
-
-	Query *outerQuery = makeNode(Query);
-	outerQuery->commandType = CMD_SELECT;
-
-	/* create range table entries */
-	Alias *selectAlias = makeAlias("citus_insert_select_subquery", NIL);
-	RangeTblEntry *newRangeTableEntry = RangeTableEntryFromNSItem(
-		addRangeTableEntryForSubquery(
-			pstate, subquery,
-			selectAlias, false, true));
-	outerQuery->rtable = list_make1(newRangeTableEntry);
-
-	/* set the FROM expression to the subquery */
-	RangeTblRef *newRangeTableRef = makeNode(RangeTblRef);
-	newRangeTableRef->rtindex = 1;
-	outerQuery->jointree = makeFromExpr(list_make1(newRangeTableRef), NULL);
-
-	/* create a target list that matches the SELECT */
-	TargetEntry *selectTargetEntry = NULL;
-	foreach_ptr(selectTargetEntry, subquery->targetList)
-	{
-		/* exactly 1 entry in FROM */
-		int indexInRangeTable = 1;
-
-		if (selectTargetEntry->resjunk)
-		{
-			continue;
-		}
-
-		Var *newSelectVar = makeVar(indexInRangeTable, selectTargetEntry->resno,
-									exprType((Node *) selectTargetEntry->expr),
-									exprTypmod((Node *) selectTargetEntry->expr),
-									exprCollation((Node *) selectTargetEntry->expr), 0);
-
-		TargetEntry *newSelectTargetEntry = makeTargetEntry((Expr *) newSelectVar,
-															selectTargetEntry->resno,
-															selectTargetEntry->resname,
-															selectTargetEntry->resjunk);
-
-		newTargetList = lappend(newTargetList, newSelectTargetEntry);
-	}
-
-	outerQuery->targetList = newTargetList;
-
-	return outerQuery;
-}
-
-
 /*
 * TwoPhaseInsertSelectTaskList generates a list of tasks for a query that
 * inserts into a target relation and selects from a set of co-located
--- a/src/backend/distributed/executor/intermediate_results.c
+++ b/src/backend/distributed/executor/intermediate_results.c
@ -44,7 +44,7 @@
 #include "utils/syscache.h"


-static bool CreatedResultsDirectory = false;
+static List *CreatedResultsDirectories = NIL;


 /* CopyDestReceiver can be used to stream results into a distributed table */
@ -593,8 +593,6 @@ CreateIntermediateResultsDirectory(void)
 {
 	char *resultDirectory = IntermediateResultsDirectory();

-	if (!CreatedResultsDirectory)
-	{
 	int makeOK = mkdir(resultDirectory, S_IRWXU);
 	if (makeOK != 0)
 	{
@ -610,8 +608,12 @@ CreateIntermediateResultsDirectory(void)
 							   resultDirectory)));
 	}

-		CreatedResultsDirectory = true;
-	}
+	MemoryContext oldContext = MemoryContextSwitchTo(TopTransactionContext);
+
+	CreatedResultsDirectories =
+		lappend(CreatedResultsDirectories, pstrdup(resultDirectory));
+
+	MemoryContextSwitchTo(oldContext);

 	return resultDirectory;
 }
@ -692,13 +694,14 @@ IntermediateResultsDirectory(void)


 /*
- * RemoveIntermediateResultsDirectory removes the intermediate result directory
+ * RemoveIntermediateResultsDirectories removes the intermediate result directory
 * for the current distributed transaction, if any was created.
 */
 void
-RemoveIntermediateResultsDirectory(void)
+RemoveIntermediateResultsDirectories(void)
 {
-	if (CreatedResultsDirectory)
+	char *directoryElement = NULL;
+	foreach_ptr(directoryElement, CreatedResultsDirectories)
 	{
 		/*
 		 * The shared directory is renamed before deleting it. Otherwise it
@ -707,7 +710,7 @@ RemoveIntermediateResultsDirectory(void)
 		 * that's not possible. The current PID is included in the new
 		 * filename, so there can be no collisions with other backends.
 		 */
-		char *sharedName = IntermediateResultsDirectory();
+		char *sharedName = directoryElement;
 		StringInfo privateName = makeStringInfo();
 		appendStringInfo(privateName, "%s.removed-by-%d", sharedName, MyProcPid);
 		if (rename(sharedName, privateName->data))
@ -727,9 +730,12 @@ RemoveIntermediateResultsDirectory(void)
 		{
 			PathNameDeleteTemporaryDir(privateName->data);
 		}
-
-		CreatedResultsDirectory = false;
 	}
+
+	/* cleanup */
+	list_free_deep(CreatedResultsDirectories);
+
+	CreatedResultsDirectories = NIL;
 }


--- a/src/backend/distributed/metadata/dependency.c
+++ b/src/backend/distributed/metadata/dependency.c
@ -157,7 +157,6 @@ static void ApplyAddToDependencyList(ObjectAddressCollector *collector,
 static List * ExpandCitusSupportedTypes(ObjectAddressCollector *collector,
 										ObjectAddress target);
 static ViewDependencyNode * BuildViewDependencyGraph(Oid relationId, HTAB *nodeMap);
-static Oid GetDependingView(Form_pg_depend pg_depend);


 /*
--- a/src/backend/distributed/metadata/metadata_sync.c
+++ b/src/backend/distributed/metadata/metadata_sync.c
@ -2061,6 +2061,8 @@ ShouldInitiateMetadataSync(bool *lockFailure)
 Datum
 citus_internal_add_partition_metadata(PG_FUNCTION_ARGS)
 {
+	CheckCitusVersion(ERROR);
+
 	PG_ENSURE_ARGNOTNULL(0, "relation");
 	Oid relationId = PG_GETARG_OID(0);

@ -2211,6 +2213,8 @@ EnsurePartitionMetadataIsSane(Oid relationId, char distributionMethod, int coloc
 Datum
 citus_internal_add_shard_metadata(PG_FUNCTION_ARGS)
 {
+	CheckCitusVersion(ERROR);
+
 	PG_ENSURE_ARGNOTNULL(0, "relation");
 	Oid relationId = PG_GETARG_OID(0);

@ -2426,6 +2430,8 @@ EnsureShardMetadataIsSane(Oid relationId, int64 shardId, char storageType,
 Datum
 citus_internal_add_placement_metadata(PG_FUNCTION_ARGS)
 {
+	CheckCitusVersion(ERROR);
+
 	int64 shardId = PG_GETARG_INT64(0);
 	int32 shardState = PG_GETARG_INT32(1);
 	int64 shardLength = PG_GETARG_INT64(2);
@ -2537,6 +2543,8 @@ ShouldSkipMetadataChecks(void)
 Datum
 citus_internal_update_placement_metadata(PG_FUNCTION_ARGS)
 {
+	CheckCitusVersion(ERROR);
+
 	int64 shardId = PG_GETARG_INT64(0);
 	int32 sourceGroupId = PG_GETARG_INT32(1);
 	int32 targetGroupId = PG_GETARG_INT32(2);
@ -2602,6 +2610,8 @@ citus_internal_update_placement_metadata(PG_FUNCTION_ARGS)
 Datum
 citus_internal_delete_shard_metadata(PG_FUNCTION_ARGS)
 {
+	CheckCitusVersion(ERROR);
+
 	int64 shardId = PG_GETARG_INT64(0);

 	if (!ShouldSkipMetadataChecks())
@ -2640,6 +2650,8 @@ citus_internal_delete_shard_metadata(PG_FUNCTION_ARGS)
 Datum
 citus_internal_update_relation_colocation(PG_FUNCTION_ARGS)
 {
+	CheckCitusVersion(ERROR);
+
 	Oid relationId = PG_GETARG_OID(0);
 	uint32 tagetColocationId = PG_GETARG_UINT32(1);

--- a/src/backend/distributed/metadata/metadata_utility.c
+++ b/src/backend/distributed/metadata/metadata_utility.c
@ -1010,7 +1010,7 @@ AppendShardSizeQuery(StringInfo selectQuery, ShardInterval *shardInterval,
 					 char *quotedShardName)
 {
 	appendStringInfo(selectQuery, "SELECT %s AS shard_name, ", quotedShardName);
-	appendStringInfo(selectQuery, PG_RELATION_SIZE_FUNCTION, quotedShardName);
+	appendStringInfo(selectQuery, PG_TOTAL_RELATION_SIZE_FUNCTION, quotedShardName);
 }


--- a/src/backend/distributed/metadata/node_metadata.c
+++ b/src/backend/distributed/metadata/node_metadata.c
@ -180,9 +180,6 @@ citus_set_coordinator_host(PG_FUNCTION_ARGS)
 	Name nodeClusterName = PG_GETARG_NAME(3);
 	nodeMetadata.nodeCluster = NameStr(*nodeClusterName);

-	/* prevent concurrent modification */
-	LockRelationOid(DistNodeRelationId(), RowShareLock);
-
 	bool isCoordinatorInMetadata = false;
 	WorkerNode *coordinatorNode = PrimaryNodeForGroup(COORDINATOR_GROUP_ID,
 													  &isCoordinatorInMetadata);
@ -1410,12 +1407,6 @@ AddNodeMetadata(char *nodeName, int32 nodePort,

 	*nodeAlreadyExists = false;

-	/*
-	 * Prevent / wait for concurrent modification before checking whether
-	 * the worker already exists in pg_dist_node.
-	 */
-	LockRelationOid(DistNodeRelationId(), RowShareLock);
-
 	WorkerNode *workerNode = FindWorkerNodeAnyCluster(nodeName, nodePort);
 	if (workerNode != NULL)
 	{
@ -2035,9 +2026,16 @@ DeleteNodeRow(char *nodeName, int32 nodePort)

 	/*
 	 * simple_heap_delete() expects that the caller has at least an
-	 * AccessShareLock on replica identity index.
+	 * AccessShareLock on primary key index.
+	 *
+	 * XXX: This does not seem required, do we really need to acquire this lock?
+	 * Postgres doesn't acquire such locks on indexes before deleting catalog tuples.
+	 * Linking here the reasons we added this lock acquirement:
+	 * https://github.com/citusdata/citus/pull/2851#discussion_r306569462
+	 * https://github.com/citusdata/citus/pull/2855#discussion_r313628554
+	 * https://github.com/citusdata/citus/issues/1890
 	 */
-	Relation replicaIndex = index_open(RelationGetReplicaIndex(pgDistNode),
+	Relation replicaIndex = index_open(RelationGetPrimaryKeyIndex(pgDistNode),
 									   AccessShareLock);

 	ScanKeyInit(&scanKey[0], Anum_pg_dist_node_nodename,
--- a/src/backend/distributed/operations/create_shards.c
+++ b/src/backend/distributed/operations/create_shards.c
@ -365,7 +365,7 @@ CreateReferenceTableShard(Oid distributedTableId)
 	List *nodeList = ReferenceTablePlacementNodeList(ShareLock);
 	nodeList = SortList(nodeList, CompareWorkerNodes);

-	int replicationFactor = ReferenceTableReplicationFactor();
+	int replicationFactor = list_length(nodeList);

 	/* get the next shard id */
 	uint64 shardId = GetNextShardId();
--- a/src/backend/distributed/planner/deparse_shard_query.c
+++ b/src/backend/distributed/planner/deparse_shard_query.c
@ -39,7 +39,6 @@
 #include "utils/syscache.h"


-static void AddInsertAliasIfNeeded(Query *query);
 static void UpdateTaskQueryString(Query *query, Task *task);
 static RelationShard * FindRelationShard(Oid inputRelationId, List *relationShardList);
 static void ConvertRteToSubqueryWithEmptyResult(RangeTblEntry *rte);
@ -159,7 +158,7 @@ RebuildQueryStrings(Job *workerJob)
 * deparsing issues (e.g. RETURNING might reference the original table name,
 * which has been replaced by a shard name).
 */
-static void
+void
 AddInsertAliasIfNeeded(Query *query)
 {
 	Assert(query->commandType == CMD_INSERT);
--- a/src/backend/distributed/planner/insert_select_planner.c
+++ b/src/backend/distributed/planner/insert_select_planner.c
@ -48,8 +48,10 @@
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
 #include "utils/rel.h"
+#include <nodes/print.h>


+static void PrepareInsertSelectForCitusPlanner(Query *insertSelectQuery);
 static DistributedPlan * CreateInsertSelectPlanInternal(uint64 planId,
 														Query *originalQuery,
 														PlannerRestrictionContext *
@ -83,6 +85,7 @@ static DeferredErrorMessage * InsertPartitionColumnMatchesSelect(Query *query,
 static DistributedPlan * CreateNonPushableInsertSelectPlan(uint64 planId, Query *parse,
 														   ParamListInfo boundParams);
 static DeferredErrorMessage * NonPushableInsertSelectSupported(Query *insertSelectQuery);
+static Query * WrapSubquery(Query *subquery);
 static void RelabelTargetEntryList(List *selectTargetList, List *insertTargetList);
 static List * AddInsertSelectCasts(List *insertTargetList, List *selectTargetList,
 								   Oid targetRelationId);
@ -370,14 +373,17 @@ CreateDistributedInsertSelectPlan(Query *originalQuery,
 * combineQuery, this function also creates a dummy combineQuery for that.
 */
 DistributedPlan *
-CreateInsertSelectIntoLocalTablePlan(uint64 planId, Query *originalQuery, ParamListInfo
-									 boundParams, bool hasUnresolvedParams,
+CreateInsertSelectIntoLocalTablePlan(uint64 planId, Query *insertSelectQuery,
+									 ParamListInfo boundParams, bool hasUnresolvedParams,
 									 PlannerRestrictionContext *plannerRestrictionContext)
 {
-	RangeTblEntry *selectRte = ExtractSelectRangeTableEntry(originalQuery);
+	RangeTblEntry *selectRte = ExtractSelectRangeTableEntry(insertSelectQuery);
+
+	PrepareInsertSelectForCitusPlanner(insertSelectQuery);
+
+	/* get the SELECT query (may have changed after PrepareInsertSelectForCitusPlanner) */
+	Query *selectQuery = selectRte->subquery;

-	Query *selectQuery = BuildSelectForInsertSelect(originalQuery);
-	originalQuery->cteList = NIL;
 	DistributedPlan *distPlan = CreateDistributedPlan(planId, selectQuery,
 													  copyObject(selectQuery),
 													  boundParams, hasUnresolvedParams,
@ -417,12 +423,84 @@ CreateInsertSelectIntoLocalTablePlan(uint64 planId, Query *originalQuery, ParamL
 	 * distributed select instead of returning it.
 	 */
 	selectRte->subquery = distPlan->combineQuery;
-	distPlan->combineQuery = originalQuery;
+	distPlan->combineQuery = insertSelectQuery;

 	return distPlan;
 }


+/*
+ * PrepareInsertSelectForCitusPlanner prepares an INSERT..SELECT query tree
+ * that was passed to the planner for use by Citus.
+ *
+ * First, it rebuilds the target lists of the INSERT and the SELECT
+ * to be in the same order, which is not guaranteed in the parse tree.
+ *
+ * Second, some of the constants in the target list will have type
+ * "unknown", which would confuse the Citus planner. To address that,
+ * we add casts to SELECT target list entries whose type does not correspond
+ * to the destination. This also helps us feed the output directly into
+ * a COPY stream for INSERT..SELECT via coordinator.
+ *
+ * In case of UNION or other set operations, the SELECT does not have a
+ * clearly defined target list, so we first wrap the UNION in a subquery.
+ * UNION queries do not have the "unknown" type problem.
+ *
+ * Finally, if the INSERT has CTEs, we move those CTEs into the SELECT,
+ * such that we can plan the SELECT as an independent query. To ensure
+ * the ctelevelsup for CTE RTE's remain the same, we wrap the SELECT into
+ * a subquery, unless we already did so in case of a UNION.
+ */
+static void
+PrepareInsertSelectForCitusPlanner(Query *insertSelectQuery)
+{
+	RangeTblEntry *insertRte = ExtractResultRelationRTEOrError(insertSelectQuery);
+	RangeTblEntry *selectRte = ExtractSelectRangeTableEntry(insertSelectQuery);
+	Oid targetRelationId = insertRte->relid;
+
+	bool isWrapped = false;
+
+	if (selectRte->subquery->setOperations != NULL)
+	{
+		/*
+		 * Prepare UNION query for reordering and adding casts by
+		 * wrapping it in a subquery to have a single target list.
+		 */
+		selectRte->subquery = WrapSubquery(selectRte->subquery);
+		isWrapped = true;
+	}
+
+	/* this is required for correct deparsing of the query */
+	ReorderInsertSelectTargetLists(insertSelectQuery, insertRte, selectRte);
+
+	/*
+	 * Cast types of insert target list and select projection list to
+	 * match the column types of the target relation.
+	 */
+	selectRte->subquery->targetList =
+		AddInsertSelectCasts(insertSelectQuery->targetList,
+							 copyObject(selectRte->subquery->targetList),
+							 targetRelationId);
+
+	if (list_length(insertSelectQuery->cteList) > 0)
+	{
+		if (!isWrapped)
+		{
+			/*
+			 * By wrapping the SELECT in a subquery, we can avoid adjusting
+			 * ctelevelsup in RTE's that point to the CTEs.
+			 */
+			selectRte->subquery = WrapSubquery(selectRte->subquery);
+		}
+
+		/* copy CTEs from the INSERT ... SELECT statement into outer SELECT */
+		selectRte->subquery->cteList = copyObject(insertSelectQuery->cteList);
+		selectRte->subquery->hasModifyingCTE = insertSelectQuery->hasModifyingCTE;
+		insertSelectQuery->cteList = NIL;
+	}
+}
+
+
 /*
 * CreateCombineQueryForRouterPlan is used for creating a dummy combineQuery
 * for a router plan, since router plans normally don't have one.
@ -881,12 +959,11 @@ ReorderInsertSelectTargetLists(Query *originalQuery, RangeTblEntry *insertRte,
 	ListCell *insertTargetEntryCell;
 	List *newSubqueryTargetlist = NIL;
 	List *newInsertTargetlist = NIL;
+	List *columnNameList = NIL;
 	int resno = 1;
-	Index insertTableId = 1;
+	Index selectTableId = 2;
 	int targetEntryIndex = 0;

-	AssertArg(InsertSelectIntoCitusTable(originalQuery));
-
 	Query *subquery = subqueryRte->subquery;

 	Oid insertRelationId = insertRte->relid;
@ -954,6 +1031,9 @@ ReorderInsertSelectTargetLists(Query *originalQuery, RangeTblEntry *insertRte,
 											newSubqueryTargetEntry);
 		}

+		Value *columnName = makeString(newSubqueryTargetEntry->resname);
+		columnNameList = lappend(columnNameList, columnName);
+
 		/*
 		 * The newly created select target entry cannot be a junk entry since junk
 		 * entries are not in the final target list and we're processing the
@ -961,7 +1041,7 @@ ReorderInsertSelectTargetLists(Query *originalQuery, RangeTblEntry *insertRte,
 		 */
 		Assert(!newSubqueryTargetEntry->resjunk);

-		Var *newInsertVar = makeVar(insertTableId, originalAttrNo,
+		Var *newInsertVar = makeVar(selectTableId, resno,
 									exprType((Node *) newSubqueryTargetEntry->expr),
 									exprTypmod((Node *) newSubqueryTargetEntry->expr),
 									exprCollation((Node *) newSubqueryTargetEntry->expr),
@ -1005,6 +1085,7 @@ ReorderInsertSelectTargetLists(Query *originalQuery, RangeTblEntry *insertRte,

 	originalQuery->targetList = newInsertTargetlist;
 	subquery->targetList = newSubqueryTargetlist;
+	subqueryRte->eref->colnames = columnNameList;

 	return NULL;
 }
@ -1412,19 +1493,10 @@ CreateNonPushableInsertSelectPlan(uint64 planId, Query *parse, ParamListInfo bou
 		return distributedPlan;
 	}

-	Query *selectQuery = BuildSelectForInsertSelect(insertSelectQuery);
+	PrepareInsertSelectForCitusPlanner(insertSelectQuery);

-	selectRte->subquery = selectQuery;
-	ReorderInsertSelectTargetLists(insertSelectQuery, insertRte, selectRte);
-
-	/*
-	 * Cast types of insert target list and select projection list to
-	 * match the column types of the target relation.
-	 */
-	selectQuery->targetList =
-		AddInsertSelectCasts(insertSelectQuery->targetList,
-							 selectQuery->targetList,
-							 targetRelationId);
+	/* get the SELECT query (may have changed after PrepareInsertSelectForCitusPlanner) */
+	Query *selectQuery = selectRte->subquery;

 	/*
 	 * Later we might need to call WrapTaskListForProjection(), which requires
@ -1506,6 +1578,63 @@ InsertSelectResultIdPrefix(uint64 planId)
 }


+/*
+ * WrapSubquery wraps the given query as a subquery in a newly constructed
+ * "SELECT * FROM (...subquery...) citus_insert_select_subquery" query.
+ */
+static Query *
+WrapSubquery(Query *subquery)
+{
+	ParseState *pstate = make_parsestate(NULL);
+	List *newTargetList = NIL;
+
+	Query *outerQuery = makeNode(Query);
+	outerQuery->commandType = CMD_SELECT;
+
+	/* create range table entries */
+	Alias *selectAlias = makeAlias("citus_insert_select_subquery", NIL);
+	RangeTblEntry *newRangeTableEntry = RangeTableEntryFromNSItem(
+		addRangeTableEntryForSubquery(
+			pstate, subquery,
+			selectAlias, false, true));
+	outerQuery->rtable = list_make1(newRangeTableEntry);
+
+	/* set the FROM expression to the subquery */
+	RangeTblRef *newRangeTableRef = makeNode(RangeTblRef);
+	newRangeTableRef->rtindex = 1;
+	outerQuery->jointree = makeFromExpr(list_make1(newRangeTableRef), NULL);
+
+	/* create a target list that matches the SELECT */
+	TargetEntry *selectTargetEntry = NULL;
+	foreach_ptr(selectTargetEntry, subquery->targetList)
+	{
+		/* exactly 1 entry in FROM */
+		int indexInRangeTable = 1;
+
+		if (selectTargetEntry->resjunk)
+		{
+			continue;
+		}
+
+		Var *newSelectVar = makeVar(indexInRangeTable, selectTargetEntry->resno,
+									exprType((Node *) selectTargetEntry->expr),
+									exprTypmod((Node *) selectTargetEntry->expr),
+									exprCollation((Node *) selectTargetEntry->expr), 0);
+
+		TargetEntry *newSelectTargetEntry = makeTargetEntry((Expr *) newSelectVar,
+															selectTargetEntry->resno,
+															selectTargetEntry->resname,
+															selectTargetEntry->resjunk);
+
+		newTargetList = lappend(newTargetList, newSelectTargetEntry);
+	}
+
+	outerQuery->targetList = newTargetList;
+
+	return outerQuery;
+}
+
+
 /*
 * RelabelTargetEntryList relabels select target list to have matching names with
 * insert target list.
@ -1557,16 +1686,22 @@ AddInsertSelectCasts(List *insertTargetList, List *selectTargetList,
 	{
 		TargetEntry *insertEntry = (TargetEntry *) lfirst(insertEntryCell);
 		TargetEntry *selectEntry = (TargetEntry *) lfirst(selectEntryCell);
-		Var *insertColumn = (Var *) insertEntry->expr;
+
 		Form_pg_attribute attr = TupleDescAttr(destTupleDescriptor,
 											   insertEntry->resno - 1);

-		Oid sourceType = insertColumn->vartype;
+		Oid sourceType = exprType((Node *) selectEntry->expr);
 		Oid targetType = attr->atttypid;
 		if (sourceType != targetType)
 		{
-			insertEntry->expr = CastExpr((Expr *) insertColumn, sourceType, targetType,
-										 attr->attcollation, attr->atttypmod);
+			/* ReorderInsertSelectTargetLists ensures we only have Vars */
+			Assert(IsA(insertEntry->expr, Var));
+
+			/* we will cast the SELECT expression, so the type changes */
+			Var *insertVar = (Var *) insertEntry->expr;
+			insertVar->vartype = targetType;
+			insertVar->vartypmod = attr->atttypmod;
+			insertVar->varcollid = attr->attcollation;

 			/*
 			 * We cannot modify the selectEntry in-place, because ORDER BY or
--- a/src/backend/distributed/planner/local_plan_cache.c
+++ b/src/backend/distributed/planner/local_plan_cache.c
@ -174,6 +174,8 @@ DeparseLocalShardQuery(Query *jobQuery, List *relationShardList, Oid
 		 */
 		Assert(!CheckInsertSelectQuery(jobQuery));

+		AddInsertAliasIfNeeded(jobQuery);
+
 		/*
 		 * For INSERT queries we cannot use pg_get_query_def. Mainly because we
 		 * cannot run UpdateRelationToShardNames on an INSERT query. This is
--- a/src/backend/distributed/planner/multi_explain.c
+++ b/src/backend/distributed/planner/multi_explain.c
@ -1487,7 +1487,9 @@ WrapQueryForExplainAnalyze(const char *queryString, TupleDesc tupleDesc)
 		}

 		Form_pg_attribute attr = &tupleDesc->attrs[columnIndex];
-		char *attrType = format_type_with_typemod(attr->atttypid, attr->atttypmod);
+		char *attrType = format_type_extended(attr->atttypid, attr->atttypmod,
+											  FORMAT_TYPE_TYPEMOD_GIVEN |
+											  FORMAT_TYPE_FORCE_QUALIFY);

 		appendStringInfo(columnDef, "field_%d %s", columnIndex, attrType);
 	}
--- a/src/backend/distributed/planner/multi_logical_optimizer.c
+++ b/src/backend/distributed/planner/multi_logical_optimizer.c
@ -1616,7 +1616,19 @@ MasterAggregateExpression(Aggref *originalAggregate,
 		Expr *directarg;
 		foreach_ptr(directarg, originalAggregate->aggdirectargs)
 		{
-			if (!IsA(directarg, Const) && !IsA(directarg, Param))
+			/*
+			 * Need to replace nodes that contain any Vars with Vars referring
+			 * to the related column of the result set returned for the worker
+			 * aggregation.
+			 *
+			 * When there are no Vars, then the expression can be fully evaluated
+			 * on the coordinator, so we skip it here. This is not just an
+			 * optimization, but the result of the expression might require
+			 * calling the final function of the aggregate, and doing so when
+			 * there are no input rows (i.e.: with an empty tuple slot) is not
+			 * desirable for the node-executor methods.
+			 */
+			if (pull_var_clause_default((Node *) directarg) != NIL)
 			{
 				Var *var = makeVar(masterTableId, walkerContext->columnId,
 								   exprType((Node *) directarg),
@ -3070,7 +3082,13 @@ WorkerAggregateExpressionList(Aggref *originalAggregate,
 		Expr *directarg;
 		foreach_ptr(directarg, originalAggregate->aggdirectargs)
 		{
-			if (!IsA(directarg, Const) && !IsA(directarg, Param))
+			/*
+			 * The worker aggregation should execute any node that contains any
+			 * Var nodes and return the result in the targetlist, so that the
+			 * combine query can then fetch the result via remote scan; see
+			 * MasterAggregateExpression.
+			 */
+			if (pull_var_clause_default((Node *) directarg) != NIL)
 			{
 				workerAggregateList = lappend(workerAggregateList, directarg);
 			}
--- a/src/backend/distributed/planner/multi_router_planner.c
+++ b/src/backend/distributed/planner/multi_router_planner.c
@ -3558,19 +3558,9 @@ DeferErrorIfUnsupportedRouterPlannableSelectQuery(Query *query)
 							 NULL, NULL);
 	}

-	if (contain_nextval_expression_walker((Node *) query->targetList, NULL))
-	{
-		/*
-		 * We let queries with nextval in the target list fall through to
-		 * the logical planner, which knows how to handle those queries.
-		 */
-		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
-							 "Sequences cannot be used in router queries",
-							 NULL, NULL);
-	}
-
 	bool hasPostgresOrCitusLocalTable = false;
 	bool hasDistributedTable = false;
+	bool hasReferenceTable = false;

 	ExtractRangeTableRelationWalker((Node *) query, &rangeTableRelationList);
 	foreach(rangeTableRelationCell, rangeTableRelationList)
@ -3586,6 +3576,11 @@ DeferErrorIfUnsupportedRouterPlannableSelectQuery(Query *query)
 				hasPostgresOrCitusLocalTable = true;
 				continue;
 			}
+			else if (IsCitusTableType(distributedTableId, REFERENCE_TABLE))
+			{
+				hasReferenceTable = true;
+				continue;
+			}
 			else if (IsCitusTableType(distributedTableId, CITUS_LOCAL_TABLE))
 			{
 				hasPostgresOrCitusLocalTable = true;
@ -3628,6 +3623,28 @@ DeferErrorIfUnsupportedRouterPlannableSelectQuery(Query *query)
 		}
 	}

+	/*
+	 * We want to make sure nextval happens on the coordinator / the current
+	 * node, since the user may have certain expectations around the values
+	 * produced by the sequence. We therefore cannot push down the nextval
+	 * call as part of a router query.
+	 *
+	 * We let queries with nextval in the target list fall through to
+	 * the logical planner, which will ensure that the nextval is called
+	 * in the combine query on the coordinator.
+	 *
+	 * If there are no distributed or reference tables in the query,
+	 * then the query will anyway happen on the coordinator, so we can
+	 * allow nextval.
+	 */
+	if (contain_nextval_expression_walker((Node *) query->targetList, NULL) &&
+		(hasDistributedTable || hasReferenceTable))
+	{
+		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
+							 "Sequences cannot be used in router queries",
+							 NULL, NULL);
+	}
+
 	/* local tables are not allowed if there are distributed tables */
 	if (hasPostgresOrCitusLocalTable && hasDistributedTable)
 	{
--- a/src/backend/distributed/sql/citus--10.2-1--10.2-2.sql
+++ b/src/backend/distributed/sql/citus--10.2-1--10.2-2.sql
@ -0,0 +1,5 @@
+-- citus--10.2-1--10.2-2
+
+-- bump version to 10.2-2
+
+#include "../../columnar/sql/columnar--10.2-1--10.2-2.sql"
--- a/src/backend/distributed/sql/citus--10.2-2--10.2-3.sql
+++ b/src/backend/distributed/sql/citus--10.2-2--10.2-3.sql
@ -0,0 +1,5 @@
+-- citus--10.2-2--10.2-3
+
+-- bump version to 10.2-3
+
+#include "../../columnar/sql/columnar--10.2-2--10.2-3.sql"
--- a/src/backend/distributed/sql/citus--10.2-3--10.2-4.sql
+++ b/src/backend/distributed/sql/citus--10.2-3--10.2-4.sql
@ -0,0 +1,10 @@
+-- citus--10.2-3--10.2-4
+
+-- bump version to 10.2-4
+
+#include "../../columnar/sql/columnar--10.2-3--10.2-4.sql"
+
+#include "udfs/fix_partition_shard_index_names/10.2-4.sql"
+#include "udfs/fix_all_partition_shard_index_names/10.2-4.sql"
+#include "udfs/worker_fix_partition_shard_index_names/10.2-4.sql"
+#include "udfs/citus_finish_pg_upgrade/10.2-4.sql"
--- a/src/backend/distributed/sql/citus--10.2-4--10.2-5.sql
+++ b/src/backend/distributed/sql/citus--10.2-4--10.2-5.sql
@ -0,0 +1 @@
+#include "udfs/citus_finish_pg_upgrade/10.2-5.sql"
--- a/src/backend/distributed/sql/citus--10.2-5--10.2-4.sql
+++ b/src/backend/distributed/sql/citus--10.2-5--10.2-4.sql
@ -0,0 +1 @@
+#include "udfs/citus_finish_pg_upgrade/10.2-4.sql"
--- a/src/backend/distributed/sql/downgrades/citus--10.2-2--10.2-1.sql
+++ b/src/backend/distributed/sql/downgrades/citus--10.2-2--10.2-1.sql
@ -0,0 +1,3 @@
+-- citus--10.2-2--10.2-1
+
+#include "../../../columnar/sql/downgrades/columnar--10.2-2--10.2-1.sql"
--- a/src/backend/distributed/sql/downgrades/citus--10.2-3--10.2-2.sql
+++ b/src/backend/distributed/sql/downgrades/citus--10.2-3--10.2-2.sql
@ -0,0 +1,3 @@
+-- citus--10.2-3--10.2-2
+
+#include "../../../columnar/sql/downgrades/columnar--10.2-3--10.2-2.sql"
--- a/src/backend/distributed/sql/downgrades/citus--10.2-4--10.2-3.sql
+++ b/src/backend/distributed/sql/downgrades/citus--10.2-4--10.2-3.sql
@ -0,0 +1,12 @@
+-- citus--10.2-4--10.2-3
+
+DROP FUNCTION pg_catalog.fix_all_partition_shard_index_names();
+DROP FUNCTION pg_catalog.fix_partition_shard_index_names(regclass);
+DROP FUNCTION pg_catalog.worker_fix_partition_shard_index_names(regclass, text, text);
+
+#include "../udfs/citus_finish_pg_upgrade/10.2-1.sql"
+
+-- This needs to be done after downgrading citus_finish_pg_upgrade. This is
+-- because citus_finish_pg_upgrade/10.2-4 depends on columnar_ensure_am_depends_catalog,
+-- which is dropped by columnar--10.2-4--10.2-3.sql
+#include "../../../columnar/sql/downgrades/columnar--10.2-4--10.2-3.sql"
--- a/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/10.2-4.sql
+++ b/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/10.2-4.sql
@ -0,0 +1,144 @@
+CREATE OR REPLACE FUNCTION pg_catalog.citus_finish_pg_upgrade()
+    RETURNS void
+    LANGUAGE plpgsql
+    SET search_path = pg_catalog
+    AS $cppu$
+DECLARE
+    table_name regclass;
+    command text;
+    trigger_name text;
+BEGIN
+
+
+    IF substring(current_Setting('server_version'), '\d+')::int >= 14 THEN
+    EXECUTE $cmd$
+        CREATE AGGREGATE array_cat_agg(anycompatiblearray) (SFUNC = array_cat, STYPE = anycompatiblearray);
+        COMMENT ON AGGREGATE array_cat_agg(anycompatiblearray)
+        IS 'concatenate input arrays into a single array';
+    $cmd$;
+    ELSE
+    EXECUTE $cmd$
+        CREATE AGGREGATE array_cat_agg(anyarray) (SFUNC = array_cat, STYPE = anyarray);
+        COMMENT ON AGGREGATE array_cat_agg(anyarray)
+        IS 'concatenate input arrays into a single array';
+    $cmd$;
+    END IF;
+
+    --
+    -- Citus creates the array_cat_agg but because of a compatibility
+    -- issue between pg13-pg14, we drop and create it during upgrade.
+    -- And as Citus creates it, there needs to be a dependency to the
+    -- Citus extension, so we create that dependency here.
+    -- We are not using:
+    --  ALTER EXENSION citus DROP/CREATE AGGREGATE array_cat_agg
+    -- because we don't have an easy way to check if the aggregate
+    -- exists with anyarray type or anycompatiblearray type.
+
+    INSERT INTO pg_depend
+    SELECT
+        'pg_proc'::regclass::oid as classid,
+        (SELECT oid FROM pg_proc WHERE proname = 'array_cat_agg') as objid,
+        0 as objsubid,
+        'pg_extension'::regclass::oid as refclassid,
+        (select oid from pg_extension where extname = 'citus') as refobjid,
+        0 as refobjsubid ,
+        'e' as deptype;
+
+    --
+    -- restore citus catalog tables
+    --
+    INSERT INTO pg_catalog.pg_dist_partition SELECT * FROM public.pg_dist_partition;
+    INSERT INTO pg_catalog.pg_dist_shard SELECT * FROM public.pg_dist_shard;
+    INSERT INTO pg_catalog.pg_dist_placement SELECT * FROM public.pg_dist_placement;
+    INSERT INTO pg_catalog.pg_dist_node_metadata SELECT * FROM public.pg_dist_node_metadata;
+    INSERT INTO pg_catalog.pg_dist_node SELECT * FROM public.pg_dist_node;
+    INSERT INTO pg_catalog.pg_dist_local_group SELECT * FROM public.pg_dist_local_group;
+    INSERT INTO pg_catalog.pg_dist_transaction SELECT * FROM public.pg_dist_transaction;
+    INSERT INTO pg_catalog.pg_dist_colocation SELECT * FROM public.pg_dist_colocation;
+    -- enterprise catalog tables
+    INSERT INTO pg_catalog.pg_dist_authinfo SELECT * FROM public.pg_dist_authinfo;
+    INSERT INTO pg_catalog.pg_dist_poolinfo SELECT * FROM public.pg_dist_poolinfo;
+
+    INSERT INTO pg_catalog.pg_dist_rebalance_strategy SELECT
+        name,
+        default_strategy,
+        shard_cost_function::regprocedure::regproc,
+        node_capacity_function::regprocedure::regproc,
+        shard_allowed_on_node_function::regprocedure::regproc,
+        default_threshold,
+        minimum_threshold,
+        improvement_threshold
+    FROM public.pg_dist_rebalance_strategy;
+
+    --
+    -- drop backup tables
+    --
+    DROP TABLE public.pg_dist_authinfo;
+    DROP TABLE public.pg_dist_colocation;
+    DROP TABLE public.pg_dist_local_group;
+    DROP TABLE public.pg_dist_node;
+    DROP TABLE public.pg_dist_node_metadata;
+    DROP TABLE public.pg_dist_partition;
+    DROP TABLE public.pg_dist_placement;
+    DROP TABLE public.pg_dist_poolinfo;
+    DROP TABLE public.pg_dist_shard;
+    DROP TABLE public.pg_dist_transaction;
+    DROP TABLE public.pg_dist_rebalance_strategy;
+
+    --
+    -- reset sequences
+    --
+    PERFORM setval('pg_catalog.pg_dist_shardid_seq', (SELECT MAX(shardid)+1 AS max_shard_id FROM pg_dist_shard), false);
+    PERFORM setval('pg_catalog.pg_dist_placement_placementid_seq', (SELECT MAX(placementid)+1 AS max_placement_id FROM pg_dist_placement), false);
+    PERFORM setval('pg_catalog.pg_dist_groupid_seq', (SELECT MAX(groupid)+1 AS max_group_id FROM pg_dist_node), false);
+    PERFORM setval('pg_catalog.pg_dist_node_nodeid_seq', (SELECT MAX(nodeid)+1 AS max_node_id FROM pg_dist_node), false);
+    PERFORM setval('pg_catalog.pg_dist_colocationid_seq', (SELECT MAX(colocationid)+1 AS max_colocation_id FROM pg_dist_colocation), false);
+
+    --
+    -- register triggers
+    --
+    FOR table_name IN SELECT logicalrelid FROM pg_catalog.pg_dist_partition
+    LOOP
+        trigger_name := 'truncate_trigger_' || table_name::oid;
+        command := 'create trigger ' || trigger_name || ' after truncate on ' || table_name || ' execute procedure pg_catalog.citus_truncate_trigger()';
+        EXECUTE command;
+        command := 'update pg_trigger set tgisinternal = true where tgname = ' || quote_literal(trigger_name);
+        EXECUTE command;
+    END LOOP;
+
+    --
+    -- set dependencies
+    --
+    INSERT INTO pg_depend
+    SELECT
+        'pg_class'::regclass::oid as classid,
+        p.logicalrelid::regclass::oid as objid,
+        0 as objsubid,
+        'pg_extension'::regclass::oid as refclassid,
+        (select oid from pg_extension where extname = 'citus') as refobjid,
+        0 as refobjsubid ,
+        'n' as deptype
+    FROM pg_catalog.pg_dist_partition p;
+
+    -- set dependencies for columnar table access method
+    PERFORM citus_internal.columnar_ensure_am_depends_catalog();
+
+    -- restore pg_dist_object from the stable identifiers
+    TRUNCATE citus.pg_dist_object;
+    INSERT INTO citus.pg_dist_object (classid, objid, objsubid, distribution_argument_index, colocationid)
+    SELECT
+        address.classid,
+        address.objid,
+        address.objsubid,
+        naming.distribution_argument_index,
+        naming.colocationid
+    FROM
+        public.pg_dist_object naming,
+        pg_catalog.pg_get_object_address(naming.type, naming.object_names, naming.object_args) address;
+
+    DROP TABLE public.pg_dist_object;
+END;
+$cppu$;
+
+COMMENT ON FUNCTION pg_catalog.citus_finish_pg_upgrade()
+    IS 'perform tasks to restore citus settings from a location that has been prepared before pg_upgrade';
--- a/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/10.2-5.sql
+++ b/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/10.2-5.sql
@ -0,0 +1,144 @@
+CREATE OR REPLACE FUNCTION pg_catalog.citus_finish_pg_upgrade()
+    RETURNS void
+    LANGUAGE plpgsql
+    SET search_path = pg_catalog
+    AS $cppu$
+DECLARE
+    table_name regclass;
+    command text;
+    trigger_name text;
+BEGIN
+
+
+    IF substring(current_Setting('server_version'), '\d+')::int >= 14 THEN
+    EXECUTE $cmd$
+        CREATE AGGREGATE array_cat_agg(anycompatiblearray) (SFUNC = array_cat, STYPE = anycompatiblearray);
+        COMMENT ON AGGREGATE array_cat_agg(anycompatiblearray)
+        IS 'concatenate input arrays into a single array';
+    $cmd$;
+    ELSE
+    EXECUTE $cmd$
+        CREATE AGGREGATE array_cat_agg(anyarray) (SFUNC = array_cat, STYPE = anyarray);
+        COMMENT ON AGGREGATE array_cat_agg(anyarray)
+        IS 'concatenate input arrays into a single array';
+    $cmd$;
+    END IF;
+
+    --
+    -- Citus creates the array_cat_agg but because of a compatibility
+    -- issue between pg13-pg14, we drop and create it during upgrade.
+    -- And as Citus creates it, there needs to be a dependency to the
+    -- Citus extension, so we create that dependency here.
+    -- We are not using:
+    --  ALTER EXENSION citus DROP/CREATE AGGREGATE array_cat_agg
+    -- because we don't have an easy way to check if the aggregate
+    -- exists with anyarray type or anycompatiblearray type.
+
+    INSERT INTO pg_depend
+    SELECT
+        'pg_proc'::regclass::oid as classid,
+        (SELECT oid FROM pg_proc WHERE proname = 'array_cat_agg') as objid,
+        0 as objsubid,
+        'pg_extension'::regclass::oid as refclassid,
+        (select oid from pg_extension where extname = 'citus') as refobjid,
+        0 as refobjsubid ,
+        'e' as deptype;
+
+    --
+    -- restore citus catalog tables
+    --
+    INSERT INTO pg_catalog.pg_dist_partition SELECT * FROM public.pg_dist_partition;
+    INSERT INTO pg_catalog.pg_dist_shard SELECT * FROM public.pg_dist_shard;
+    INSERT INTO pg_catalog.pg_dist_placement SELECT * FROM public.pg_dist_placement;
+    INSERT INTO pg_catalog.pg_dist_node_metadata SELECT * FROM public.pg_dist_node_metadata;
+    INSERT INTO pg_catalog.pg_dist_node SELECT * FROM public.pg_dist_node;
+    INSERT INTO pg_catalog.pg_dist_local_group SELECT * FROM public.pg_dist_local_group;
+    INSERT INTO pg_catalog.pg_dist_transaction SELECT * FROM public.pg_dist_transaction;
+    INSERT INTO pg_catalog.pg_dist_colocation SELECT * FROM public.pg_dist_colocation;
+    -- enterprise catalog tables
+    INSERT INTO pg_catalog.pg_dist_authinfo SELECT * FROM public.pg_dist_authinfo;
+    INSERT INTO pg_catalog.pg_dist_poolinfo SELECT * FROM public.pg_dist_poolinfo;
+
+    INSERT INTO pg_catalog.pg_dist_rebalance_strategy SELECT
+        name,
+        default_strategy,
+        shard_cost_function::regprocedure::regproc,
+        node_capacity_function::regprocedure::regproc,
+        shard_allowed_on_node_function::regprocedure::regproc,
+        default_threshold,
+        minimum_threshold,
+        improvement_threshold
+    FROM public.pg_dist_rebalance_strategy;
+
+    --
+    -- drop backup tables
+    --
+    DROP TABLE public.pg_dist_authinfo;
+    DROP TABLE public.pg_dist_colocation;
+    DROP TABLE public.pg_dist_local_group;
+    DROP TABLE public.pg_dist_node;
+    DROP TABLE public.pg_dist_node_metadata;
+    DROP TABLE public.pg_dist_partition;
+    DROP TABLE public.pg_dist_placement;
+    DROP TABLE public.pg_dist_poolinfo;
+    DROP TABLE public.pg_dist_shard;
+    DROP TABLE public.pg_dist_transaction;
+    DROP TABLE public.pg_dist_rebalance_strategy;
+
+    --
+    -- reset sequences
+    --
+    PERFORM setval('pg_catalog.pg_dist_shardid_seq', (SELECT MAX(shardid)+1 AS max_shard_id FROM pg_dist_shard), false);
+    PERFORM setval('pg_catalog.pg_dist_placement_placementid_seq', (SELECT MAX(placementid)+1 AS max_placement_id FROM pg_dist_placement), false);
+    PERFORM setval('pg_catalog.pg_dist_groupid_seq', (SELECT MAX(groupid)+1 AS max_group_id FROM pg_dist_node), false);
+    PERFORM setval('pg_catalog.pg_dist_node_nodeid_seq', (SELECT MAX(nodeid)+1 AS max_node_id FROM pg_dist_node), false);
+    PERFORM setval('pg_catalog.pg_dist_colocationid_seq', (SELECT MAX(colocationid)+1 AS max_colocation_id FROM pg_dist_colocation), false);
+
+    --
+    -- register triggers
+    --
+    FOR table_name IN SELECT logicalrelid FROM pg_catalog.pg_dist_partition JOIN pg_class ON (logicalrelid = oid) WHERE relkind <> 'f'
+    LOOP
+        trigger_name := 'truncate_trigger_' || table_name::oid;
+        command := 'create trigger ' || trigger_name || ' after truncate on ' || table_name || ' execute procedure pg_catalog.citus_truncate_trigger()';
+        EXECUTE command;
+        command := 'update pg_trigger set tgisinternal = true where tgname = ' || quote_literal(trigger_name);
+        EXECUTE command;
+    END LOOP;
+
+    --
+    -- set dependencies
+    --
+    INSERT INTO pg_depend
+    SELECT
+        'pg_class'::regclass::oid as classid,
+        p.logicalrelid::regclass::oid as objid,
+        0 as objsubid,
+        'pg_extension'::regclass::oid as refclassid,
+        (select oid from pg_extension where extname = 'citus') as refobjid,
+        0 as refobjsubid ,
+        'n' as deptype
+    FROM pg_catalog.pg_dist_partition p;
+
+    -- set dependencies for columnar table access method
+    PERFORM citus_internal.columnar_ensure_am_depends_catalog();
+
+    -- restore pg_dist_object from the stable identifiers
+    TRUNCATE citus.pg_dist_object;
+    INSERT INTO citus.pg_dist_object (classid, objid, objsubid, distribution_argument_index, colocationid)
+    SELECT
+        address.classid,
+        address.objid,
+        address.objsubid,
+        naming.distribution_argument_index,
+        naming.colocationid
+    FROM
+        public.pg_dist_object naming,
+        pg_catalog.pg_get_object_address(naming.type, naming.object_names, naming.object_args) address;
+
+    DROP TABLE public.pg_dist_object;
+END;
+$cppu$;
+
+COMMENT ON FUNCTION pg_catalog.citus_finish_pg_upgrade()
+    IS 'perform tasks to restore citus settings from a location that has been prepared before pg_upgrade';
--- a/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/latest.sql
+++ b/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/latest.sql
@ -97,7 +97,7 @@ BEGIN
    --
    -- register triggers
    --
-    FOR table_name IN SELECT logicalrelid FROM pg_catalog.pg_dist_partition
+    FOR table_name IN SELECT logicalrelid FROM pg_catalog.pg_dist_partition JOIN pg_class ON (logicalrelid = oid) WHERE relkind <> 'f'
    LOOP
        trigger_name := 'truncate_trigger_' || table_name::oid;
        command := 'create trigger ' || trigger_name || ' after truncate on ' || table_name || ' execute procedure pg_catalog.citus_truncate_trigger()';
@ -120,6 +120,9 @@ BEGIN
        'n' as deptype
    FROM pg_catalog.pg_dist_partition p;

+    -- set dependencies for columnar table access method
+    PERFORM citus_internal.columnar_ensure_am_depends_catalog();
+
    -- restore pg_dist_object from the stable identifiers
    TRUNCATE citus.pg_dist_object;
    INSERT INTO citus.pg_dist_object (classid, objid, objsubid, distribution_argument_index, colocationid)
--- a/src/backend/distributed/sql/udfs/fix_all_partition_shard_index_names/10.2-4.sql
+++ b/src/backend/distributed/sql/udfs/fix_all_partition_shard_index_names/10.2-4.sql
@ -0,0 +1,21 @@
+CREATE OR REPLACE FUNCTION pg_catalog.fix_all_partition_shard_index_names()
+  RETURNS SETOF regclass
+  LANGUAGE plpgsql
+  AS $$
+DECLARE
+	dist_partitioned_table_name regclass;
+BEGIN
+  FOR dist_partitioned_table_name IN SELECT p.logicalrelid
+                                     FROM pg_dist_partition p
+                                     JOIN pg_class c ON p.logicalrelid = c.oid
+                                     WHERE c.relkind = 'p'
+		                                 ORDER BY c.relname, c.oid
+    LOOP
+      EXECUTE 'SELECT fix_partition_shard_index_names( ' || quote_literal(dist_partitioned_table_name) || ' )';
+      RETURN NEXT dist_partitioned_table_name;
+    END LOOP;
+  RETURN;
+END;
+$$;
+COMMENT ON FUNCTION pg_catalog.fix_all_partition_shard_index_names()
+  IS 'fix index names on partition shards of all tables';
--- a/src/backend/distributed/sql/udfs/fix_all_partition_shard_index_names/latest.sql
+++ b/src/backend/distributed/sql/udfs/fix_all_partition_shard_index_names/latest.sql
@ -0,0 +1,21 @@
+CREATE OR REPLACE FUNCTION pg_catalog.fix_all_partition_shard_index_names()
+  RETURNS SETOF regclass
+  LANGUAGE plpgsql
+  AS $$
+DECLARE
+	dist_partitioned_table_name regclass;
+BEGIN
+  FOR dist_partitioned_table_name IN SELECT p.logicalrelid
+                                     FROM pg_dist_partition p
+                                     JOIN pg_class c ON p.logicalrelid = c.oid
+                                     WHERE c.relkind = 'p'
+		                                 ORDER BY c.relname, c.oid
+    LOOP
+      EXECUTE 'SELECT fix_partition_shard_index_names( ' || quote_literal(dist_partitioned_table_name) || ' )';
+      RETURN NEXT dist_partitioned_table_name;
+    END LOOP;
+  RETURN;
+END;
+$$;
+COMMENT ON FUNCTION pg_catalog.fix_all_partition_shard_index_names()
+  IS 'fix index names on partition shards of all tables';
--- a/src/backend/distributed/sql/udfs/fix_partition_shard_index_names/10.2-4.sql
+++ b/src/backend/distributed/sql/udfs/fix_partition_shard_index_names/10.2-4.sql
@ -0,0 +1,6 @@
+CREATE FUNCTION pg_catalog.fix_partition_shard_index_names(table_name regclass)
+  RETURNS void
+  LANGUAGE C STRICT
+  AS 'MODULE_PATHNAME', $$fix_partition_shard_index_names$$;
+COMMENT ON FUNCTION pg_catalog.fix_partition_shard_index_names(table_name regclass)
+  IS 'fix index names on partition shards of given table';
--- a/src/backend/distributed/sql/udfs/fix_partition_shard_index_names/latest.sql
+++ b/src/backend/distributed/sql/udfs/fix_partition_shard_index_names/latest.sql
@ -0,0 +1,6 @@
+CREATE FUNCTION pg_catalog.fix_partition_shard_index_names(table_name regclass)
+  RETURNS void
+  LANGUAGE C STRICT
+  AS 'MODULE_PATHNAME', $$fix_partition_shard_index_names$$;
+COMMENT ON FUNCTION pg_catalog.fix_partition_shard_index_names(table_name regclass)
+  IS 'fix index names on partition shards of given table';
--- a/src/backend/distributed/sql/udfs/worker_fix_partition_shard_index_names/10.2-4.sql
+++ b/src/backend/distributed/sql/udfs/worker_fix_partition_shard_index_names/10.2-4.sql
@ -0,0 +1,10 @@
+CREATE FUNCTION pg_catalog.worker_fix_partition_shard_index_names(parent_shard_index regclass,
+                                                                  partition_shard text,
+                                                                  new_partition_shard_index_name text)
+  RETURNS void
+  LANGUAGE C STRICT
+  AS 'MODULE_PATHNAME', $$worker_fix_partition_shard_index_names$$;
+COMMENT ON FUNCTION pg_catalog.worker_fix_partition_shard_index_names(parent_shard_index regclass,
+                                                                      partition_shard text,
+                                                                      new_partition_shard_index_name text)
+  IS 'fix the name of the index on given partition shard that is child of given parent_index';
--- a/src/backend/distributed/sql/udfs/worker_fix_partition_shard_index_names/latest.sql
+++ b/src/backend/distributed/sql/udfs/worker_fix_partition_shard_index_names/latest.sql
@ -0,0 +1,10 @@
+CREATE FUNCTION pg_catalog.worker_fix_partition_shard_index_names(parent_shard_index regclass,
+                                                                  partition_shard text,
+                                                                  new_partition_shard_index_name text)
+  RETURNS void
+  LANGUAGE C STRICT
+  AS 'MODULE_PATHNAME', $$worker_fix_partition_shard_index_names$$;
+COMMENT ON FUNCTION pg_catalog.worker_fix_partition_shard_index_names(parent_shard_index regclass,
+                                                                      partition_shard text,
+                                                                      new_partition_shard_index_name text)
+  IS 'fix the name of the index on given partition shard that is child of given parent_index';
--- a/src/backend/distributed/transaction/transaction_management.c
+++ b/src/backend/distributed/transaction/transaction_management.c
@ -320,7 +320,7 @@ CoordinatedTransactionCallback(XactEvent event, void *arg)
 			/* stop propagating notices from workers, we know the query is failed */
 			DisableWorkerMessagePropagation();

-			RemoveIntermediateResultsDirectory();
+			RemoveIntermediateResultsDirectories();

 			ResetShardPlacementTransactionState();

@ -408,7 +408,7 @@ CoordinatedTransactionCallback(XactEvent event, void *arg)
 			 * existing folders that are associated with distributed transaction
 			 * ids on the worker nodes.
 			 */
-			RemoveIntermediateResultsDirectory();
+			RemoveIntermediateResultsDirectories();

 			UnSetDistributedTransactionId();
 			break;
@ -420,10 +420,10 @@ CoordinatedTransactionCallback(XactEvent event, void *arg)
 			 * If the distributed query involves 2PC, we already removed
 			 * the intermediate result directory on XACT_EVENT_PREPARE. However,
 			 * if not, we should remove it here on the COMMIT. Since
-			 * RemoveIntermediateResultsDirectory() is idempotent, we're safe
+			 * RemoveIntermediateResultsDirectories() is idempotent, we're safe
 			 * to call it here again even if the transaction involves 2PC.
 			 */
-			RemoveIntermediateResultsDirectory();
+			RemoveIntermediateResultsDirectories();

 			/* nothing further to do if there's no managed remote xacts */
 			if (CurrentCoordinatedTransactionState == COORD_TRANS_NONE)
--- a/src/backend/distributed/utils/colocation_utils.c
+++ b/src/backend/distributed/utils/colocation_utils.c
@ -1235,10 +1235,17 @@ DeleteColocationGroup(uint32 colocationId)
 	{
 		/*
 		 * simple_heap_delete() expects that the caller has at least an
-		 * AccessShareLock on replica identity index.
+		 * AccessShareLock on primary key index.
+		 *
+		 * XXX: This does not seem required, do we really need to acquire this lock?
+		 * Postgres doesn't acquire such locks on indexes before deleting catalog tuples.
+		 * Linking here the reasons we added this lock acquirement:
+		 * https://github.com/citusdata/citus/pull/2851#discussion_r306569462
+		 * https://github.com/citusdata/citus/pull/2855#discussion_r313628554
+		 * https://github.com/citusdata/citus/issues/1890
 		 */
 		Relation replicaIndex =
-			index_open(RelationGetReplicaIndex(pgDistColocation),
+			index_open(RelationGetPrimaryKeyIndex(pgDistColocation),
 					   AccessShareLock);
 		simple_heap_delete(pgDistColocation, &(heapTuple->t_self));

--- a/src/backend/distributed/utils/enable_ssl.c
+++ b/src/backend/distributed/utils/enable_ssl.c
@ -8,6 +8,19 @@
 *-------------------------------------------------------------------------
 */

+
+/*
+ * Make sure that functions marked as deprecated in OpenSSL 3.0 don't trigger
+ * deprecation warnings by indicating that we're using the OpenSSL 1.0.1
+ * compatibile API. Postgres does this by already in PG14, so we should not do
+ * it otherwise we get warnings about redefining this value.
+ */
+#if PG_VERSION_NUM < PG_VERSION_14
+#ifndef OPENSSL_API_COMPAT
+#define OPENSSL_API_COMPAT 0x1000100L
+#endif
+#endif
+
 #include "postgres.h"

 #include "distributed/connection_management.h"
--- a/src/backend/distributed/utils/multi_partitioning_utils.c
+++ b/src/backend/distributed/utils/multi_partitioning_utils.c
@ -11,11 +11,13 @@
 #include "access/genam.h"
 #include "access/heapam.h"
 #include "access/htup_details.h"
+#include "catalog/index.h"
 #include "catalog/indexing.h"
 #include "catalog/partition.h"
 #include "catalog/pg_class.h"
 #include "catalog/pg_constraint.h"
 #include "catalog/pg_inherits.h"
+#include "commands/tablecmds.h"
 #include "common/string.h"
 #include "distributed/citus_nodes.h"
 #include "distributed/adaptive_executor.h"
@ -26,13 +28,16 @@
 #include "distributed/deparse_shard_query.h"
 #include "distributed/listutils.h"
 #include "distributed/metadata_utility.h"
+#include "distributed/multi_executor.h"
 #include "distributed/multi_partitioning_utils.h"
 #include "distributed/multi_physical_planner.h"
 #include "distributed/relay_utility.h"
 #include "distributed/resource_lock.h"
 #include "distributed/shardinterval_utils.h"
 #include "distributed/version_compat.h"
+#include "distributed/worker_protocol.h"
 #include "lib/stringinfo.h"
+#include "nodes/makefuncs.h"
 #include "nodes/pg_list.h"
 #include "pgstat.h"
 #include "partitioning/partdesc.h"
@ -41,12 +46,22 @@
 #include "utils/lsyscache.h"
 #include "utils/rel.h"
 #include "utils/syscache.h"
+#include "utils/varlena.h"

 static char * PartitionBound(Oid partitionId);
 static Relation try_relation_open_nolock(Oid relationId);
 static List * CreateFixPartitionConstraintsTaskList(Oid relationId);
 static List * WorkerFixPartitionConstraintCommandList(Oid relationId, uint64 shardId,
 													  List *checkConstraintList);
+static List * CreateFixPartitionShardIndexNamesTaskList(Oid parentRelationId);
+static List * WorkerFixPartitionShardIndexNamesCommandList(uint64 parentShardId,
+														   List *indexIdList);
+static List * WorkerFixPartitionShardIndexNamesCommandListForParentShardIndex(
+	char *qualifiedParentShardIndexName, Oid parentIndexId);
+static List * WorkerFixPartitionShardIndexNamesCommandListForPartitionIndex(Oid
+																			partitionIndexId,
+																			char *
+																			qualifiedParentShardIndexName);
 static List * CheckConstraintNameListForRelation(Oid relationId);
 static bool RelationHasConstraint(Oid relationId, char *constraintName);
 static char * RenameConstraintCommand(Oid relationId, char *constraintName,
@ -55,6 +70,8 @@ static char * RenameConstraintCommand(Oid relationId, char *constraintName,

 PG_FUNCTION_INFO_V1(fix_pre_citus10_partitioned_table_constraint_names);
 PG_FUNCTION_INFO_V1(worker_fix_pre_citus10_partitioned_table_constraint_names);
+PG_FUNCTION_INFO_V1(fix_partition_shard_index_names);
+PG_FUNCTION_INFO_V1(worker_fix_partition_shard_index_names);


 /*
@ -130,6 +147,167 @@ worker_fix_pre_citus10_partitioned_table_constraint_names(PG_FUNCTION_ARGS)
 }


+/*
+ * fix_partition_shard_index_names fixes the index names of shards of partitions of
+ * partitioned tables on workers.
+ *
+ * When running CREATE INDEX on parent_table, we didn't explicitly create the index on
+ * each partition as well. Postgres created indexes for partitions in the coordinator,
+ * and also in the workers. Actually, Postgres auto-generates index names when auto-creating
+ * indexes on each partition shard of the parent shards. If index name is too long, it
+ * truncates the name and adds _idx postfix to it. However, when truncating the name, the
+ * shardId of the partition shard can be lost. This may result in the same index name used for
+ * the partition shell table and one of the partition shards.
+ * For more details, check issue #4962 https://github.com/citusdata/citus/issues/4962
+ *
+ * fix_partition_shard_index_names renames indexes of shards of partition tables to include
+ * the shardId at the end of the name, regardless of whether index name was long or short
+ * As a result there will be no index name ending in _idx, rather all will end in _{shardid}
+ * Algorithm is:
+ * foreach parentShard in shardListOfParentTableId:
+ *  foreach parentIndex on parent:
+ *      generate qualifiedParentShardIndexName -> parentShardIndex
+ *      foreach inheritedPartitionIndex on parentIndex:
+ *          get table relation of inheritedPartitionIndex -> partitionId
+ *          foreach partitionShard in shardListOfPartitionid:
+ *              generate qualifiedPartitionShardName -> partitionShard
+ *              generate newPartitionShardIndexName
+ *              (the following happens in the worker node)
+ *              foreach inheritedPartitionShardIndex on parentShardIndex:
+ *                  if table relation of inheritedPartitionShardIndex is partitionShard:
+ *                      if inheritedPartitionShardIndex does not have proper name:
+ *                          Rename(inheritedPartitionShardIndex, newPartitionShardIndexName)
+ *                      break
+ */
+Datum
+fix_partition_shard_index_names(PG_FUNCTION_ARGS)
+{
+	CheckCitusVersion(ERROR);
+	EnsureCoordinator();
+
+	Oid relationId = PG_GETARG_OID(0);
+
+	Relation relation = try_relation_open(relationId, AccessExclusiveLock);
+
+	if (relation == NULL)
+	{
+		ereport(NOTICE, (errmsg("relation with OID %u does not exist, skipping",
+								relationId)));
+		PG_RETURN_VOID();
+	}
+
+	if (relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+	{
+		relation_close(relation, NoLock);
+		ereport(ERROR, (errmsg(
+							"Fixing shard index names is only applicable to partitioned"
+							" tables, and \"%s\" is not a partitioned table",
+							RelationGetRelationName(relation))));
+	}
+
+	if (!IsCitusTable(relationId))
+	{
+		relation_close(relation, NoLock);
+		ereport(ERROR, (errmsg("fix_partition_shard_index_names can "
+							   "only be called for distributed partitioned tables")));
+	}
+
+	EnsureTableOwner(relationId);
+
+	List *taskList = CreateFixPartitionShardIndexNamesTaskList(relationId);
+
+	/* do not do anything if there are no index names to fix */
+	if (taskList != NIL)
+	{
+		bool localExecutionSupported = true;
+		RowModifyLevel modLevel = ROW_MODIFY_NONE;
+		ExecutionParams *execParams = CreateBasicExecutionParams(modLevel, taskList,
+																 MaxAdaptiveExecutorPoolSize,
+																 localExecutionSupported);
+		ExecuteTaskListExtended(execParams);
+	}
+
+	relation_close(relation, NoLock);
+
+	PG_RETURN_VOID();
+}
+
+
+/*
+ * worker_fix_partition_shard_index_names fixes the index name of the index on given
+ * partition shard that has parent the given parent index.
+ * The parent index should be the index of a shard of a distributed partitioned table.
+ */
+Datum
+worker_fix_partition_shard_index_names(PG_FUNCTION_ARGS)
+{
+	Oid parentShardIndexId = PG_GETARG_OID(0);
+
+	text *partitionShardName = PG_GETARG_TEXT_P(1);
+
+	/* resolve partitionShardId from passed in schema and partition shard name */
+	List *partitionShardNameList = textToQualifiedNameList(partitionShardName);
+	RangeVar *partitionShard = makeRangeVarFromNameList(partitionShardNameList);
+
+	/* lock the relation with the lock mode */
+	bool missing_ok = true;
+	Oid partitionShardId = RangeVarGetRelid(partitionShard, NoLock, missing_ok);
+
+	if (!OidIsValid(partitionShardId))
+	{
+		PG_RETURN_VOID();
+	}
+
+	CheckCitusVersion(ERROR);
+	EnsureTableOwner(partitionShardId);
+
+	text *newPartitionShardIndexNameText = PG_GETARG_TEXT_P(2);
+	char *newPartitionShardIndexName = text_to_cstring(
+		newPartitionShardIndexNameText);
+
+	if (!has_subclass(parentShardIndexId))
+	{
+		ereport(ERROR, (errmsg("could not fix child index names: "
+							   "index is not partitioned")));
+	}
+
+	List *partitionShardIndexIds = find_inheritance_children(parentShardIndexId,
+															 ShareRowExclusiveLock);
+	Oid partitionShardIndexId = InvalidOid;
+	foreach_oid(partitionShardIndexId, partitionShardIndexIds)
+	{
+		if (IndexGetRelation(partitionShardIndexId, false) == partitionShardId)
+		{
+			char *partitionShardIndexName = get_rel_name(partitionShardIndexId);
+			if (ExtractShardIdFromTableName(partitionShardIndexName, missing_ok) ==
+				INVALID_SHARD_ID)
+			{
+				/*
+				 * ExtractShardIdFromTableName will return INVALID_SHARD_ID if
+				 * partitionShardIndexName doesn't end in _shardid. In that case,
+				 * we want to rename this partition shard index to newPartitionShardIndexName,
+				 * which ends in _shardid, hence we maintain naming consistency:
+				 * we can reach this partition shard index by conventional Citus naming
+				 */
+				RenameStmt *stmt = makeNode(RenameStmt);
+
+				stmt->renameType = OBJECT_INDEX;
+				stmt->missing_ok = false;
+				char *idxNamespace = get_namespace_name(get_rel_namespace(
+															partitionShardIndexId));
+				stmt->relation = makeRangeVar(idxNamespace, partitionShardIndexName, -1);
+				stmt->newname = newPartitionShardIndexName;
+
+				RenameRelation(stmt);
+			}
+			break;
+		}
+	}
+
+	PG_RETURN_VOID();
+}
+
+
 /*
 * CreateFixPartitionConstraintsTaskList goes over all the partitions of a distributed
 * partitioned table, and creates the list of tasks to execute
@ -257,6 +435,199 @@ WorkerFixPartitionConstraintCommandList(Oid relationId, uint64 shardId,
 }


+/*
+ * CreateFixPartitionShardIndexNamesTaskList goes over all the indexes of a distributed
+ * partitioned table, and creates the list of tasks to execute
+ * worker_fix_partition_shard_index_names UDF on worker nodes.
+ *
+ * We create parent_table_shard_count tasks,
+ * each task with parent_indexes_count x parent_partitions_count query strings.
+ */
+static List *
+CreateFixPartitionShardIndexNamesTaskList(Oid parentRelationId)
+{
+	List *taskList = NIL;
+
+	/* enumerate the tasks when putting them to the taskList */
+	int taskId = 1;
+
+	Relation parentRelation = RelationIdGetRelation(parentRelationId);
+
+	List *parentIndexIdList = RelationGetIndexList(parentRelation);
+
+	/* early exit if the parent relation does not have any indexes */
+	if (parentIndexIdList == NIL)
+	{
+		RelationClose(parentRelation);
+		return NIL;
+	}
+
+	List *partitionList = PartitionList(parentRelationId);
+
+	/* early exit if the parent relation does not have any partitions */
+	if (partitionList == NIL)
+	{
+		RelationClose(parentRelation);
+		return NIL;
+	}
+
+	List *parentShardIntervalList = LoadShardIntervalList(parentRelationId);
+
+	/* lock metadata before getting placement lists */
+	LockShardListMetadata(parentShardIntervalList, ShareLock);
+	Oid partitionId = InvalidOid;
+	foreach_oid(partitionId, partitionList)
+	{
+		List *partitionShardIntervalList = LoadShardIntervalList(partitionId);
+		LockShardListMetadata(partitionShardIntervalList, ShareLock);
+	}
+
+	ShardInterval *parentShardInterval = NULL;
+	foreach_ptr(parentShardInterval, parentShardIntervalList)
+	{
+		uint64 parentShardId = parentShardInterval->shardId;
+
+		List *queryStringList = WorkerFixPartitionShardIndexNamesCommandList(
+			parentShardId, parentIndexIdList);
+
+		Task *task = CitusMakeNode(Task);
+		task->jobId = INVALID_JOB_ID;
+		task->taskId = taskId++;
+
+		task->taskType = DDL_TASK;
+		SetTaskQueryStringList(task, queryStringList);
+		task->dependentTaskList = NULL;
+		task->replicationModel = REPLICATION_MODEL_INVALID;
+		task->anchorShardId = parentShardId;
+		task->taskPlacementList = ActiveShardPlacementList(parentShardId);
+
+		taskList = lappend(taskList, task);
+	}
+
+	RelationClose(parentRelation);
+
+	return taskList;
+}
+
+
+/*
+ * WorkerFixPartitionShardIndexNamesCommandList creates a list of queries that will fix
+ * all child index names of parent indexes on given shard of parent partitioned table.
+ */
+static List *
+WorkerFixPartitionShardIndexNamesCommandList(uint64 parentShardId,
+											 List *parentIndexIdList)
+{
+	List *commandList = NIL;
+	Oid parentIndexId = InvalidOid;
+	foreach_oid(parentIndexId, parentIndexIdList)
+	{
+		if (!has_subclass(parentIndexId))
+		{
+			continue;
+		}
+
+		/*
+		 * Get the qualified name of the corresponding index of given parent index
+		 * in the parent shard with given parentShardId
+		 */
+		char *parentIndexName = get_rel_name(parentIndexId);
+		char *parentShardIndexName = pstrdup(parentIndexName);
+		AppendShardIdToName(&parentShardIndexName, parentShardId);
+		Oid schemaId = get_rel_namespace(parentIndexId);
+		char *schemaName = get_namespace_name(schemaId);
+		char *qualifiedParentShardIndexName = quote_qualified_identifier(schemaName,
+																		 parentShardIndexName);
+		List *commands = WorkerFixPartitionShardIndexNamesCommandListForParentShardIndex(
+			qualifiedParentShardIndexName, parentIndexId);
+		commandList = list_concat(commandList, commands);
+	}
+
+	return commandList;
+}
+
+
+/*
+ * WorkerFixPartitionShardIndexNamesCommandListForParentShardIndex creates a list of queries that will fix
+ * all child index names of given index on shard of parent partitioned table.
+ */
+static List *
+WorkerFixPartitionShardIndexNamesCommandListForParentShardIndex(
+	char *qualifiedParentShardIndexName, Oid parentIndexId)
+{
+	List *commandList = NIL;
+
+	/*
+	 * Get the list of all partition indexes that are children of current
+	 * index on parent
+	 */
+	List *partitionIndexIds = find_inheritance_children(parentIndexId,
+														ShareRowExclusiveLock);
+	Oid partitionIndexId = InvalidOid;
+	foreach_oid(partitionIndexId, partitionIndexIds)
+	{
+		List *commands = WorkerFixPartitionShardIndexNamesCommandListForPartitionIndex(
+			partitionIndexId, qualifiedParentShardIndexName);
+		commandList = list_concat(commandList, commands);
+	}
+	return commandList;
+}
+
+
+/*
+ * WorkerFixPartitionShardIndexNamesCommandListForPartitionIndex creates a list of queries that will fix
+ * all child index names of given index on shard of parent partitioned table, whose table relation is a shard
+ * of the partition that is the table relation of given partitionIndexId
+ */
+static List *
+WorkerFixPartitionShardIndexNamesCommandListForPartitionIndex(Oid partitionIndexId,
+															  char *
+															  qualifiedParentShardIndexName)
+{
+	List *commandList = NIL;
+
+	/* get info for this partition relation of this index*/
+	char *partitionIndexName = get_rel_name(partitionIndexId);
+	Oid partitionId = IndexGetRelation(partitionIndexId, false);
+	char *partitionName = get_rel_name(partitionId);
+	char *partitionSchemaName = get_namespace_name(get_rel_namespace(partitionId));
+	List *partitionShardIntervalList = LoadShardIntervalList(partitionId);
+
+	ShardInterval *partitionShardInterval = NULL;
+	foreach_ptr(partitionShardInterval, partitionShardIntervalList)
+	{
+		/*
+		 * Prepare commands for each shard of current partition
+		 * to fix the index name that corresponds to the
+		 * current parent index name
+		 */
+		uint64 partitionShardId = partitionShardInterval->shardId;
+
+		/* get qualified partition shard name */
+		char *partitionShardName = pstrdup(partitionName);
+		AppendShardIdToName(&partitionShardName, partitionShardId);
+		char *qualifiedPartitionShardName = quote_qualified_identifier(
+			partitionSchemaName,
+			partitionShardName);
+
+		/* generate the new correct index name */
+		char *newPartitionShardIndexName = pstrdup(partitionIndexName);
+		AppendShardIdToName(&newPartitionShardIndexName, partitionShardId);
+
+		/* create worker_fix_partition_shard_index_names command */
+		StringInfo shardQueryString = makeStringInfo();
+		appendStringInfo(shardQueryString,
+						 "SELECT worker_fix_partition_shard_index_names(%s::regclass, %s, %s)",
+						 quote_literal_cstr(qualifiedParentShardIndexName),
+						 quote_literal_cstr(qualifiedPartitionShardName),
+						 quote_literal_cstr(newPartitionShardIndexName));
+		commandList = lappend(commandList, shardQueryString->data);
+	}
+
+	return commandList;
+}
+
+
 /*
 * RelationHasConstraint checks if a relation has a constraint with a given name.
 */
--- a/src/backend/distributed/utils/reference_table_utils.c
+++ b/src/backend/distributed/utils/reference_table_utils.c
@ -60,8 +60,16 @@ PG_FUNCTION_INFO_V1(replicate_reference_tables);
 Datum
 replicate_reference_tables(PG_FUNCTION_ARGS)
 {
+	/* to prevent concurrent node additions while copying reference tables */
+	LockRelationOid(DistNodeRelationId(), ShareLock);
 	EnsureReferenceTablesExistOnAllNodes();

+	/*
+	 * Given the copying of reference tables and updating metadata have been done via a
+	 * loopback connection we do not have to retain the lock on pg_dist_node anymore.
+	 */
+	UnlockRelationOid(DistNodeRelationId(), ShareLock);
+
 	PG_RETURN_VOID();
 }

@ -91,33 +99,54 @@ EnsureReferenceTablesExistOnAllNodes(void)
 void
 EnsureReferenceTablesExistOnAllNodesExtended(char transferMode)
 {
-	/*
-	 * Prevent this function from running concurrently with itself.
-	 *
-	 * It also prevents concurrent DROP TABLE or DROP SCHEMA. We need this
-	 * because through-out this function we assume values in referenceTableIdList
-	 * are still valid.
-	 *
-	 * We don't need to handle other kinds of reference table DML/DDL here, since
-	 * master_copy_shard_placement gets enough locks for that.
-	 *
-	 * We also don't need special handling for concurrent create_refernece_table.
-	 * Since that will trigger a call to this function from another backend,
-	 * which will block until our call is finished.
-	 */
+	List *referenceTableIdList = NIL;
+	uint64 shardId = INVALID_SHARD_ID;
+	List *newWorkersList = NIL;
+	const char *referenceTableName = NULL;
 	int colocationId = CreateReferenceTableColocationId();
-	LockColocationId(colocationId, ExclusiveLock);

-	List *referenceTableIdList = CitusTableTypeIdList(REFERENCE_TABLE);
+	/*
+	 * Most of the time this function should result in a conclusion where we do not need
+	 * to copy any reference tables. To prevent excessive locking the majority of the time
+	 * we run our precondition checks first with a lower lock. If, after checking with the
+	 * lower lock, that we might need to copy reference tables we check with a more
+	 * aggressive and self conflicting lock. It is important to be self conflicting in the
+	 * second run to make sure that two concurrent calls to this routine will actually not
+	 * run concurrently after the initial check.
+	 *
+	 * If after two iterations of precondition checks we still find the need for copying
+	 * reference tables we exit the loop with all locks held. This will prevent concurrent
+	 * DROP TABLE and create_reference_table calls so that the list of reference tables we
+	 * operate on are stable.
+	 *
+	 * Since the changes to the reference table placements are made via loopback
+	 * connections we release the locks held at the end of this function. Due to Citus
+	 * only running transactions in READ COMMITTED mode we can be sure that other
+	 * transactions correctly find the metadata entries.
+	 */
+	LOCKMODE lockmodes[] = { AccessShareLock, ExclusiveLock };
+	for (int lockmodeIndex = 0; lockmodeIndex < lengthof(lockmodes); lockmodeIndex++)
+	{
+		LockColocationId(colocationId, lockmodes[lockmodeIndex]);
+
+		referenceTableIdList = CitusTableTypeIdList(REFERENCE_TABLE);
 		if (referenceTableIdList == NIL)
 		{
-		/* no reference tables exist */
-		UnlockColocationId(colocationId, ExclusiveLock);
+			/*
+			 * No reference tables exist, make sure that any locks obtained earlier are
+			 * released. It will probably not matter, but we release the locks in the
+			 * reverse order we obtained them in.
+			 */
+			for (int releaseLockmodeIndex = lockmodeIndex; releaseLockmodeIndex >= 0;
+				 releaseLockmodeIndex--)
+			{
+				UnlockColocationId(colocationId, lockmodes[releaseLockmodeIndex]);
+			}
 			return;
 		}

 		Oid referenceTableId = linitial_oid(referenceTableIdList);
-	const char *referenceTableName = get_rel_name(referenceTableId);
+		referenceTableName = get_rel_name(referenceTableId);
 		List *shardIntervalList = LoadShardIntervalList(referenceTableId);
 		if (list_length(shardIntervalList) == 0)
 		{
@ -127,21 +156,29 @@ EnsureReferenceTablesExistOnAllNodesExtended(char transferMode)
 		}

 		ShardInterval *shardInterval = (ShardInterval *) linitial(shardIntervalList);
-	uint64 shardId = shardInterval->shardId;
+		shardId = shardInterval->shardId;

 		/*
 		 * We only take an access share lock, otherwise we'll hold up citus_add_node.
 		 * In case of create_reference_table() where we don't want concurrent writes
 		 * to pg_dist_node, we have already acquired ShareLock on pg_dist_node.
 		 */
-	List *newWorkersList = WorkersWithoutReferenceTablePlacement(shardId,
-																 AccessShareLock);
+		newWorkersList = WorkersWithoutReferenceTablePlacement(shardId, AccessShareLock);
 		if (list_length(newWorkersList) == 0)
 		{
-		/* nothing to do, no need for lock */
-		UnlockColocationId(colocationId, ExclusiveLock);
+			/*
+			 * All workers alreaddy have a copy of the reference tables, make sure that
+			 * any locks obtained earlier are released. It will probably not matter, but
+			 * we release the locks in the reverse order we obtained them in.
+			 */
+			for (int releaseLockmodeIndex = lockmodeIndex; releaseLockmodeIndex >= 0;
+				 releaseLockmodeIndex--)
+			{
+				UnlockColocationId(colocationId, lockmodes[releaseLockmodeIndex]);
+			}
 			return;
 		}
+	}

 	/*
 	 * master_copy_shard_placement triggers metadata sync-up, which tries to
@ -221,10 +258,17 @@ EnsureReferenceTablesExistOnAllNodesExtended(char transferMode)
 	}

 	/*
-	 * Unblock other backends, they will probably observe that there are no
-	 * more worker nodes without placements, unless nodes were added concurrently
+	 * Since reference tables have been copied via a loopback connection we do not have to
+	 * retain our locks. Since Citus only runs well in READ COMMITTED mode we can be sure
+	 * that other transactions will find the reference tables copied.
+	 * We have obtained and held multiple locks, here we unlock them all in the reverse
+	 * order we have obtained them in.
 	 */
-	UnlockColocationId(colocationId, ExclusiveLock);
+	for (int releaseLockmodeIndex = lengthof(lockmodes) - 1; releaseLockmodeIndex >= 0;
+		 releaseLockmodeIndex--)
+	{
+		UnlockColocationId(colocationId, lockmodes[releaseLockmodeIndex]);
+	}
 }


@ -425,6 +469,28 @@ CreateReferenceTableColocationId()
 }


+uint32
+GetReferenceTableColocationId()
+{
+	int shardCount = 1;
+	Oid distributionColumnType = InvalidOid;
+	Oid distributionColumnCollation = InvalidOid;
+
+	/*
+	 * We don't maintain replication factor of reference tables anymore and
+	 * just use -1 instead. We don't use this value in any places.
+	 */
+	int replicationFactor = -1;
+
+	/* check for existing colocations */
+	uint32 colocationId =
+		ColocationId(shardCount, replicationFactor, distributionColumnType,
+					 distributionColumnCollation);
+
+	return colocationId;
+}
+
+
 /*
 * DeleteAllReferenceTablePlacementsFromNodeGroup function iterates over list of reference
 * tables and deletes all reference table placements from pg_dist_placement table
@ -504,19 +570,6 @@ CompareOids(const void *leftElement, const void *rightElement)
 }


-/*
- * ReferenceTableReplicationFactor returns the replication factor for
- * reference tables.
- */
-int
-ReferenceTableReplicationFactor(void)
-{
-	List *nodeList = ReferenceTablePlacementNodeList(NoLock);
-	int replicationFactor = list_length(nodeList);
-	return replicationFactor;
-}
-
-
 /*
 * ReplicateAllReferenceTablesToNode function finds all reference tables and
 * replicates them to the given worker node. It also modifies pg_dist_colocation
@ -527,6 +580,16 @@ ReferenceTableReplicationFactor(void)
 void
 ReplicateAllReferenceTablesToNode(char *nodeName, int nodePort)
 {
+	int colocationId = GetReferenceTableColocationId();
+	if (colocationId == INVALID_COLOCATION_ID)
+	{
+		/* no reference tables in system */
+		return;
+	}
+
+	/* prevent changes in table set while replicating reference tables */
+	LockColocationId(colocationId, RowExclusiveLock);
+
 	List *referenceTableList = CitusTableTypeIdList(REFERENCE_TABLE);

 	/* if there is no reference table, we do not need to replicate anything */
--- a/src/backend/distributed/utils/resource_lock.c
+++ b/src/backend/distributed/utils/resource_lock.c
@ -183,20 +183,49 @@ lock_shard_resources(PG_FUNCTION_ARGS)
 	int shardIdCount = ArrayObjectCount(shardIdArrayObject);
 	Datum *shardIdArrayDatum = DeconstructArrayObject(shardIdArrayObject);

+	/*
+	 * The executor calls this UDF for modification queries. So, any user
+	 * who has the the rights to modify this table are actually able
+	 * to call the UDF.
+	 *
+	 * So, at this point, we make sure that any malicious user who doesn't
+	 * have modification privileges to call this UDF.
+	 *
+	 * Update/Delete/Truncate commands already acquires ExclusiveLock
+	 * on the executor. However, for INSERTs, the user might have only
+	 * INSERTs granted, so add a special case for it.
+	 */
+	AclMode aclMask = ACL_UPDATE | ACL_DELETE | ACL_TRUNCATE;
+	if (lockMode == RowExclusiveLock)
+	{
+		aclMask |= ACL_INSERT;
+	}
+
 	for (int shardIdIndex = 0; shardIdIndex < shardIdCount; shardIdIndex++)
 	{
 		int64 shardId = DatumGetInt64(shardIdArrayDatum[shardIdIndex]);

 		/*
-		 * We don't want random users to block writes. The callers of this
-		 * function either operates on all the colocated placements, such
-		 * as shard moves, or requires superuser such as adding node.
-		 * In other words, the coordinator initiated operations has already
-		 * ensured table owner, we are preventing any malicious attempt to
-		 * use this function.
+		 * We don't want random users to block writes. If the current user
+		 * has privileges to modify the shard, then the user can already
+		 * acquire the lock. So, we allow.
 		 */
 		bool missingOk = true;
-		EnsureShardOwner(shardId, missingOk);
+		Oid relationId = LookupShardRelationFromCatalog(shardId, missingOk);
+
+		if (!OidIsValid(relationId) && missingOk)
+		{
+			/*
+			 * This could happen in two ways. First, a malicious user is trying
+			 * to acquire locks on non-existing shards. Second, the metadata has
+			 * not been synced (or not yet visible) to this node. In the second
+			 * case, there is no point in locking the shards because no other
+			 * transaction can be accessing the table.
+			 */
+			continue;
+		}
+
+		EnsureTablePermissions(relationId, aclMask);

 		LockShardResource(shardId, lockMode);
 	}
--- a/src/backend/distributed/utils/shardinterval_utils.c
+++ b/src/backend/distributed/utils/shardinterval_utils.c
@ -297,7 +297,7 @@ FindShardIntervalIndex(Datum searchedValue, CitusTableCacheEntry *cacheEntry)
 	ShardInterval **shardIntervalCache = cacheEntry->sortedShardIntervalArray;
 	int shardCount = cacheEntry->shardIntervalArrayLength;
 	FmgrInfo *compareFunction = cacheEntry->shardIntervalCompareFunction;
-	bool useBinarySearch = (IsCitusTableTypeCacheEntry(cacheEntry, HASH_DISTRIBUTED) ||
+	bool useBinarySearch = (!IsCitusTableTypeCacheEntry(cacheEntry, HASH_DISTRIBUTED) ||
 							!cacheEntry->hasUniformHashDistribution);
 	int shardIndex = INVALID_SHARD_INDEX;

--- a/src/backend/distributed/worker/worker_create_or_replace.c
+++ b/src/backend/distributed/worker/worker_create_or_replace.c
@ -61,7 +61,7 @@ WrapCreateOrReplace(const char *sql)
 * have this functionality or where their implementation is not sufficient.
 *
 * Besides checking if an object of said name exists it tries to compare the object to be
- * created with the one in the local catalog. If there is a difference the on in the local
+ * created with the one in the local catalog. If there is a difference the one in the local
 * catalog will be renamed after which the statement can be executed on this worker to
 * create the object.
 *
--- a/src/include/citus_config.h.in
+++ b/src/include/citus_config.h.in
@ -38,7 +38,7 @@
 #undef HAVE_LIBCURL

 /* Define to 1 if you have the `lz4' library (-llz4). */
-#undef HAVE_LIBLZ4
+#undef HAVE_CITUS_LIBLZ4

 /* Define to 1 if you have the `zstd' library (-lzstd). */
 #undef HAVE_LIBZSTD
--- a/src/include/citus_version.h.in
+++ b/src/include/citus_version.h.in
@ -25,7 +25,7 @@
 #undef HAVE_LIBCURL

 /* Define to 1 if you have the `liblz4' library (-llz4). */
-#undef HAVE_LIBLZ4
+#undef HAVE_CITUS_LIBLZ4

 /* Define to 1 if you have the `libzstd' library (-lzstd). */
 #undef HAVE_LIBZSTD
--- a/src/include/columnar/columnar.h
+++ b/src/include/columnar/columnar.h
@ -309,6 +309,7 @@ extern Datum columnar_relation_storageid(PG_FUNCTION_ARGS);
 /* write_state_management.c */
 extern ColumnarWriteState * columnar_init_write_state(Relation relation, TupleDesc
 													  tupdesc,
+													  Oid tupSlotRelationId,
 													  SubTransactionId currentSubXid);
 extern void FlushWriteStateForRelfilenode(Oid relfilenode, SubTransactionId
 										  currentSubXid);
--- a/src/include/distributed/connection_management.h
+++ b/src/include/distributed/connection_management.h
@ -16,6 +16,7 @@
 #include "distributed/transaction_management.h"
 #include "distributed/remote_transaction.h"
 #include "lib/ilist.h"
+#include "pg_config.h"
 #include "portability/instr_time.h"
 #include "utils/guc.h"
 #include "utils/hsearch.h"
@ -264,5 +265,7 @@ extern void MarkConnectionConnected(MultiConnection *connection);
 extern double MillisecondsPassedSince(instr_time moment);
 extern long MillisecondsToTimeout(instr_time start, long msAfterStart);

+#if PG_VERSION_NUM < 140000
 extern void WarmUpConnParamsHash(void);
+#endif
 #endif /* CONNECTION_MANAGMENT_H */
--- a/src/include/distributed/deparse_shard_query.h
+++ b/src/include/distributed/deparse_shard_query.h
@ -29,6 +29,7 @@ extern void SetTaskQueryStringList(Task *task, List *queryStringList);
 extern char * TaskQueryString(Task *task);
 extern char * TaskQueryStringAtIndex(Task *task, int index);
 extern int GetTaskQueryType(Task *task);
+extern void AddInsertAliasIfNeeded(Query *query);


 #endif /* DEPARSE_SHARD_QUERY_H */
--- a/src/include/distributed/insert_select_executor.h
+++ b/src/include/distributed/insert_select_executor.h
@ -19,7 +19,6 @@
 extern bool EnableRepartitionedInsertSelect;

 extern TupleTableSlot * NonPushableInsertSelectExecScan(CustomScanState *node);
-extern Query * BuildSelectForInsertSelect(Query *insertSelectQuery);
 extern bool IsSupportedRedistributionTarget(Oid targetRelationId);
 extern bool IsRedistributablePlan(Plan *selectPlan);

--- a/src/include/distributed/intermediate_results.h
+++ b/src/include/distributed/intermediate_results.h
@ -57,7 +57,7 @@ extern void WriteToLocalFile(StringInfo copyData, FileCompat *fileCompat);
 extern uint64 RemoteFileDestReceiverBytesSent(DestReceiver *destReceiver);
 extern void SendQueryResultViaCopy(const char *resultId);
 extern void ReceiveQueryResultViaCopy(const char *resultId);
-extern void RemoveIntermediateResultsDirectory(void);
+extern void RemoveIntermediateResultsDirectories(void);
 extern int64 IntermediateResultSize(const char *resultId);
 extern char * QueryResultFileName(const char *resultId);
 extern char * CreateIntermediateResultsDirectory(void);
--- a/src/include/distributed/metadata/dependency.h
+++ b/src/include/distributed/metadata/dependency.h
@ -15,6 +15,8 @@
 #include "postgres.h"

 #include "catalog/objectaddress.h"
+#include "catalog/pg_depend.h"
+#include "distributed/errormessage.h"
 #include "nodes/pg_list.h"

 extern List * GetUniqueDependenciesList(List *objectAddressesList);
@ -24,5 +26,6 @@ extern bool SupportedDependencyByCitus(const ObjectAddress *address);
 extern List * GetPgDependTuplesForDependingObjects(Oid targetObjectClassId,
 												   Oid targetObjectId);
 extern List * GetDependingViews(Oid relationId);
+extern Oid GetDependingView(Form_pg_depend pg_depend);

 #endif /* CITUS_DEPENDENCY_H */
--- a/src/include/distributed/metadata_utility.h
+++ b/src/include/distributed/metadata_utility.h
@ -291,7 +291,7 @@ extern bool GetNodeDiskSpaceStatsForConnection(MultiConnection *connection,
 											   uint64 *availableBytes,
 											   uint64 *totalBytes);
 extern void ExecuteQueryViaSPI(char *query, int SPIOK);
-extern void EnsureSequenceTypeSupported(Oid seqOid, Oid seqTypId);
+extern void EnsureSequenceTypeSupported(Oid seqOid, Oid attributeTypeId);
 extern void AlterSequenceType(Oid seqOid, Oid typeOid);
 extern void MarkSequenceListDistributedAndPropagateDependencies(List *sequenceList);
 extern void MarkSequenceDistributedAndPropagateDependencies(Oid sequenceOid);
--- a/src/include/distributed/reference_table_utils.h
+++ b/src/include/distributed/reference_table_utils.h
@ -21,6 +21,7 @@
 extern void EnsureReferenceTablesExistOnAllNodes(void);
 extern void EnsureReferenceTablesExistOnAllNodesExtended(char transferMode);
 extern uint32 CreateReferenceTableColocationId(void);
+extern uint32 GetReferenceTableColocationId(void);
 extern void DeleteAllReferenceTablePlacementsFromNodeGroup(int32 groupId);
 extern int CompareOids(const void *leftElement, const void *rightElement);
 extern int ReferenceTableReplicationFactor(void);
--- a/src/test/columnar_freezing/Makefile
+++ b/src/test/columnar_freezing/Makefile
@ -0,0 +1,37 @@
+#-------------------------------------------------------------------------
+#
+# Makefile for src/test/columnar_freezing
+#
+# Test that columnar freezing works.
+#
+#-------------------------------------------------------------------------
+
+subdir = src/test/columnar_freezing
+top_builddir = ../../..
+include $(top_builddir)/Makefile.global
+
+# copied from pgxs/Makefile.global to use postgres' abs build dir for pg_regress
+ifeq ($(enable_tap_tests),yes)
+
+define citus_prove_installcheck
+rm -rf '$(CURDIR)'/tmp_check
+$(MKDIR_P) '$(CURDIR)'/tmp_check
+cd $(srcdir) && \
+TESTDIR='$(CURDIR)' \
+PATH="$(bindir):$$PATH" \
+PGPORT='6$(DEF_PGPORT)' \
+top_builddir='$(CURDIR)/$(top_builddir)' \
+PG_REGRESS='$(pgxsdir)/src/test/regress/pg_regress' \
+TEMP_CONFIG='$(CURDIR)'/postgresql.conf \
+$(PROVE) $(PG_PROVE_FLAGS) $(PROVE_FLAGS) $(if $(PROVE_TESTS),$(PROVE_TESTS),t/*.pl)
+endef
+
+else
+citus_prove_installcheck = @echo "TAP tests not enabled when postgres was compiled"
+endif
+
+installcheck:
+	$(citus_prove_installcheck)
+
+clean distclean maintainer-clean:
+	rm -rf tmp_check
--- a/src/test/columnar_freezing/postgresql.conf
+++ b/src/test/columnar_freezing/postgresql.conf
@ -0,0 +1,7 @@
+shared_preload_libraries=citus
+shared_preload_libraries='citus'
+vacuum_freeze_min_age = 50000
+vacuum_freeze_table_age = 50000
+synchronous_commit = off
+fsync = off
+
--- a/src/test/columnar_freezing/t/001_columnar_freezing.pl
+++ b/src/test/columnar_freezing/t/001_columnar_freezing.pl
@ -0,0 +1,52 @@
+# Minimal test testing streaming replication
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 2;
+
+# Initialize single node
+my $node_one = get_new_node('node_one');
+$node_one->init();
+$node_one->start;
+
+# initialize the citus extension
+$node_one->safe_psql('postgres', "CREATE EXTENSION citus;");
+
+# create columnar table and insert simple data to verify the data survives a crash
+$node_one->safe_psql('postgres', "
+CREATE TABLE test_row(i int);
+INSERT INTO test_row VALUES (1);
+CREATE TABLE test_columnar_freeze(i int) USING columnar WITH(autovacuum_enabled=false);
+INSERT INTO test_columnar_freeze VALUES (1);
+");
+
+my $ten_thousand_updates = "";
+
+foreach (1..10000) {
+    $ten_thousand_updates .= "UPDATE test_row SET i = i + 1;\n";
+}
+
+# 70K updates
+foreach (1..7) {
+    $node_one->safe_psql('postgres', $ten_thousand_updates);
+}
+
+my $result = $node_one->safe_psql('postgres', "
+select age(relfrozenxid) < 70000 as was_frozen
+  from pg_class where relname='test_columnar_freeze';
+");
+print "node one count: $result\n";
+is($result, qq(f), 'columnar table was not frozen');
+
+$node_one->safe_psql('postgres', 'VACUUM FREEZE test_columnar_freeze;');
+
+$result = $node_one->safe_psql('postgres', "
+select age(relfrozenxid) < 70000 as was_frozen
+  from pg_class where relname='test_columnar_freeze';
+");
+print "node one count: $result\n";
+is($result, qq(t), 'columnar table was frozen');
+
+$node_one->stop('fast');
+
--- a/src/test/regress/after_pg_upgrade_schedule
+++ b/src/test/regress/after_pg_upgrade_schedule
@ -1 +1,5 @@
-test: upgrade_basic_after upgrade_columnar_after upgrade_type_after upgrade_ref2ref_after upgrade_distributed_function_after upgrade_rebalance_strategy_after upgrade_list_citus_objects
+test: upgrade_basic_after upgrade_type_after upgrade_ref2ref_after upgrade_distributed_function_after upgrade_rebalance_strategy_after upgrade_list_citus_objects
+
+# This attempts dropping citus extension (and rollbacks), so please do
+# not run in parallel with any other tests.
+test: upgrade_columnar_after
--- a/src/test/regress/before_pg_upgrade_schedule
+++ b/src/test/regress/before_pg_upgrade_schedule
@ -2,7 +2,10 @@
 test: multi_test_helpers multi_test_helpers_superuser
 test: multi_test_catalog_views
 test: upgrade_basic_before
-test: upgrade_columnar_before
 test: upgrade_ref2ref_before
 test: upgrade_type_before
 test: upgrade_distributed_function_before upgrade_rebalance_strategy_before
+
+# upgrade_columnar_before renames public schema to citus_schema, so let's
+# run this test as the last one.
+test: upgrade_columnar_before
--- a/src/test/regress/bin/normalize.sed
+++ b/src/test/regress/bin/normalize.sed
@ -246,3 +246,6 @@ s/TRIM\(BOTH FROM value\)/btrim\(value\)/g
 s/pg14\.idx.*/pg14\.xxxxx/g

 s/CREATE TABLESPACE test_tablespace LOCATION.*/CREATE TABLESPACE test_tablespace LOCATION XXXX/g
+
+# columnar log for var correlation
+s/(.*absolute correlation \()([0,1]\.[0-9]+)(\) of var attribute [0-9]+ is smaller than.*)/\1X\.YZ\3/g
--- a/src/test/regress/expected/aggregate_support.out
+++ b/src/test/regress/expected/aggregate_support.out
@ -712,6 +712,19 @@ select array_agg(val order by valf) from aggdata;
 {0,NULL,2,3,5,2,4,NULL,NULL,8,NULL}
 (1 row)

+-- test by using some other node types as arguments to agg
+select key, percentile_cont((key - (key > 4)::int) / 10.0) within group(order by val) from aggdata group by key;
+ key | percentile_cont
+---------------------------------------------------------------------
+   1 |               2
+   2 |             2.4
+   3 |               4
+   5 |
+   6 |
+   7 |               8
+   9 |               0
+(7 rows)
+
 -- Test TransformSubqueryNode
 select * FROM (
    SELECT key, mode() within group (order by floor(agg1.val/2)) m from aggdata agg1
@ -932,5 +945,100 @@ SELECT square_func(5), a, count(a) FROM t1 GROUP BY a;
 ERROR:  function aggregate_support.square_func(integer) does not exist
 HINT:  No function matches the given name and argument types. You might need to add explicit type casts.
 CONTEXT:  while executing command on localhost:xxxxx
+-- Test the cases where the worker agg exec. returns no tuples.
+CREATE TABLE dist_table (dist_col int, agg_col numeric);
+SELECT create_distributed_table('dist_table', 'dist_col');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE TABLE ref_table (int_col int);
+SELECT create_reference_table('ref_table');
+ create_reference_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT PERCENTILE_DISC(.25) WITHIN GROUP (ORDER BY agg_col)
+FROM dist_table
+LEFT JOIN ref_table ON TRUE;
+ percentile_disc
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT PERCENTILE_DISC(.25) WITHIN GROUP (ORDER BY agg_col)
+FROM (SELECT *, random() FROM dist_table) a;
+ percentile_disc
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT PERCENTILE_DISC((2 > random())::int::numeric / 10) WITHIN GROUP (ORDER BY agg_col)
+FROM dist_table
+LEFT JOIN ref_table ON TRUE;
+ percentile_disc
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT SUM(COALESCE(agg_col, 3))
+FROM dist_table
+LEFT JOIN ref_table ON TRUE;
+ sum
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT AVG(COALESCE(agg_col, 10))
+FROM dist_table
+LEFT JOIN ref_table ON TRUE;
+ avg
+---------------------------------------------------------------------
+
+(1 row)
+
+insert into dist_table values (2, 11.2), (3, NULL), (6, 3.22), (3, 4.23), (5, 5.25), (4, 63.4), (75, NULL), (80, NULL), (96, NULL), (8, 1078), (0, 1.19);
+-- run the same queries after loading some data
+SELECT PERCENTILE_DISC(.25) WITHIN GROUP (ORDER BY agg_col)
+FROM dist_table
+LEFT JOIN ref_table ON TRUE;
+ percentile_disc
+---------------------------------------------------------------------
+            3.22
+(1 row)
+
+SELECT PERCENTILE_DISC(.25) WITHIN GROUP (ORDER BY agg_col)
+FROM (SELECT *, random() FROM dist_table) a;
+ percentile_disc
+---------------------------------------------------------------------
+            3.22
+(1 row)
+
+SELECT PERCENTILE_DISC((2 > random())::int::numeric / 10) WITHIN GROUP (ORDER BY agg_col)
+FROM dist_table
+LEFT JOIN ref_table ON TRUE;
+ percentile_disc
+---------------------------------------------------------------------
+            1.19
+(1 row)
+
+SELECT floor(SUM(COALESCE(agg_col, 3)))
+FROM dist_table
+LEFT JOIN ref_table ON TRUE;
+ floor
+---------------------------------------------------------------------
+  1178
+(1 row)
+
+SELECT floor(AVG(COALESCE(agg_col, 10)))
+FROM dist_table
+LEFT JOIN ref_table ON TRUE;
+ floor
+---------------------------------------------------------------------
+   109
+(1 row)
+
 set client_min_messages to error;
 drop schema aggregate_support cascade;
--- a/src/test/regress/expected/columnar_alter_set_type.out
+++ b/src/test/regress/expected/columnar_alter_set_type.out
@ -50,3 +50,33 @@ SELECT * FROM test_alter_table ORDER BY a;
 (4 rows)

 DROP TABLE test_alter_table;
+--  Make sure that the correct table options are used when rewriting the table.
+-- This is reflected by the VACUUM VERBOSE output right after a rewrite showing
+-- that all chunks are compressed with the configured compression algorithm
+-- https://github.com/citusdata/citus/issues/5927
+CREATE TABLE test(i int) USING columnar;
+SELECT alter_columnar_table_set('test', compression => 'lz4');
+ alter_columnar_table_set
+---------------------------------------------------------------------
+
+(1 row)
+
+INSERT INTO test VALUES(1);
+VACUUM VERBOSE test;
+INFO:  statistics for "test":
+storage id: xxxxx
+total file size: 24576, total data size: 6
+compression rate: 0.83x
+total row count: 1, stripe count: 1, average rows per stripe: 1
+chunk count: 1, containing data for dropped columns: 0, lz4 compressed: 1
+
+ALTER TABLE test ALTER COLUMN i TYPE int8;
+VACUUM VERBOSE test;
+INFO:  statistics for "test":
+storage id: xxxxx
+total file size: 24576, total data size: 10
+compression rate: 0.90x
+total row count: 1, stripe count: 1, average rows per stripe: 1
+chunk count: 1, containing data for dropped columns: 0, lz4 compressed: 1
+
+DROP TABLE test;
--- a/src/test/regress/expected/columnar_chunk_filtering.out
+++ b/src/test/regress/expected/columnar_chunk_filtering.out
@ -645,7 +645,7 @@ alter table coltest add column x5 int default (random()*20000)::int;
 analyze coltest;
 -- test that expressions on whole-row references are not pushed down
 select * from coltest where coltest = (1,1,1,1);
-NOTICE:  columnar planner: cannot push down clause: var is whole-row reference
+NOTICE:  columnar planner: cannot push down clause: var is whole-row reference or system column
 NOTICE:  columnar planner: adding CustomScan path for coltest
 DETAIL:  unparameterized; 0 clauses pushed down
 id | x1 | x2 | x3 | x5
@ -655,7 +655,7 @@ DETAIL:  unparameterized; 0 clauses pushed down
 -- test that expressions on uncorrelated attributes are not pushed down
 set columnar.qual_pushdown_correlation to default;
 select * from coltest where x5 = 23484;
-NOTICE:  columnar planner: cannot push down clause: var attribute 5 is uncorrelated
+NOTICE:  columnar planner: cannot push down clause: absolute correlation (X.YZ) of var attribute 5 is smaller than the value configured in "columnar.qual_pushdown_correlation_threshold" (0.900)
 NOTICE:  columnar planner: adding CustomScan path for coltest
 DETAIL:  unparameterized; 0 clauses pushed down
 id | x1 | x2 | x3 | x5
@ -819,3 +819,250 @@ select * from numrange_test natural join numrange_test2 order by nr;

 DROP TABLE atest1, atest2, t1, t2, t3, numrange_test, numrange_test2;
 set default_table_access_method to default;
+set columnar.planner_debug_level to notice;
+BEGIN;
+  SET LOCAL columnar.stripe_row_limit = 2000;
+  SET LOCAL columnar.chunk_group_row_limit = 1000;
+  create table pushdown_test (a int, b int) using columnar;
+  insert into pushdown_test values (generate_series(1, 200000));
+COMMIT;
+SET columnar.max_custom_scan_paths TO 50;
+SET columnar.qual_pushdown_correlation_threshold TO 0.0;
+EXPLAIN (analyze on, costs off, timing off, summary off)
+SELECT sum(a) FROM pushdown_test WHERE a = 204356 or a = 104356 or a = 76556;
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 1 clauses pushed down
+                                     QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Custom Scan (ColumnarScan) on pushdown_test (actual rows=2 loops=1)
+         Filter: ((a = 204356) OR (a = 104356) OR (a = 76556))
+         Rows Removed by Filter: 1998
+         Columnar Projected Columns: a
+         Columnar Chunk Group Filters: ((a = 204356) OR (a = 104356) OR (a = 76556))
+         Columnar Chunk Groups Removed by Filter: 198
+(7 rows)
+
+SELECT sum(a) FROM pushdown_test WHERE a = 204356 or a = 104356 or a = 76556;
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 1 clauses pushed down
+  sum
+---------------------------------------------------------------------
+ 180912
+(1 row)
+
+EXPLAIN (analyze on, costs off, timing off, summary off)
+SELECT sum(a) FROM pushdown_test WHERE a = 194356 or a = 104356 or a = 76556;
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 1 clauses pushed down
+                                     QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Custom Scan (ColumnarScan) on pushdown_test (actual rows=3 loops=1)
+         Filter: ((a = 194356) OR (a = 104356) OR (a = 76556))
+         Rows Removed by Filter: 2997
+         Columnar Projected Columns: a
+         Columnar Chunk Group Filters: ((a = 194356) OR (a = 104356) OR (a = 76556))
+         Columnar Chunk Groups Removed by Filter: 197
+(7 rows)
+
+SELECT sum(a) FROM pushdown_test WHERE a = 194356 or a = 104356 or a = 76556;
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 1 clauses pushed down
+  sum
+---------------------------------------------------------------------
+ 375268
+(1 row)
+
+EXPLAIN (analyze on, costs off, timing off, summary off)
+SELECT sum(a) FROM pushdown_test WHERE a = 204356 or a > a*-1 + b;
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: cannot push down clause: all arguments of an OR expression must be pushdownable but one of them was not, due to the reason given above
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 0 clauses pushed down
+                                QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Custom Scan (ColumnarScan) on pushdown_test (actual rows=0 loops=1)
+         Filter: ((a = 204356) OR (a > ((a * '-1'::integer) + b)))
+         Rows Removed by Filter: 200000
+         Columnar Projected Columns: a, b
+(5 rows)
+
+EXPLAIN (analyze on, costs off, timing off, summary off)
+SELECT sum(a) FROM pushdown_test where (a > 1000 and a < 10000) or (a > 20000 and a < 50000);
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 1 clauses pushed down
+                                              QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Custom Scan (ColumnarScan) on pushdown_test (actual rows=38998 loops=1)
+         Filter: (((a > 1000) AND (a < 10000)) OR ((a > 20000) AND (a < 50000)))
+         Rows Removed by Filter: 2
+         Columnar Projected Columns: a
+         Columnar Chunk Group Filters: (((a > 1000) AND (a < 10000)) OR ((a > 20000) AND (a < 50000)))
+         Columnar Chunk Groups Removed by Filter: 161
+(7 rows)
+
+SELECT sum(a) FROM pushdown_test where (a > 1000 and a < 10000) or (a > 20000 and a < 50000);
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 1 clauses pushed down
+    sum
+---------------------------------------------------------------------
+ 1099459500
+(1 row)
+
+EXPLAIN (analyze on, costs off, timing off, summary off)
+SELECT sum(a) FROM pushdown_test where (a > random() and a < 2*a) or (a > 100);
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: cannot push down clause: none of the arguments were pushdownable, due to the reason(s) given above
+NOTICE:  columnar planner: cannot push down clause: all arguments of an OR expression must be pushdownable but one of them was not, due to the reason given above
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 0 clauses pushed down
+                                      QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Custom Scan (ColumnarScan) on pushdown_test (actual rows=200000 loops=1)
+         Filter: ((((a)::double precision > random()) AND (a < (2 * a))) OR (a > 100))
+         Columnar Projected Columns: a
+(4 rows)
+
+SELECT sum(a) FROM pushdown_test where (a > random() and a < 2*a) or (a > 100);
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: cannot push down clause: none of the arguments were pushdownable, due to the reason(s) given above
+NOTICE:  columnar planner: cannot push down clause: all arguments of an OR expression must be pushdownable but one of them was not, due to the reason given above
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 0 clauses pushed down
+     sum
+---------------------------------------------------------------------
+ 20000100000
+(1 row)
+
+EXPLAIN (analyze on, costs off, timing off, summary off)
+SELECT sum(a) FROM pushdown_test where (a > random() and a <= 2000) or (a > 200000-1010);
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 1 clauses pushed down
+                                       QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Custom Scan (ColumnarScan) on pushdown_test (actual rows=3010 loops=1)
+         Filter: ((((a)::double precision > random()) AND (a <= 2000)) OR (a > 198990))
+         Rows Removed by Filter: 990
+         Columnar Projected Columns: a
+         Columnar Chunk Group Filters: ((a <= 2000) OR (a > 198990))
+         Columnar Chunk Groups Removed by Filter: 196
+(7 rows)
+
+SELECT sum(a) FROM pushdown_test where (a > random() and a <= 2000) or (a > 200000-1010);
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 1 clauses pushed down
+    sum
+---------------------------------------------------------------------
+ 203491455
+(1 row)
+
+EXPLAIN (analyze on, costs off, timing off, summary off)
+SELECT sum(a) FROM pushdown_test where
+(
+  a > random()
+  and
+  (
+    (a < 200 and a not in (select a from pushdown_test)) or
+    (a > 1000 and a < 2000)
+  )
+)
+or
+(a > 200000-2010);
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 0 clauses pushed down
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: cannot push down clause: must not contain a subplan
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 1 clauses pushed down
+                                                                  QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Custom Scan (ColumnarScan) on pushdown_test (actual rows=3009 loops=1)
+         Filter: ((((a)::double precision > random()) AND (((a < 200) AND (NOT (SubPlan 1))) OR ((a > 1000) AND (a < 2000)))) OR (a > 197990))
+         Rows Removed by Filter: 1991
+         Columnar Projected Columns: a
+         Columnar Chunk Group Filters: (((a < 200) OR ((a > 1000) AND (a < 2000))) OR (a > 197990))
+         Columnar Chunk Groups Removed by Filter: 195
+         SubPlan 1
+           ->  Materialize (actual rows=100 loops=199)
+                 ->  Custom Scan (ColumnarScan) on pushdown_test pushdown_test_1 (actual rows=199 loops=1)
+                       Columnar Projected Columns: a
+(11 rows)
+
+SELECT sum(a) FROM pushdown_test where
+(
+  a > random()
+  and
+  (
+    (a < 200 and a not in (select a from pushdown_test)) or
+    (a > 1000 and a < 2000)
+  )
+)
+or
+(a > 200000-2010);
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 0 clauses pushed down
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: cannot push down clause: must not contain a subplan
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 1 clauses pushed down
+    sum
+---------------------------------------------------------------------
+ 401479455
+(1 row)
+
+create function stable_1(arg int) returns int language plpgsql STRICT IMMUTABLE as
+$$ BEGIN RETURN 1+arg; END; $$;
+EXPLAIN (analyze on, costs off, timing off, summary off)
+SELECT sum(a) FROM pushdown_test where (a = random() and a < stable_1(a) and a < stable_1(6000));
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 1 clauses pushed down
+                                        QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Custom Scan (ColumnarScan) on pushdown_test (actual rows=0 loops=1)
+         Filter: ((a < 6001) AND ((a)::double precision = random()) AND (a < stable_1(a)))
+         Rows Removed by Filter: 6000
+         Columnar Projected Columns: a
+         Columnar Chunk Group Filters: (a < 6001)
+         Columnar Chunk Groups Removed by Filter: 194
+(7 rows)
+
+SELECT sum(a) FROM pushdown_test where (a = random() and a < stable_1(a) and a < stable_1(6000));
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 1 clauses pushed down
+ sum
+---------------------------------------------------------------------
+
+(1 row)
+
+RESET columnar.max_custom_scan_paths;
+RESET columnar.qual_pushdown_correlation_threshold;
+RESET columnar.planner_debug_level;
+DROP TABLE pushdown_test;
--- a/src/test/regress/expected/columnar_chunk_filtering_0.out
+++ b/src/test/regress/expected/columnar_chunk_filtering_0.out
@ -645,7 +645,7 @@ alter table coltest add column x5 int default (random()*20000)::int;
 analyze coltest;
 -- test that expressions on whole-row references are not pushed down
 select * from coltest where coltest = (1,1,1,1);
-NOTICE:  columnar planner: cannot push down clause: var is whole-row reference
+NOTICE:  columnar planner: cannot push down clause: var is whole-row reference or system column
 NOTICE:  columnar planner: adding CustomScan path for coltest
 DETAIL:  unparameterized; 0 clauses pushed down
 id | x1 | x2 | x3 | x5
@ -655,7 +655,7 @@ DETAIL:  unparameterized; 0 clauses pushed down
 -- test that expressions on uncorrelated attributes are not pushed down
 set columnar.qual_pushdown_correlation to default;
 select * from coltest where x5 = 23484;
-NOTICE:  columnar planner: cannot push down clause: var attribute 5 is uncorrelated
+NOTICE:  columnar planner: cannot push down clause: absolute correlation (X.YZ) of var attribute 5 is smaller than the value configured in "columnar.qual_pushdown_correlation_threshold" (0.900)
 NOTICE:  columnar planner: adding CustomScan path for coltest
 DETAIL:  unparameterized; 0 clauses pushed down
 id | x1 | x2 | x3 | x5
@ -819,3 +819,250 @@ select * from numrange_test natural join numrange_test2 order by nr;

 DROP TABLE atest1, atest2, t1, t2, t3, numrange_test, numrange_test2;
 set default_table_access_method to default;
+set columnar.planner_debug_level to notice;
+BEGIN;
+  SET LOCAL columnar.stripe_row_limit = 2000;
+  SET LOCAL columnar.chunk_group_row_limit = 1000;
+  create table pushdown_test (a int, b int) using columnar;
+  insert into pushdown_test values (generate_series(1, 200000));
+COMMIT;
+SET columnar.max_custom_scan_paths TO 50;
+SET columnar.qual_pushdown_correlation_threshold TO 0.0;
+EXPLAIN (analyze on, costs off, timing off, summary off)
+SELECT sum(a) FROM pushdown_test WHERE a = 204356 or a = 104356 or a = 76556;
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 1 clauses pushed down
+                                     QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Custom Scan (ColumnarScan) on pushdown_test (actual rows=2 loops=1)
+         Filter: ((a = 204356) OR (a = 104356) OR (a = 76556))
+         Rows Removed by Filter: 1998
+         Columnar Projected Columns: a
+         Columnar Chunk Group Filters: ((a = 204356) OR (a = 104356) OR (a = 76556))
+         Columnar Chunk Groups Removed by Filter: 198
+(7 rows)
+
+SELECT sum(a) FROM pushdown_test WHERE a = 204356 or a = 104356 or a = 76556;
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 1 clauses pushed down
+  sum
+---------------------------------------------------------------------
+ 180912
+(1 row)
+
+EXPLAIN (analyze on, costs off, timing off, summary off)
+SELECT sum(a) FROM pushdown_test WHERE a = 194356 or a = 104356 or a = 76556;
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 1 clauses pushed down
+                                     QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Custom Scan (ColumnarScan) on pushdown_test (actual rows=3 loops=1)
+         Filter: ((a = 194356) OR (a = 104356) OR (a = 76556))
+         Rows Removed by Filter: 2997
+         Columnar Projected Columns: a
+         Columnar Chunk Group Filters: ((a = 194356) OR (a = 104356) OR (a = 76556))
+         Columnar Chunk Groups Removed by Filter: 197
+(7 rows)
+
+SELECT sum(a) FROM pushdown_test WHERE a = 194356 or a = 104356 or a = 76556;
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 1 clauses pushed down
+  sum
+---------------------------------------------------------------------
+ 375268
+(1 row)
+
+EXPLAIN (analyze on, costs off, timing off, summary off)
+SELECT sum(a) FROM pushdown_test WHERE a = 204356 or a > a*-1 + b;
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: cannot push down clause: all arguments of an OR expression must be pushdownable but one of them was not, due to the reason given above
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 0 clauses pushed down
+                                QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Custom Scan (ColumnarScan) on pushdown_test (actual rows=0 loops=1)
+         Filter: ((a = 204356) OR (a > ((a * '-1'::integer) + b)))
+         Rows Removed by Filter: 200000
+         Columnar Projected Columns: a, b
+(5 rows)
+
+EXPLAIN (analyze on, costs off, timing off, summary off)
+SELECT sum(a) FROM pushdown_test where (a > 1000 and a < 10000) or (a > 20000 and a < 50000);
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 1 clauses pushed down
+                                              QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Custom Scan (ColumnarScan) on pushdown_test (actual rows=38998 loops=1)
+         Filter: (((a > 1000) AND (a < 10000)) OR ((a > 20000) AND (a < 50000)))
+         Rows Removed by Filter: 2
+         Columnar Projected Columns: a
+         Columnar Chunk Group Filters: (((a > 1000) AND (a < 10000)) OR ((a > 20000) AND (a < 50000)))
+         Columnar Chunk Groups Removed by Filter: 161
+(7 rows)
+
+SELECT sum(a) FROM pushdown_test where (a > 1000 and a < 10000) or (a > 20000 and a < 50000);
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 1 clauses pushed down
+    sum
+---------------------------------------------------------------------
+ 1099459500
+(1 row)
+
+EXPLAIN (analyze on, costs off, timing off, summary off)
+SELECT sum(a) FROM pushdown_test where (a > random() and a < 2*a) or (a > 100);
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: cannot push down clause: none of the arguments were pushdownable, due to the reason(s) given above
+NOTICE:  columnar planner: cannot push down clause: all arguments of an OR expression must be pushdownable but one of them was not, due to the reason given above
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 0 clauses pushed down
+                                      QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Custom Scan (ColumnarScan) on pushdown_test (actual rows=200000 loops=1)
+         Filter: ((((a)::double precision > random()) AND (a < (2 * a))) OR (a > 100))
+         Columnar Projected Columns: a
+(4 rows)
+
+SELECT sum(a) FROM pushdown_test where (a > random() and a < 2*a) or (a > 100);
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: cannot push down clause: none of the arguments were pushdownable, due to the reason(s) given above
+NOTICE:  columnar planner: cannot push down clause: all arguments of an OR expression must be pushdownable but one of them was not, due to the reason given above
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 0 clauses pushed down
+     sum
+---------------------------------------------------------------------
+ 20000100000
+(1 row)
+
+EXPLAIN (analyze on, costs off, timing off, summary off)
+SELECT sum(a) FROM pushdown_test where (a > random() and a <= 2000) or (a > 200000-1010);
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 1 clauses pushed down
+                                       QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Custom Scan (ColumnarScan) on pushdown_test (actual rows=3010 loops=1)
+         Filter: ((((a)::double precision > random()) AND (a <= 2000)) OR (a > 198990))
+         Rows Removed by Filter: 990
+         Columnar Projected Columns: a
+         Columnar Chunk Group Filters: ((a <= 2000) OR (a > 198990))
+         Columnar Chunk Groups Removed by Filter: 196
+(7 rows)
+
+SELECT sum(a) FROM pushdown_test where (a > random() and a <= 2000) or (a > 200000-1010);
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 1 clauses pushed down
+    sum
+---------------------------------------------------------------------
+ 203491455
+(1 row)
+
+EXPLAIN (analyze on, costs off, timing off, summary off)
+SELECT sum(a) FROM pushdown_test where
+(
+  a > random()
+  and
+  (
+    (a < 200 and a not in (select a from pushdown_test)) or
+    (a > 1000 and a < 2000)
+  )
+)
+or
+(a > 200000-2010);
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 0 clauses pushed down
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: cannot push down clause: must not contain a subplan
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 1 clauses pushed down
+                                                                  QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Custom Scan (ColumnarScan) on pushdown_test (actual rows=3009 loops=1)
+         Filter: ((((a)::double precision > random()) AND (((a < 200) AND (NOT (SubPlan 1))) OR ((a > 1000) AND (a < 2000)))) OR (a > 197990))
+         Rows Removed by Filter: 1991
+         Columnar Projected Columns: a
+         Columnar Chunk Group Filters: (((a < 200) OR ((a > 1000) AND (a < 2000))) OR (a > 197990))
+         Columnar Chunk Groups Removed by Filter: 195
+         SubPlan 1
+           ->  Materialize (actual rows=100 loops=199)
+                 ->  Custom Scan (ColumnarScan) on pushdown_test pushdown_test_1 (actual rows=199 loops=1)
+                       Columnar Projected Columns: a
+(11 rows)
+
+SELECT sum(a) FROM pushdown_test where
+(
+  a > random()
+  and
+  (
+    (a < 200 and a not in (select a from pushdown_test)) or
+    (a > 1000 and a < 2000)
+  )
+)
+or
+(a > 200000-2010);
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 0 clauses pushed down
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: cannot push down clause: must not contain a subplan
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 1 clauses pushed down
+    sum
+---------------------------------------------------------------------
+ 401479455
+(1 row)
+
+create function stable_1(arg int) returns int language plpgsql STRICT IMMUTABLE as
+$$ BEGIN RETURN 1+arg; END; $$;
+EXPLAIN (analyze on, costs off, timing off, summary off)
+SELECT sum(a) FROM pushdown_test where (a = random() and a < stable_1(a) and a < stable_1(6000));
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 1 clauses pushed down
+                                        QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Custom Scan (ColumnarScan) on pushdown_test (actual rows=0 loops=1)
+         Filter: ((a < 6001) AND ((a)::double precision = random()) AND (a < stable_1(a)))
+         Rows Removed by Filter: 6000
+         Columnar Projected Columns: a
+         Columnar Chunk Group Filters: (a < 6001)
+         Columnar Chunk Groups Removed by Filter: 194
+(7 rows)
+
+SELECT sum(a) FROM pushdown_test where (a = random() and a < stable_1(a) and a < stable_1(6000));
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
+HINT:  Var must only reference this rel, and Expr must not reference this rel
+NOTICE:  columnar planner: adding CustomScan path for pushdown_test
+DETAIL:  unparameterized; 1 clauses pushed down
+ sum
+---------------------------------------------------------------------
+
+(1 row)
+
+RESET columnar.max_custom_scan_paths;
+RESET columnar.qual_pushdown_correlation_threshold;
+RESET columnar.planner_debug_level;
+DROP TABLE pushdown_test;
--- a/src/test/regress/expected/columnar_create.out
+++ b/src/test/regress/expected/columnar_create.out
@ -60,6 +60,89 @@ SELECT columnar_test_helpers.columnar_metadata_has_storage_id(:columnar_table_1_
 t
 (1 row)

+BEGIN;
+  INSERT INTO columnar_table_1 VALUES (2);
+ROLLBACK;
+INSERT INTO columnar_table_1 VALUES (3),(4);
+INSERT INTO columnar_table_1 VALUES (5),(6);
+INSERT INTO columnar_table_1 VALUES (7),(8);
+-- Test whether columnar metadata accessors are still fine even
+-- when the metadata indexes are not available to them.
+BEGIN;
+  ALTER INDEX columnar.stripe_first_row_number_idx RENAME TO new_index_name;
+  ALTER INDEX columnar.chunk_pkey RENAME TO new_index_name_1;
+  ALTER INDEX columnar.stripe_pkey RENAME TO new_index_name_2;
+  ALTER INDEX columnar.chunk_group_pkey RENAME TO new_index_name_3;
+  CREATE INDEX columnar_table_1_idx ON columnar_table_1(a);
+WARNING:  Metadata index stripe_first_row_number_idx is not available, this might mean slower read/writes on columnar tables. This is expected during Postgres upgrades and not expected otherwise.
+WARNING:  Metadata index stripe_first_row_number_idx is not available, this might mean slower read/writes on columnar tables. This is expected during Postgres upgrades and not expected otherwise.
+WARNING:  Metadata index chunk_pkey is not available, this might mean slower read/writes on columnar tables. This is expected during Postgres upgrades and not expected otherwise.
+WARNING:  Metadata index chunk_group_pkey is not available, this might mean slower read/writes on columnar tables. This is expected during Postgres upgrades and not expected otherwise.
+  -- make sure that we test index scan
+  SET LOCAL columnar.enable_custom_scan TO 'off';
+  SET LOCAL enable_seqscan TO off;
+  SET LOCAL seq_page_cost TO 10000000;
+  SELECT * FROM columnar_table_1 WHERE a = 6;
+WARNING:  Metadata index stripe_first_row_number_idx is not available, this might mean slower read/writes on columnar tables. This is expected during Postgres upgrades and not expected otherwise.
+ a
+---------------------------------------------------------------------
+ 6
+(1 row)
+
+  SELECT * FROM columnar_table_1 WHERE a = 5;
+ a
+---------------------------------------------------------------------
+ 5
+(1 row)
+
+  SELECT * FROM columnar_table_1 WHERE a = 7;
+ a
+---------------------------------------------------------------------
+ 7
+(1 row)
+
+  SELECT * FROM columnar_table_1 WHERE a = 3;
+ a
+---------------------------------------------------------------------
+ 3
+(1 row)
+
+  DROP INDEX columnar_table_1_idx;
+  -- Re-shuffle some metadata records to test whether we can
+  -- rely on sequential metadata scan when the metadata records
+  -- are not ordered by their "first_row_number"s.
+  WITH cte AS (
+      DELETE FROM columnar.stripe
+      WHERE storage_id = :columnar_table_1_storage_id
+      RETURNING *
+  )
+  INSERT INTO columnar.stripe SELECT * FROM cte ORDER BY first_row_number DESC;
+  SELECT SUM(a) FROM columnar_table_1;
+ sum
+---------------------------------------------------------------------
+  34
+(1 row)
+
+  SELECT * FROM columnar_table_1 WHERE a = 6;
+ a
+---------------------------------------------------------------------
+ 6
+(1 row)
+
+  -- Run a SELECT query after the INSERT command to force flushing the
+  -- data within the xact block.
+  INSERT INTO columnar_table_1 VALUES (20);
+  SELECT COUNT(*) FROM columnar_table_1;
+WARNING:  Metadata index stripe_pkey is not available, this might mean slower read/writes on columnar tables. This is expected during Postgres upgrades and not expected otherwise.
+ count
+---------------------------------------------------------------------
+     8
+(1 row)
+
+  DROP TABLE columnar_table_1 CASCADE;
+NOTICE:  drop cascades to materialized view columnar_table_1_mv
+WARNING:  Metadata index on a columnar metadata table is not available, this might mean slower read/writes on columnar tables. This is expected during Postgres upgrades and not expected otherwise.
+ROLLBACK;
 -- test dropping columnar table
 DROP TABLE columnar_table_1 CASCADE;
 NOTICE:  drop cascades to materialized view columnar_table_1_mv
--- a/src/test/regress/expected/columnar_indexes.out
+++ b/src/test/regress/expected/columnar_indexes.out
@ -257,6 +257,32 @@ SELECT SUM(a)=48000 FROM columnar_table WHERE a = 16000 OR a = 32000;
 t
 (1 row)

+BEGIN;
+  ALTER INDEX columnar.stripe_first_row_number_idx RENAME TO new_index_name;
+  ALTER INDEX columnar.chunk_pkey RENAME TO new_index_name_1;
+  -- same queries but this time some metadata indexes are not available
+  SELECT SUM(a)=312487500 FROM columnar_table WHERE a < 25000;
+WARNING:  Metadata index stripe_first_row_number_idx is not available, this might mean slower read/writes on columnar tables. This is expected during Postgres upgrades and not expected otherwise.
+WARNING:  Metadata index stripe_first_row_number_idx is not available, this might mean slower read/writes on columnar tables. This is expected during Postgres upgrades and not expected otherwise.
+WARNING:  Metadata index chunk_pkey is not available, this might mean slower read/writes on columnar tables. This is expected during Postgres upgrades and not expected otherwise.
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+  SELECT SUM(a)=167000 FROM columnar_table WHERE a = 16000 OR a = 151000;
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+  SELECT SUM(a)=48000 FROM columnar_table WHERE a = 16000 OR a = 32000;
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
 TRUNCATE columnar_table;
 ALTER TABLE columnar_table DROP CONSTRAINT columnar_table_pkey;
 -- hash --
@ -704,5 +730,90 @@ begin;
  insert into uniq select generate_series(1,100);
 ERROR:  cannot read from index when there is unflushed data in upper transactions
 rollback;
+-- Show that we nicely ignore index deletion requests made to columnarAM.
+--
+-- An INSERT command might trigger index deletion if index already had dead
+-- entries for the key we are about to insert.
+-- There are two ways of index deletion:
+--   a) simple deletion
+--   b) bottom-up deletion (>= pg14)
+--
+-- Since columnar_index_fetch_tuple never sets all_dead to true, columnarAM
+-- doesn't expect to receive simple deletion as we don't mark any index
+-- entries as dead.
+-- Otherwise, columnarAM would throw an error for all of below six test cases.
+--
+-- However, since columnarAM doesn't delete any dead entries via simple
+-- deletion, postgres might ask for a more comprehensive deletion (bottom-up)
+-- at some point when pg >= 14.
+-- For this reason, all following six test cases would certainly trigger
+-- bottom-up deletion. Show that we gracefully ignore such requests.
+CREATE TABLE index_tuple_delete (a int UNIQUE) USING COLUMNAR;
+ALTER TABLE index_tuple_delete SET (autovacuum_enabled = false);
+BEGIN;
+  -- i) rollback before flushing
+	INSERT INTO index_tuple_delete SELECT i FROM generate_series(0,10000)i;
+ROLLBACK;
+-- index deletion test-1
+BEGIN;
+  INSERT INTO index_tuple_delete SELECT i FROM generate_series(0,10000)i;
+ROLLBACK;
+COPY index_tuple_delete FROM PROGRAM 'seq 10000';
+TRUNCATE index_tuple_delete;
+BEGIN;
+  -- ii) rollback after flushing
+	INSERT INTO index_tuple_delete SELECT i FROM generate_series(0,10000)i;
+  SELECT SUM(a) > 0 FROM index_tuple_delete;
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+-- index deletion test-2
+BEGIN;
+  INSERT INTO index_tuple_delete SELECT i FROM generate_series(0,10000)i;
+ROLLBACK;
+COPY index_tuple_delete FROM PROGRAM 'seq 10000';
+TRUNCATE index_tuple_delete;
+BEGIN;
+  -- iii) rollback before flushing, use savepoint
+  SAVEPOINT sp1;
+	  INSERT INTO index_tuple_delete SELECT i FROM generate_series(0,10000)i;
+  ROLLBACK TO sp1;
+  -- index deletion test-3
+  SAVEPOINT sp2;
+    INSERT INTO index_tuple_delete SELECT i FROM generate_series(0,10000)i;
+  ROLLBACK TO sp2;
+  COPY index_tuple_delete FROM PROGRAM 'seq 10000';
+ROLLBACK;
+-- index deletion test-4
+BEGIN;
+  INSERT INTO index_tuple_delete SELECT i FROM generate_series(0,10000)i;
+ROLLBACK;
+COPY index_tuple_delete FROM PROGRAM 'seq 10000';
+TRUNCATE index_tuple_delete;
+BEGIN;
+  -- iv) rollback after flushing, use savepoint
+  SAVEPOINT sp1;
+	  INSERT INTO index_tuple_delete SELECT i FROM generate_series(0,10000)i;
+    SELECT SUM(a) > 0 FROM index_tuple_delete;
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+  ROLLBACK TO sp1;
+  -- index deletion test-5
+  SAVEPOINT sp2;
+    INSERT INTO index_tuple_delete SELECT i FROM generate_series(0,10000)i;
+  ROLLBACK TO sp2;
+  COPY index_tuple_delete FROM PROGRAM 'seq 10000';
+ROLLBACK;
+-- index deletion test-6
+BEGIN;
+  INSERT INTO index_tuple_delete SELECT i FROM generate_series(0,10000)i;
+ROLLBACK;
+COPY index_tuple_delete FROM PROGRAM 'seq 10000';
 SET client_min_messages TO WARNING;
 DROP SCHEMA columnar_indexes CASCADE;
--- a/src/test/regress/expected/columnar_insert.out
+++ b/src/test/regress/expected/columnar_insert.out
@ -291,6 +291,20 @@ BEGIN;
 (1 row)

 ROLLBACK;
+CREATE OR REPLACE FUNCTION test_columnar_storage_write_new_page(relation regclass) RETURNS void
+STRICT LANGUAGE c AS 'citus', 'test_columnar_storage_write_new_page';
+CREATE TABLE aborted_write (a int, b int) USING columnar;
+SELECT test_columnar_storage_write_new_page('aborted_write');
+ test_columnar_storage_write_new_page
+---------------------------------------------------------------------
+
+(1 row)
+
+SET client_min_messages TO DEBUG4;
+INSERT INTO aborted_write VALUES (5);
+DEBUG:  Flushing Stripe of size 1
+DEBUG:  overwriting page 2
+DETAIL:  This can happen after a roll-back.
 RESET search_path;
 SET client_min_messages TO WARNING;
 DROP SCHEMA columnar_insert CASCADE;
--- a/src/test/regress/expected/coordinator_shouldhaveshards.out
+++ b/src/test/regress/expected/coordinator_shouldhaveshards.out
@ -847,8 +847,8 @@ HAVING (max(table_2.value) >= (SELECT value FROM a));
 DEBUG:  Group by list without distribution column is not allowed  in distributed INSERT ... SELECT queries
 DEBUG:  generating subplan XXX_1 for CTE a: SELECT key, value FROM coordinator_shouldhaveshards.table_1 ORDER BY key, value DESC LIMIT 1
 DEBUG:  push down of limit count: 1
-DEBUG:  generating subplan XXX_2 for subquery SELECT count(*) AS count, a.key FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN coordinator_shouldhaveshards.table_2 USING (key)) GROUP BY a.key HAVING (max(table_2.value) OPERATOR(pg_catalog.>=) (SELECT a_1.value FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a_1))
-DEBUG:  Plan XXX query after replacing subqueries and CTEs: SELECT int4(count) AS key, (key)::text AS value FROM (SELECT intermediate_result.count, intermediate_result.key FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(count bigint, key integer)) citus_insert_select_subquery
+DEBUG:  generating subplan XXX_2 for subquery SELECT int4(count(*)) AS auto_coerced_by_citus_0, (a.key)::text AS auto_coerced_by_citus_1 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN coordinator_shouldhaveshards.table_2 USING (key)) GROUP BY a.key HAVING (max(table_2.value) OPERATOR(pg_catalog.>=) (SELECT a_1.value FROM (SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a_1))
+DEBUG:  Plan XXX query after replacing subqueries and CTEs: SELECT auto_coerced_by_citus_0 AS key, auto_coerced_by_citus_1 AS value FROM (SELECT intermediate_result.auto_coerced_by_citus_0, intermediate_result.auto_coerced_by_citus_1 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(auto_coerced_by_citus_0 integer, auto_coerced_by_citus_1 text)) citus_insert_select_subquery
 DEBUG:  Collecting INSERT ... SELECT results on coordinator
 DEBUG:  Subplan XXX_1 will be written to local file
 DEBUG:  Subplan XXX_1 will be sent to localhost:xxxxx
@ -856,9 +856,9 @@ DEBUG:  Subplan XXX_1 will be sent to localhost:xxxxx
 NOTICE:  executing the command locally: SELECT key, value FROM coordinator_shouldhaveshards.table_1_1503102 table_1 WHERE true ORDER BY key, value DESC LIMIT '1'::bigint
 NOTICE:  executing the command locally: SELECT key, value FROM coordinator_shouldhaveshards.table_1_1503105 table_1 WHERE true ORDER BY key, value DESC LIMIT '1'::bigint
 DEBUG:  Subplan XXX_2 will be written to local file
-NOTICE:  executing the command locally: SELECT count(*) AS count, worker_column_1 AS key, max(worker_column_2) AS worker_column_3 FROM (SELECT a.key AS worker_column_1, table_2.value AS worker_column_2 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN coordinator_shouldhaveshards.table_2_1503106 table_2(key, value) USING (key))) worker_subquery GROUP BY worker_column_1
-NOTICE:  executing the command locally: SELECT count(*) AS count, worker_column_1 AS key, max(worker_column_2) AS worker_column_3 FROM (SELECT a.key AS worker_column_1, table_2.value AS worker_column_2 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN coordinator_shouldhaveshards.table_2_1503109 table_2(key, value) USING (key))) worker_subquery GROUP BY worker_column_1
-NOTICE:  executing the command locally: SELECT int4(count) AS key, (key)::text AS value FROM (SELECT intermediate_result.count, intermediate_result.key FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(count bigint, key integer)) citus_insert_select_subquery
+NOTICE:  executing the command locally: SELECT count(*) AS auto_coerced_by_citus_0, (worker_column_1)::text AS auto_coerced_by_citus_1, worker_column_1 AS discarded_target_item_1, max(worker_column_2) AS worker_column_4 FROM (SELECT a.key AS worker_column_1, table_2.value AS worker_column_2 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN coordinator_shouldhaveshards.table_2_1503106 table_2(key, value) USING (key))) worker_subquery GROUP BY worker_column_1
+NOTICE:  executing the command locally: SELECT count(*) AS auto_coerced_by_citus_0, (worker_column_1)::text AS auto_coerced_by_citus_1, worker_column_1 AS discarded_target_item_1, max(worker_column_2) AS worker_column_4 FROM (SELECT a.key AS worker_column_1, table_2.value AS worker_column_2 FROM ((SELECT intermediate_result.key, intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text)) a JOIN coordinator_shouldhaveshards.table_2_1503109 table_2(key, value) USING (key))) worker_subquery GROUP BY worker_column_1
+NOTICE:  executing the command locally: SELECT auto_coerced_by_citus_0 AS key, auto_coerced_by_citus_1 AS value FROM (SELECT intermediate_result.auto_coerced_by_citus_0, intermediate_result.auto_coerced_by_citus_1 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(auto_coerced_by_citus_0 integer, auto_coerced_by_citus_1 text)) citus_insert_select_subquery
 NOTICE:  executing the copy locally for shard xxxxx
 WITH stats AS (
  SELECT count(key) m FROM table_1
--- a/src/test/regress/expected/distributed_functions.out
+++ b/src/test/regress/expected/distributed_functions.out
@ -832,6 +832,266 @@ SELECT * FROM test ORDER BY id;
 (2 rows)

 DROP TABLE test;
+-- verify that recreating distributed functions with TABLE params gets propagated to workers
+CREATE OR REPLACE FUNCTION func_with_return_table(int)
+RETURNS  TABLE (date date)
+LANGUAGE plpgsql AS $$
+BEGIN
+    RETURN query SELECT '2011-01-01'::date;
+END;
+$$;
+SELECT create_distributed_function('func_with_return_table(int)');
+ create_distributed_function
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE OR REPLACE FUNCTION func_with_return_table(int)
+RETURNS  TABLE (date date)
+LANGUAGE plpgsql AS $$
+BEGIN
+    RETURN query SELECT '2011-01-02'::date;
+END;
+$$;
+SELECT count(*) FROM
+  (SELECT result FROM
+    run_command_on_workers($$select row(pg_proc.pronargs, pg_proc.proargtypes, pg_proc.prosrc) from pg_proc where proname = 'func_with_return_table';$$)
+    UNION  select row(pg_proc.pronargs, pg_proc.proargtypes, pg_proc.prosrc)::text from pg_proc where proname = 'func_with_return_table')
+  as test;
+ count
+---------------------------------------------------------------------
+     1
+(1 row)
+
+-- verify that recreating distributed functions with OUT params gets propagated to workers
+CREATE OR REPLACE FUNCTION func_with_out_param(a int, out b int)
+  RETURNS int
+LANGUAGE sql AS $$ select 1; $$;
+SELECT create_distributed_function('func_with_out_param(int)');
+ create_distributed_function
+---------------------------------------------------------------------
+
+(1 row)
+
+SET client_min_messages TO ERROR;
+CREATE ROLE r1;
+SELECT 1 FROM run_command_on_workers($$CREATE ROLE r1;$$);
+ ?column?
+---------------------------------------------------------------------
+        1
+        1
+(2 rows)
+
+GRANT EXECUTE ON FUNCTION func_with_out_param TO r1;
+SELECT 1 FROM run_command_on_workers($$GRANT EXECUTE ON FUNCTION func_with_out_param TO r1;$$);
+ ?column?
+---------------------------------------------------------------------
+        1
+        1
+(2 rows)
+
+RESET client_min_messages;
+CREATE OR REPLACE FUNCTION func_with_out_param(a int, out b int)
+  RETURNS int
+LANGUAGE sql AS $$ select 2; $$;
+SELECT count(*) FROM
+  (SELECT result FROM
+    run_command_on_workers($$select row(pg_proc.pronargs, pg_proc.proargtypes, pg_proc.prosrc, pg_proc.proowner) from pg_proc where proname = 'func_with_out_param';$$)
+    UNION  select row(pg_proc.pronargs, pg_proc.proargtypes, pg_proc.prosrc, pg_proc.proowner)::text from pg_proc where proname = 'func_with_out_param')
+  as test;
+ count
+---------------------------------------------------------------------
+     1
+(1 row)
+
+-- verify that recreating distributed functions with INOUT params gets propagated to workers
+CREATE OR REPLACE FUNCTION func_with_inout_param(a int, inout b int)
+  RETURNS int
+LANGUAGE sql AS $$ select 1; $$;
+-- this should error out
+SELECT create_distributed_function('func_with_inout_param(int)');
+ERROR:  function "func_with_inout_param(int)" does not exist
+-- this should work
+SELECT create_distributed_function('func_with_inout_param(int,int)');
+ create_distributed_function
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE OR REPLACE FUNCTION func_with_inout_param(a int, inout b int)
+  RETURNS int
+LANGUAGE sql AS $$ select 2; $$;
+SELECT count(*) FROM
+  (SELECT result FROM
+    run_command_on_workers($$select row(pg_proc.pronargs, pg_proc.proargtypes, pg_proc.prosrc) from pg_proc where proname = 'func_with_inout_param';$$)
+    UNION  select row(pg_proc.pronargs, pg_proc.proargtypes, pg_proc.prosrc)::text from pg_proc where proname = 'func_with_inout_param')
+  as test;
+ count
+---------------------------------------------------------------------
+     1
+(1 row)
+
+-- verify that recreating distributed functions with VARIADIC params gets propagated to workers
+CREATE OR REPLACE FUNCTION func_with_variadic_param(a int, variadic b int[])
+  RETURNS int
+LANGUAGE sql AS $$ select 1; $$;
+-- this should work
+SELECT create_distributed_function('func_with_variadic_param(int,int[])');
+ create_distributed_function
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE OR REPLACE FUNCTION func_with_variadic_param(a int, variadic b int[])
+  RETURNS int
+LANGUAGE sql AS $$ select 2; $$;
+SELECT count(*) FROM
+  (SELECT result FROM
+    run_command_on_workers($$select row(pg_proc.pronargs, pg_proc.proargtypes, pg_proc.prosrc) from pg_proc where proname = 'func_with_variadic_param';$$)
+    UNION  select row(pg_proc.pronargs, pg_proc.proargtypes, pg_proc.prosrc)::text from pg_proc where proname = 'func_with_variadic_param')
+  as test;
+ count
+---------------------------------------------------------------------
+     1
+(1 row)
+
+-- verify that recreating distributed functions returning setof records gets propagated to workers
+CREATE OR REPLACE FUNCTION func_returning_setof_int(IN parm1 date, IN parm2 interval)
+  RETURNS SETOF integer AS
+$BODY$
+BEGIN
+    RETURN QUERY
+    SELECT 1;
+END;
+$BODY$
+  LANGUAGE plpgsql VOLATILE
+  COST 100;
+SELECT create_distributed_function('func_returning_setof_int(date,interval)');
+ create_distributed_function
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE OR REPLACE FUNCTION func_returning_setof_int(IN parm1 date, IN parm2 interval)
+  RETURNS SETOF integer AS
+$BODY$
+BEGIN
+    RETURN QUERY
+    SELECT 2;
+
+END;
+$BODY$
+  LANGUAGE plpgsql VOLATILE
+  COST 100;
+SELECT count(*) FROM
+  (SELECT result FROM
+    run_command_on_workers($$select row(pg_proc.pronargs, pg_proc.proargtypes, pg_proc.prosrc) from pg_proc where proname = 'func_returning_setof_int';$$)
+    UNION  select row(pg_proc.pronargs, pg_proc.proargtypes, pg_proc.prosrc)::text from pg_proc where proname = 'func_returning_setof_int')
+  as test;
+ count
+---------------------------------------------------------------------
+     1
+(1 row)
+
+-- verify that recreating distributed functions with variadic param returning setof records gets propagated to workers
+CREATE OR REPLACE FUNCTION func_returning_setof_int_with_variadic_param(IN parm1 date, VARIADIC parm2 int[])
+  RETURNS SETOF integer AS
+$BODY$
+BEGIN
+    RETURN QUERY
+    SELECT 1;
+END;
+$BODY$
+  LANGUAGE plpgsql VOLATILE
+  COST 100;
+SELECT create_distributed_function('func_returning_setof_int_with_variadic_param(date,int[])');
+ create_distributed_function
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE OR REPLACE FUNCTION func_returning_setof_int_with_variadic_param(IN parm1 date, VARIADIC parm2 int[])
+  RETURNS SETOF integer AS
+$BODY$
+BEGIN
+    RETURN QUERY
+    SELECT 2;
+END;
+$BODY$
+  LANGUAGE plpgsql VOLATILE
+  COST 100;
+SELECT count(*) FROM
+  (SELECT result FROM
+    run_command_on_workers($$select row(pg_proc.pronargs, pg_proc.proargtypes, pg_proc.prosrc) from pg_proc where proname = 'func_returning_setof_int_with_variadic_param';$$)
+    UNION  select row(pg_proc.pronargs, pg_proc.proargtypes, pg_proc.prosrc)::text from pg_proc where proname = 'func_returning_setof_int_with_variadic_param')
+  as test;
+ count
+---------------------------------------------------------------------
+     1
+(1 row)
+
+-- verify that recreating distributed procedures with out params gets propagated to workers
+CREATE OR REPLACE PROCEDURE proc_with_variadic_param(IN parm1 date, VARIADIC parm2 int[])
+  LANGUAGE SQL
+AS $$
+    SELECT 1;
+$$;
+-- this should error out
+SELECT create_distributed_function('proc_with_variadic_param(date)');
+ERROR:  function "proc_with_variadic_param(date)" does not exist
+-- this should work
+SELECT create_distributed_function('proc_with_variadic_param(date,int[])');
+ create_distributed_function
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE OR REPLACE PROCEDURE proc_with_variadic_param(IN parm1 date, VARIADIC parm2 int[])
+  LANGUAGE SQL
+AS $$
+    SELECT 2;
+$$;
+SELECT count(*) FROM
+  (SELECT result FROM
+    run_command_on_workers($$select row(pg_proc.pronargs, pg_proc.proargtypes, pg_proc.prosrc) from pg_proc where proname = 'proc_with_variadic_param';$$)
+    UNION  select row(pg_proc.pronargs, pg_proc.proargtypes, pg_proc.prosrc)::text from pg_proc where proname = 'proc_with_variadic_param')
+  as test;
+ count
+---------------------------------------------------------------------
+     1
+(1 row)
+
+-- verify that recreating distributed procedures with INOUT param gets propagated to workers
+CREATE OR REPLACE PROCEDURE proc_with_inout_param(IN parm1 date, INOUT parm2 int)
+  LANGUAGE SQL
+AS $$
+    SELECT 1;
+$$;
+-- this should error out
+SELECT create_distributed_function('proc_with_inout_param(date)');
+ERROR:  function "proc_with_inout_param(date)" does not exist
+-- this should work
+SELECT create_distributed_function('proc_with_inout_param(date,int)');
+ create_distributed_function
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE OR REPLACE PROCEDURE proc_with_inout_param(IN parm1 date, INOUT parm2 int)
+  LANGUAGE SQL
+AS $$
+    SELECT 2;
+$$;
+SELECT count(*) FROM
+  (SELECT result FROM
+    run_command_on_workers($$select row(pg_proc.pronargs, pg_proc.proargtypes, pg_proc.prosrc) from pg_proc where proname = 'proc_with_inout_param';$$)
+    UNION  select row(pg_proc.pronargs, pg_proc.proargtypes, pg_proc.prosrc)::text from pg_proc where proname = 'proc_with_inout_param')
+  as test;
+ count
+---------------------------------------------------------------------
+     1
+(1 row)
+
 SET client_min_messages TO error; -- suppress cascading objects dropping
 DROP SCHEMA function_tests CASCADE;
 DROP SCHEMA function_tests2 CASCADE;
--- a/src/test/regress/expected/distributed_functions_conflict.out
+++ b/src/test/regress/expected/distributed_functions_conflict.out
@ -139,6 +139,27 @@ SELECT worker_create_or_replace_object('CREATE AGGREGATE proc_conflict.existing_
 f
 (1 row)

+-- test worker_create_or_replace_object with a function that returns table
+CREATE OR REPLACE FUNCTION func_with_return_table(int)
+RETURNS TABLE (date date)
+LANGUAGE plpgsql AS $$
+BEGIN
+    RETURN query SELECT '2011-01-01'::date;
+END;
+$$;
+SELECT worker_create_or_replace_object('CREATE OR REPLACE FUNCTION func_with_return_table(int) RETURNS TABLE (date date) LANGUAGE plpgsql AS $$ BEGIN RETURN query SELECT ''2011-01-01''::date; END; $$;');
+ worker_create_or_replace_object
+---------------------------------------------------------------------
+ t
+(1 row)
+
+-- verify that a backup function is created
+SELECT COUNT(*)=2 FROM pg_proc WHERE proname LIKE 'func_with_return_table%';
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
 -- hide cascades
 SET client_min_messages TO error;
 DROP SCHEMA proc_conflict CASCADE;
--- a/src/test/regress/expected/insert_select_into_local_table.out
+++ b/src/test/regress/expected/insert_select_into_local_table.out
@ -149,6 +149,67 @@ SELECT * FROM non_dist_unique ORDER BY 1;
 5 |  8
 (5 rows)

+INSERT INTO non_dist_unique
+SELECT a+1, b FROM dist_table
+UNION ALL
+SELECT a+100, b FROM dist_table
+ON CONFLICT (a) DO NOTHING;
+SELECT * FROM non_dist_unique ORDER BY 1;
+  a  | b
+---------------------------------------------------------------------
+   1 |  6
+   2 |  7
+   3 | 14
+   4 | 15
+   5 |  8
+ 101 |  6
+ 102 |  7
+ 103 |  8
+(8 rows)
+
+INSERT INTO non_dist_unique
+SELECT a+1, b FROM dist_table
+UNION ALL
+SELECT a+100, b FROM dist_table
+ON CONFLICT (a) DO UPDATE SET b = EXCLUDED.b + 1;
+SELECT * FROM non_dist_unique ORDER BY 1;
+  a  | b
+---------------------------------------------------------------------
+   1 | 6
+   2 | 7
+   3 | 8
+   4 | 9
+   5 | 8
+ 101 | 7
+ 102 | 8
+ 103 | 9
+(8 rows)
+
+WITH cte1 AS (SELECT s FROM generate_series(1,10) s)
+INSERT INTO non_dist_unique
+WITH cte2 AS (SELECT s FROM generate_series(1,10) s)
+SELECT a+1, b FROM dist_table WHERE b IN (SELECT s FROM cte1)
+UNION ALL
+SELECT s, s FROM cte1
+ON CONFLICT (a) DO NOTHING;
+SELECT * FROM non_dist_unique ORDER BY 1;
+  a  | b
+---------------------------------------------------------------------
+   1 |  6
+   2 |  7
+   3 |  8
+   4 |  9
+   5 |  8
+   6 |  6
+   7 |  7
+   8 |  8
+   9 |  9
+  10 | 10
+ 101 |  7
+ 102 |  8
+ 103 |  9
+(13 rows)
+
 DROP TABLE non_dist_unique;
 -- test INSERT INTO a table with DEFAULT
 CREATE TABLE non_dist_default (a INT, c TEXT DEFAULT 'def');
@ -168,6 +229,16 @@ SELECT * FROM non_dist_default ORDER BY 1, 2;
 3 | def
 (3 rows)

+SELECT alter_table_set_access_method('non_dist_default', 'columnar');
+NOTICE:  creating a new table for insert_select_into_local_table.non_dist_default
+NOTICE:  moving the data of insert_select_into_local_table.non_dist_default
+NOTICE:  dropping the old insert_select_into_local_table.non_dist_default
+NOTICE:  renaming the new table to insert_select_into_local_table.non_dist_default
+ alter_table_set_access_method
+---------------------------------------------------------------------
+
+(1 row)
+
 INSERT INTO non_dist_default SELECT a, c FROM dist_table WHERE a = 1;
 SELECT * FROM non_dist_default ORDER BY 1, 2;
 a |  c
@ -354,6 +425,691 @@ SELECT * FROM non_dist_2 ORDER BY 1, 2;
 (3 rows)

 TRUNCATE non_dist_2;
+-- check issue https://github.com/citusdata/citus/issues/5858
+CREATE TABLE local_dest_table(
+  col_1 integer,
+  col_2 integer,
+  col_3 text,
+  col_4 text,
+  drop_col text,
+  col_5 bigint,
+  col_6 text,
+  col_7 text default 'col_7',
+  col_8 varchar
+);
+ALTER TABLE local_dest_table DROP COLUMN drop_col;
+CREATE TABLE dist_source_table_1(
+  int_col integer,
+  drop_col text,
+  text_col_1 text,
+  dist_col integer,
+  text_col_2 text
+);
+SELECT create_distributed_table('dist_source_table_1', 'dist_col');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+ALTER TABLE dist_source_table_1 DROP COLUMN drop_col;
+INSERT INTO dist_source_table_1 VALUES (1, 'value', 1, 'value');
+INSERT INTO dist_source_table_1 VALUES (2, 'value2', 1, 'value');
+INSERT INTO dist_source_table_1 VALUES (3, 'value', 3, 'value3');
+CREATE TABLE dist_source_table_2(
+  dist_col integer,
+  int_col integer
+);
+SELECT create_distributed_table('dist_source_table_2', 'dist_col');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+INSERT INTO dist_source_table_2 VALUES (1, 1);
+INSERT INTO dist_source_table_2 VALUES (2, 2);
+INSERT INTO dist_source_table_2 VALUES (4, 4);
+CREATE TABLE local_source_table_1 AS SELECT * FROM dist_source_table_1;
+CREATE TABLE local_source_table_2 AS SELECT * FROM dist_source_table_2;
+/*
+ * query_results_equal compares the effect of two queries on local_dest_table.
+ * We use this to ensure that INSERT INTO local_dest_table SELECT behaves
+ * the same when selecting from a regular table (postgres handles it) and
+ * a distributed table (Citus handles it).
+ *
+ * The queries are generated by calling format() on query_table twice,
+ * once for each source_table argument.
+ */
+CREATE OR REPLACE FUNCTION query_results_equal(query_template text, source_table_1 text, source_table_2 text)
+RETURNS bool
+AS $$
+DECLARE
+	l1 local_dest_table[];
+	l2 local_dest_table[];
+BEGIN
+	/* get the results using source_table_1 as source */
+    TRUNCATE local_dest_table;
+	EXECUTE format(query_template, source_table_1);
+	SELECT array_agg(l) INTO l1
+	FROM (SELECT * FROM local_dest_table ORDER BY 1, 2, 3, 4, 5, 6, 7, 8) l;
+
+	/* get the results using source_table_2 as source */
+    TRUNCATE local_dest_table;
+	EXECUTE format(query_template, source_table_2);
+	SELECT array_agg(l) INTO l2
+	FROM (SELECT * FROM local_dest_table ORDER BY 1, 2, 3, 4, 5, 6, 7, 8) l;
+
+	RAISE NOTICE 'l2=%', l1;
+	RAISE NOTICE 'l2=%', l2;
+	RETURN l1 = l2;
+END;
+$$ LANGUAGE plpgsql;
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table
+  SELECT
+    t1.dist_col,
+    1,
+    'string1',
+    'string2',
+    2,
+    'string3',
+    t1.text_col_1,
+    t1.text_col_2
+  FROM %1$s_1 t1
+  WHERE t1.int_col IN (SELECT int_col FROM %1$s_2)
+$$, 'local_source_table', 'dist_source_table');
+NOTICE:  l2={"(1,1,string1,string2,2,string3,value,value)","(1,1,string1,string2,2,string3,value2,value)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(1,1,string1,string2,2,string3,value,value)","(1,1,string1,string2,2,string3,value2,value)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table
+  SELECT
+    t1.dist_col,
+    1,
+    'string1',
+    'string2',
+    2,
+    'string3',
+    t1.text_col_1,
+    t1.text_col_2
+  FROM %1$s t1
+  returning *
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(1,1,string1,string2,2,string3,value,value)","(1,1,string1,string2,2,string3,value2,value)","(3,1,string1,string2,2,string3,value,value3)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(1,1,string1,string2,2,string3,value,value)","(1,1,string1,string2,2,string3,value2,value)","(3,1,string1,string2,2,string3,value,value3)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table (col_3, col_4) SELECT
+    'string1',
+    'string2'::text
+  FROM %1$s t1
+  returning *;
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(,,string1,string2,,,col_7,)","(,,string1,string2,,,col_7,)","(,,string1,string2,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(,,string1,string2,,,col_7,)","(,,string1,string2,,,col_7,)","(,,string1,string2,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table (col_7, col_4) SELECT
+    'string1',
+    'string2'::text
+  FROM %1$s t1
+  returning *;
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(,,,string2,,,string1,)","(,,,string2,,,string1,)","(,,,string2,,,string1,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(,,,string2,,,string1,)","(,,,string2,,,string1,)","(,,,string2,,,string1,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table (col_4, col_3) SELECT
+    'string1',
+    'string2'::text
+  FROM %1$s t1
+  WHERE dist_col = 1
+  returning *;
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(,,string2,string1,,,col_7,)","(,,string2,string1,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(,,string2,string1,,,col_7,)","(,,string2,string1,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table (col_4, col_1)
+  SELECT
+    'string1',
+     dist_col
+  FROM %1$s
+  UNION ALL
+  SELECT
+    'string',
+     int_col
+  FROM %1$s;
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(1,,,string,,,col_7,)","(1,,,string1,,,col_7,)","(1,,,string1,,,col_7,)","(2,,,string,,,col_7,)","(3,,,string,,,col_7,)","(3,,,string1,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(1,,,string,,,col_7,)","(1,,,string1,,,col_7,)","(1,,,string1,,,col_7,)","(2,,,string,,,col_7,)","(3,,,string,,,col_7,)","(3,,,string1,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  WITH cte1 AS (SELECT s FROM generate_series(1,10) s)
+  INSERT INTO local_dest_table (col_4, col_1)
+  SELECT
+    'string1',
+     dist_col
+  FROM %1$s WHERE int_col IN (SELECT s FROM cte1)
+  UNION ALL
+  SELECT
+    'string',
+     int_col
+  FROM %1$s WHERE int_col IN (SELECT s + 1 FROM cte1)
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(1,,,string1,,,col_7,)","(1,,,string1,,,col_7,)","(2,,,string,,,col_7,)","(3,,,string,,,col_7,)","(3,,,string1,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(1,,,string1,,,col_7,)","(1,,,string1,,,col_7,)","(2,,,string,,,col_7,)","(3,,,string,,,col_7,)","(3,,,string1,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  WITH cte1 AS (SELECT 'stringcte', s FROM generate_series(1,10) s)
+  INSERT INTO local_dest_table (col_4, col_1)
+  SELECT
+    'string1',
+     dist_col
+  FROM %1$s WHERE int_col IN (SELECT s FROM cte1)
+  UNION ALL
+  SELECT
+    *
+  FROM cte1
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(1,,,string1,,,col_7,)","(1,,,string1,,,col_7,)","(1,,,stringcte,,,col_7,)","(2,,,stringcte,,,col_7,)","(3,,,string1,,,col_7,)","(3,,,stringcte,,,col_7,)","(4,,,stringcte,,,col_7,)","(5,,,stringcte,,,col_7,)","(6,,,stringcte,,,col_7,)","(7,,,stringcte,,,col_7,)","(8,,,stringcte,,,col_7,)","(9,,,stringcte,,,col_7,)","(10,,,stringcte,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(1,,,string1,,,col_7,)","(1,,,string1,,,col_7,)","(1,,,stringcte,,,col_7,)","(2,,,stringcte,,,col_7,)","(3,,,string1,,,col_7,)","(3,,,stringcte,,,col_7,)","(4,,,stringcte,,,col_7,)","(5,,,stringcte,,,col_7,)","(6,,,stringcte,,,col_7,)","(7,,,stringcte,,,col_7,)","(8,,,stringcte,,,col_7,)","(9,,,stringcte,,,col_7,)","(10,,,stringcte,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table (col_3)
+  SELECT t1.text_col_1
+  FROM %1$s t1
+  GROUP BY t1.text_col_1;
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(,,value,,,,col_7,)","(,,value2,,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(,,value,,,,col_7,)","(,,value2,,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table (col_1, col_2, col_3, col_5, col_6, col_7, col_8)
+  SELECT
+    max(t1.dist_col),
+    3,
+    'string_3',
+    4,
+    44,
+    t1.text_col_1,
+    'string_1000'
+  FROM %1$s t1
+  GROUP BY t1.text_col_2, t1.text_col_1;
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(1,3,string_3,,4,44,value,string_1000)","(1,3,string_3,,4,44,value2,string_1000)","(3,3,string_3,,4,44,value,string_1000)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(1,3,string_3,,4,44,value,string_1000)","(1,3,string_3,,4,44,value2,string_1000)","(3,3,string_3,,4,44,value,string_1000)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table (col_7, col_8)
+  SELECT
+    t1.text_col_1,
+    'string_1000'
+  FROM dist_source_table_1 t1
+  GROUP BY t1.text_col_1;
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(,,,,,,value,string_1000)","(,,,,,,value2,string_1000)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(,,,,,,value,string_1000)","(,,,,,,value2,string_1000)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table (col_6, col_7, col_8)
+  SELECT
+    'string_4',
+    t1.text_col_1,
+    'string_1000'
+  FROM %1$s t1
+  GROUP BY t1.text_col_1;
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(,,,,,string_4,value,string_1000)","(,,,,,string_4,value2,string_1000)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(,,,,,string_4,value,string_1000)","(,,,,,string_4,value2,string_1000)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table (col_5, col_3)
+  SELECT 12, 'string_11' FROM %1$s t1
+  UNION
+  SELECT int_col, 'string' FROM %1$s;
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(,,string,,1,,col_7,)","(,,string,,2,,col_7,)","(,,string,,3,,col_7,)","(,,string_11,,12,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(,,string,,1,,col_7,)","(,,string,,2,,col_7,)","(,,string,,3,,col_7,)","(,,string_11,,12,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table(col_3, col_2)
+  SELECT text_col_1, count(*) FROM %1$s GROUP BY 1
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(,1,value2,,,,col_7,)","(,2,value,,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(,1,value2,,,,col_7,)","(,2,value,,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table(col_3, col_5)
+  SELECT text_col_1, count(*)::int FROM %1$s GROUP BY 1
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(,,value,,2,,col_7,)","(,,value2,,1,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(,,value,,2,,col_7,)","(,,value2,,1,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+-- repeat above tests with Citus local table
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table
+  SELECT
+    t1.dist_col,
+    1,
+    'string1',
+    'string2',
+    2,
+    'string3',
+    t1.text_col_1,
+    t1.text_col_2
+  FROM %1$s_1 t1
+  WHERE t1.int_col IN (SELECT int_col FROM %1$s_2)
+$$, 'local_source_table', 'dist_source_table');
+NOTICE:  l2={"(1,1,string1,string2,2,string3,value,value)","(1,1,string1,string2,2,string3,value2,value)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(1,1,string1,string2,2,string3,value,value)","(1,1,string1,string2,2,string3,value2,value)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table
+  SELECT
+    t1.dist_col,
+    1,
+    'string1',
+    'string2',
+    2,
+    'string3',
+    t1.text_col_1,
+    t1.text_col_2
+  FROM %1$s t1
+  returning *
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(1,1,string1,string2,2,string3,value,value)","(1,1,string1,string2,2,string3,value2,value)","(3,1,string1,string2,2,string3,value,value3)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(1,1,string1,string2,2,string3,value,value)","(1,1,string1,string2,2,string3,value2,value)","(3,1,string1,string2,2,string3,value,value3)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table (col_3, col_4) SELECT
+    'string1',
+    'string2'::text
+  FROM %1$s t1
+  returning *;
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(,,string1,string2,,,col_7,)","(,,string1,string2,,,col_7,)","(,,string1,string2,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(,,string1,string2,,,col_7,)","(,,string1,string2,,,col_7,)","(,,string1,string2,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table (col_7, col_4) SELECT
+    'string1',
+    'string2'::text
+  FROM %1$s t1
+  returning *;
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(,,,string2,,,string1,)","(,,,string2,,,string1,)","(,,,string2,,,string1,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(,,,string2,,,string1,)","(,,,string2,,,string1,)","(,,,string2,,,string1,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table (col_4, col_3) SELECT
+    'string1',
+    'string2'::text
+  FROM %1$s t1
+  WHERE dist_col = 1
+  returning *;
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(,,string2,string1,,,col_7,)","(,,string2,string1,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(,,string2,string1,,,col_7,)","(,,string2,string1,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table (col_4, col_1)
+  SELECT
+    'string1',
+     dist_col
+  FROM %1$s
+  UNION ALL
+  SELECT
+    'string',
+     int_col
+  FROM %1$s;
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(1,,,string,,,col_7,)","(1,,,string1,,,col_7,)","(1,,,string1,,,col_7,)","(2,,,string,,,col_7,)","(3,,,string,,,col_7,)","(3,,,string1,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(1,,,string,,,col_7,)","(1,,,string1,,,col_7,)","(1,,,string1,,,col_7,)","(2,,,string,,,col_7,)","(3,,,string,,,col_7,)","(3,,,string1,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  WITH cte1 AS (SELECT s FROM generate_series(1,10) s)
+  INSERT INTO local_dest_table (col_4, col_1)
+  SELECT
+    'string1',
+     dist_col
+  FROM %1$s WHERE int_col IN (SELECT s FROM cte1)
+  UNION ALL
+  SELECT
+    'string',
+     int_col
+  FROM %1$s WHERE int_col IN (SELECT s + 1 FROM cte1)
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(1,,,string1,,,col_7,)","(1,,,string1,,,col_7,)","(2,,,string,,,col_7,)","(3,,,string,,,col_7,)","(3,,,string1,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(1,,,string1,,,col_7,)","(1,,,string1,,,col_7,)","(2,,,string,,,col_7,)","(3,,,string,,,col_7,)","(3,,,string1,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  WITH cte1 AS (SELECT 'stringcte', s FROM generate_series(1,10) s)
+  INSERT INTO local_dest_table (col_4, col_1)
+  SELECT
+    'string1',
+     dist_col
+  FROM %1$s WHERE int_col IN (SELECT s FROM cte1)
+  UNION ALL
+  SELECT
+    *
+  FROM cte1
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(1,,,string1,,,col_7,)","(1,,,string1,,,col_7,)","(1,,,stringcte,,,col_7,)","(2,,,stringcte,,,col_7,)","(3,,,string1,,,col_7,)","(3,,,stringcte,,,col_7,)","(4,,,stringcte,,,col_7,)","(5,,,stringcte,,,col_7,)","(6,,,stringcte,,,col_7,)","(7,,,stringcte,,,col_7,)","(8,,,stringcte,,,col_7,)","(9,,,stringcte,,,col_7,)","(10,,,stringcte,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(1,,,string1,,,col_7,)","(1,,,string1,,,col_7,)","(1,,,stringcte,,,col_7,)","(2,,,stringcte,,,col_7,)","(3,,,string1,,,col_7,)","(3,,,stringcte,,,col_7,)","(4,,,stringcte,,,col_7,)","(5,,,stringcte,,,col_7,)","(6,,,stringcte,,,col_7,)","(7,,,stringcte,,,col_7,)","(8,,,stringcte,,,col_7,)","(9,,,stringcte,,,col_7,)","(10,,,stringcte,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table (col_3)
+  SELECT t1.text_col_1
+  FROM %1$s t1
+  GROUP BY t1.text_col_1;
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(,,value,,,,col_7,)","(,,value2,,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(,,value,,,,col_7,)","(,,value2,,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table (col_1, col_2, col_3, col_5, col_6, col_7, col_8)
+  SELECT
+    max(t1.dist_col),
+    3,
+    'string_3',
+    4,
+    44,
+    t1.text_col_1,
+    'string_1000'
+  FROM %1$s t1
+  GROUP BY t1.text_col_2, t1.text_col_1;
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(1,3,string_3,,4,44,value,string_1000)","(1,3,string_3,,4,44,value2,string_1000)","(3,3,string_3,,4,44,value,string_1000)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(1,3,string_3,,4,44,value,string_1000)","(1,3,string_3,,4,44,value2,string_1000)","(3,3,string_3,,4,44,value,string_1000)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table (col_7, col_8)
+  SELECT
+    t1.text_col_1,
+    'string_1000'
+  FROM dist_source_table_1 t1
+  GROUP BY t1.text_col_1;
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(,,,,,,value,string_1000)","(,,,,,,value2,string_1000)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(,,,,,,value,string_1000)","(,,,,,,value2,string_1000)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table (col_6, col_7, col_8)
+  SELECT
+    'string_4',
+    t1.text_col_1,
+    'string_1000'
+  FROM %1$s t1
+  GROUP BY t1.text_col_1;
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(,,,,,string_4,value,string_1000)","(,,,,,string_4,value2,string_1000)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(,,,,,string_4,value,string_1000)","(,,,,,string_4,value2,string_1000)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table (col_5, col_3)
+  SELECT 12, 'string_11' FROM %1$s t1
+  UNION
+  SELECT int_col, 'string' FROM %1$s;
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(,,string,,1,,col_7,)","(,,string,,2,,col_7,)","(,,string,,3,,col_7,)","(,,string_11,,12,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(,,string,,1,,col_7,)","(,,string,,2,,col_7,)","(,,string,,3,,col_7,)","(,,string_11,,12,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table(col_3, col_2)
+  SELECT text_col_1, count(*) FROM %1$s GROUP BY 1
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(,1,value2,,,,col_7,)","(,2,value,,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(,1,value2,,,,col_7,)","(,2,value,,,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT * FROM query_results_equal($$
+  INSERT INTO local_dest_table(col_3, col_5)
+  SELECT text_col_1, count(*)::int FROM %1$s GROUP BY 1
+$$, 'local_source_table_1', 'dist_source_table_1');
+NOTICE:  l2={"(,,value,,2,,col_7,)","(,,value2,,1,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+NOTICE:  l2={"(,,value,,2,,col_7,)","(,,value2,,1,,col_7,)"}
+CONTEXT:  PL/pgSQL function query_results_equal(text,text,text) line XX at RAISE
+ query_results_equal
+---------------------------------------------------------------------
+ t
+(1 row)
+
+-- go back to proper local table for remaining tests
+TRUNCATE local_dest_table;
+SELECT undistribute_table('local_source_table_1');
+ERROR:  cannot undistribute table because the table is not distributed
+-- use a sequence (cannot use query_results_equal, since sequence values would not match)
+CREATE SEQUENCE seq;
+BEGIN;
+INSERT INTO local_dest_table (col_5, col_3)
+SELECT 12, 'string_11' FROM dist_source_table_1
+UNION
+SELECT nextval('seq'), 'string' FROM dist_source_table_1;
+SELECT * FROM local_dest_table ORDER BY 1,2,3,4,5,6,7,8;
+ col_1 | col_2 |   col_3   | col_4 | col_5 | col_6 | col_7 | col_8
+---------------------------------------------------------------------
+       |       | string    |       |     1 |       | col_7 |
+       |       | string    |       |     2 |       | col_7 |
+       |       | string    |       |     3 |       | col_7 |
+       |       | string_11 |       |    12 |       | col_7 |
+(4 rows)
+
+ROLLBACK;
+-- add a bigserial column
+ALTER TABLE local_dest_table ADD COLUMN col_9 bigserial;
+-- not supported due to limitations in nextval handling
+INSERT INTO local_dest_table (col_5, col_3)
+SELECT 12, 'string_11' FROM dist_source_table_1
+UNION
+SELECT 11, 'string' FROM dist_source_table_1;
+SELECT * FROM local_dest_table ORDER BY 1,2,3,4,5,6,7,8;
+ col_1 | col_2 |   col_3   | col_4 | col_5 | col_6 | col_7 | col_8 | col_9
+---------------------------------------------------------------------
+       |       | string    |       |    11 |       | col_7 |       |     2
+       |       | string_11 |       |    12 |       | col_7 |       |     1
+(2 rows)
+
+BEGIN;
+INSERT INTO local_dest_table(col_3, col_2)
+SELECT text_col_1, count(*) FROM dist_source_table_1 GROUP BY 1;
+SELECT * FROM local_dest_table ORDER BY 1,2,3,4,5,6,7,8;
+ col_1 | col_2 |   col_3   | col_4 | col_5 | col_6 | col_7 | col_8 | col_9
+---------------------------------------------------------------------
+       |     1 | value2    |       |       |       | col_7 |       |     3
+       |     2 | value     |       |       |       | col_7 |       |     4
+       |       | string    |       |    11 |       | col_7 |       |     2
+       |       | string_11 |       |    12 |       | col_7 |       |     1
+(4 rows)
+
+ROLLBACK;
+BEGIN;
+INSERT INTO local_dest_table (col_4, col_3) SELECT
+  'string1',
+  'string2'::text
+FROM dist_source_table_1 t1
+WHERE dist_col = 1
+RETURNING *;
+ col_1 | col_2 |  col_3  |  col_4  | col_5 | col_6 | col_7 | col_8 | col_9
+---------------------------------------------------------------------
+       |       | string2 | string1 |       |       | col_7 |       |     5
+       |       | string2 | string1 |       |       | col_7 |       |     6
+(2 rows)
+
+ROLLBACK;
 \set VERBOSITY terse
 DROP SCHEMA insert_select_into_local_table CASCADE;
-NOTICE:  drop cascades to 5 other objects
+NOTICE:  drop cascades to 12 other objects
--- a/src/test/regress/expected/insert_select_repartition.out
+++ b/src/test/regress/expected/insert_select_repartition.out
@ -500,7 +500,7 @@ INSERT INTO target_table
 SELECT mapped_key, c FROM t NATURAL JOIN source_table;
 DEBUG:  volatile functions are not allowed in distributed INSERT ... SELECT queries
 DEBUG:  generating subplan XXX_1 for CTE t: SELECT mapped_key, a, c FROM insert_select_repartition.source_table WHERE ((a)::double precision OPERATOR(pg_catalog.>) floor(random()))
-DEBUG:  Plan XXX query after replacing subqueries and CTEs: SELECT mapped_key AS a, (c)::integer[] AS b FROM (SELECT t.mapped_key, t.c FROM ((SELECT intermediate_result.mapped_key, intermediate_result.a, intermediate_result.c FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(mapped_key integer, a integer, c double precision[])) t JOIN insert_select_repartition.source_table USING (mapped_key, a, c))) citus_insert_select_subquery
+DEBUG:  Plan XXX query after replacing subqueries and CTEs: SELECT mapped_key AS a, auto_coerced_by_citus_1 AS b FROM (SELECT t.mapped_key, (t.c)::integer[] AS auto_coerced_by_citus_1 FROM ((SELECT intermediate_result.mapped_key, intermediate_result.a, intermediate_result.c FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(mapped_key integer, a integer, c double precision[])) t JOIN insert_select_repartition.source_table USING (mapped_key, a, c))) citus_insert_select_subquery
 DEBUG:  performing repartitioned INSERT ... SELECT
 RESET client_min_messages;
 SELECT * FROM target_table ORDER BY a;
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`#include "udfs/citus_finish_pg_upgrade/10.2-5.sql"`