Cleanup from dropping PG14 support (#8377 )

Should have been part of 3b1c082791 This commit deletes some redundant alternative test outputs which were created for PG14. `server_version_ge_15` means "Server version greater than or equal to 15".
Upgrade uncrustify from 0.68.1 to 0.82.0 (#8373 )
2025-12-12 13:00:29 +03:00 · 2025-12-11 16:51:19 +03:00 · 2025-12-09 18:19:09 +00:00 · 2025-12-05 16:50:42 +03:00 · 2025-12-05 12:37:05 +00:00 · 2025-12-04 19:51:45 +00:00
495 changed files with 43077 additions and 10535 deletions
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@ -73,7 +73,7 @@ USER citus

 # build postgres versions separately for effective parrallelism and caching of already built versions when changing only certain versions
 FROM base AS pg15
-RUN MAKEFLAGS="-j $(nproc)" pgenv build 15.13
+RUN MAKEFLAGS="-j $(nproc)" pgenv build 15.14
 RUN rm .pgenv/src/*.tar*
 RUN make -C .pgenv/src/postgresql-*/ clean
 RUN make -C .pgenv/src/postgresql-*/src/include install
@ -85,7 +85,7 @@ RUN cp -r .pgenv/src .pgenv/pgsql-* .pgenv/config .pgenv-staging/
 RUN rm .pgenv-staging/config/default.conf

 FROM base AS pg16
-RUN MAKEFLAGS="-j $(nproc)" pgenv build 16.9
+RUN MAKEFLAGS="-j $(nproc)" pgenv build 16.10
 RUN rm .pgenv/src/*.tar*
 RUN make -C .pgenv/src/postgresql-*/ clean
 RUN make -C .pgenv/src/postgresql-*/src/include install
@ -97,7 +97,7 @@ RUN cp -r .pgenv/src .pgenv/pgsql-* .pgenv/config .pgenv-staging/
 RUN rm .pgenv-staging/config/default.conf

 FROM base AS pg17
-RUN MAKEFLAGS="-j $(nproc)" pgenv build 17.5
+RUN MAKEFLAGS="-j $(nproc)" pgenv build 17.6
 RUN rm .pgenv/src/*.tar*
 RUN make -C .pgenv/src/postgresql-*/ clean
 RUN make -C .pgenv/src/postgresql-*/src/include install
@ -113,10 +113,10 @@ FROM base AS uncrustify-builder
 RUN sudo apt update && sudo apt install -y cmake tree

 WORKDIR /uncrustify
-RUN curl -L https://github.com/uncrustify/uncrustify/archive/uncrustify-0.68.1.tar.gz | tar xz
-WORKDIR /uncrustify/uncrustify-uncrustify-0.68.1/
+RUN curl -L https://github.com/uncrustify/uncrustify/archive/uncrustify-0.82.0.tar.gz | tar xz
+WORKDIR /uncrustify/uncrustify-uncrustify-0.82.0/
 RUN mkdir build
-WORKDIR /uncrustify/uncrustify-uncrustify-0.68.1/build/
+WORKDIR /uncrustify/uncrustify-uncrustify-0.82.0/build/
 RUN cmake ..
 RUN MAKEFLAGS="-j $(nproc)" make -s

@ -216,7 +216,7 @@ COPY --chown=citus:citus .psqlrc .
 RUN sudo chown --from=root:root citus:citus -R ~

 # sets default pg version
-RUN pgenv switch 17.5
+RUN pgenv switch 17.6

 # make connecting to the coordinator easy
 ENV PGPORT=9700
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@ -2,6 +2,8 @@
    "image": "ghcr.io/citusdata/citus-devcontainer:main",
    "runArgs": [
        "--cap-add=SYS_PTRACE",
+        "--cap-add=SYS_NICE",                 // allow NUMA page inquiry
+        "--security-opt=seccomp=unconfined",  // unblocks move_pages() in the container
        "--ulimit=core=-1",
    ],
    "forwardPorts": [
--- a/.devcontainer/requirements.txt
+++ b/.devcontainer/requirements.txt
@ -1,4 +1,4 @@
-black==23.11.0
+black==24.3.0
 click==8.1.7
 isort==5.12.0
 mypy-extensions==1.0.0
--- a/.devcontainer/src/test/regress/Pipfile
+++ b/.devcontainer/src/test/regress/Pipfile
@ -16,7 +16,7 @@ pytest-timeout = "*"
 pytest-xdist = "*"
 pytest-repeat = "*"
 pyyaml = "*"
-werkzeug = "==2.3.7"
+werkzeug = "==3.0.6"

 [dev-packages]
 black = "*"
--- a/.devcontainer/src/test/regress/Pipfile.lock
+++ b/.devcontainer/src/test/regress/Pipfile.lock
@ -1,7 +1,7 @@
 {
    "_meta": {
        "hash": {
-            "sha256": "f8db86383082539f626f1402e720f5f2e3f9718b44a8f26110cf9f52e7ca46bc"
+            "sha256": "bdfddfee81a47cfb42e76936d229e94f5d3cee75f612b7beb2d3008b06d6427b"
        },
        "pipfile-spec": 6,
        "requires": {
@ -119,69 +119,85 @@
        },
        "certifi": {
            "hashes": [
-                "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f",
-                "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"
+                "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b",
+                "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"
            ],
+            "index": "pypi",
            "markers": "python_version >= '3.6'",
-            "version": "==2024.2.2"
+            "version": "==2024.7.4"
        },
        "cffi": {
            "hashes": [
-                "sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc",
-                "sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a",
-                "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417",
-                "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab",
-                "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520",
-                "sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36",
-                "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743",
-                "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8",
-                "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed",
-                "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684",
-                "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56",
-                "sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324",
-                "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d",
-                "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235",
-                "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e",
-                "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088",
-                "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000",
-                "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7",
-                "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e",
-                "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673",
-                "sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c",
-                "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe",
-                "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2",
-                "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098",
-                "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8",
-                "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a",
-                "sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0",
-                "sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b",
-                "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896",
-                "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e",
-                "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9",
-                "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2",
-                "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b",
-                "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6",
-                "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404",
-                "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f",
-                "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0",
-                "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4",
-                "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc",
-                "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936",
-                "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba",
-                "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872",
-                "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb",
-                "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614",
-                "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1",
-                "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d",
-                "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969",
-                "sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b",
-                "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4",
-                "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627",
-                "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956",
-                "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357"
+                "sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8",
+                "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2",
+                "sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1",
+                "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15",
+                "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36",
+                "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824",
+                "sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8",
+                "sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36",
+                "sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17",
+                "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf",
+                "sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc",
+                "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3",
+                "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed",
+                "sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702",
+                "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1",
+                "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8",
+                "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903",
+                "sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6",
+                "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d",
+                "sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b",
+                "sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e",
+                "sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be",
+                "sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c",
+                "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683",
+                "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9",
+                "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c",
+                "sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8",
+                "sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1",
+                "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4",
+                "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655",
+                "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67",
+                "sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595",
+                "sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0",
+                "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65",
+                "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41",
+                "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6",
+                "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401",
+                "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6",
+                "sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3",
+                "sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16",
+                "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93",
+                "sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e",
+                "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4",
+                "sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964",
+                "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c",
+                "sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576",
+                "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0",
+                "sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3",
+                "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662",
+                "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3",
+                "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff",
+                "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5",
+                "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd",
+                "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f",
+                "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5",
+                "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14",
+                "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d",
+                "sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9",
+                "sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7",
+                "sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382",
+                "sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a",
+                "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e",
+                "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a",
+                "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4",
+                "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99",
+                "sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87",
+                "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b"
            ],
-            "markers": "platform_python_implementation != 'PyPy'",
-            "version": "==1.16.0"
+            "markers": "python_version >= '3.8'",
+            "version": "==1.17.1"
        },
        "click": {
            "hashes": [
@ -202,42 +218,41 @@
        },
        "cryptography": {
            "hashes": [
-                "sha256:04859aa7f12c2b5f7e22d25198ddd537391f1695df7057c8700f71f26f47a129",
-                "sha256:069d2ce9be5526a44093a0991c450fe9906cdf069e0e7cd67d9dee49a62b9ebe",
-                "sha256:0d3ec384058b642f7fb7e7bff9664030011ed1af8f852540c76a1317a9dd0d20",
-                "sha256:0fab2a5c479b360e5e0ea9f654bcebb535e3aa1e493a715b13244f4e07ea8eec",
-                "sha256:0fea01527d4fb22ffe38cd98951c9044400f6eff4788cf52ae116e27d30a1ba3",
-                "sha256:1b797099d221df7cce5ff2a1d272761d1554ddf9a987d3e11f6459b38cd300fd",
-                "sha256:1e935c2900fb53d31f491c0de04f41110351377be19d83d908c1fd502ae8daa5",
-                "sha256:20100c22b298c9eaebe4f0b9032ea97186ac2555f426c3e70670f2517989543b",
-                "sha256:20180da1b508f4aefc101cebc14c57043a02b355d1a652b6e8e537967f1e1b46",
-                "sha256:25b09b73db78facdfd7dd0fa77a3f19e94896197c86e9f6dc16bce7b37a96504",
-                "sha256:2619487f37da18d6826e27854a7f9d4d013c51eafb066c80d09c63cf24505306",
-                "sha256:2eb6368d5327d6455f20327fb6159b97538820355ec00f8cc9464d617caecead",
-                "sha256:35772a6cffd1f59b85cb670f12faba05513446f80352fe811689b4e439b5d89e",
-                "sha256:39d5c93e95bcbc4c06313fc6a500cee414ee39b616b55320c1904760ad686938",
-                "sha256:3d96ea47ce6d0055d5b97e761d37b4e84195485cb5a38401be341fabf23bc32a",
-                "sha256:4dcab7c25e48fc09a73c3e463d09ac902a932a0f8d0c568238b3696d06bf377b",
-                "sha256:5fbf0f3f0fac7c089308bd771d2c6c7b7d53ae909dce1db52d8e921f6c19bb3a",
-                "sha256:6c25e1e9c2ce682d01fc5e2dde6598f7313027343bd14f4049b82ad0402e52cd",
-                "sha256:762f3771ae40e111d78d77cbe9c1035e886ac04a234d3ee0856bf4ecb3749d54",
-                "sha256:90147dad8c22d64b2ff7331f8d4cddfdc3ee93e4879796f837bdbb2a0b141e0c",
-                "sha256:935cca25d35dda9e7bd46a24831dfd255307c55a07ff38fd1a92119cffc34857",
-                "sha256:93fbee08c48e63d5d1b39ab56fd3fdd02e6c2431c3da0f4edaf54954744c718f",
-                "sha256:9541c69c62d7446539f2c1c06d7046aef822940d248fa4b8962ff0302862cc1f",
-                "sha256:c23f03cfd7d9826cdcbad7850de67e18b4654179e01fe9bc623d37c2638eb4ef",
-                "sha256:c3d1f5a1d403a8e640fa0887e9f7087331abb3f33b0f2207d2cc7f213e4a864c",
-                "sha256:d1998e545081da0ab276bcb4b33cce85f775adb86a516e8f55b3dac87f469548",
-                "sha256:d5cf11bc7f0b71fb71af26af396c83dfd3f6eed56d4b6ef95d57867bf1e4ba65",
-                "sha256:db0480ffbfb1193ac4e1e88239f31314fe4c6cdcf9c0b8712b55414afbf80db4",
-                "sha256:de4ae486041878dc46e571a4c70ba337ed5233a1344c14a0790c4c4be4bbb8b4",
-                "sha256:de5086cd475d67113ccb6f9fae6d8fe3ac54a4f9238fd08bfdb07b03d791ff0a",
-                "sha256:df34312149b495d9d03492ce97471234fd9037aa5ba217c2a6ea890e9166f151",
-                "sha256:ead69ba488f806fe1b1b4050febafdbf206b81fa476126f3e16110c818bac396"
+                "sha256:00918d859aa4e57db8299607086f793fa7813ae2ff5a4637e318a25ef82730f7",
+                "sha256:1e8d181e90a777b63f3f0caa836844a1182f1f265687fac2115fcf245f5fbec3",
+                "sha256:1f9a92144fa0c877117e9748c74501bea842f93d21ee00b0cf922846d9d0b183",
+                "sha256:21377472ca4ada2906bc313168c9dc7b1d7ca417b63c1c3011d0c74b7de9ae69",
+                "sha256:24979e9f2040c953a94bf3c6782e67795a4c260734e5264dceea65c8f4bae64a",
+                "sha256:2a46a89ad3e6176223b632056f321bc7de36b9f9b93b2cc1cccf935a3849dc62",
+                "sha256:322eb03ecc62784536bc173f1483e76747aafeb69c8728df48537eb431cd1911",
+                "sha256:436df4f203482f41aad60ed1813811ac4ab102765ecae7a2bbb1dbb66dcff5a7",
+                "sha256:4f422e8c6a28cf8b7f883eb790695d6d45b0c385a2583073f3cec434cc705e1a",
+                "sha256:53f23339864b617a3dfc2b0ac8d5c432625c80014c25caac9082314e9de56f41",
+                "sha256:5fed5cd6102bb4eb843e3315d2bf25fede494509bddadb81e03a859c1bc17b83",
+                "sha256:610a83540765a8d8ce0f351ce42e26e53e1f774a6efb71eb1b41eb01d01c3d12",
+                "sha256:6c8acf6f3d1f47acb2248ec3ea261171a671f3d9428e34ad0357148d492c7864",
+                "sha256:6f76fdd6fd048576a04c5210d53aa04ca34d2ed63336d4abd306d0cbe298fddf",
+                "sha256:72198e2b5925155497a5a3e8c216c7fb3e64c16ccee11f0e7da272fa93b35c4c",
+                "sha256:887143b9ff6bad2b7570da75a7fe8bbf5f65276365ac259a5d2d5147a73775f2",
+                "sha256:888fcc3fce0c888785a4876ca55f9f43787f4c5c1cc1e2e0da71ad481ff82c5b",
+                "sha256:8e6a85a93d0642bd774460a86513c5d9d80b5c002ca9693e63f6e540f1815ed0",
+                "sha256:94f99f2b943b354a5b6307d7e8d19f5c423a794462bde2bf310c770ba052b1c4",
+                "sha256:9b336599e2cb77b1008cb2ac264b290803ec5e8e89d618a5e978ff5eb6f715d9",
+                "sha256:a2d8a7045e1ab9b9f803f0d9531ead85f90c5f2859e653b61497228b18452008",
+                "sha256:b8272f257cf1cbd3f2e120f14c68bff2b6bdfcc157fafdee84a1b795efd72862",
+                "sha256:bf688f615c29bfe9dfc44312ca470989279f0e94bb9f631f85e3459af8efc009",
+                "sha256:d9c5b9f698a83c8bd71e0f4d3f9f839ef244798e5ffe96febfa9714717db7af7",
+                "sha256:dd7c7e2d71d908dc0f8d2027e1604102140d84b155e658c20e8ad1304317691f",
+                "sha256:df978682c1504fc93b3209de21aeabf2375cb1571d4e61907b3e7a2540e83026",
+                "sha256:e403f7f766ded778ecdb790da786b418a9f2394f36e8cc8b796cc056ab05f44f",
+                "sha256:eb3889330f2a4a148abead555399ec9a32b13b7c8ba969b72d8e500eb7ef84cd",
+                "sha256:f4daefc971c2d1f82f03097dc6f216744a6cd2ac0f04c68fb935ea2ba2a0d420",
+                "sha256:f51f5705ab27898afda1aaa430f34ad90dc117421057782022edf0600bec5f14",
+                "sha256:fd0ee90072861e276b0ff08bd627abec29e32a53b2be44e41dbcdf87cbee2b00"
            ],
            "index": "pypi",
-            "markers": "python_version >= '3.7'",
-            "version": "==42.0.3"
+            "markers": "python_version >= '3.7' and python_full_version not in '3.9.0, 3.9.1'",
+            "version": "==44.0.1"
        },
        "docopt": {
            "hashes": [
@ -329,11 +344,12 @@
        },
        "jinja2": {
            "hashes": [
-                "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa",
-                "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"
+                "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d",
+                "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"
            ],
+            "index": "pypi",
            "markers": "python_version >= '3.7'",
-            "version": "==3.1.3"
+            "version": "==3.1.6"
        },
        "kaitaistruct": {
            "hashes": [
@ -353,69 +369,70 @@
        },
        "markupsafe": {
            "hashes": [
-                "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf",
-                "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff",
-                "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f",
-                "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3",
-                "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532",
-                "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f",
-                "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617",
-                "sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df",
-                "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4",
-                "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906",
-                "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f",
-                "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4",
-                "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8",
-                "sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371",
-                "sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2",
-                "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465",
-                "sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52",
-                "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6",
-                "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169",
-                "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad",
-                "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2",
-                "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0",
-                "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029",
-                "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f",
-                "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a",
-                "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced",
-                "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5",
-                "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c",
-                "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf",
-                "sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9",
-                "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb",
-                "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad",
-                "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3",
-                "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1",
-                "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46",
-                "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc",
-                "sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a",
-                "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee",
-                "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900",
-                "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5",
-                "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea",
-                "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f",
-                "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5",
-                "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e",
-                "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a",
-                "sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f",
-                "sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50",
-                "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a",
-                "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b",
-                "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4",
-                "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff",
-                "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2",
-                "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46",
-                "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b",
-                "sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf",
-                "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5",
-                "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5",
-                "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab",
-                "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd",
-                "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68"
+                "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4",
+                "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30",
+                "sha256:1225beacc926f536dc82e45f8a4d68502949dc67eea90eab715dea3a21c1b5f0",
+                "sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9",
+                "sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396",
+                "sha256:1a9d3f5f0901fdec14d8d2f66ef7d035f2157240a433441719ac9a3fba440b13",
+                "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028",
+                "sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca",
+                "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557",
+                "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832",
+                "sha256:3169b1eefae027567d1ce6ee7cae382c57fe26e82775f460f0b2778beaad66c0",
+                "sha256:3809ede931876f5b2ec92eef964286840ed3540dadf803dd570c3b7e13141a3b",
+                "sha256:38a9ef736c01fccdd6600705b09dc574584b89bea478200c5fbf112a6b0d5579",
+                "sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a",
+                "sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c",
+                "sha256:48032821bbdf20f5799ff537c7ac3d1fba0ba032cfc06194faffa8cda8b560ff",
+                "sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c",
+                "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22",
+                "sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094",
+                "sha256:57cb5a3cf367aeb1d316576250f65edec5bb3be939e9247ae594b4bcbc317dfb",
+                "sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e",
+                "sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5",
+                "sha256:6af100e168aa82a50e186c82875a5893c5597a0c1ccdb0d8b40240b1f28b969a",
+                "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d",
+                "sha256:6e296a513ca3d94054c2c881cc913116e90fd030ad1c656b3869762b754f5f8a",
+                "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b",
+                "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8",
+                "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225",
+                "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c",
+                "sha256:88b49a3b9ff31e19998750c38e030fc7bb937398b1f78cfa599aaef92d693144",
+                "sha256:8c4e8c3ce11e1f92f6536ff07154f9d49677ebaaafc32db9db4620bc11ed480f",
+                "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87",
+                "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d",
+                "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93",
+                "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf",
+                "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158",
+                "sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84",
+                "sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb",
+                "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48",
+                "sha256:b424c77b206d63d500bcb69fa55ed8d0e6a3774056bdc4839fc9298a7edca171",
+                "sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c",
+                "sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6",
+                "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd",
+                "sha256:bbcb445fa71794da8f178f0f6d66789a28d7319071af7a496d4d507ed566270d",
+                "sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1",
+                "sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d",
+                "sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca",
+                "sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a",
+                "sha256:cfad01eed2c2e0c01fd0ecd2ef42c492f7f93902e39a42fc9ee1692961443a29",
+                "sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe",
+                "sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798",
+                "sha256:e07c3764494e3776c602c1e78e298937c3315ccc9043ead7e685b7f2b8d47b3c",
+                "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8",
+                "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f",
+                "sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f",
+                "sha256:eaa0a10b7f72326f1372a713e73c3f739b524b3af41feb43e4921cb529f5929a",
+                "sha256:eb7972a85c54febfb25b5c4b4f3af4dcc731994c7da0d8a0b4a6eb0640e1d178",
+                "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0",
+                "sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79",
+                "sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430",
+                "sha256:fcabf5ff6eea076f859677f5f0b6b5c1a51e70a376b0579e0eadef8db48c6b50"
            ],
-            "markers": "python_version >= '3.7'",
-            "version": "==2.1.5"
+            "markers": "python_version >= '3.9'",
+            "version": "==3.0.2"
        },
        "mitmproxy": {
            "editable": true,
@ -561,10 +578,11 @@
        },
        "pycparser": {
            "hashes": [
-                "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9",
-                "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"
+                "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6",
+                "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"
            ],
-            "version": "==2.21"
+            "markers": "python_version >= '3.8'",
+            "version": "==2.22"
        },
        "pyopenssl": {
            "hashes": [
@ -772,20 +790,22 @@
        },
        "tornado": {
            "hashes": [
-                "sha256:02ccefc7d8211e5a7f9e8bc3f9e5b0ad6262ba2fbb683a6443ecc804e5224ce0",
-                "sha256:10aeaa8006333433da48dec9fe417877f8bcc21f48dda8d661ae79da357b2a63",
-                "sha256:27787de946a9cffd63ce5814c33f734c627a87072ec7eed71f7fc4417bb16263",
-                "sha256:6f8a6c77900f5ae93d8b4ae1196472d0ccc2775cc1dfdc9e7727889145c45052",
-                "sha256:71ddfc23a0e03ef2df1c1397d859868d158c8276a0603b96cf86892bff58149f",
-                "sha256:72291fa6e6bc84e626589f1c29d90a5a6d593ef5ae68052ee2ef000dfd273dee",
-                "sha256:88b84956273fbd73420e6d4b8d5ccbe913c65d31351b4c004ae362eba06e1f78",
-                "sha256:e43bc2e5370a6a8e413e1e1cd0c91bedc5bd62a74a532371042a18ef19e10579",
-                "sha256:f0251554cdd50b4b44362f73ad5ba7126fc5b2c2895cc62b14a1c2d7ea32f212",
-                "sha256:f7894c581ecdcf91666a0912f18ce5e757213999e183ebfc2c3fdbf4d5bd764e",
-                "sha256:fd03192e287fbd0899dd8f81c6fb9cbbc69194d2074b38f384cb6fa72b80e9c2"
+                "sha256:007f036f7b661e899bd9ef3fa5f87eb2cb4d1b2e7d67368e778e140a2f101a7a",
+                "sha256:03576ab51e9b1677e4cdaae620d6700d9823568b7939277e4690fe4085886c55",
+                "sha256:119c03f440a832128820e87add8a175d211b7f36e7ee161c631780877c28f4fb",
+                "sha256:231f2193bb4c28db2bdee9e57bc6ca0cd491f345cd307c57d79613b058e807e0",
+                "sha256:542e380658dcec911215c4820654662810c06ad872eefe10def6a5e9b20e9633",
+                "sha256:7c625b9d03f1fb4d64149c47d0135227f0434ebb803e2008040eb92906b0105a",
+                "sha256:9a0d8d2309faf015903080fb5bdd969ecf9aa5ff893290845cf3fd5b2dd101bc",
+                "sha256:9ac1cbe1db860b3cbb251e795c701c41d343f06a96049d6274e7c77559117e41",
+                "sha256:ab75fe43d0e1b3a5e3ceddb2a611cb40090dd116a84fc216a07a298d9e000471",
+                "sha256:c70c0a26d5b2d85440e4debd14a8d0b463a0cf35d92d3af05f5f1ffa8675c826",
+                "sha256:f81067dad2e4443b015368b24e802d0083fecada4f0a4572fdb72fc06e54a9a6",
+                "sha256:fd20c816e31be1bbff1f7681f970bbbd0bb241c364220140228ba24242bcdc59"
            ],
-            "markers": "python_version >= '3.8'",
-            "version": "==6.4"
+            "index": "pypi",
+            "markers": "python_version >= '3.9'",
+            "version": "==6.5"
        },
        "typing-extensions": {
            "hashes": [
@ -803,12 +823,12 @@
        },
        "werkzeug": {
            "hashes": [
-                "sha256:2b8c0e447b4b9dbcc85dd97b6eeb4dcbaf6c8b6c3be0bd654e25553e0a2157d8",
-                "sha256:effc12dba7f3bd72e605ce49807bbe692bd729c3bb122a3b91747a6ae77df528"
+                "sha256:1bc0c2310d2fbb07b1dd1105eba2f7af72f322e1e455f2f93c993bee8c8a5f17",
+                "sha256:a8dd59d4de28ca70471a34cba79bed5f7ef2e036a76b3ab0835474246eb41f8d"
            ],
            "index": "pypi",
            "markers": "python_version >= '3.8'",
-            "version": "==2.3.7"
+            "version": "==3.0.6"
        },
        "wsproto": {
            "hashes": [
@ -884,40 +904,40 @@
        },
        "black": {
            "hashes": [
-                "sha256:057c3dc602eaa6fdc451069bd027a1b2635028b575a6c3acfd63193ced20d9c8",
-                "sha256:08654d0797e65f2423f850fc8e16a0ce50925f9337fb4a4a176a7aa4026e63f8",
-                "sha256:163baf4ef40e6897a2a9b83890e59141cc8c2a98f2dda5080dc15c00ee1e62cd",
-                "sha256:1e08fb9a15c914b81dd734ddd7fb10513016e5ce7e6704bdd5e1251ceee51ac9",
-                "sha256:4dd76e9468d5536abd40ffbc7a247f83b2324f0c050556d9c371c2b9a9a95e31",
-                "sha256:4f9de21bafcba9683853f6c96c2d515e364aee631b178eaa5145fc1c61a3cc92",
-                "sha256:61a0391772490ddfb8a693c067df1ef5227257e72b0e4108482b8d41b5aee13f",
-                "sha256:6981eae48b3b33399c8757036c7f5d48a535b962a7c2310d19361edeef64ce29",
-                "sha256:7e53a8c630f71db01b28cd9602a1ada68c937cbf2c333e6ed041390d6968faf4",
-                "sha256:810d445ae6069ce64030c78ff6127cd9cd178a9ac3361435708b907d8a04c693",
-                "sha256:93601c2deb321b4bad8f95df408e3fb3943d85012dddb6121336b8e24a0d1218",
-                "sha256:992e451b04667116680cb88f63449267c13e1ad134f30087dec8527242e9862a",
-                "sha256:9db528bccb9e8e20c08e716b3b09c6bdd64da0dd129b11e160bf082d4642ac23",
-                "sha256:a0057f800de6acc4407fe75bb147b0c2b5cbb7c3ed110d3e5999cd01184d53b0",
-                "sha256:ba15742a13de85e9b8f3239c8f807723991fbfae24bad92d34a2b12e81904982",
-                "sha256:bce4f25c27c3435e4dace4815bcb2008b87e167e3bf4ee47ccdc5ce906eb4894",
-                "sha256:ca610d29415ee1a30a3f30fab7a8f4144e9d34c89a235d81292a1edb2b55f540",
-                "sha256:d533d5e3259720fdbc1b37444491b024003e012c5173f7d06825a77508085430",
-                "sha256:d84f29eb3ee44859052073b7636533ec995bd0f64e2fb43aeceefc70090e752b",
-                "sha256:e37c99f89929af50ffaf912454b3e3b47fd64109659026b678c091a4cd450fb2",
-                "sha256:e8a6ae970537e67830776488bca52000eaa37fa63b9988e8c487458d9cd5ace6",
-                "sha256:faf2ee02e6612577ba0181f4347bcbcf591eb122f7841ae5ba233d12c39dcb4d"
+                "sha256:2818cf72dfd5d289e48f37ccfa08b460bf469e67fb7c4abb07edc2e9f16fb63f",
+                "sha256:41622020d7120e01d377f74249e677039d20e6344ff5851de8a10f11f513bf93",
+                "sha256:4acf672def7eb1725f41f38bf6bf425c8237248bb0804faa3965c036f7672d11",
+                "sha256:4be5bb28e090456adfc1255e03967fb67ca846a03be7aadf6249096100ee32d0",
+                "sha256:4f1373a7808a8f135b774039f61d59e4be7eb56b2513d3d2f02a8b9365b8a8a9",
+                "sha256:56f52cfbd3dabe2798d76dbdd299faa046a901041faf2cf33288bc4e6dae57b5",
+                "sha256:65b76c275e4c1c5ce6e9870911384bff5ca31ab63d19c76811cb1fb162678213",
+                "sha256:65c02e4ea2ae09d16314d30912a58ada9a5c4fdfedf9512d23326128ac08ac3d",
+                "sha256:6905238a754ceb7788a73f02b45637d820b2f5478b20fec82ea865e4f5d4d9f7",
+                "sha256:79dcf34b33e38ed1b17434693763301d7ccbd1c5860674a8f871bd15139e7837",
+                "sha256:7bb041dca0d784697af4646d3b62ba4a6b028276ae878e53f6b4f74ddd6db99f",
+                "sha256:7d5e026f8da0322b5662fa7a8e752b3fa2dac1c1cbc213c3d7ff9bdd0ab12395",
+                "sha256:9f50ea1132e2189d8dff0115ab75b65590a3e97de1e143795adb4ce317934995",
+                "sha256:a0c9c4a0771afc6919578cec71ce82a3e31e054904e7197deacbc9382671c41f",
+                "sha256:aadf7a02d947936ee418777e0247ea114f78aff0d0959461057cae8a04f20597",
+                "sha256:b5991d523eee14756f3c8d5df5231550ae8993e2286b8014e2fdea7156ed0959",
+                "sha256:bf21b7b230718a5f08bd32d5e4f1db7fc8788345c8aea1d155fc17852b3410f5",
+                "sha256:c45f8dff244b3c431b36e3224b6be4a127c6aca780853574c00faf99258041eb",
+                "sha256:c7ed6668cbbfcd231fa0dc1b137d3e40c04c7f786e626b405c62bcd5db5857e4",
+                "sha256:d7de8d330763c66663661a1ffd432274a2f92f07feeddd89ffd085b5744f85e7",
+                "sha256:e19cb1c6365fd6dc38a6eae2dcb691d7d83935c10215aef8e6c38edee3f77abd",
+                "sha256:e2af80566f43c85f5797365077fb64a393861a3730bd110971ab7a0c94e873e7"
            ],
            "index": "pypi",
            "markers": "python_version >= '3.8'",
-            "version": "==24.2.0"
+            "version": "==24.3.0"
        },
        "click": {
            "hashes": [
-                "sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1",
-                "sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb"
+                "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2",
+                "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"
            ],
-            "markers": "python_version >= '3.6'",
-            "version": "==8.0.4"
+            "markers": "python_version >= '3.7'",
+            "version": "==8.1.8"
        },
        "flake8": {
            "hashes": [
@ -956,19 +976,19 @@
        },
        "mypy-extensions": {
            "hashes": [
-                "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d",
-                "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"
+                "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505",
+                "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"
            ],
-            "markers": "python_version >= '3.5'",
-            "version": "==1.0.0"
+            "markers": "python_version >= '3.8'",
+            "version": "==1.1.0"
        },
        "packaging": {
            "hashes": [
-                "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5",
-                "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"
+                "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484",
+                "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"
            ],
-            "markers": "python_version >= '3.7'",
-            "version": "==23.2"
+            "markers": "python_version >= '3.8'",
+            "version": "==25.0"
        },
        "pathspec": {
            "hashes": [
@ -980,11 +1000,11 @@
        },
        "platformdirs": {
            "hashes": [
-                "sha256:0614df2a2f37e1a662acbd8e2b25b92ccf8632929bc6d43467e17fe89c75e068",
-                "sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768"
+                "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc",
+                "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4"
            ],
-            "markers": "python_version >= '3.8'",
-            "version": "==4.2.0"
+            "markers": "python_version >= '3.9'",
+            "version": "==4.3.8"
        },
        "pycodestyle": {
            "hashes": [
@ -1004,19 +1024,49 @@
        },
        "tomli": {
            "hashes": [
-                "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc",
-                "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"
+                "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6",
+                "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd",
+                "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c",
+                "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b",
+                "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8",
+                "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6",
+                "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77",
+                "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff",
+                "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea",
+                "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192",
+                "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249",
+                "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee",
+                "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4",
+                "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98",
+                "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8",
+                "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4",
+                "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281",
+                "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744",
+                "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69",
+                "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13",
+                "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140",
+                "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e",
+                "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e",
+                "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc",
+                "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff",
+                "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec",
+                "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2",
+                "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222",
+                "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106",
+                "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272",
+                "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a",
+                "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7"
            ],
-            "markers": "python_version < '3.11'",
-            "version": "==2.0.1"
+            "markers": "python_version >= '3.8'",
+            "version": "==2.2.1"
        },
        "typing-extensions": {
            "hashes": [
-                "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783",
-                "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd"
+                "sha256:38b39f4aeeab64884ce9f74c94263ef78f3c22467c8724005483154c26648d36",
+                "sha256:d1e1e3b58374dc93031d6eda2420a48ea44a36c2b4766a4fdeb3710755731d76"
            ],
-            "markers": "python_version >= '3.8'",
-            "version": "==4.9.0"
+            "markers": "python_version >= '3.9'",
+            "version": "==4.14.1"
        }
    }
 }
--- a/.gitattributes
+++ b/.gitattributes
@ -28,6 +28,7 @@ src/backend/distributed/utils/citus_outfuncs.c -citus-style
 src/backend/distributed/deparser/ruleutils_15.c -citus-style
 src/backend/distributed/deparser/ruleutils_16.c -citus-style
 src/backend/distributed/deparser/ruleutils_17.c -citus-style
+src/backend/distributed/deparser/ruleutils_18.c -citus-style
 src/backend/distributed/commands/index_pg_source.c -citus-style

 src/include/distributed/citus_nodes.h -citus-style
--- a/.github/actions/upload_coverage/action.yml
+++ b/.github/actions/upload_coverage/action.yml
@ -13,15 +13,3 @@ runs:
      token: ${{ inputs.codecov_token }}
      verbose: true
      gcov: true
-  - name: Create codeclimate coverage
-    run: |-
-      lcov --directory . --capture --output-file lcov.info
-      lcov --remove lcov.info -o lcov.info '/usr/*'
-      sed "s=^SF:$PWD/=SF:=g" -i lcov.info # relative pats are required by codeclimate
-      mkdir -p /tmp/codeclimate
-      cc-test-reporter format-coverage -t lcov -o /tmp/codeclimate/${{ inputs.flags }}.json lcov.info
-    shell: bash
-  - uses: actions/upload-artifact@v4.6.0
-    with:
-      path: "/tmp/codeclimate/*.json"
-      name: codeclimate-${{ inputs.flags }}
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@ -30,13 +30,13 @@ jobs:
      fail_test_image_name: "ghcr.io/citusdata/failtester"
      pgupgrade_image_name: "ghcr.io/citusdata/pgupgradetester"
      style_checker_image_name: "ghcr.io/citusdata/stylechecker"
-      style_checker_tools_version: "0.8.18"
-      sql_snapshot_pg_version: "17.5"
-      image_suffix: "-dev-d28f316"
-      pg15_version: '{ "major": "15", "full": "15.13" }'
-      pg16_version: '{ "major": "16", "full": "16.9" }'
-      pg17_version: '{ "major": "17", "full": "17.5" }'
-      upgrade_pg_versions: "15.13-16.9-17.5"
+      style_checker_tools_version: "0.8.33"
+      sql_snapshot_pg_version: "17.6"
+      image_suffix: "-ve4d3aa0"
+      pg15_version: '{ "major": "15", "full": "15.14" }'
+      pg16_version: '{ "major": "16", "full": "16.10" }'
+      pg17_version: '{ "major": "17", "full": "17.6" }'
+      upgrade_pg_versions: "15.14-16.10-17.6"
    steps:
      # Since GHA jobs need at least one step we use a noop step here.
      - name: Set up parameters
@ -153,6 +153,7 @@ jobs:
          - check-isolation
          - check-operations
          - check-follower-cluster
+          - check-add-backup-node
          - check-columnar
          - check-columnar-isolation
          - check-enterprise
@ -224,10 +225,16 @@ jobs:
    runs-on: ubuntu-latest
    container:
      image: "${{ matrix.image_name }}:${{ fromJson(matrix.pg_version).full }}${{ needs.params.outputs.image_suffix }}"
-      options: --user root --dns=8.8.8.8
+      options: >-
+        --user root
+        --dns=8.8.8.8
+        --cap-add=SYS_NICE
+        --security-opt seccomp=unconfined
      # Due to Github creates a default network for each job, we need to use
      # --dns= to have similar DNS settings as our other CI systems or local
      # machines. Otherwise, we may see different results.
+      # and grant caps so PG18's NUMA introspection (pg_shmem_allocations_numa -> move_pages)
+      # doesn't fail with EPERM in CI.
    needs:
    - params
    - build
@ -331,7 +338,15 @@ jobs:
          make -C src/test/regress \
            check-pg-upgrade \
            old-bindir=/usr/lib/postgresql/${{ env.old_pg_major }}/bin \
-            new-bindir=/usr/lib/postgresql/${{ env.new_pg_major }}/bin
+            new-bindir=/usr/lib/postgresql/${{ env.new_pg_major }}/bin \
+            test-with-columnar=false
+
+        gosu circleci \
+          make -C src/test/regress \
+            check-pg-upgrade \
+            old-bindir=/usr/lib/postgresql/${{ env.old_pg_major }}/bin \
+            new-bindir=/usr/lib/postgresql/${{ env.new_pg_major }}/bin \
+            test-with-columnar=true
    - name: Copy pg_upgrade logs for newData dir
      run: |-
        mkdir -p /tmp/pg_upgrade_newData_logs
@ -349,14 +364,20 @@ jobs:
        flags: ${{ env.old_pg_major }}_${{ env.new_pg_major }}_upgrade
        codecov_token: ${{ secrets.CODECOV_TOKEN }}
  test-citus-upgrade:
-    name: PG${{ fromJson(needs.params.outputs.pg15_version).major }} - check-citus-upgrade
+    name: PG${{ fromJson(matrix.pg_version).major }} - check-citus-upgrade
    runs-on: ubuntu-latest
    container:
-      image: "${{ needs.params.outputs.citusupgrade_image_name }}:${{ fromJson(needs.params.outputs.pg15_version).full }}${{ needs.params.outputs.image_suffix }}"
+      image: "${{ needs.params.outputs.citusupgrade_image_name }}:${{ fromJson(matrix.pg_version).full }}${{ needs.params.outputs.image_suffix }}"
      options: --user root
    needs:
    - params
    - build
+    strategy:
+      fail-fast: false
+      matrix:
+        pg_version:
+          - ${{ needs.params.outputs.pg15_version }}
+          - ${{ needs.params.outputs.pg16_version }}
    steps:
    - uses: actions/checkout@v4
    - uses: "./.github/actions/setup_extension"
@ -365,7 +386,7 @@ jobs:
    - name: Install and test citus upgrade
      run: |-
        # run make check-citus-upgrade for all citus versions
-        # the image has ${CITUS_VERSIONS} set with all verions it contains the binaries of
+        # the image has ${CITUS_VERSIONS} set with all versions it contains the binaries of
        for citus_version in ${CITUS_VERSIONS}; do \
          gosu circleci \
            make -C src/test/regress \
@ -376,7 +397,7 @@ jobs:
              citus-post-tar=${GITHUB_WORKSPACE}/install-$PG_MAJOR.tar; \
        done;
        # run make check-citus-upgrade-mixed for all citus versions
-        # the image has ${CITUS_VERSIONS} set with all verions it contains the binaries of
+        # the image has ${CITUS_VERSIONS} set with all versions it contains the binaries of
        for citus_version in ${CITUS_VERSIONS}; do \
          gosu circleci \
            make -C src/test/regress \
@ -395,29 +416,6 @@ jobs:
      with:
        flags: ${{ env.PG_MAJOR }}_citus_upgrade
        codecov_token: ${{ secrets.CODECOV_TOKEN }}
-  upload-coverage:
-    if: always()
-    env:
-      CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }}
-    runs-on: ubuntu-latest
-    container:
-      image: ${{ needs.params.outputs.test_image_name }}:${{ fromJson(needs.params.outputs.pg17_version).full }}${{ needs.params.outputs.image_suffix }}
-    needs:
-      - params
-      - test-citus
-      - test-arbitrary-configs
-      - test-citus-upgrade
-      - test-pg-upgrade
-    steps:
-      - uses: actions/download-artifact@v4.1.8
-        with:
-          pattern: codeclimate*
-          path: codeclimate
-          merge-multiple: true
-      - name: Upload coverage results to Code Climate
-        run: |-
-          cc-test-reporter sum-coverage codeclimate/*.json -o total.json
-          cc-test-reporter upload-coverage -i total.json
  ch_benchmark:
    name: CH Benchmark
    if: startsWith(github.ref, 'refs/heads/ch_benchmark/')
@ -485,10 +483,14 @@ jobs:
        tests=${detected_changes}

        # split the tests to be skipped --today we only skip upgrade tests
+        # and snapshot based node addition tests.
+        # snapshot based node addition tests are not flaky, as they promote
+        # the streaming replica (clone) to a PostgreSQL primary node that is one way
+        # operation
        skipped_tests=""
        not_skipped_tests=""
        for test in $tests; do
-            if [[ $test =~ ^src/test/regress/sql/upgrade_ ]]; then
+            if [[ $test =~ ^src/test/regress/sql/upgrade_ ]] || [[ $test =~ ^src/test/regress/sql/multi_add_node_from_backup ]]; then
                skipped_tests="$skipped_tests $test"
            else
                not_skipped_tests="$not_skipped_tests $test"
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@ -60,8 +60,7 @@ jobs:
          libzstd-dev \
          libzstd1 \
          lintian \
-          postgresql-server-dev-15 \
-          postgresql-server-dev-all \
+          postgresql-server-dev-17 \
          python3-pip \
          python3-setuptools \
          wget \
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,4 +1,98 @@
-## citus v13.1.0 (May 30th, 2025) ###
+### citus v13.1.1 (Oct 1st, 2025) ###
+
+* Adds support for latest PG minors: 14.19, 15.14, 16.10 (#8142)
+
+* Fixes an assertion failure when an expression in the query references
+  a CTE (#8106)
+
+* Fixes a bug that causes an unexpected error when executing
+  repartitioned MERGE (#8201)
+
+* Fixes a bug that causes allowing UPDATE / MERGE queries that may
+  change the distribution column value (#8214)
+
+* Updates dynamic_library_path automatically when CDC is enabled (#8025)
+
+### citus v13.0.5 (Oct 1st, 2025) ###
+
+* Adds support for latest PG minors: 14.19, 15.14, 16.10 (#7986, #8142)
+
+* Fixes a bug that causes an unexpected error when executing
+  repartitioned MERGE (#8201)
+
+* Fixes a bug that causes allowing UPDATE / MERGE queries that may
+  change the distribution column value (#8214)
+
+* Fixes a bug in redundant WHERE clause detection (#8162)
+
+* Updates dynamic_library_path automatically when CDC is enabled (#8025)
+
+### citus v12.1.10 (Oct 1, 2025) ###
+
+* Adds support for latest PG minors: 14.19, 15.14, 16.10 (#7986, #8142)
+
+* Fixes a bug that causes allowing UPDATE / MERGE queries that may
+  change the distribution column value (#8214)
+
+* Fixes an assertion failure that happens when querying a view that is
+  defined on distributed tables (#8136)
+
+### citus v12.1.9 (Sep 3, 2025) ###
+
+* Adds a GUC for queries with outer joins and pseudoconstant quals (#8163)
+
+* Updates dynamic_library_path automatically when CDC is enabled (#7715)
+
+### citus v13.2.0 (August 18, 2025) ###
+
+* Adds `citus_add_clone_node()`, `citus_add_clone_node_with_nodeid()`,
+  `citus_remove_clone_node()` and `citus_remove_clone_node_with_nodeid()`
+  UDFs to support snapshot-based node splits. This feature allows promoting
+  a streaming replica (clone) to a primary node and rebalancing shards
+  between the original and newly promoted node without requiring a full data
+  copy. This greatly reduces rebalance times for scale-out operations when
+  the new node already has the data via streaming replication (#8122)
+
+* Improves performance of shard rebalancer by parallelizing moves and removing
+  bottlenecks that blocked concurrent logical-replication transfers. This
+  reduces rebalance windows especially for clusters with large reference
+  tables and allows multiple shard transfers to run in parallel (#7983)
+
+* Adds citus.enable_recurring_outer_join_pushdown GUC (enabled by default)
+  to allow pushing down LEFT/RIGHT outer joins having a reference table in
+  the outer side and a distributed table on the inner side (e.g.,
+  \<reference table\> LEFT JOIN \<distributed table\>) (#7973)
+
+* Adds citus.enable_local_fast_path_query_optimization (enabled by default)
+  GUC to avoid unnecessary query deparsing to improve performance of
+  fast-path queries targeting local shards (#8035)
+
+* Adds `citus_stats()` UDF that can be used to retrieve distributed `pg_stats`
+  for the provided Citus table. (#8026)
+
+* Avoids automatically creating citus_columnar when there are no relations
+  using it (#8081)
+
+* Makes sure to check if the distribution key is in the target list before
+  pushing down a query with a union and an outer join (#8092)
+
+* Fixes a bug in EXPLAIN ANALYZE to prevent unintended (duplicate) execution
+  of the (sub)plans during the explain phase (#8017)
+
+* Fixes potential memory corruptions that could happen when accessing
+  various catalog tables after a Citus downgrade is followed by a Citus
+  upgrade (#7950, #8120, #8124, #8121, #8114, #8146)
+
+* Fixes UPDATE statements with indirection and array/jsonb subscripting with
+  more than one field (#7675)
+
+* Fixes an assertion failure that happens when an expression in the query
+  references a CTE (#8106)
+
+* Fixes an assertion failure that happens when querying a view that is
+  defined on distributed tables (#8136)
+
+### citus v13.1.0 (May 30th, 2025) ###

 * Adds `citus_stat_counters` view that can be used to query
  stat counters that Citus collects while the feature is enabled, which is
@ -125,6 +219,23 @@

 * Fixes potential `NULL` dereference in casual clocks (#7704)

+### citus v13.0.4 (May 29th, 2025) ###
+
+* Fixes an issue detected using address sanitizer (#7966)
+
+* Error out for queries with outer joins and pseudoconstant quals in versions
+  prior to PG 17 (#7937)
+
+### citus v12.1.8 (May 29, 2025) ###
+
+* Fixes a crash in left outer joins that can happen when there is an an
+  aggregate on a column from the inner side of the join (#7904)
+
+* Fixes an issue detected using address sanitizer (#7965)
+
+* Fixes a crash when executing a prepared CALL, which is not pure SQL but
+available with some drivers like npgsql and jpgdbc (#7288)
+
 ### citus v13.0.3 (March 20th, 2025) ###

 * Fixes a version bump issue in 13.0.2
--- a/EXTENSION_COMPATIBILITY.md
+++ b/EXTENSION_COMPATIBILITY.md
@ -0,0 +1,78 @@
+Below table is created with Citus 12.1.7 on PG16
+| Extension Name               | Works as Expected   | Notes   |
+|:-----------------------------|:--------------------|:--------|
+| address_standardizer         | Yes                 |         |
+| address_standardizer_data_us | Yes                 |         |
+| age                          | Partially           | Works fine side by side, but graph data cannot be distributed. |
+| amcheck                      | Yes                 |         |
+| anon                         | Partially           | Cannot anonymize distributed tables. It is possible to anonymize local tables. |
+| auto_explain                 | No                  | [Issue #6448](https://github.com/citusdata/citus/issues/6448) |
+| azure                        | Yes                 |         |
+| azure_ai                     | Yes                 |         |
+| azure_storage                | Yes                 |         |
+| bloom                        | Yes                 |         |
+| Btree_gin                    | Yes                 |         |
+| btree_gist                   | Yes                 |         |
+| citext                       | Yes                 |         |
+| Citus_columnar               | Yes                 |         |
+| cube                         | Yes                 |         |
+| dblink                       | Yes                 |         |
+| dict_int                     | Yes                 |         |
+| dict_xsyn                    | Yes                 |         |
+| earthdistance                | Yes                 |         |
+| fuzzystrmatch                | Yes                 |         |
+| hll                          | Yes                 |         |
+| hstore                       | Yes                 |         |
+| hypopg                       | Partially           | Hypopg can work on local tables and individual shards, however, when we create a hypothetical index on a distributed table, citus does not propagate the index creation command to worker nodes, and thus, hypothetical index is not used in explain statements.         |
+| intagg                       | Yes                 |         |
+| intarray                     | Yes                 |         |
+| isn                          | Yes                 |         |
+| lo                           | Partially           | Extension relies on triggers, but Citus does not support triggers over distributed tables |
+| login_hook                   | Yes                 |         |
+| ltree                        | Yes                 |         |
+| oracle_fdw                   | Yes                 |         |
+| orafce                       | Yes                 |         |
+| pageinspect                  | Yes                 |         |
+| pg_buffercache               | Yes                 |         |
+| pg_cron                      | Yes                 |         |
+| pg_diskann                   | Yes                 |         |
+| pg_failover_slots            | To be tested        |         |
+| pg_freespacemap              | Partially           | Users can set citus.override_table_visibility='off'; to get accurate calculation of free space map. |
+| pg_hint_plan                 | Partially           | Works fine side by side, but hints are ignored for distributed queries |
+| pg_partman                   | Yes                 |         |
+| pg_prewarm                   | Partially           | In order to prewarm distributed tables, set " citus.override_table_visibility" to off, and run prewarm for each shard. This needs to be done at each node. |
+| pg_repack                    | Partially           | Extension relies on triggers, but Citus does not support triggers over distributed tables. It works fine on local tables. |
+| pg_squeeze                   | Partially           | It can work on local tables, but it is not aware of distributed tables. Users can set citus.override_table_visibility='off'; and then run pg_squeeze for each shard. This needs to be done at each node. |
+| pg_stat_statements           | Yes                 |         |
+| pg_trgm                      | Yes                 |         |
+| pg_visibility                | Partially           | In order to get visibility map of a distributed table, customers can run the functions for shard tables. |
+| pgaadauth                    | Yes                 |         |
+| pgaudit                      | Yes                 |         |
+| pgcrypto                     | Yes                 |         |
+| pglogical                    | No                  |         |
+| pgrowlocks                   | Partially           | It works only with individual shards, not with distributed table names. |
+| pgstattuple                  | Yes                 |         |
+| plpgsql                      | Yes                 |         |
+| plv8                         | Yes                 |         |
+| postgis                      | Yes                 |         |
+| postgis_raster               | Yes                 |         |
+| postgis_sfcgal               | Yes                 |         |
+| postgis_tiger_geocoder       | No                  |         |
+| postgis_topology             | No                  |         |
+| postgres_fdw                 | Yes                 |         |
+| postgres_protobuf            | Yes                 |         |
+| semver                       | Yes                 |         |
+| session_variable             | No                  |         |
+| sslinfo                      | Yes                 |         |
+| tablefunc                    | Yes                 |         |
+| tdigest                      | Yes                 |         |
+| tds_fdw                      | Yes                 |         |
+| timescaledb                  | No                  | [Known to be incompatible with Citus](https://www.citusdata.com/blog/2021/10/22/how-to-scale-postgres-for-time-series-data-with-citus/#:~:text=Postgres%E2%80%99%20built-in%20partitioning) |
+| topn                         | Yes                 |         |
+| tsm_system_rows              | Yes                 |         |
+| tsm_system_time              | Yes                 |         |
+| unaccent                     | Yes                 |         |
+| uuid-ossp                    | Yes                 |         |
+| vector (aka pg_vector)       | Yes                 |         |
+| wal2json                     | To be tested        |         |
+| xml2                         | To be tested        |         |
--- a/STYLEGUIDE.md
+++ b/STYLEGUIDE.md
@ -11,9 +11,9 @@ tool. This tool uses `uncrustify` under the hood.

 ```bash
 # Uncrustify changes the way it formats code every release a bit. To make sure
-# everyone formats consistently we use version 0.68.1:
-curl -L https://github.com/uncrustify/uncrustify/archive/uncrustify-0.68.1.tar.gz | tar xz
-cd uncrustify-uncrustify-0.68.1/
+# everyone formats consistently we use version 0.82.0:
+curl -L https://github.com/uncrustify/uncrustify/archive/uncrustify-0.82.0.tar.gz | tar xz
+cd uncrustify-uncrustify-0.82.0/
 mkdir build
 cd build
 cmake ..
--- a/18
+++ b/18
@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for Citus 13.1.0.
+# Generated by GNU Autoconf 2.69 for Citus 14.0devel.
 #
 #
 # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@ -579,8 +579,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='Citus'
 PACKAGE_TARNAME='citus'
-PACKAGE_VERSION='13.1.0'
-PACKAGE_STRING='Citus 13.1.0'
+PACKAGE_VERSION='14.0devel'
+PACKAGE_STRING='Citus 14.0devel'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''

@ -1262,7 +1262,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-\`configure' configures Citus 13.1.0 to adapt to many kinds of systems.
+\`configure' configures Citus 14.0devel to adapt to many kinds of systems.

 Usage: $0 [OPTION]... [VAR=VALUE]...

@ -1324,7 +1324,7 @@ fi

 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of Citus 13.1.0:";;
+     short | recursive ) echo "Configuration of Citus 14.0devel:";;
   esac
  cat <<\_ACEOF

@ -1429,7 +1429,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
  cat <<\_ACEOF
-Citus configure 13.1.0
+Citus configure 14.0devel
 generated by GNU Autoconf 2.69

 Copyright (C) 2012 Free Software Foundation, Inc.
@ -1912,7 +1912,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.

-It was created by Citus $as_me 13.1.0, which was
+It was created by Citus $as_me 14.0devel, which was
 generated by GNU Autoconf 2.69.  Invocation command line was

  $ $0 $@
@ -5393,7 +5393,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by Citus $as_me 13.1.0, which was
+This file was extended by Citus $as_me 14.0devel, which was
 generated by GNU Autoconf 2.69.  Invocation command line was

  CONFIG_FILES    = $CONFIG_FILES
@ -5455,7 +5455,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-Citus config.status 13.1.0
+Citus config.status 14.0devel
 configured by $0, generated by GNU Autoconf 2.69,
  with options \\"\$ac_cs_config\\"

--- a/configure.ac
+++ b/configure.ac
@ -5,7 +5,7 @@
 # everyone needing autoconf installed, the resulting files are checked
 # into the SCM.

-AC_INIT([Citus], [13.1.0])
+AC_INIT([Citus], [14.0devel])
 AC_COPYRIGHT([Copyright (c) Citus Data, Inc.])

 # we'll need sed and awk for some of the version commands
--- a/src/backend/columnar/citus_columnar.control
+++ b/src/backend/columnar/citus_columnar.control
@ -1,6 +1,6 @@
 # Columnar extension
 comment = 'Citus Columnar extension'
-default_version = '12.2-1'
+default_version = '14.0-1'
 module_pathname = '$libdir/citus_columnar'
 relocatable = false
 schema = pg_catalog
--- a/src/backend/columnar/columnar_customscan.c
+++ b/src/backend/columnar/columnar_customscan.c
@ -21,6 +21,13 @@
 #include "catalog/pg_am.h"
 #include "catalog/pg_statistic.h"
 #include "commands/defrem.h"
+
+#include "columnar/columnar_version_compat.h"
+#if PG_VERSION_NUM >= PG_VERSION_18
+#include "commands/explain_format.h"
+#endif
+#include "executor/executor.h"   /* for ExecInitExprWithParams(), ExecEvalExpr() */
+#include "nodes/execnodes.h"     /* for ExprState, ExprContext, etc. */
 #include "nodes/extensible.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
@ -1549,8 +1556,7 @@ ColumnarPerStripeScanCost(RelOptInfo *rel, Oid relationId, int numberOfColumnsRe
 		ereport(ERROR, (errmsg("could not open relation with OID %u", relationId)));
 	}

-	List *stripeList = StripesForRelfilelocator(RelationPhysicalIdentifier_compat(
-													relation));
+	List *stripeList = StripesForRelfilelocator(relation);
 	RelationClose(relation);

 	uint32 maxColumnCount = 0;
@ -1607,8 +1613,7 @@ ColumnarTableStripeCount(Oid relationId)
 		ereport(ERROR, (errmsg("could not open relation with OID %u", relationId)));
 	}

-	List *stripeList = StripesForRelfilelocator(RelationPhysicalIdentifier_compat(
-													relation));
+	List *stripeList = StripesForRelfilelocator(relation);
 	int stripeCount = list_length(stripeList);
 	RelationClose(relation);

--- a/src/backend/columnar/columnar_metadata.c
+++ b/src/backend/columnar/columnar_metadata.c
@ -72,9 +72,9 @@

 #define COLUMNAR_RELOPTION_NAMESPACE "columnar"
 #define SLOW_METADATA_ACCESS_WARNING \
-	"Metadata index %s is not available, this might mean slower read/writes " \
-	"on columnar tables. This is expected during Postgres upgrades and not " \
-	"expected otherwise."
+		"Metadata index %s is not available, this might mean slower read/writes " \
+		"on columnar tables. This is expected during Postgres upgrades and not " \
+		"expected otherwise."

 typedef struct
 {
@ -106,7 +106,9 @@ static void GetHighestUsedAddressAndId(uint64 storageId,
 									   uint64 *highestUsedAddress,
 									   uint64 *highestUsedId);
 static StripeMetadata * UpdateStripeMetadataRow(uint64 storageId, uint64 stripeId,
-												bool *update, Datum *newValues);
+												uint64 fileOffset, uint64 dataLength,
+												uint64 rowCount, uint64 chunkCount);
+
 static List * ReadDataFileStripeList(uint64 storageId, Snapshot snapshot);
 static StripeMetadata * BuildStripeMetadata(Relation columnarStripes,
 											HeapTuple heapTuple);
@ -123,7 +125,7 @@ static Oid ColumnarChunkGroupRelationId(void);
 static Oid ColumnarChunkIndexRelationId(void);
 static Oid ColumnarChunkGroupIndexRelationId(void);
 static Oid ColumnarNamespaceId(void);
-static uint64 LookupStorageId(RelFileLocator relfilelocator);
+static uint64 LookupStorageId(Oid relationId, RelFileLocator relfilelocator);
 static uint64 GetHighestUsedRowNumber(uint64 storageId);
 static void DeleteStorageFromColumnarMetadataTable(Oid metadataTableId,
 												   AttrNumber storageIdAtrrNumber,
@ -183,6 +185,8 @@ typedef FormData_columnar_options *Form_columnar_options;
 #define Anum_columnar_stripe_chunk_count 8
 #define Anum_columnar_stripe_first_row_number 9

+static int GetFirstRowNumberAttrIndexInColumnarStripe(TupleDesc tupleDesc);
+
 /* constants for columnar.chunk_group */
 #define Natts_columnar_chunkgroup 4
 #define Anum_columnar_chunkgroup_storageid 1
@ -602,7 +606,7 @@ ReadColumnarOptions(Oid regclass, ColumnarOptions *options)
 * of columnar.chunk.
 */
 void
-SaveStripeSkipList(RelFileLocator relfilelocator, uint64 stripe,
+SaveStripeSkipList(Oid relid, RelFileLocator relfilelocator, uint64 stripe,
 				   StripeSkipList *chunkList,
 				   TupleDesc tupleDescriptor)
 {
@ -610,11 +614,17 @@ SaveStripeSkipList(RelFileLocator relfilelocator, uint64 stripe,
 	uint32 chunkIndex = 0;
 	uint32 columnCount = chunkList->columnCount;

-	uint64 storageId = LookupStorageId(relfilelocator);
+	uint64 storageId = LookupStorageId(relid, relfilelocator);
 	Oid columnarChunkOid = ColumnarChunkRelationId();
 	Relation columnarChunk = table_open(columnarChunkOid, RowExclusiveLock);
 	ModifyState *modifyState = StartModifyRelation(columnarChunk);
+	bool pushed_snapshot = false;

+	if (!ActiveSnapshotSet())
+	{
+		PushActiveSnapshot(GetTransactionSnapshot());
+		pushed_snapshot = true;
+	}
 	for (columnIndex = 0; columnIndex < columnCount; columnIndex++)
 	{
 		for (chunkIndex = 0; chunkIndex < chunkList->chunkCount; chunkIndex++)
@ -645,20 +655,25 @@ SaveStripeSkipList(RelFileLocator relfilelocator, uint64 stripe,
 			{
 				values[Anum_columnar_chunk_minimum_value - 1] =
 					PointerGetDatum(DatumToBytea(chunk->minimumValue,
-												 &tupleDescriptor->attrs[columnIndex]));
+												 TupleDescAttr(tupleDescriptor,
+															   columnIndex)));
 				values[Anum_columnar_chunk_maximum_value - 1] =
 					PointerGetDatum(DatumToBytea(chunk->maximumValue,
-												 &tupleDescriptor->attrs[columnIndex]));
+												 TupleDescAttr(tupleDescriptor,
+															   columnIndex)));
 			}
 			else
 			{
 				nulls[Anum_columnar_chunk_minimum_value - 1] = true;
 				nulls[Anum_columnar_chunk_maximum_value - 1] = true;
 			}
-
 			InsertTupleAndEnforceConstraints(modifyState, values, nulls);
 		}
 	}
+	if (pushed_snapshot)
+	{
+		PopActiveSnapshot();
+	}

 	FinishModifyRelation(modifyState);
 	table_close(columnarChunk, RowExclusiveLock);
@ -669,10 +684,10 @@ SaveStripeSkipList(RelFileLocator relfilelocator, uint64 stripe,
 * SaveChunkGroups saves the metadata for given chunk groups in columnar.chunk_group.
 */
 void
-SaveChunkGroups(RelFileLocator relfilelocator, uint64 stripe,
+SaveChunkGroups(Oid relid, RelFileLocator relfilelocator, uint64 stripe,
 				List *chunkGroupRowCounts)
 {
-	uint64 storageId = LookupStorageId(relfilelocator);
+	uint64 storageId = LookupStorageId(relid, relfilelocator);
 	Oid columnarChunkGroupOid = ColumnarChunkGroupRelationId();
 	Relation columnarChunkGroup = table_open(columnarChunkGroupOid, RowExclusiveLock);
 	ModifyState *modifyState = StartModifyRelation(columnarChunkGroup);
@ -705,7 +720,7 @@ SaveChunkGroups(RelFileLocator relfilelocator, uint64 stripe,
 * ReadStripeSkipList fetches chunk metadata for a given stripe.
 */
 StripeSkipList *
-ReadStripeSkipList(RelFileLocator relfilelocator, uint64 stripe,
+ReadStripeSkipList(Relation rel, uint64 stripe,
 				   TupleDesc tupleDescriptor,
 				   uint32 chunkCount, Snapshot snapshot)
 {
@ -714,7 +729,8 @@ ReadStripeSkipList(RelFileLocator relfilelocator, uint64 stripe,
 	uint32 columnCount = tupleDescriptor->natts;
 	ScanKeyData scanKey[2];

-	uint64 storageId = LookupStorageId(relfilelocator);
+	uint64 storageId = LookupStorageId(RelationPrecomputeOid(rel),
+									   RelationPhysicalIdentifier_compat(rel));

 	Oid columnarChunkOid = ColumnarChunkRelationId();
 	Relation columnarChunk = table_open(columnarChunkOid, AccessShareLock);
@ -803,9 +819,9 @@ ReadStripeSkipList(RelFileLocator relfilelocator, uint64 stripe,
 				datumArray[Anum_columnar_chunk_maximum_value - 1]);

 			chunk->minimumValue =
-				ByteaToDatum(minValue, &tupleDescriptor->attrs[columnIndex]);
+				ByteaToDatum(minValue, TupleDescAttr(tupleDescriptor, columnIndex));
 			chunk->maximumValue =
-				ByteaToDatum(maxValue, &tupleDescriptor->attrs[columnIndex]);
+				ByteaToDatum(maxValue, TupleDescAttr(tupleDescriptor, columnIndex));

 			chunk->hasMinMax = true;
 		}
@ -942,10 +958,12 @@ StripeMetadataLookupRowNumber(Relation relation, uint64 rowNumber, Snapshot snap
 		strategyNumber = BTGreaterStrategyNumber;
 		procedure = F_INT8GT;
 	}
-	ScanKeyInit(&scanKey[1], Anum_columnar_stripe_first_row_number,
-				strategyNumber, procedure, Int64GetDatum(rowNumber));

 	Relation columnarStripes = table_open(ColumnarStripeRelationId(), AccessShareLock);
+	TupleDesc tupleDesc = RelationGetDescr(columnarStripes);
+
+	ScanKeyInit(&scanKey[1], GetFirstRowNumberAttrIndexInColumnarStripe(tupleDesc) + 1,
+				strategyNumber, procedure, Int64GetDatum(rowNumber));

 	Oid indexId = ColumnarStripeFirstRowNumberIndexRelationId();
 	bool indexOk = OidIsValid(indexId);
@ -1210,9 +1228,13 @@ static void
 InsertEmptyStripeMetadataRow(uint64 storageId, uint64 stripeId, uint32 columnCount,
 							 uint32 chunkGroupRowCount, uint64 firstRowNumber)
 {
-	bool nulls[Natts_columnar_stripe] = { false };
+	Oid columnarStripesOid = ColumnarStripeRelationId();
+	Relation columnarStripes = table_open(columnarStripesOid, RowExclusiveLock);
+	TupleDesc tupleDescriptor = RelationGetDescr(columnarStripes);
+
+	Datum *values = (Datum *) palloc(tupleDescriptor->natts * sizeof(Datum));
+	bool *nulls = (bool *) palloc0(tupleDescriptor->natts * sizeof(bool));

-	Datum values[Natts_columnar_stripe] = { 0 };
 	values[Anum_columnar_stripe_storageid - 1] =
 		UInt64GetDatum(storageId);
 	values[Anum_columnar_stripe_stripe - 1] =
@ -1221,7 +1243,7 @@ InsertEmptyStripeMetadataRow(uint64 storageId, uint64 stripeId, uint32 columnCou
 		UInt32GetDatum(columnCount);
 	values[Anum_columnar_stripe_chunk_row_count - 1] =
 		UInt32GetDatum(chunkGroupRowCount);
-	values[Anum_columnar_stripe_first_row_number - 1] =
+	values[GetFirstRowNumberAttrIndexInColumnarStripe(tupleDescriptor)] =
 		UInt64GetDatum(firstRowNumber);

 	/* stripe has no rows yet, so initialize rest of the columns accordingly */
@ -1234,9 +1256,6 @@ InsertEmptyStripeMetadataRow(uint64 storageId, uint64 stripeId, uint32 columnCou
 	values[Anum_columnar_stripe_chunk_count - 1] =
 		UInt32GetDatum(0);

-	Oid columnarStripesOid = ColumnarStripeRelationId();
-	Relation columnarStripes = table_open(columnarStripesOid, RowExclusiveLock);
-
 	ModifyState *modifyState = StartModifyRelation(columnarStripes);

 	InsertTupleAndEnforceConstraints(modifyState, values, nulls);
@ -1244,6 +1263,9 @@ InsertEmptyStripeMetadataRow(uint64 storageId, uint64 stripeId, uint32 columnCou
 	FinishModifyRelation(modifyState);

 	table_close(columnarStripes, RowExclusiveLock);
+
+	pfree(values);
+	pfree(nulls);
 }


@ -1252,11 +1274,26 @@ InsertEmptyStripeMetadataRow(uint64 storageId, uint64 stripeId, uint32 columnCou
 * of the given relfilenode.
 */
 List *
-StripesForRelfilelocator(RelFileLocator relfilelocator)
+StripesForRelfilelocator(Relation rel)
 {
-	uint64 storageId = LookupStorageId(relfilelocator);
+	uint64 storageId = LookupStorageId(RelationPrecomputeOid(rel),
+									   RelationPhysicalIdentifier_compat(rel));

-	return ReadDataFileStripeList(storageId, GetTransactionSnapshot());
+	/*
+	 * PG18 requires snapshot to be active or registered before it's used
+	 * Without this, we hit
+	 * Assert(snapshot->regd_count > 0 || snapshot->active_count > 0);
+	 * when reading columnar stripes.
+	 * Relevant PG18 commit:
+	 * 8076c00592e40e8dbd1fce7a98b20d4bf075e4ba
+	 */
+	Snapshot snapshot = RegisterSnapshot(GetTransactionSnapshot());
+
+	List *readDataFileStripeList = ReadDataFileStripeList(storageId, snapshot);
+
+	UnregisterSnapshot(snapshot);
+
+	return readDataFileStripeList;
 }


@ -1269,9 +1306,10 @@ StripesForRelfilelocator(RelFileLocator relfilelocator)
 * returns 0.
 */
 uint64
-GetHighestUsedAddress(RelFileLocator relfilelocator)
+GetHighestUsedAddress(Relation rel)
 {
-	uint64 storageId = LookupStorageId(relfilelocator);
+	uint64 storageId = LookupStorageId(RelationPrecomputeOid(rel),
+									   RelationPhysicalIdentifier_compat(rel));

 	uint64 highestUsedAddress = 0;
 	uint64 highestUsedId = 0;
@ -1281,6 +1319,24 @@ GetHighestUsedAddress(RelFileLocator relfilelocator)
 }


+/*
+ * In case if relid hasn't been defined yet, we should use RelidByRelfilenumber
+ * to get correct relid value.
+ *
+ * Now it is basically used for temp rels, because since PG18(it was backpatched
+ * through PG13) RelidByRelfilenumber skip temp relations and we should use
+ * alternative ways to get relid value in case of temp objects.
+ */
+Oid
+ColumnarRelationId(Oid relid, RelFileLocator relfilelocator)
+{
+	return OidIsValid(relid) ? relid : RelidByRelfilenumber(RelationTablespace_compat
+																(relfilelocator),
+															RelationPhysicalIdentifierNumber_compat
+																(relfilelocator));
+}
+
+
 /*
 * GetHighestUsedAddressAndId returns the highest used address and id for
 * the given relfilenode across all active and inactive transactions.
@ -1354,19 +1410,8 @@ CompleteStripeReservation(Relation rel, uint64 stripeId, uint64 sizeBytes,
 	uint64 resLogicalStart = ColumnarStorageReserveData(rel, sizeBytes);
 	uint64 storageId = ColumnarStorageGetStorageId(rel, false);

-	bool update[Natts_columnar_stripe] = { false };
-	update[Anum_columnar_stripe_file_offset - 1] = true;
-	update[Anum_columnar_stripe_data_length - 1] = true;
-	update[Anum_columnar_stripe_row_count - 1] = true;
-	update[Anum_columnar_stripe_chunk_count - 1] = true;
-
-	Datum newValues[Natts_columnar_stripe] = { 0 };
-	newValues[Anum_columnar_stripe_file_offset - 1] = Int64GetDatum(resLogicalStart);
-	newValues[Anum_columnar_stripe_data_length - 1] = Int64GetDatum(sizeBytes);
-	newValues[Anum_columnar_stripe_row_count - 1] = UInt64GetDatum(rowCount);
-	newValues[Anum_columnar_stripe_chunk_count - 1] = Int32GetDatum(chunkCount);
-
-	return UpdateStripeMetadataRow(storageId, stripeId, update, newValues);
+	return UpdateStripeMetadataRow(storageId, stripeId, resLogicalStart,
+								   sizeBytes, rowCount, chunkCount);
 }


@ -1377,12 +1422,9 @@ CompleteStripeReservation(Relation rel, uint64 stripeId, uint64 sizeBytes,
 * of stripe metadata should be updated according to modifications done.
 */
 static StripeMetadata *
-UpdateStripeMetadataRow(uint64 storageId, uint64 stripeId, bool *update,
-						Datum *newValues)
+UpdateStripeMetadataRow(uint64 storageId, uint64 stripeId, uint64 fileOffset,
+						uint64 dataLength, uint64 rowCount, uint64 chunkCount)
 {
-	SnapshotData dirtySnapshot;
-	InitDirtySnapshot(dirtySnapshot);
-
 	ScanKeyData scanKey[2];
 	ScanKeyInit(&scanKey[0], Anum_columnar_stripe_storageid,
 				BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(storageId));
@ -1392,11 +1434,15 @@ UpdateStripeMetadataRow(uint64 storageId, uint64 stripeId, bool *update,
 	Oid columnarStripesOid = ColumnarStripeRelationId();

 	Relation columnarStripes = table_open(columnarStripesOid, AccessShareLock);
+	TupleDesc tupleDescriptor = RelationGetDescr(columnarStripes);

 	Oid indexId = ColumnarStripePKeyIndexRelationId();
 	bool indexOk = OidIsValid(indexId);
-	SysScanDesc scanDescriptor = systable_beginscan(columnarStripes, indexId, indexOk,
-													&dirtySnapshot, 2, scanKey);
+
+	void *state;
+	HeapTuple tuple;
+	systable_inplace_update_begin(columnarStripes, indexId, indexOk, NULL,
+								  2, scanKey, &tuple, &state);

 	static bool loggedSlowMetadataAccessWarning = false;
 	if (!indexOk && !loggedSlowMetadataAccessWarning)
@ -1405,8 +1451,7 @@ UpdateStripeMetadataRow(uint64 storageId, uint64 stripeId, bool *update,
 		loggedSlowMetadataAccessWarning = true;
 	}

-	HeapTuple oldTuple = systable_getnext(scanDescriptor);
-	if (!HeapTupleIsValid(oldTuple))
+	if (!HeapTupleIsValid(tuple))
 	{
 		ereport(ERROR, (errmsg("attempted to modify an unexpected stripe, "
 							   "columnar storage with id=" UINT64_FORMAT
@ -1415,34 +1460,44 @@ UpdateStripeMetadataRow(uint64 storageId, uint64 stripeId, bool *update,
 	}

 	/*
-	 * heap_inplace_update already doesn't allow changing size of the original
+	 * systable_inplace_update_finish already doesn't allow changing size of the original
 	 * tuple, so we don't allow setting any Datum's to NULL values.
 	 */
-	bool newNulls[Natts_columnar_stripe] = { false };
-	TupleDesc tupleDescriptor = RelationGetDescr(columnarStripes);
-	HeapTuple modifiedTuple = heap_modify_tuple(oldTuple, tupleDescriptor,
-												newValues, newNulls, update);

-	heap_inplace_update(columnarStripes, modifiedTuple);
+	Datum *newValues = (Datum *) palloc(tupleDescriptor->natts * sizeof(Datum));
+	bool *newNulls = (bool *) palloc0(tupleDescriptor->natts * sizeof(bool));
+	bool *update = (bool *) palloc0(tupleDescriptor->natts * sizeof(bool));

-	/*
-	 * Existing tuple now contains modifications, because we used
-	 * heap_inplace_update().
-	 */
-	HeapTuple newTuple = oldTuple;
+	update[Anum_columnar_stripe_file_offset - 1] = true;
+	update[Anum_columnar_stripe_data_length - 1] = true;
+	update[Anum_columnar_stripe_row_count - 1] = true;
+	update[Anum_columnar_stripe_chunk_count - 1] = true;
+
+	newValues[Anum_columnar_stripe_file_offset - 1] = Int64GetDatum(fileOffset);
+	newValues[Anum_columnar_stripe_data_length - 1] = Int64GetDatum(dataLength);
+	newValues[Anum_columnar_stripe_row_count - 1] = UInt64GetDatum(rowCount);
+	newValues[Anum_columnar_stripe_chunk_count - 1] = Int32GetDatum(chunkCount);
+
+	tuple = heap_modify_tuple(tuple,
+							  tupleDescriptor,
+							  newValues,
+							  newNulls,
+							  update);
+
+	systable_inplace_update_finish(state, tuple);

-	/*
-	 * Must not pass modifiedTuple, because BuildStripeMetadata expects a real
-	 * heap tuple with MVCC fields.
-	 */
 	StripeMetadata *modifiedStripeMetadata = BuildStripeMetadata(columnarStripes,
-																 newTuple);
+																 tuple);

 	CommandCounterIncrement();

-	systable_endscan(scanDescriptor);
+	heap_freetuple(tuple);
 	table_close(columnarStripes, AccessShareLock);

+	pfree(newValues);
+	pfree(newNulls);
+	pfree(update);
+
 	/* return StripeMetadata object built from modified tuple */
 	return modifiedStripeMetadata;
 }
@ -1506,10 +1561,12 @@ BuildStripeMetadata(Relation columnarStripes, HeapTuple heapTuple)
 {
 	Assert(RelationGetRelid(columnarStripes) == ColumnarStripeRelationId());

-	Datum datumArray[Natts_columnar_stripe];
-	bool isNullArray[Natts_columnar_stripe];
-	heap_deform_tuple(heapTuple, RelationGetDescr(columnarStripes),
-					  datumArray, isNullArray);
+	TupleDesc tupleDescriptor = RelationGetDescr(columnarStripes);
+
+	Datum *datumArray = (Datum *) palloc(tupleDescriptor->natts * sizeof(Datum));
+	bool *isNullArray = (bool *) palloc(tupleDescriptor->natts * sizeof(bool));
+
+	heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray);

 	StripeMetadata *stripeMetadata = palloc0(sizeof(StripeMetadata));
 	stripeMetadata->id = DatumGetInt64(datumArray[Anum_columnar_stripe_stripe - 1]);
@ -1526,7 +1583,10 @@ BuildStripeMetadata(Relation columnarStripes, HeapTuple heapTuple)
 	stripeMetadata->rowCount = DatumGetInt64(
 		datumArray[Anum_columnar_stripe_row_count - 1]);
 	stripeMetadata->firstRowNumber = DatumGetUInt64(
-		datumArray[Anum_columnar_stripe_first_row_number - 1]);
+		datumArray[GetFirstRowNumberAttrIndexInColumnarStripe(tupleDescriptor)]);
+
+	pfree(datumArray);
+	pfree(isNullArray);

 	/*
 	 * If there is unflushed data in a parent transaction, then we would
@ -1552,7 +1612,7 @@ BuildStripeMetadata(Relation columnarStripes, HeapTuple heapTuple)
 * metadata tables.
 */
 void
-DeleteMetadataRows(RelFileLocator relfilelocator)
+DeleteMetadataRows(Relation rel)
 {
 	/*
 	 * During a restore for binary upgrade, metadata tables and indexes may or
@ -1563,7 +1623,8 @@ DeleteMetadataRows(RelFileLocator relfilelocator)
 		return;
 	}

-	uint64 storageId = LookupStorageId(relfilelocator);
+	uint64 storageId = LookupStorageId(RelationPrecomputeOid(rel),
+									   RelationPhysicalIdentifier_compat(rel));

 	DeleteStorageFromColumnarMetadataTable(ColumnarStripeRelationId(),
 										   Anum_columnar_stripe_storageid,
@ -1727,12 +1788,37 @@ create_estate_for_relation(Relation rel)
 	rte->relkind = rel->rd_rel->relkind;
 	rte->rellockmode = AccessShareLock;

+/* Prepare permission info on PG 16+ */
 #if PG_VERSION_NUM >= PG_VERSION_16
 	List *perminfos = NIL;
 	addRTEPermissionInfo(&perminfos, rte);
-	ExecInitRangeTable(estate, list_make1(rte), perminfos);
+#endif
+
+/* Initialize the range table, with the right signature for each PG version */
+#if PG_VERSION_NUM >= PG_VERSION_18
+
+	/* PG 18+ needs four arguments (unpruned_relids) */
+	ExecInitRangeTable(
+		estate,
+		list_make1(rte),
+		perminfos,
+		NULL  /* unpruned_relids: not used by columnar */
+		);
+#elif PG_VERSION_NUM >= PG_VERSION_16
+
+	/* PG 16–17: three-arg signature (permInfos) */
+	ExecInitRangeTable(
+		estate,
+		list_make1(rte),
+		perminfos
+		);
 #else
-	ExecInitRangeTable(estate, list_make1(rte));
+
+	/* PG 15: two-arg signature */
+	ExecInitRangeTable(
+		estate,
+		list_make1(rte)
+		);
 #endif

 	estate->es_output_cid = GetCurrentCommandId(true);
@ -1937,13 +2023,11 @@ ColumnarNamespaceId(void)
 * false if the relation doesn't have a meta page yet.
 */
 static uint64
-LookupStorageId(RelFileLocator relfilelocator)
+LookupStorageId(Oid relid, RelFileLocator relfilelocator)
 {
-	Oid relationId = RelidByRelfilenumber(RelationTablespace_compat(relfilelocator),
-										  RelationPhysicalIdentifierNumber_compat(
-											  relfilelocator));
+	relid = ColumnarRelationId(relid, relfilelocator);

-	Relation relation = relation_open(relationId, AccessShareLock);
+	Relation relation = relation_open(relid, AccessShareLock);
 	uint64 storageId = ColumnarStorageGetStorageId(relation, false);
 	table_close(relation, AccessShareLock);

@ -2049,3 +2133,23 @@ GetHighestUsedRowNumber(uint64 storageId)

 	return highestRowNumber;
 }
+
+
+/*
+ * GetFirstRowNumberAttrIndexInColumnarStripe returns attrnum for first_row_number attr.
+ *
+ * first_row_number attr was added to table columnar.stripe using alter operation after
+ * the version where Citus started supporting downgrades, and it's only column that we've
+ * introduced to columnar.stripe since then.
+ *
+ * And in case of a downgrade + upgrade, tupleDesc->natts becomes greater than
+ * Natts_columnar_stripe and when this happens, then we know that attrnum first_row_number is
+ * not Anum_columnar_stripe_first_row_number anymore but tupleDesc->natts - 1.
+ */
+static int
+GetFirstRowNumberAttrIndexInColumnarStripe(TupleDesc tupleDesc)
+{
+	return tupleDesc->natts == Natts_columnar_stripe
+		   ? (Anum_columnar_stripe_first_row_number - 1)
+		   : tupleDesc->natts - 1;
+}
--- a/src/backend/columnar/columnar_reader.c
+++ b/src/backend/columnar/columnar_reader.c
@ -41,8 +41,8 @@
 #include "distributed/listutils.h"

 #define UNEXPECTED_STRIPE_READ_ERR_MSG \
-	"attempted to read an unexpected stripe while reading columnar " \
-	"table %s, stripe with id=" UINT64_FORMAT " is not flushed"
+		"attempted to read an unexpected stripe while reading columnar " \
+		"table %s, stripe with id=" UINT64_FORMAT " is not flushed"

 typedef struct ChunkGroupReadState
 {
@ -758,7 +758,9 @@ SnapshotMightSeeUnflushedStripes(Snapshot snapshot)
 		}

 		default:
+		{
 			return false;
+		}
 	}
 }

@ -986,8 +988,7 @@ ColumnarTableRowCount(Relation relation)
 {
 	ListCell *stripeMetadataCell = NULL;
 	uint64 totalRowCount = 0;
-	List *stripeList = StripesForRelfilelocator(RelationPhysicalIdentifier_compat(
-													relation));
+	List *stripeList = StripesForRelfilelocator(relation);

 	foreach(stripeMetadataCell, stripeList)
 	{
@ -1015,8 +1016,7 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata,

 	bool *projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList);

-	StripeSkipList *stripeSkipList = ReadStripeSkipList(RelationPhysicalIdentifier_compat(
-															relation),
+	StripeSkipList *stripeSkipList = ReadStripeSkipList(relation,
 														stripeMetadata->id,
 														tupleDescriptor,
 														stripeMetadata->chunkCount,
--- a/src/backend/columnar/columnar_storage.c
+++ b/src/backend/columnar/columnar_storage.c
@ -547,7 +547,8 @@ ColumnarStorageTruncate(Relation rel, uint64 newDataReservation)
 	if (!ColumnarLogicalOffsetIsValid(newDataReservation))
 	{
 		elog(ERROR,
-			 "attempted to truncate relation %d to invalid logical offset: " UINT64_FORMAT,
+			 "attempted to truncate relation %d to "
+			 "invalid logical offset: " UINT64_FORMAT,
 			 rel->rd_id, newDataReservation);
 	}

--- a/src/backend/columnar/columnar_tableam.c
+++ b/src/backend/columnar/columnar_tableam.c
@ -872,7 +872,7 @@ columnar_relation_set_new_filelocator(Relation rel,
 									 RelationPhysicalIdentifier_compat(rel)),
 								 GetCurrentSubTransactionId());

-		DeleteMetadataRows(RelationPhysicalIdentifier_compat(rel));
+		DeleteMetadataRows(rel);
 	}

 	*freezeXid = RecentXmin;
@ -897,7 +897,7 @@ columnar_relation_nontransactional_truncate(Relation rel)
 	NonTransactionDropWriteState(RelationPhysicalIdentifierNumber_compat(relfilelocator));

 	/* Delete old relfilenode metadata */
-	DeleteMetadataRows(relfilelocator);
+	DeleteMetadataRows(rel);

 	/*
 	 * No need to set new relfilenode, since the table was created in this
@ -960,8 +960,7 @@ columnar_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
 	ColumnarOptions columnarOptions = { 0 };
 	ReadColumnarOptions(OldHeap->rd_id, &columnarOptions);

-	ColumnarWriteState *writeState = ColumnarBeginWrite(RelationPhysicalIdentifier_compat(
-															NewHeap),
+	ColumnarWriteState *writeState = ColumnarBeginWrite(NewHeap,
 														columnarOptions,
 														targetDesc);

@ -1012,7 +1011,7 @@ NeededColumnsList(TupleDesc tupdesc, Bitmapset *attr_needed)

 	for (int i = 0; i < tupdesc->natts; i++)
 	{
-		if (tupdesc->attrs[i].attisdropped)
+		if (TupleDescAttr(tupdesc, i)->attisdropped)
 		{
 			continue;
 		}
@ -1036,8 +1035,7 @@ NeededColumnsList(TupleDesc tupdesc, Bitmapset *attr_needed)
 static uint64
 ColumnarTableTupleCount(Relation relation)
 {
-	List *stripeList = StripesForRelfilelocator(RelationPhysicalIdentifier_compat(
-													relation));
+	List *stripeList = StripesForRelfilelocator(relation);
 	uint64 tupleCount = 0;

 	ListCell *lc = NULL;
@ -1121,10 +1119,27 @@ columnar_vacuum_rel(Relation rel, VacuumParams *params,
 	bool frozenxid_updated;
 	bool minmulti_updated;

+/* for PG 18+, vac_update_relstats gained a new “all_frozen” param */
+#if PG_VERSION_NUM >= PG_VERSION_18
+
+	/* all frozen pages are always 0, because columnar stripes never store XIDs */
+	BlockNumber new_rel_allfrozen = 0;
+
+	vac_update_relstats(rel, new_rel_pages, new_live_tuples,
+						new_rel_allvisible,  /* allvisible */
+						new_rel_allfrozen,   /* all_frozen */
+						nindexes > 0,
+						newRelFrozenXid, newRelminMxid,
+						&frozenxid_updated, &minmulti_updated,
+						false);
+#else
 	vac_update_relstats(rel, new_rel_pages, new_live_tuples,
 						new_rel_allvisible, nindexes > 0,
 						newRelFrozenXid, newRelminMxid,
-						&frozenxid_updated, &minmulti_updated, false);
+						&frozenxid_updated, &minmulti_updated,
+						false);
+#endif
+
 #else
 	TransactionId oldestXmin;
 	TransactionId freezeLimit;
@ -1187,10 +1202,19 @@ columnar_vacuum_rel(Relation rel, VacuumParams *params,
 #endif
 #endif

+#if PG_VERSION_NUM >= PG_VERSION_18
+	pgstat_report_vacuum(RelationGetRelid(rel),
+						 rel->rd_rel->relisshared,
+						 Max(new_live_tuples, 0), /* live tuples */
+						 0,                       /* dead tuples */
+						 GetCurrentTimestamp());  /* start time */
+#else
 	pgstat_report_vacuum(RelationGetRelid(rel),
 						 rel->rd_rel->relisshared,
 						 Max(new_live_tuples, 0),
 						 0);
+#endif
+
 	pgstat_progress_end_command();
 }

@ -1202,7 +1226,6 @@ static void
 LogRelationStats(Relation rel, int elevel)
 {
 	ListCell *stripeMetadataCell = NULL;
-	RelFileLocator relfilelocator = RelationPhysicalIdentifier_compat(rel);
 	StringInfo infoBuf = makeStringInfo();

 	int compressionStats[COMPRESSION_COUNT] = { 0 };
@ -1213,19 +1236,23 @@ LogRelationStats(Relation rel, int elevel)
 	uint64 droppedChunksWithData = 0;
 	uint64 totalDecompressedLength = 0;

-	List *stripeList = StripesForRelfilelocator(relfilelocator);
+	List *stripeList = StripesForRelfilelocator(rel);
 	int stripeCount = list_length(stripeList);

 	foreach(stripeMetadataCell, stripeList)
 	{
 		StripeMetadata *stripe = lfirst(stripeMetadataCell);
-		StripeSkipList *skiplist = ReadStripeSkipList(relfilelocator, stripe->id,
+
+		Snapshot snapshot = RegisterSnapshot(GetTransactionSnapshot());
+		StripeSkipList *skiplist = ReadStripeSkipList(rel, stripe->id,
 													  RelationGetDescr(rel),
 													  stripe->chunkCount,
-													  GetTransactionSnapshot());
+													  snapshot);
+		UnregisterSnapshot(snapshot);
+
 		for (uint32 column = 0; column < skiplist->columnCount; column++)
 		{
-			bool attrDropped = tupdesc->attrs[column].attisdropped;
+			bool attrDropped = TupleDescAttr(tupdesc, column)->attisdropped;
 			for (uint32 chunk = 0; chunk < skiplist->chunkCount; chunk++)
 			{
 				ColumnChunkSkipNode *skipnode =
@ -1355,8 +1382,7 @@ TruncateColumnar(Relation rel, int elevel)
 	 * new stripes be added beyond highestPhysicalAddress while
 	 * we're truncating.
 	 */
-	uint64 newDataReservation = Max(GetHighestUsedAddress(
-										RelationPhysicalIdentifier_compat(rel)) + 1,
+	uint64 newDataReservation = Max(GetHighestUsedAddress(rel) + 1,
 									ColumnarFirstLogicalOffset);

 	BlockNumber old_rel_pages = smgrnblocks(RelationGetSmgr(rel), MAIN_FORKNUM);
@ -2124,7 +2150,7 @@ ColumnarTableDropHook(Oid relid)
 		Relation rel = table_open(relid, AccessExclusiveLock);
 		RelFileLocator relfilelocator = RelationPhysicalIdentifier_compat(rel);

-		DeleteMetadataRows(relfilelocator);
+		DeleteMetadataRows(rel);
 		DeleteColumnarTableOptions(rel->rd_id, true);

 		MarkRelfilenumberDropped(RelationPhysicalIdentifierNumber_compat(relfilelocator),
@ -2384,9 +2410,10 @@ ColumnarProcessUtility(PlannedStmt *pstmt,
 		}

 		default:
-
+		{
 			/* FALL THROUGH */
 			break;
+		}
 	}

 	if (columnarOptions != NIL && columnarRangeVar == NULL)
@ -2564,8 +2591,13 @@ static const TableAmRoutine columnar_am_methods = {

 	.relation_estimate_size = columnar_estimate_rel_size,

+#if PG_VERSION_NUM < PG_VERSION_18
+
+	/* these two fields were removed in PG 18 */
 	.scan_bitmap_next_block = NULL,
 	.scan_bitmap_next_tuple = NULL,
+#endif
+
 	.scan_sample_next_block = columnar_scan_sample_next_block,
 	.scan_sample_next_tuple = columnar_scan_sample_next_tuple
 };
@ -2603,7 +2635,7 @@ detoast_values(TupleDesc tupleDesc, Datum *orig_values, bool *isnull)

 	for (int i = 0; i < tupleDesc->natts; i++)
 	{
-		if (!isnull[i] && tupleDesc->attrs[i].attlen == -1 &&
+		if (!isnull[i] && TupleDescAttr(tupleDesc, i)->attlen == -1 &&
 			VARATT_IS_EXTENDED(values[i]))
 		{
 			/* make a copy */
--- a/src/backend/columnar/columnar_writer.c
+++ b/src/backend/columnar/columnar_writer.c
@ -48,6 +48,12 @@ struct ColumnarWriteState
 	FmgrInfo **comparisonFunctionArray;
 	RelFileLocator relfilelocator;

+	/*
+	 * We can't rely on RelidByRelfilenumber for temp tables since
+	 * PG18(it was backpatched through PG13).
+	 */
+	Oid temp_relid;
+
 	MemoryContext stripeWriteContext;
 	MemoryContext perTupleContext;
 	StripeBuffers *stripeBuffers;
@ -93,10 +99,12 @@ static StringInfo CopyStringInfo(StringInfo sourceString);
 * data load operation.
 */
 ColumnarWriteState *
-ColumnarBeginWrite(RelFileLocator relfilelocator,
+ColumnarBeginWrite(Relation rel,
 				   ColumnarOptions options,
 				   TupleDesc tupleDescriptor)
 {
+	RelFileLocator relfilelocator = RelationPhysicalIdentifier_compat(rel);
+
 	/* get comparison function pointers for each of the columns */
 	uint32 columnCount = tupleDescriptor->natts;
 	FmgrInfo **comparisonFunctionArray = palloc0(columnCount * sizeof(FmgrInfo *));
@ -134,6 +142,7 @@ ColumnarBeginWrite(RelFileLocator relfilelocator,

 	ColumnarWriteState *writeState = palloc0(sizeof(ColumnarWriteState));
 	writeState->relfilelocator = relfilelocator;
+	writeState->temp_relid = RelationPrecomputeOid(rel);
 	writeState->options = options;
 	writeState->tupleDescriptor = CreateTupleDescCopy(tupleDescriptor);
 	writeState->comparisonFunctionArray = comparisonFunctionArray;
@ -183,10 +192,9 @@ ColumnarWriteRow(ColumnarWriteState *writeState, Datum *columnValues, bool *colu
 		writeState->stripeSkipList = stripeSkipList;
 		writeState->compressionBuffer = makeStringInfo();

-		Oid relationId = RelidByRelfilenumber(RelationTablespace_compat(
-												  writeState->relfilelocator),
-											  RelationPhysicalIdentifierNumber_compat(
-												  writeState->relfilelocator));
+		Oid relationId = ColumnarRelationId(writeState->temp_relid,
+											writeState->relfilelocator);
+
 		Relation relation = relation_open(relationId, NoLock);
 		writeState->emptyStripeReservation =
 			ReserveEmptyStripe(relation, columnCount, chunkRowCount,
@ -404,10 +412,9 @@ FlushStripe(ColumnarWriteState *writeState)

 	elog(DEBUG1, "Flushing Stripe of size %d", stripeBuffers->rowCount);

-	Oid relationId = RelidByRelfilenumber(RelationTablespace_compat(
-											  writeState->relfilelocator),
-										  RelationPhysicalIdentifierNumber_compat(
-											  writeState->relfilelocator));
+	Oid relationId = ColumnarRelationId(writeState->temp_relid,
+										writeState->relfilelocator);
+
 	Relation relation = relation_open(relationId, NoLock);

 	/*
@ -499,10 +506,12 @@ FlushStripe(ColumnarWriteState *writeState)
 		}
 	}

-	SaveChunkGroups(writeState->relfilelocator,
+	SaveChunkGroups(writeState->temp_relid,
+					writeState->relfilelocator,
 					stripeMetadata->id,
 					writeState->chunkGroupRowCounts);
-	SaveStripeSkipList(writeState->relfilelocator,
+	SaveStripeSkipList(writeState->temp_relid,
+					   writeState->relfilelocator,
 					   stripeMetadata->id,
 					   stripeSkipList, tupleDescriptor);

--- a/src/backend/columnar/sql/citus_columnar--12.2-1--13.2-1.sql
+++ b/src/backend/columnar/sql/citus_columnar--12.2-1--13.2-1.sql
@ -0,0 +1,3 @@
+-- citus_columnar--12.2-1--13.2-1.sql
+
+#include "udfs/columnar_finish_pg_upgrade/13.2-1.sql"
--- a/src/backend/columnar/sql/citus_columnar--13.2-1--14.0-1.sql
+++ b/src/backend/columnar/sql/citus_columnar--13.2-1--14.0-1.sql
@ -0,0 +1,2 @@
+-- citus_columnar--13.2-1--14.0-1
+-- bump version to 14.0-1
--- a/src/backend/columnar/sql/downgrades/citus_columnar--13.2-1--12.2-1.sql
+++ b/src/backend/columnar/sql/downgrades/citus_columnar--13.2-1--12.2-1.sql
@ -0,0 +1,3 @@
+-- citus_columnar--13.2-1--12.2-1.sql
+
+DROP FUNCTION IF EXISTS pg_catalog.columnar_finish_pg_upgrade();
--- a/src/backend/columnar/sql/downgrades/citus_columnar--14.0-1--13.2-1.sql
+++ b/src/backend/columnar/sql/downgrades/citus_columnar--14.0-1--13.2-1.sql
@ -0,0 +1,2 @@
+-- citus_columnar--14.0-1--13.2-1
+-- downgrade version to 13.2-1
--- a/src/backend/columnar/sql/udfs/columnar_finish_pg_upgrade/13.2-1.sql
+++ b/src/backend/columnar/sql/udfs/columnar_finish_pg_upgrade/13.2-1.sql
@ -0,0 +1,13 @@
+CREATE OR REPLACE FUNCTION pg_catalog.columnar_finish_pg_upgrade()
+    RETURNS void
+    LANGUAGE plpgsql
+    SET search_path = pg_catalog
+    AS $cppu$
+BEGIN
+    -- set dependencies for columnar table access method
+    PERFORM columnar_internal.columnar_ensure_am_depends_catalog();
+END;
+$cppu$;
+
+COMMENT ON FUNCTION pg_catalog.columnar_finish_pg_upgrade()
+    IS 'perform tasks to properly complete a Postgres upgrade for columnar extension';
--- a/src/backend/columnar/sql/udfs/columnar_finish_pg_upgrade/latest.sql
+++ b/src/backend/columnar/sql/udfs/columnar_finish_pg_upgrade/latest.sql
@ -0,0 +1,13 @@
+CREATE OR REPLACE FUNCTION pg_catalog.columnar_finish_pg_upgrade()
+    RETURNS void
+    LANGUAGE plpgsql
+    SET search_path = pg_catalog
+    AS $cppu$
+BEGIN
+    -- set dependencies for columnar table access method
+    PERFORM columnar_internal.columnar_ensure_am_depends_catalog();
+END;
+$cppu$;
+
+COMMENT ON FUNCTION pg_catalog.columnar_finish_pg_upgrade()
+    IS 'perform tasks to properly complete a Postgres upgrade for columnar extension';
--- a/src/backend/columnar/write_state_management.c
+++ b/src/backend/columnar/write_state_management.c
@ -191,8 +191,7 @@ columnar_init_write_state(Relation relation, TupleDesc tupdesc,
 	ReadColumnarOptions(tupSlotRelationId, &columnarOptions);

 	SubXidWriteState *stackEntry = palloc0(sizeof(SubXidWriteState));
-	stackEntry->writeState = ColumnarBeginWrite(RelationPhysicalIdentifier_compat(
-													relation),
+	stackEntry->writeState = ColumnarBeginWrite(relation,
 												columnarOptions,
 												tupdesc);
 	stackEntry->subXid = currentSubXid;
--- a/src/backend/distributed/README.md
+++ b/src/backend/distributed/README.md
@ -355,6 +355,15 @@ DEBUG:  Total number of commands sent over the session 8: 1 to node localhost:97
 (0 rows)
 ```

+### Delaying the Fast Path Plan
+
+As of Citus 13.2, if it can be determined at plan-time that a fast path query is against a local shard then a shortcut can be taken so that deparse and parse/plan of the shard query is avoided. Citus must be in MX mode and the shard must be local to the Citus node processing the query. If so, the OID of the distributed table is replaced by the OID of the shard in the parse tree. The parse tree is then given to the Postgres planner which returns a plan that is stored in the distributed plan's task. That plan can be repeatedly used by the local executor (described in the next section), avoiding the need to deparse and plan the shard query on each execution.
+
+We call this delayed fast path planning because if a query is eligible for fast path planning then `FastPathPlanner()` is delayed if the following properties hold:
+- The query is a SELECT or UPDATE on a distributed table (schema or column sharded) or Citus managed local table
+- The query has no volatile functions
+
+If so, then `FastPathRouterQuery()` sets a flag indicating that making the fast path plan should be delayed until after the worker job has been created. At that point the router planner uses `CheckAndBuildDelayedFastPathPlan()` to see if the task's shard placement is local (and not a dummy placement) and the metadata of the shard table and distributed table are consistent (no DDL in progress on the distributed table). If so the parse tree with OID of the distributed table replaced by the OID of the shard table is fed to `standard_planner()` and the resultant plan is saved in the task. Otherwise, if the worker job has been marked for deferred pruning or the shard is not local or the shard is local but it's not safe to swap OIDs, then `CheckAndBuildDelayedFastPathPlan()` calls `FastPathPlanner()` to ensure a complete plan context. Reference tables are not currently supported, but this may be relaxed for SELECT statements in the future. Delayed fast path planning can be disabled by turning off `citus.enable_local_fast_path_query_optimization` (it is on by default).

 ## Router Planner in Citus

@ -788,14 +797,13 @@ WHERE l.user_id = o.user_id AND o.primary_key = 55;



-### Ref table LEFT JOIN distributed table JOINs via recursive planning
+### Outer joins between reference and distributed tables

-Very much like local-distributed table joins, Citus can't push down queries formatted as:
+In general, when the outer side of an outer join is a recurring tuple (e.g., reference table, intermediate results, or set returning functions), it is not safe to push down the join.
 ```sql
 "... ref_table LEFT JOIN distributed_table ..."
+"... distributed_table RIGHT JOIN ref_table ..."
 ```
-This is the case when the outer side is a recurring tuple (e.g., reference table, intermediate results, or set returning functions).
-
 In these situations, Citus recursively plans the "distributed" part of the join. Even though it may seem excessive to recursively plan a distributed table, remember that Citus pushes down the filters and projections. Functions involved here include `RequiredAttrNumbersForRelation()` and `ReplaceRTERelationWithRteSubquery()`.

 The core function handling this logic is `RecursivelyPlanRecurringTupleOuterJoinWalker()`. There are likely numerous optimizations possible (e.g., first pushing down an inner JOIN then an outer join), but these have not been implemented due to their complexity.
@ -819,6 +827,45 @@ DEBUG:  Wrapping relation "orders_table" "o" to a subquery
 DEBUG:  generating subplan 45_1 for subquery SELECT order_id, status FROM public.orders_table o WHERE true
 ```

+As of Citus 13.2, under certain conditions, Citus can push down these types of LEFT and RIGHT outer joins by injecting constraints—derived from the shard intervals of distributed tables—into shard queries for the reference table. The eligibility rules for pushdown are defined in `CanPushdownRecurringOuterJoin()`, while the logic for computing and injecting the constraints is implemented in `UpdateWhereClauseToPushdownRecurringOuterJoin()`.
+
+#### Example Query
+
+In the example below, Citus pushes down the query by injecting interval constraints on the reference table. The injected constraints are visible in the EXPLAIN output.
+
+```sql
+SELECT pc.category_name, count(pt.product_id)
+FROM product_categories pc
+LEFT JOIN products_table pt ON pc.category_id = pt.product_id
+GROUP BY pc.category_name;
+```
+
+#### Debug Messages
+```
+DEBUG:  Router planner cannot handle multi-shard select queries
+DEBUG:  a push down safe left join with recurring left side
+```
+
+#### Explain Output
+```
+HashAggregate
+  Group Key: remote_scan.category_name
+  ->  Custom Scan (Citus Adaptive)
+        Task Count: 32
+        Tasks Shown: One of 32
+        ->  Task
+              Node: host=localhost port=9701 dbname=ebru
+              ->  HashAggregate
+                    Group Key: pc.category_name
+                    ->  Hash Right Join
+                          Hash Cond: (pt.product_id = pc.category_id)
+                          ->  Seq Scan on products_table_102072 pt
+                          ->  Hash
+                                ->  Seq Scan on product_categories_102106 pc
+                                      Filter: ((category_id IS NULL) OR ((btint4cmp('-2147483648'::integer, hashint8((category_id)::bigint)) < 0) AND (btint4cmp(hashint8((category_id::bigint), '-2013265921'::integer) <= 0)))
+```
+
+
 ### Recursive Planning When FROM Clause has Reference Table (or Recurring Tuples)

 This section discusses a specific scenario in Citus's recursive query planning: handling queries where the main query's `FROM` clause is recurring, but there are subqueries in the `SELECT` or `WHERE` clauses involving distributed tables.
--- a/src/backend/distributed/cdc/cdc_decoder_utils.c
+++ b/src/backend/distributed/cdc/cdc_decoder_utils.c
@ -346,12 +346,12 @@ CdcIsReferenceTableViaCatalog(Oid relationId)
 		return false;
 	}

-	Datum datumArray[Natts_pg_dist_partition];
-	bool isNullArray[Natts_pg_dist_partition];
-
 	Relation pgDistPartition = table_open(DistPartitionRelationId(), AccessShareLock);

 	TupleDesc tupleDescriptor = RelationGetDescr(pgDistPartition);
+	Datum *datumArray = (Datum *) palloc(tupleDescriptor->natts * sizeof(Datum));
+	bool *isNullArray = (bool *) palloc(tupleDescriptor->natts * sizeof(bool));
+
 	heap_deform_tuple(partitionTuple, tupleDescriptor, datumArray, isNullArray);

 	if (isNullArray[Anum_pg_dist_partition_partmethod - 1] ||
@ -363,6 +363,8 @@ CdcIsReferenceTableViaCatalog(Oid relationId)
 		 */
 		heap_freetuple(partitionTuple);
 		table_close(pgDistPartition, NoLock);
+		pfree(datumArray);
+		pfree(isNullArray);
 		return false;
 	}

@ -374,6 +376,8 @@ CdcIsReferenceTableViaCatalog(Oid relationId)

 	heap_freetuple(partitionTuple);
 	table_close(pgDistPartition, NoLock);
+	pfree(datumArray);
+	pfree(isNullArray);

 	/*
 	 * A table is a reference table when its partition method is 'none'
--- a/src/backend/distributed/citus.control
+++ b/src/backend/distributed/citus.control
@ -1,6 +1,6 @@
 # Citus extension
 comment = 'Citus distributed database'
-default_version = '13.1-1'
+default_version = '14.0-1'
 module_pathname = '$libdir/citus'
 relocatable = false
 schema = pg_catalog
--- a/src/backend/distributed/clock/causal_clock.c
+++ b/src/backend/distributed/clock/causal_clock.c
@ -44,17 +44,17 @@
 #include "distributed/remote_commands.h"

 #define SAVE_AND_PERSIST(c) \
-	do { \
-		Oid savedUserId = InvalidOid; \
-		int savedSecurityContext = 0; \
-		LogicalClockShmem->clusterClockValue = *(c); \
-		GetUserIdAndSecContext(&savedUserId, &savedSecurityContext); \
-		SetUserIdAndSecContext(CitusExtensionOwner(), SECURITY_LOCAL_USERID_CHANGE); \
-		DirectFunctionCall2(setval_oid, \
-							ObjectIdGetDatum(DistClockLogicalSequenceId()), \
-							Int64GetDatum((c)->logical)); \
-		SetUserIdAndSecContext(savedUserId, savedSecurityContext); \
-	} while (0)
+		do { \
+			Oid savedUserId = InvalidOid; \
+			int savedSecurityContext = 0; \
+			LogicalClockShmem->clusterClockValue = *(c); \
+			GetUserIdAndSecContext(&savedUserId, &savedSecurityContext); \
+			SetUserIdAndSecContext(CitusExtensionOwner(), SECURITY_LOCAL_USERID_CHANGE); \
+			DirectFunctionCall2(setval_oid, \
+								ObjectIdGetDatum(DistClockLogicalSequenceId()), \
+								Int64GetDatum((c)->logical)); \
+			SetUserIdAndSecContext(savedUserId, savedSecurityContext); \
+		} while (0)

 PG_FUNCTION_INFO_V1(citus_get_node_clock);
 PG_FUNCTION_INFO_V1(citus_internal_adjust_local_clock_to_remote);
--- a/src/backend/distributed/commands/alter_table.c
+++ b/src/backend/distributed/commands/alter_table.c
@ -77,9 +77,9 @@
 #define ALTER_TABLE_SET_ACCESS_METHOD 'm'

 #define UNDISTRIBUTE_TABLE_CASCADE_HINT \
-	"Use cascade option to undistribute all the relations involved in " \
-	"a foreign key relationship with %s by executing SELECT " \
-	"undistribute_table($$%s$$, cascade_via_foreign_keys=>true)"
+		"Use cascade option to undistribute all the relations involved in " \
+		"a foreign key relationship with %s by executing SELECT " \
+		"undistribute_table($$%s$$, cascade_via_foreign_keys=>true)"


 typedef TableConversionReturn *(*TableConversionFunction)(struct
@ -185,8 +185,8 @@ typedef struct TableConversionState


 static TableConversionReturn * AlterDistributedTable(TableConversionParameters *params);
-static TableConversionReturn * AlterTableSetAccessMethod(
-	TableConversionParameters *params);
+static TableConversionReturn * AlterTableSetAccessMethod(TableConversionParameters *
+														 params);
 static TableConversionReturn * ConvertTable(TableConversionState *con);
 static TableConversionReturn * ConvertTableInternal(TableConversionState *con);
 static bool SwitchToSequentialAndLocalExecutionIfShardNameTooLong(char *relationName,
@ -215,8 +215,8 @@ static char * CreateWorkerChangeSequenceDependencyCommand(char *qualifiedSequece
 static void ErrorIfMatViewSizeExceedsTheLimit(Oid matViewOid);
 static char * CreateMaterializedViewDDLCommand(Oid matViewOid);
 static char * GetAccessMethodForMatViewIfExists(Oid viewOid);
-static bool WillRecreateForeignKeyToReferenceTable(Oid relationId,
-												   CascadeToColocatedOption cascadeOption);
+static bool WillRecreateFKeyToReferenceTable(Oid relationId,
+											 CascadeToColocatedOption cascadeOption);
 static void WarningsForDroppingForeignKeysWithDistributedTables(Oid relationId);
 static void ErrorIfUnsupportedCascadeObjects(Oid relationId);
 static List * WrapTableDDLCommands(List *commandStrings);
@ -505,8 +505,9 @@ UndistributeTable(TableConversionParameters *params)
 	if (!params->bypassTenantCheck && IsTenantSchema(schemaId) &&
 		IsCitusTableType(params->relationId, SINGLE_SHARD_DISTRIBUTED))
 	{
-		EnsureUndistributeTenantTableSafe(params->relationId,
-										  TenantOperationNames[TENANT_UNDISTRIBUTE_TABLE]);
+		EnsureUndistributeTenantTableSafe(
+			params->relationId,
+			TenantOperationNames[TENANT_UNDISTRIBUTE_TABLE]);
 	}

 	if (!params->cascadeViaForeignKeys)
@ -577,7 +578,7 @@ AlterDistributedTable(TableConversionParameters *params)
 	TableConversionState *con = CreateTableConversion(params);
 	CheckAlterDistributedTableConversionParameters(con);

-	if (WillRecreateForeignKeyToReferenceTable(con->relationId, con->cascadeToColocated))
+	if (WillRecreateFKeyToReferenceTable(con->relationId, con->cascadeToColocated))
 	{
 		ereport(DEBUG1, (errmsg("setting multi shard modify mode to sequential")));
 		SetLocalMultiShardModifyModeToSequential();
@ -1927,14 +1928,10 @@ GetNonGeneratedStoredColumnNameList(Oid relationId)
 	for (int columnIndex = 0; columnIndex < tupleDescriptor->natts; columnIndex++)
 	{
 		Form_pg_attribute currentColumn = TupleDescAttr(tupleDescriptor, columnIndex);
-		if (currentColumn->attisdropped)
-		{
-			/* skip dropped columns */
-			continue;
-		}

-		if (currentColumn->attgenerated == ATTRIBUTE_GENERATED_STORED)
+		if (IsDroppedOrGenerated(currentColumn))
 		{
+			/* skip dropped or generated columns */
 			continue;
 		}

@ -2197,13 +2194,13 @@ GetAccessMethodForMatViewIfExists(Oid viewOid)


 /*
- * WillRecreateForeignKeyToReferenceTable checks if the table of relationId has any foreign
+ * WillRecreateFKeyToReferenceTable checks if the table of relationId has any foreign
 * key to a reference table, if conversion will be cascaded to colocated table this function
 * also checks if any of the colocated tables have a foreign key to a reference table too
 */
 bool
-WillRecreateForeignKeyToReferenceTable(Oid relationId,
-									   CascadeToColocatedOption cascadeOption)
+WillRecreateFKeyToReferenceTable(Oid relationId,
+								 CascadeToColocatedOption cascadeOption)
 {
 	if (cascadeOption == CASCADE_TO_COLOCATED_NO ||
 		cascadeOption == CASCADE_TO_COLOCATED_UNSPECIFIED)
--- a/src/backend/distributed/commands/cascade_table_operation_for_connected_relations.c
+++ b/src/backend/distributed/commands/cascade_table_operation_for_connected_relations.c
@ -522,7 +522,7 @@ ExecuteCascadeOperationForRelationIdList(List *relationIdList,
 * with the flag InTableTypeConversionFunctionCall set to true.
 */
 void
-ExecuteAndLogUtilityCommandListInTableTypeConversionViaSPI(List *utilityCommandList)
+ExecuteAndLogUtilityCommandListInTableTypeConversionViaSPI(List *utilityCmdList)
 {
 	bool oldValue = InTableTypeConversionFunctionCall;
 	InTableTypeConversionFunctionCall = true;
@ -531,7 +531,7 @@ ExecuteAndLogUtilityCommandListInTableTypeConversionViaSPI(List *utilityCommandL
 	PG_TRY();
 	{
 		char *utilityCommand = NULL;
-		foreach_declared_ptr(utilityCommand, utilityCommandList)
+		foreach_declared_ptr(utilityCommand, utilityCmdList)
 		{
 			/*
 			 * CREATE MATERIALIZED VIEW commands need to be parsed/transformed,
@ -566,10 +566,10 @@ ExecuteAndLogUtilityCommandListInTableTypeConversionViaSPI(List *utilityCommandL
 * ExecuteAndLogUtilityCommand function for each of them.
 */
 void
-ExecuteAndLogUtilityCommandList(List *utilityCommandList)
+ExecuteAndLogUtilityCommandList(List *utilityCmdList)
 {
 	char *utilityCommand = NULL;
-	foreach_declared_ptr(utilityCommand, utilityCommandList)
+	foreach_declared_ptr(utilityCommand, utilityCmdList)
 	{
 		ExecuteAndLogUtilityCommand(utilityCommand);
 	}
--- a/src/backend/distributed/commands/common.c
+++ b/src/backend/distributed/commands/common.c
@ -19,6 +19,7 @@
 #include "nodes/parsenodes.h"
 #include "tcop/utility.h"

+#include "distributed/citus_depended_object.h"
 #include "distributed/commands.h"
 #include "distributed/commands/utility_hook.h"
 #include "distributed/deparser.h"
@ -63,6 +64,13 @@ PostprocessCreateDistributedObjectFromCatalogStmt(Node *stmt, const char *queryS
 		return NIL;
 	}

+	if (ops->qualify &&
+		DistOpsValidityState(stmt, ops) == ShouldQualifyAfterLocalCreation)
+	{
+		/* qualify the statement after local creation */
+		ops->qualify(stmt);
+	}
+
 	List *addresses = GetObjectAddressListFromParseTree(stmt, false, true);

 	/*  the code-path only supports a single object */
--- a/src/backend/distributed/commands/create_distributed_table.c
+++ b/src/backend/distributed/commands/create_distributed_table.c
@ -175,8 +175,9 @@ static bool DistributionColumnUsesNumericColumnNegativeScale(TupleDesc relationD
 static int numeric_typmod_scale(int32 typmod);
 static bool is_valid_numeric_typmod(int32 typmod);

-static bool DistributionColumnUsesGeneratedStoredColumn(TupleDesc relationDesc,
-														Var *distributionColumn);
+static void DistributionColumnIsGeneratedCheck(TupleDesc relationDesc,
+											   Var *distributionColumn,
+											   const char *relationName);
 static bool CanUseExclusiveConnections(Oid relationId, bool localTableEmpty);
 static uint64 DoCopyFromLocalTableIntoShards(Relation distributedRelation,
 											 DestReceiver *copyDest,
@ -701,8 +702,9 @@ EnsureColocateWithTableIsValid(Oid relationId, char distributionMethod,
 	 * given table. We should make those checks after local table conversion by acquiring locks to
 	 * the relation because the distribution column can be modified in that period.
 	 */
-	Oid distributionColumnType = ColumnTypeIdForRelationColumnName(relationId,
-																   distributionColumnName);
+	Oid distributionColumnType = ColumnTypeIdForRelationColumnName(
+		relationId,
+		distributionColumnName);

 	text *colocateWithTableNameText = cstring_to_text(colocateWithTableName);
 	Oid colocateWithTableId = ResolveRelationId(colocateWithTableNameText, false);
@ -1107,8 +1109,8 @@ CreateCitusTable(Oid relationId, CitusTableType tableType,
 				 DistributedTableParams *distributedTableParams)
 {
 	if ((tableType == HASH_DISTRIBUTED || tableType == APPEND_DISTRIBUTED ||
-		 tableType == RANGE_DISTRIBUTED || tableType == SINGLE_SHARD_DISTRIBUTED) !=
-		(distributedTableParams != NULL))
+		 tableType == SINGLE_SHARD_DISTRIBUTED ||
+		 tableType == RANGE_DISTRIBUTED) != (distributedTableParams != NULL))
 	{
 		ereport(ERROR, (errmsg("distributed table params must be provided "
 							   "when creating a distributed table and must "
@ -2103,13 +2105,10 @@ EnsureRelationCanBeDistributed(Oid relationId, Var *distributionColumn,

 	/* verify target relation is not distributed by a generated stored column
 	 */
-	if (distributionMethod != DISTRIBUTE_BY_NONE &&
-		DistributionColumnUsesGeneratedStoredColumn(relationDesc, distributionColumn))
+	if (distributionMethod != DISTRIBUTE_BY_NONE)
 	{
-		ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						errmsg("cannot distribute relation: %s", relationName),
-						errdetail("Distribution column must not use GENERATED ALWAYS "
-								  "AS (...) STORED.")));
+		DistributionColumnIsGeneratedCheck(relationDesc, distributionColumn,
+										   relationName);
 	}

 	/* verify target relation is not distributed by a column of type numeric with negative scale */
@ -2829,9 +2828,7 @@ TupleDescColumnNameList(TupleDesc tupleDescriptor)
 		Form_pg_attribute currentColumn = TupleDescAttr(tupleDescriptor, columnIndex);
 		char *columnName = NameStr(currentColumn->attname);

-		if (currentColumn->attisdropped ||
-			currentColumn->attgenerated == ATTRIBUTE_GENERATED_STORED
-			)
+		if (IsDroppedOrGenerated(currentColumn))
 		{
 			continue;
 		}
@ -2893,22 +2890,43 @@ DistributionColumnUsesNumericColumnNegativeScale(TupleDesc relationDesc,


 /*
- * DistributionColumnUsesGeneratedStoredColumn returns whether a given relation uses
- * GENERATED ALWAYS AS (...) STORED on distribution column
+ * DistributionColumnIsGeneratedCheck throws an error if a given relation uses
+ * GENERATED ALWAYS AS (...) STORED | VIRTUAL on distribution column
 */
-static bool
-DistributionColumnUsesGeneratedStoredColumn(TupleDesc relationDesc,
-											Var *distributionColumn)
+static void
+DistributionColumnIsGeneratedCheck(TupleDesc relationDesc,
+								   Var *distributionColumn,
+								   const char *relationName)
 {
 	Form_pg_attribute attributeForm = TupleDescAttr(relationDesc,
 													distributionColumn->varattno - 1);
-
-	if (attributeForm->attgenerated == ATTRIBUTE_GENERATED_STORED)
+	switch (attributeForm->attgenerated)
 	{
-		return true;
-	}
+		case ATTRIBUTE_GENERATED_STORED:
+		{
+			ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							errmsg("cannot distribute relation: %s", relationName),
+							errdetail("Distribution column must not use GENERATED ALWAYS "
+									  "AS (...) STORED.")));
+			break;
+		}

-	return false;
+#if PG_VERSION_NUM >= PG_VERSION_18
+		case ATTRIBUTE_GENERATED_VIRTUAL:
+		{
+			ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							errmsg("cannot distribute relation: %s", relationName),
+							errdetail("Distribution column must not use GENERATED ALWAYS "
+									  "AS (...) VIRTUAL.")));
+			break;
+		}
+
+#endif
+		default:
+		{
+			break;
+		}
+	}
 }


--- a/src/backend/distributed/commands/distribute_object_ops.c
+++ b/src/backend/distributed/commands/distribute_object_ops.c
@ -2165,7 +2165,9 @@ GetDistributeObjectOps(Node *node)
 				}

 				default:
+				{
 					return &Any_SecLabel;
+				}
 			}
 		}

--- a/src/backend/distributed/commands/extension.c
+++ b/src/backend/distributed/commands/extension.c
@ -25,6 +25,12 @@
 #include "utils/lsyscache.h"
 #include "utils/syscache.h"

+#include "pg_version_constants.h"
+
+#if PG_VERSION_NUM < PG_VERSION_17
+#include "catalog/pg_am_d.h"
+#endif
+
 #include "citus_version.h"

 #include "columnar/columnar.h"
@ -52,6 +58,10 @@ static void MarkExistingObjectDependenciesDistributedIfSupported(void);
 static List * GetAllViews(void);
 static bool ShouldPropagateExtensionCommand(Node *parseTree);
 static bool IsAlterExtensionSetSchemaCitus(Node *parseTree);
+static bool HasAnyRelationsUsingOldColumnar(void);
+static Oid GetOldColumnarAMIdIfExists(void);
+static bool AccessMethodDependsOnAnyExtensions(Oid accessMethodId);
+static bool HasAnyRelationsUsingAccessMethod(Oid accessMethodId);
 static Node * RecreateExtensionStmt(Oid extensionOid);
 static List * GenerateGrantCommandsOnExtensionDependentFDWs(Oid extensionId);

@ -783,7 +793,8 @@ PreprocessCreateExtensionStmtForCitusColumnar(Node *parsetree)
 		/*citus version >= 11.1 requires install citus_columnar first*/
 		if (versionNumber >= 1110 && !CitusHasBeenLoaded())
 		{
-			if (get_extension_oid("citus_columnar", true) == InvalidOid)
+			if (get_extension_oid("citus_columnar", true) == InvalidOid &&
+				(versionNumber < 1320 || HasAnyRelationsUsingOldColumnar()))
 			{
 				CreateExtensionWithVersion("citus_columnar", NULL);
 			}
@ -894,9 +905,10 @@ PreprocessAlterExtensionCitusStmtForCitusColumnar(Node *parseTree)
 		double newVersionNumber = GetExtensionVersionNumber(pstrdup(newVersion));

 		/*alter extension citus update to version >= 11.1-1, and no citus_columnar installed */
-		if (newVersionNumber >= 1110 && citusColumnarOid == InvalidOid)
+		if (newVersionNumber >= 1110 && citusColumnarOid == InvalidOid &&
+			(newVersionNumber < 1320 || HasAnyRelationsUsingOldColumnar()))
 		{
-			/*it's upgrade citus to 11.1-1 or further version */
+			/*it's upgrade citus to 11.1-1 or further version and there are relations using old columnar */
 			CreateExtensionWithVersion("citus_columnar", CITUS_COLUMNAR_INTERNAL_VERSION);
 		}
 		else if (newVersionNumber < 1110 && citusColumnarOid != InvalidOid)
@ -911,7 +923,8 @@ PreprocessAlterExtensionCitusStmtForCitusColumnar(Node *parseTree)
 		int versionNumber = (int) (100 * strtod(CITUS_MAJORVERSION, NULL));
 		if (versionNumber >= 1110)
 		{
-			if (citusColumnarOid == InvalidOid)
+			if (citusColumnarOid == InvalidOid &&
+				(versionNumber < 1320 || HasAnyRelationsUsingOldColumnar()))
 			{
 				CreateExtensionWithVersion("citus_columnar",
 										   CITUS_COLUMNAR_INTERNAL_VERSION);
@ -921,6 +934,117 @@ PreprocessAlterExtensionCitusStmtForCitusColumnar(Node *parseTree)
 }


+/*
+ * HasAnyRelationsUsingOldColumnar returns true if there are any relations
+ * using the old columnar access method.
+ */
+static bool
+HasAnyRelationsUsingOldColumnar(void)
+{
+	Oid oldColumnarAMId = GetOldColumnarAMIdIfExists();
+	return OidIsValid(oldColumnarAMId) &&
+		   HasAnyRelationsUsingAccessMethod(oldColumnarAMId);
+}
+
+
+/*
+ * GetOldColumnarAMIdIfExists returns the oid of the old columnar access
+ * method, i.e., the columnar access method that we had as part of "citus"
+ * extension before we split it into "citus_columnar" at version 11.1, if
+ * it exists. Otherwise, it returns InvalidOid.
+ *
+ * We know that it's "old columnar" only if the access method doesn't depend
+ * on any extensions. This is because, in citus--11.0-4--11.1-1.sql, we
+ * detach the columnar objects (including the access method) from citus
+ * in preparation for splitting of the columnar into a separate extension.
+ */
+static Oid
+GetOldColumnarAMIdIfExists(void)
+{
+	Oid columnarAMId = get_am_oid("columnar", true);
+	if (OidIsValid(columnarAMId) && !AccessMethodDependsOnAnyExtensions(columnarAMId))
+	{
+		return columnarAMId;
+	}
+
+	return InvalidOid;
+}
+
+
+/*
+ * AccessMethodDependsOnAnyExtensions returns true if the access method
+ * with the given accessMethodId depends on any extensions.
+ */
+static bool
+AccessMethodDependsOnAnyExtensions(Oid accessMethodId)
+{
+	ScanKeyData key[3];
+
+	Relation pgDepend = table_open(DependRelationId, AccessShareLock);
+
+	ScanKeyInit(&key[0],
+				Anum_pg_depend_classid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(AccessMethodRelationId));
+	ScanKeyInit(&key[1],
+				Anum_pg_depend_objid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(accessMethodId));
+
+	ScanKeyInit(&key[2],
+				Anum_pg_depend_objsubid,
+				BTEqualStrategyNumber, F_INT4EQ,
+				Int32GetDatum(0));
+
+	SysScanDesc scan = systable_beginscan(pgDepend, DependDependerIndexId, true,
+										  NULL, 3, key);
+
+	bool result = false;
+
+	HeapTuple heapTuple = NULL;
+	while (HeapTupleIsValid(heapTuple = systable_getnext(scan)))
+	{
+		Form_pg_depend dependForm = (Form_pg_depend) GETSTRUCT(heapTuple);
+
+		if (dependForm->refclassid == ExtensionRelationId)
+		{
+			result = true;
+			break;
+		}
+	}
+
+	systable_endscan(scan);
+	table_close(pgDepend, AccessShareLock);
+
+	return result;
+}
+
+
+/*
+ * HasAnyRelationsUsingAccessMethod returns true if there are any relations
+ * using the access method with the given accessMethodId.
+ */
+static bool
+HasAnyRelationsUsingAccessMethod(Oid accessMethodId)
+{
+	ScanKeyData key[1];
+	Relation pgClass = table_open(RelationRelationId, AccessShareLock);
+	ScanKeyInit(&key[0],
+				Anum_pg_class_relam,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(accessMethodId));
+
+	SysScanDesc scan = systable_beginscan(pgClass, InvalidOid, false, NULL, 1, key);
+
+	bool result = HeapTupleIsValid(systable_getnext(scan));
+
+	systable_endscan(scan);
+	table_close(pgClass, AccessShareLock);
+
+	return result;
+}
+
+
 /*
 * PostprocessAlterExtensionCitusStmtForCitusColumnar process the case when upgrade citus
 * to version that support citus_columnar, or downgrade citus to lower version that
@ -959,7 +1083,7 @@ PostprocessAlterExtensionCitusStmtForCitusColumnar(Node *parseTree)
 	{
 		/*alter extension citus update, need upgrade citus_columnar from Y to Z*/
 		int versionNumber = (int) (100 * strtod(CITUS_MAJORVERSION, NULL));
-		if (versionNumber >= 1110)
+		if (versionNumber >= 1110 && citusColumnarOid != InvalidOid)
 		{
 			char *curColumnarVersion = get_extension_version(citusColumnarOid);
 			if (strcmp(curColumnarVersion, CITUS_COLUMNAR_INTERNAL_VERSION) == 0)
--- a/src/backend/distributed/commands/foreign_constraint.c
+++ b/src/backend/distributed/commands/foreign_constraint.c
@ -47,13 +47,13 @@


 #define BehaviorIsRestrictOrNoAction(x) \
-	((x) == FKCONSTR_ACTION_NOACTION || (x) == FKCONSTR_ACTION_RESTRICT)
+		((x) == FKCONSTR_ACTION_NOACTION || (x) == FKCONSTR_ACTION_RESTRICT)


 #define USE_CREATE_REFERENCE_TABLE_HINT \
-	"You could use SELECT create_reference_table('%s') " \
-	"to replicate the referenced table to all nodes or " \
-	"consider dropping the foreign key"
+		"You could use SELECT create_reference_table('%s') " \
+		"to replicate the referenced table to all nodes or " \
+		"consider dropping the foreign key"


 typedef bool (*CheckRelationFunc)(Oid);
--- a/src/backend/distributed/commands/function.c
+++ b/src/backend/distributed/commands/function.c
@ -71,7 +71,7 @@
 #define DISABLE_LOCAL_CHECK_FUNCTION_BODIES "SET LOCAL check_function_bodies TO off;"
 #define RESET_CHECK_FUNCTION_BODIES "RESET check_function_bodies;"
 #define argumentStartsWith(arg, prefix) \
-	(strncmp(arg, prefix, strlen(prefix)) == 0)
+		(strncmp(arg, prefix, strlen(prefix)) == 0)

 /* forward declaration for helper functions*/
 static bool RecreateSameNonColocatedFunction(ObjectAddress functionAddress,
@ -107,9 +107,9 @@ static void DistributeFunctionColocatedWithDistributedTable(RegProcedure funcOid
 															char *colocateWithTableName,
 															const ObjectAddress *
 															functionAddress);
-static void DistributeFunctionColocatedWithSingleShardTable(const
-															ObjectAddress *functionAddress,
-															text *colocateWithText);
+static void DistributeFunctionColocatedWithSingleShardTable(const ObjectAddress *
+															functionAddress, text *
+															colocateWithText);
 static void DistributeFunctionColocatedWithReferenceTable(const
 														  ObjectAddress *functionAddress);
 static List * FilterDistributedFunctions(GrantStmt *grantStmt);
@ -769,13 +769,16 @@ UpdateFunctionDistributionInfo(const ObjectAddress *distAddress,
 	const bool indexOK = true;

 	ScanKeyData scanKey[3];
-	Datum values[Natts_pg_dist_object];
-	bool isnull[Natts_pg_dist_object];
-	bool replace[Natts_pg_dist_object];

 	Relation pgDistObjectRel = table_open(DistObjectRelationId(), RowExclusiveLock);
 	TupleDesc tupleDescriptor = RelationGetDescr(pgDistObjectRel);

+	Datum *values = palloc0(tupleDescriptor->natts * sizeof(Datum));
+	bool *isnull = palloc0(tupleDescriptor->natts * sizeof(bool));
+	bool *replace = palloc0(tupleDescriptor->natts * sizeof(bool));
+
+	int forseDelegationIndex = GetForceDelegationAttrIndexInPgDistObject(tupleDescriptor);
+
 	/* scan pg_dist_object for classid = $1 AND objid = $2 AND objsubid = $3 via index */
 	ScanKeyInit(&scanKey[0], Anum_pg_dist_object_classid, BTEqualStrategyNumber, F_OIDEQ,
 				ObjectIdGetDatum(distAddress->classId));
@ -797,12 +800,7 @@ UpdateFunctionDistributionInfo(const ObjectAddress *distAddress,
 							   distAddress->objectId, distAddress->objectSubId)));
 	}

-	memset(values, 0, sizeof(values));
-	memset(isnull, 0, sizeof(isnull));
-	memset(replace, 0, sizeof(replace));
-
 	replace[Anum_pg_dist_object_distribution_argument_index - 1] = true;
-
 	if (distribution_argument_index != NULL)
 	{
 		values[Anum_pg_dist_object_distribution_argument_index - 1] = Int32GetDatum(
@ -825,16 +823,15 @@ UpdateFunctionDistributionInfo(const ObjectAddress *distAddress,
 		isnull[Anum_pg_dist_object_colocationid - 1] = true;
 	}

-	replace[Anum_pg_dist_object_force_delegation - 1] = true;
+	replace[forseDelegationIndex] = true;
 	if (forceDelegation != NULL)
 	{
-		values[Anum_pg_dist_object_force_delegation - 1] = BoolGetDatum(
-			*forceDelegation);
-		isnull[Anum_pg_dist_object_force_delegation - 1] = false;
+		values[forseDelegationIndex] = BoolGetDatum(*forceDelegation);
+		isnull[forseDelegationIndex] = false;
 	}
 	else
 	{
-		isnull[Anum_pg_dist_object_force_delegation - 1] = true;
+		isnull[forseDelegationIndex] = true;
 	}

 	heapTuple = heap_modify_tuple(heapTuple, tupleDescriptor, values, isnull, replace);
@ -849,6 +846,10 @@ UpdateFunctionDistributionInfo(const ObjectAddress *distAddress,

 	table_close(pgDistObjectRel, NoLock);

+	pfree(values);
+	pfree(isnull);
+	pfree(replace);
+
 	if (EnableMetadataSync)
 	{
 		List *objectAddressList = list_make1((ObjectAddress *) distAddress);
@ -1895,7 +1896,9 @@ ShouldAddFunctionSignature(FunctionParameterMode mode)
 		}

 		default:
+		{
 			return true;
+		}
 	}
 }

--- a/src/backend/distributed/commands/grant.c
+++ b/src/backend/distributed/commands/grant.c
@ -96,6 +96,7 @@ PreprocessGrantStmt(Node *node, const char *queryString,
 			{
 				appendStringInfo(&privsString, "%s", priv->priv_name);
 			}
+
 			/*
 			 * ALL can only be set alone.
 			 * And ALL is not added as a keyword in priv_name by parser, but
@ -108,6 +109,7 @@ PreprocessGrantStmt(Node *node, const char *queryString,
 				/* this is used for column level only */
 				appendStringInfo(&privsString, "ALL");
 			}
+
 			/*
 			 * Instead of relying only on the syntax check done by Postgres and
 			 * adding an assert here, add a default ERROR if ALL is not first
@ -227,8 +229,8 @@ CollectGrantTableIdList(GrantStmt *grantStmt)

 	bool grantOnTableCommand = (grantStmt->targtype == ACL_TARGET_OBJECT &&
 								grantStmt->objtype == OBJECT_TABLE);
-	bool grantAllTablesOnSchemaCommand = (grantStmt->targtype ==
-										  ACL_TARGET_ALL_IN_SCHEMA &&
+	bool grantAllTablesOnSchemaCommand = (grantStmt->targtype == ACL_TARGET_ALL_IN_SCHEMA
+																   &&
 										  grantStmt->objtype == OBJECT_TABLE);

 	/* we are only interested in table level grants */
--- a/src/backend/distributed/commands/index.c
+++ b/src/backend/distributed/commands/index.c
@ -64,8 +64,8 @@ static int GetNumberOfIndexParameters(IndexStmt *createIndexStatement);
 static bool IndexAlreadyExists(IndexStmt *createIndexStatement);
 static Oid CreateIndexStmtGetIndexId(IndexStmt *createIndexStatement);
 static Oid CreateIndexStmtGetSchemaId(IndexStmt *createIndexStatement);
-static void SwitchToSequentialAndLocalExecutionIfIndexNameTooLong(
-	IndexStmt *createIndexStatement);
+static void SwitchToSequentialAndLocalExecutionIfIndexNameTooLong(IndexStmt *
+																  createIndexStatement);
 static char * GenerateLongestShardPartitionIndexName(IndexStmt *createIndexStatement);
 static char * GenerateDefaultIndexName(IndexStmt *createIndexStatement);
 static List * GenerateIndexParameters(IndexStmt *createIndexStatement);
@ -854,8 +854,11 @@ PostprocessIndexStmt(Node *node, const char *queryString)
 	table_close(relation, NoLock);
 	index_close(indexRelation, NoLock);

+	PushActiveSnapshot(GetTransactionSnapshot());
+
 	/* mark index as invalid, in-place (cannot be rolled back) */
 	index_set_state_flags(indexRelationId, INDEX_DROP_CLEAR_VALID);
+	PopActiveSnapshot();

 	/* re-open a transaction command from here on out */
 	CommitTransactionCommand();
@ -1370,8 +1373,11 @@ MarkIndexValid(IndexStmt *indexStmt)
 											schemaId);
 	Relation indexRelation = index_open(indexRelationId, RowExclusiveLock);

+	PushActiveSnapshot(GetTransactionSnapshot());
+
 	/* mark index as valid, in-place (cannot be rolled back) */
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
+	PopActiveSnapshot();

 	table_close(relation, NoLock);
 	index_close(indexRelation, NoLock);
--- a/src/backend/distributed/commands/multi_copy.c
+++ b/src/backend/distributed/commands/multi_copy.c
@ -350,7 +350,6 @@ static void LogLocalCopyToRelationExecution(uint64 shardId);
 static void LogLocalCopyToFileExecution(uint64 shardId);
 static void ErrorIfMergeInCopy(CopyStmt *copyStatement);

-
 /* exports for SQL callable functions */
 PG_FUNCTION_INFO_V1(citus_text_send_as_jsonb);

@ -484,9 +483,7 @@ CopyToExistingShards(CopyStmt *copyStatement, QueryCompletion *completionTag)
 		Form_pg_attribute currentColumn = TupleDescAttr(tupleDescriptor, columnIndex);
 		char *columnName = NameStr(currentColumn->attname);

-		if (currentColumn->attisdropped ||
-			currentColumn->attgenerated == ATTRIBUTE_GENERATED_STORED
-			)
+		if (IsDroppedOrGenerated(currentColumn))
 		{
 			continue;
 		}
@ -804,9 +801,7 @@ CanUseBinaryCopyFormat(TupleDesc tupleDescription)
 	{
 		Form_pg_attribute currentColumn = TupleDescAttr(tupleDescription, columnIndex);

-		if (currentColumn->attisdropped ||
-			currentColumn->attgenerated == ATTRIBUTE_GENERATED_STORED
-			)
+		if (IsDroppedOrGenerated(currentColumn))
 		{
 			continue;
 		}
@ -1277,7 +1272,9 @@ ConversionPathForTypes(Oid inputType, Oid destType, CopyCoercionData *result)
 		}

 		default:
+		{
 			Assert(false); /* there are no other options for this enum */
+		}
 	}
 }

@ -1316,9 +1313,7 @@ TypeArrayFromTupleDescriptor(TupleDesc tupleDescriptor)
 	for (int columnIndex = 0; columnIndex < columnCount; columnIndex++)
 	{
 		Form_pg_attribute attr = TupleDescAttr(tupleDescriptor, columnIndex);
-		if (attr->attisdropped ||
-			attr->attgenerated == ATTRIBUTE_GENERATED_STORED
-			)
+		if (IsDroppedOrGenerated(attr))
 		{
 			typeArray[columnIndex] = InvalidOid;
 		}
@ -1486,9 +1481,7 @@ AppendCopyRowData(Datum *valueArray, bool *isNullArray, TupleDesc rowDescriptor,
 			value = CoerceColumnValue(value, &columnCoercionPaths[columnIndex]);
 		}

-		if (currentColumn->attisdropped ||
-			currentColumn->attgenerated == ATTRIBUTE_GENERATED_STORED
-			)
+		if (IsDroppedOrGenerated(currentColumn))
 		{
 			continue;
 		}
@ -1607,9 +1600,7 @@ AvailableColumnCount(TupleDesc tupleDescriptor)
 	{
 		Form_pg_attribute currentColumn = TupleDescAttr(tupleDescriptor, columnIndex);

-		if (!currentColumn->attisdropped &&
-			currentColumn->attgenerated != ATTRIBUTE_GENERATED_STORED
-			)
+		if (!IsDroppedOrGenerated(currentColumn))
 		{
 			columnCount++;
 		}
@ -2479,7 +2470,7 @@ ProcessAppendToShardOption(Oid relationId, CopyStmt *copyStatement)
 		if (!IsCitusTableType(relationId, APPEND_DISTRIBUTED))
 		{
 			ereport(ERROR, (errmsg(APPEND_TO_SHARD_OPTION " is only valid for "
-														  "append-distributed tables")));
+								   "append-distributed tables")));
 		}

 		/* throws an error if shard does not exist */
@ -2869,8 +2860,8 @@ ErrorIfCopyHasOnErrorLogVerbosity(CopyStmt *copyStatement)
 	{
 		if (strcmp(option->defname, "on_error") == 0)
 		{
-			ereport(ERROR, (errmsg(
-								"Citus does not support COPY FROM with ON_ERROR option.")));
+			ereport(ERROR, (errmsg("Citus does not support "
+								   "COPY FROM with ON_ERROR option.")));
 		}
 		else if (strcmp(option->defname, "log_verbosity") == 0)
 		{
@ -2887,8 +2878,8 @@ ErrorIfCopyHasOnErrorLogVerbosity(CopyStmt *copyStatement)
 	 */
 	if (log_verbosity)
 	{
-		ereport(ERROR, (errmsg(
-							"Citus does not support COPY FROM with LOG_VERBOSITY option.")));
+		ereport(ERROR, (errmsg("Citus does not support "
+							   "COPY FROM with LOG_VERBOSITY option.")));
 	}
 #endif
 }
@ -3049,7 +3040,7 @@ CitusCopySelect(CopyStmt *copyStatement)

 	for (int i = 0; i < tupleDescriptor->natts; i++)
 	{
-		Form_pg_attribute attr = &tupleDescriptor->attrs[i];
+		Form_pg_attribute attr = TupleDescAttr(tupleDescriptor, i);

 		if (attr->attisdropped ||
 			attr->attgenerated
@ -3999,3 +3990,20 @@ UnclaimCopyConnections(List *connectionStateList)
 		UnclaimConnection(connectionState->connection);
 	}
 }
+
+
+/*
+ * IsDroppedOrGenerated - helper function for determining if an attribute is
+ * dropped or generated. Used by COPY and Citus DDL to skip such columns.
+ */
+inline bool
+IsDroppedOrGenerated(Form_pg_attribute attr)
+{
+	/*
+	 * If the "is dropped" flag is true or the generated column flag
+	 * is not the default nul character (in which case its value is 's'
+	 * for ATTRIBUTE_GENERATED_STORED or possibly 'v' with PG18+ for
+	 * ATTRIBUTE_GENERATED_VIRTUAL) then return true.
+	 */
+	return attr->attisdropped || (attr->attgenerated != '\0');
+}
--- a/src/backend/distributed/commands/non_main_db_distribute_object_ops.c
+++ b/src/backend/distributed/commands/non_main_db_distribute_object_ops.c
@ -35,13 +35,13 @@


 #define EXECUTE_COMMAND_ON_REMOTE_NODES_AS_USER \
-	"SELECT citus_internal.execute_command_on_remote_nodes_as_user(%s, %s)"
+		"SELECT citus_internal.execute_command_on_remote_nodes_as_user(%s, %s)"
 #define START_MANAGEMENT_TRANSACTION \
-	"SELECT citus_internal.start_management_transaction('%lu')"
+		"SELECT citus_internal.start_management_transaction('%lu')"
 #define MARK_OBJECT_DISTRIBUTED \
-	"SELECT citus_internal.mark_object_distributed(%d, %s, %d, %s)"
+		"SELECT citus_internal.mark_object_distributed(%d, %s, %d, %s)"
 #define UNMARK_OBJECT_DISTRIBUTED \
-	"SELECT pg_catalog.citus_unmark_object_distributed(%d, %d, %d, %s)"
+		"SELECT pg_catalog.citus_unmark_object_distributed(%d, %d, %d, %s)"


 /*
--- a/src/backend/distributed/commands/publication.c
+++ b/src/backend/distributed/commands/publication.c
@ -196,6 +196,27 @@ BuildCreatePublicationStmt(Oid publicationId)
 											-1);
 	createPubStmt->options = lappend(createPubStmt->options, pubViaRootOption);

+/* WITH (publish_generated_columns = ...) option (PG18+) */
+#if PG_VERSION_NUM >= PG_VERSION_18
+	if (publicationForm->pubgencols == 's')    /* stored */
+	{
+		DefElem *pubGenColsOption =
+			makeDefElem("publish_generated_columns",
+						(Node *) makeString("stored"),
+						-1);
+
+		createPubStmt->options =
+			lappend(createPubStmt->options, pubGenColsOption);
+	}
+	else if (publicationForm->pubgencols != 'n') /* 'n' = none (default) */
+	{
+		ereport(ERROR,
+				(errmsg("unexpected pubgencols value '%c' for publication %u",
+						publicationForm->pubgencols, publicationId)));
+	}
+#endif
+
+
 	/* WITH (publish = 'insert, update, delete, truncate') option */
 	List *publishList = NIL;

--- a/src/backend/distributed/commands/rename.c
+++ b/src/backend/distributed/commands/rename.c
@ -149,13 +149,14 @@ PreprocessRenameStmt(Node *node, const char *renameCommand,
 		}

 		default:
-
+		{
 			/*
 			 * Nodes that are not supported by Citus: we pass-through to the
 			 * main PostgreSQL executor. Any Citus-supported RenameStmt
 			 * renameType must appear above in the switch, explicitly.
 			 */
 			return NIL;
+		}
 	}

 	bool isCitusRelation = IsCitusTable(tableRelationId);
--- a/src/backend/distributed/commands/sequence.c
+++ b/src/backend/distributed/commands/sequence.c
@ -177,8 +177,7 @@ ExtractDefaultColumnsAndOwnedSequences(Oid relationId, List **columnNameList,
 	{
 		Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, attributeIndex);

-		if (attributeForm->attisdropped ||
-			attributeForm->attgenerated == ATTRIBUTE_GENERATED_STORED)
+		if (IsDroppedOrGenerated(attributeForm))
 		{
 			/* skip dropped columns and columns with GENERATED AS ALWAYS expressions */
 			continue;
@ -463,8 +462,8 @@ PreprocessAlterSequenceStmt(Node *node, const char *queryString,
 	if (IsAnyObjectDistributed(addresses) || SequenceUsedInDistributedTable(address,
 																			DEPENDENCY_INTERNAL))
 	{
-		ereport(ERROR, (errmsg(
-							"Altering a distributed sequence is currently not supported.")));
+		ereport(ERROR, (errmsg("Altering a distributed sequence "
+							   "is currently not supported.")));
 	}

 	/*
@ -992,8 +991,8 @@ FilterDistributedSequences(GrantStmt *stmt)
 {
 	bool grantOnSequenceCommand = (stmt->targtype == ACL_TARGET_OBJECT &&
 								   stmt->objtype == OBJECT_SEQUENCE);
-	bool grantOnAllSequencesInSchemaCommand = (stmt->targtype ==
-											   ACL_TARGET_ALL_IN_SCHEMA &&
+	bool grantOnAllSequencesInSchemaCommand = (stmt->targtype == ACL_TARGET_ALL_IN_SCHEMA
+																		&&
 											   stmt->objtype == OBJECT_SEQUENCE);

 	/* we are only interested in sequence level grants */
@ -1034,11 +1033,10 @@ FilterDistributedSequences(GrantStmt *stmt)
 			 */
 			if (list_member_oid(namespaceOidList, namespaceOid))
 			{
-				RangeVar *distributedSequence = makeRangeVar(get_namespace_name(
-																 namespaceOid),
-															 get_rel_name(
-																 sequenceAddress->objectId),
-															 -1);
+				RangeVar *distributedSequence = makeRangeVar(
+					get_namespace_name(namespaceOid),
+					get_rel_name(sequenceAddress->objectId),
+					-1);
 				grantSequenceList = lappend(grantSequenceList, distributedSequence);
 			}
 		}
--- a/src/backend/distributed/commands/serialize_distributed_ddls.c
+++ b/src/backend/distributed/commands/serialize_distributed_ddls.c
@ -237,7 +237,9 @@ AcquireCitusAdvisoryObjectClassLockGetOid(ObjectClass objectClass,
 		}

 		default:
+		{
 			elog(ERROR, "unsupported object class: %d", objectClass);
+		}
 	}
 }

@ -270,6 +272,8 @@ AcquireCitusAdvisoryObjectClassLockCheckPrivileges(ObjectClass objectClass, Oid
 		}

 		default:
+		{
 			elog(ERROR, "unsupported object class: %d", objectClass);
+		}
 	}
 }
--- a/src/backend/distributed/commands/statistics.c
+++ b/src/backend/distributed/commands/statistics.c
@ -50,7 +50,7 @@

 #define DEFAULT_STATISTICS_TARGET -1
 #define ALTER_INDEX_COLUMN_SET_STATS_COMMAND \
-	"ALTER INDEX %s ALTER COLUMN %d SET STATISTICS %d"
+		"ALTER INDEX %s ALTER COLUMN %d SET STATISTICS %d"

 static char * GenerateAlterIndexColumnSetStatsCommand(char *indexNameWithSchema,
 													  int16 attnum,
@ -69,7 +69,15 @@ PreprocessCreateStatisticsStmt(Node *node, const char *queryString,
 {
 	CreateStatsStmt *stmt = castNode(CreateStatsStmt, node);

-	RangeVar *relation = (RangeVar *) linitial(stmt->relations);
+	Node *relationNode = (Node *) linitial(stmt->relations);
+
+	if (!IsA(relationNode, RangeVar))
+	{
+		return NIL;
+	}
+
+	RangeVar *relation = (RangeVar *) relationNode;
+
 	Oid relationId = RangeVarGetRelid(relation, ShareUpdateExclusiveLock, false);

 	if (!IsCitusTable(relationId) || !ShouldPropagate())
--- a/src/backend/distributed/commands/table.c
+++ b/src/backend/distributed/commands/table.c
@ -81,23 +81,23 @@ static void ErrorIfAttachCitusTableToPgLocalTable(Oid parentRelationId,
 												  Oid partitionRelationId);
 static bool DeparserSupportsAlterTableAddColumn(AlterTableStmt *alterTableStatement,
 												AlterTableCmd *addColumnSubCommand);
-static bool ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef(
-	AlterTableStmt *alterTableStatement);
+static bool ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef(AlterTableStmt *
+														   alterTableStatement);
 static bool ShouldMarkConnectedRelationsNotAutoConverted(Oid leftRelationId,
 														 Oid rightRelationId);
 static bool RelationIdListContainsCitusTableType(List *relationIdList,
 												 CitusTableType citusTableType);
 static bool RelationIdListContainsPostgresTable(List *relationIdList);
-static void ConvertPostgresLocalTablesToCitusLocalTables(
-	AlterTableStmt *alterTableStatement);
+static void ConvertPostgresLocalTablesToCitusLocalTables(AlterTableStmt *
+														 alterTableStatement);
 static bool RangeVarListHasLocalRelationConvertedByUser(List *relationRangeVarList,
 														AlterTableStmt *
 														alterTableStatement);
 static int CompareRangeVarsByOid(const void *leftElement, const void *rightElement);
-static List * GetAlterTableAddFKeyRightRelationIdList(
-	AlterTableStmt *alterTableStatement);
-static List * GetAlterTableAddFKeyRightRelationRangeVarList(
-	AlterTableStmt *alterTableStatement);
+static List * GetAlterTableAddFKeyRightRelationIdList(AlterTableStmt *
+													  alterTableStatement);
+static List * GetAlterTableAddFKeyRightRelationRangeVarList(AlterTableStmt *
+															alterTableStatement);
 static List * GetAlterTableAddFKeyConstraintList(AlterTableStmt *alterTableStatement);
 static List * GetAlterTableCommandFKeyConstraintList(AlterTableCmd *command);
 static List * GetRangeVarListFromFKeyConstraintList(List *fKeyConstraintList);
@ -1352,6 +1352,7 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand,
 															 constraint);
 				}
 			}
+
 			/*
 			 * When constraint->indexname is not NULL we are handling an
 			 * ADD {PRIMARY KEY, UNIQUE} USING INDEX command. In this case
@ -1532,6 +1533,7 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand,
 				}
 			}
 		}
+
 		/*
 		 * We check for ALTER COLUMN .. SET/DROP DEFAULT
 		 * we should not propagate anything to shards
@ -2181,7 +2183,9 @@ AlterTableCommandTypeIsTrigger(AlterTableType alterTableType)
 		}

 		default:
+		{
 			return false;
+		}
 	}
 }

@ -2719,6 +2723,7 @@ PostprocessAlterTableStmt(AlterTableStmt *alterTableStatement)
 				}
 			}
 		}
+
 		/*
 		 * We check for ALTER COLUMN .. SET DEFAULT nextval('user_defined_seq')
 		 * we should make sure that the type of the column that uses
@ -2815,6 +2820,7 @@ FixAlterTableStmtIndexNames(AlterTableStmt *alterTableStatement)

 			FixPartitionShardIndexNames(relationId, parentIndexOid);
 		}
+
 		/*
 		 * If this is an ALTER TABLE .. ATTACH PARTITION command
 		 * we have wrong index names generated on indexes of shards of
@ -3425,13 +3431,13 @@ ErrorIfUnsupportedAlterTableStmt(AlterTableStmt *alterTableStatement)
 									if (commandList->length > 1 ||
 										columnConstraints->length > 1)
 									{
-										ereport(ERROR, (errcode(
-															ERRCODE_FEATURE_NOT_SUPPORTED),
-														errmsg(
-															"cannot execute ADD COLUMN .. DEFAULT nextval('..')"
-															" command with other subcommands/constraints"),
-														errhint(
-															"You can issue each subcommand separately")));
+										ereport(ERROR,
+												(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+												 errmsg(
+													 "cannot execute ADD COLUMN .. DEFAULT nextval('..')"
+													 " command with other subcommands/constraints"),
+												 errhint(
+													 "You can issue each subcommand separately")));
 									}

 									/*
@ -3440,14 +3446,14 @@ ErrorIfUnsupportedAlterTableStmt(AlterTableStmt *alterTableStatement)
 									 */
 									if (!TableEmpty(relationId))
 									{
-										ereport(ERROR, (errcode(
-															ERRCODE_FEATURE_NOT_SUPPORTED),
-														errmsg(
-															"cannot add a column involving DEFAULT nextval('..') "
-															"because the table is not empty"),
-														errhint(
-															"You can first call ALTER TABLE .. ADD COLUMN .. smallint/int/bigint\n"
-															"Then set the default by ALTER TABLE .. ALTER COLUMN .. SET DEFAULT nextval('..')")));
+										ereport(ERROR,
+												(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+												 errmsg(
+													 "cannot add a column involving DEFAULT nextval('..') "
+													 "because the table is not empty"),
+												 errhint(
+													 "You can first call ALTER TABLE .. ADD COLUMN .. smallint/int/bigint\n"
+													 "Then set the default by ALTER TABLE .. ALTER COLUMN .. SET DEFAULT nextval('..')")));
 									}
 								}
 							}
--- a/src/backend/distributed/commands/utility_hook.c
+++ b/src/backend/distributed/commands/utility_hook.c
@ -1297,7 +1297,8 @@ ExecuteDistributedDDLJob(DDLJob *ddlJob)
 							 "partial failure, potentially leading to an inconsistent "
 							 "state.\nIf the problematic command is a CREATE operation, "
 							 "consider using the 'IF EXISTS' syntax to drop the object,"
-							 "\nif applicable, and then re-attempt the original command.")));
+							 "\nif applicable, and then re-attempt "
+							 "the original command.")));
 			}

 			PG_RE_THROW();
--- a/src/backend/distributed/commands/vacuum.c
+++ b/src/backend/distributed/commands/vacuum.c
@ -48,21 +48,27 @@ typedef struct CitusVacuumParams
 #endif
 } CitusVacuumParams;

+/*
+ * Information we track per VACUUM/ANALYZE target relation.
+ */
+typedef struct CitusVacuumRelation
+{
+	VacuumRelation *vacuumRelation;
+	Oid relationId;
+} CitusVacuumRelation;
+
 /* Local functions forward declarations for processing distributed table commands */
-static bool IsDistributedVacuumStmt(List *vacuumRelationIdList);
+static bool IsDistributedVacuumStmt(List *vacuumRelationList);
 static List * VacuumTaskList(Oid relationId, CitusVacuumParams vacuumParams,
 							 List *vacuumColumnList);
 static char * DeparseVacuumStmtPrefix(CitusVacuumParams vacuumParams);
 static char * DeparseVacuumColumnNames(List *columnNameList);
-static List * VacuumColumnList(VacuumStmt *vacuumStmt, int relationIndex);
-static List * ExtractVacuumTargetRels(VacuumStmt *vacuumStmt);
-static void ExecuteVacuumOnDistributedTables(VacuumStmt *vacuumStmt, List *relationIdList,
+static void ExecuteVacuumOnDistributedTables(VacuumStmt *vacuumStmt, List *relationList,
 											 CitusVacuumParams vacuumParams);
 static void ExecuteUnqualifiedVacuumTasks(VacuumStmt *vacuumStmt,
 										  CitusVacuumParams vacuumParams);
 static CitusVacuumParams VacuumStmtParams(VacuumStmt *vacstmt);
-static List * VacuumRelationIdList(VacuumStmt *vacuumStmt, CitusVacuumParams
-								   vacuumParams);
+static List * VacuumRelationList(VacuumStmt *vacuumStmt, CitusVacuumParams vacuumParams);

 /*
 * PostprocessVacuumStmt processes vacuum statements that may need propagation to
@ -97,7 +103,7 @@ PostprocessVacuumStmt(Node *node, const char *vacuumCommand)
 	 * when no table is specified propagate the command as it is;
 	 * otherwise, only propagate when there is at least 1 citus table
 	 */
-	List *relationIdList = VacuumRelationIdList(vacuumStmt, vacuumParams);
+	List *vacuumRelationList = VacuumRelationList(vacuumStmt, vacuumParams);

 	if (list_length(vacuumStmt->rels) == 0)
 	{
@ -105,11 +111,11 @@ PostprocessVacuumStmt(Node *node, const char *vacuumCommand)

 		ExecuteUnqualifiedVacuumTasks(vacuumStmt, vacuumParams);
 	}
-	else if (IsDistributedVacuumStmt(relationIdList))
+	else if (IsDistributedVacuumStmt(vacuumRelationList))
 	{
 		/* there is at least 1 citus table specified */

-		ExecuteVacuumOnDistributedTables(vacuumStmt, relationIdList,
+		ExecuteVacuumOnDistributedTables(vacuumStmt, vacuumRelationList,
 										 vacuumParams);
 	}

@ -120,39 +126,58 @@ PostprocessVacuumStmt(Node *node, const char *vacuumCommand)


 /*
- * VacuumRelationIdList returns the oid of the relations in the given vacuum statement.
+ * VacuumRelationList returns the list of relations in the given vacuum statement,
+ * along with their resolved Oids (if they can be locked).
 */
 static List *
-VacuumRelationIdList(VacuumStmt *vacuumStmt, CitusVacuumParams vacuumParams)
+VacuumRelationList(VacuumStmt *vacuumStmt, CitusVacuumParams vacuumParams)
 {
 	LOCKMODE lockMode = (vacuumParams.options & VACOPT_FULL) ? AccessExclusiveLock :
 						ShareUpdateExclusiveLock;

 	bool skipLocked = (vacuumParams.options & VACOPT_SKIP_LOCKED);

-	List *vacuumRelationList = ExtractVacuumTargetRels(vacuumStmt);
+	List *relationList = NIL;

-	List *relationIdList = NIL;
-
-	RangeVar *vacuumRelation = NULL;
-	foreach_declared_ptr(vacuumRelation, vacuumRelationList)
+	VacuumRelation *vacuumRelation = NULL;
+	foreach_declared_ptr(vacuumRelation, vacuumStmt->rels)
 	{
+		Oid relationId = InvalidOid;
+
 		/*
 		 * If skip_locked option is enabled, we are skipping that relation
-		 * if the lock for it is currently not available; else, we get the lock.
+		 * if the lock for it is currently not available; otherwise, we get the lock.
 		 */
-		Oid relationId = RangeVarGetRelidExtended(vacuumRelation,
+		if (vacuumRelation->relation)
+		{
+			relationId = RangeVarGetRelidExtended(vacuumRelation->relation,
 												  lockMode,
 												  skipLocked ? RVR_SKIP_LOCKED : 0, NULL,
 												  NULL);
+		}
+		else if (OidIsValid(vacuumRelation->oid))
+		{
+			/* fall back to the Oid directly when provided */
+			if (!skipLocked || ConditionalLockRelationOid(vacuumRelation->oid, lockMode))
+			{
+				if (!skipLocked)
+				{
+					LockRelationOid(vacuumRelation->oid, lockMode);
+				}
+				relationId = vacuumRelation->oid;
+			}
+		}

 		if (OidIsValid(relationId))
 		{
-			relationIdList = lappend_oid(relationIdList, relationId);
+			CitusVacuumRelation *relation = palloc(sizeof(CitusVacuumRelation));
+			relation->vacuumRelation = vacuumRelation;
+			relation->relationId = relationId;
+			relationList = lappend(relationList, relation);
 		}
 	}

-	return relationIdList;
+	return relationList;
 }


@ -161,12 +186,13 @@ VacuumRelationIdList(VacuumStmt *vacuumStmt, CitusVacuumParams vacuumParams)
 * otherwise, it returns false.
 */
 static bool
-IsDistributedVacuumStmt(List *vacuumRelationIdList)
+IsDistributedVacuumStmt(List *vacuumRelationList)
 {
-	Oid relationId = InvalidOid;
-	foreach_declared_oid(relationId, vacuumRelationIdList)
+	CitusVacuumRelation *vacuumRelation = NULL;
+	foreach_declared_ptr(vacuumRelation, vacuumRelationList)
 	{
-		if (OidIsValid(relationId) && IsCitusTable(relationId))
+		if (OidIsValid(vacuumRelation->relationId) &&
+			IsCitusTable(vacuumRelation->relationId))
 		{
 			return true;
 		}
@ -181,24 +207,31 @@ IsDistributedVacuumStmt(List *vacuumRelationIdList)
 * if they are citus tables.
 */
 static void
-ExecuteVacuumOnDistributedTables(VacuumStmt *vacuumStmt, List *relationIdList,
+ExecuteVacuumOnDistributedTables(VacuumStmt *vacuumStmt, List *relationList,
 								 CitusVacuumParams vacuumParams)
 {
-	int relationIndex = 0;
-
-	Oid relationId = InvalidOid;
-	foreach_declared_oid(relationId, relationIdList)
+	CitusVacuumRelation *vacuumRelationEntry = NULL;
+	foreach_declared_ptr(vacuumRelationEntry, relationList)
 	{
+		Oid relationId = vacuumRelationEntry->relationId;
+		VacuumRelation *vacuumRelation = vacuumRelationEntry->vacuumRelation;
+
+		RangeVar *relation = vacuumRelation->relation;
+		if (relation != NULL && !relation->inh)
+		{
+			/* ONLY specified, so don't recurse to shard placements */
+			continue;
+		}
+
 		if (IsCitusTable(relationId))
 		{
-			List *vacuumColumnList = VacuumColumnList(vacuumStmt, relationIndex);
+			List *vacuumColumnList = vacuumRelation->va_cols;
 			List *taskList = VacuumTaskList(relationId, vacuumParams, vacuumColumnList);

 			/* local execution is not implemented for VACUUM commands */
 			bool localExecutionSupported = false;
 			ExecuteUtilityTaskList(taskList, localExecutionSupported);
 		}
-		relationIndex++;
 	}
 }

@ -484,39 +517,6 @@ DeparseVacuumColumnNames(List *columnNameList)
 }


-/*
- * VacuumColumnList returns list of columns from relation
- * in the vacuum statement at specified relationIndex.
- */
-static List *
-VacuumColumnList(VacuumStmt *vacuumStmt, int relationIndex)
-{
-	VacuumRelation *vacuumRelation = (VacuumRelation *) list_nth(vacuumStmt->rels,
-																 relationIndex);
-
-	return vacuumRelation->va_cols;
-}
-
-
-/*
- * ExtractVacuumTargetRels returns list of target
- * relations from vacuum statement.
- */
-static List *
-ExtractVacuumTargetRels(VacuumStmt *vacuumStmt)
-{
-	List *vacuumList = NIL;
-
-	VacuumRelation *vacuumRelation = NULL;
-	foreach_declared_ptr(vacuumRelation, vacuumStmt->rels)
-	{
-		vacuumList = lappend(vacuumList, vacuumRelation->relation);
-	}
-
-	return vacuumList;
-}
-
-
 /*
 * VacuumStmtParams returns a CitusVacuumParams based on the supplied VacuumStmt.
 */
--- a/src/backend/distributed/connection/connection_management.c
+++ b/src/backend/distributed/connection/connection_management.c
@ -475,8 +475,8 @@ FindAvailableConnection(dlist_head *connections, uint32 flags)
 		if (flags & OUTSIDE_TRANSACTION)
 		{
 			/* don't return connections that are used in transactions */
-			if (connection->remoteTransaction.transactionState !=
-				REMOTE_TRANS_NOT_STARTED)
+			if (connection->
+				remoteTransaction.transactionState != REMOTE_TRANS_NOT_STARTED)
 			{
 				continue;
 			}
--- a/src/backend/distributed/connection/placement_connection.c
+++ b/src/backend/distributed/connection/placement_connection.c
@ -191,8 +191,8 @@ static HTAB *ConnectionShardHash;

 static MultiConnection * FindPlacementListConnection(int flags, List *placementAccessList,
 													 const char *userName);
-static ConnectionPlacementHashEntry * FindOrCreatePlacementEntry(
-	ShardPlacement *placement);
+static ConnectionPlacementHashEntry * FindOrCreatePlacementEntry(ShardPlacement *
+																 placement);
 static bool CanUseExistingConnection(uint32 flags, const char *userName,
 									 ConnectionReference *placementConnection);
 static bool ConnectionAccessedDifferentPlacement(MultiConnection *connection,
--- a/src/backend/distributed/connection/remote_commands.c
+++ b/src/backend/distributed/connection/remote_commands.c
@ -14,6 +14,7 @@
 #include "miscadmin.h"
 #include "pgstat.h"

+#include "catalog/pg_collation.h"
 #include "lib/stringinfo.h"
 #include "storage/latch.h"
 #include "utils/builtins.h"
@ -371,8 +372,9 @@ CommandMatchesLogGrepPattern(const char *command)
 	if (GrepRemoteCommands && strnlen(GrepRemoteCommands, NAMEDATALEN) > 0)
 	{
 		Datum boolDatum =
-			DirectFunctionCall2(textlike, CStringGetTextDatum(command),
-								CStringGetTextDatum(GrepRemoteCommands));
+			DirectFunctionCall2Coll(textlike, DEFAULT_COLLATION_OID,
+									CStringGetTextDatum(command),
+									CStringGetTextDatum(GrepRemoteCommands));

 		return DatumGetBool(boolDatum);
 	}
--- a/src/backend/distributed/connection/shared_connection_stats.c
+++ b/src/backend/distributed/connection/shared_connection_stats.c
@ -675,8 +675,9 @@ SharedConnectionStatsShmemInit(void)
 		ConnectionStatsSharedState->sharedConnectionHashTrancheId = LWLockNewTrancheId();
 		ConnectionStatsSharedState->sharedConnectionHashTrancheName =
 			"Shared Connection Tracking Hash Tranche";
-		LWLockRegisterTranche(ConnectionStatsSharedState->sharedConnectionHashTrancheId,
-							  ConnectionStatsSharedState->sharedConnectionHashTrancheName);
+		LWLockRegisterTranche(
+			ConnectionStatsSharedState->sharedConnectionHashTrancheId,
+			ConnectionStatsSharedState->sharedConnectionHashTrancheName);

 		LWLockInitialize(&ConnectionStatsSharedState->sharedConnectionHashLock,
 						 ConnectionStatsSharedState->sharedConnectionHashTrancheId);
--- a/src/backend/distributed/connection/worker_log_messages.c
+++ b/src/backend/distributed/connection/worker_log_messages.c
@ -11,6 +11,7 @@
 #include "postgres.h"

 #include "utils/elog.h"
+#include "utils/memutils.h"   /* for TopTransactionContext */

 #include "distributed/connection_management.h"
 #include "distributed/error_codes.h"
--- a/src/backend/distributed/deparser/citus_ruleutils.c
+++ b/src/backend/distributed/deparser/citus_ruleutils.c
@ -82,6 +82,7 @@ static void AppendStorageParametersToString(StringInfo stringBuffer,
 											List *optionList);
 static const char * convert_aclright_to_string(int aclright);
 static void simple_quote_literal(StringInfo buf, const char *val);
+static SubscriptingRef * TargetEntryExprFindSubsRef(Expr *expr);
 static void AddVacuumParams(ReindexStmt *reindexStmt, StringInfo buffer);
 static void process_acl_items(Acl *acl, const char *relationName,
 							  const char *attributeName, List **defs);
@ -470,6 +471,13 @@ pg_get_tableschemadef_string(Oid tableRelationId, IncludeSequenceDefaults
 						appendStringInfo(&buffer, " GENERATED ALWAYS AS (%s) STORED",
 										 defaultString);
 					}
+#if PG_VERSION_NUM >= PG_VERSION_18
+					else if (attributeForm->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
+					{
+						appendStringInfo(&buffer, " GENERATED ALWAYS AS (%s) VIRTUAL",
+										 defaultString);
+					}
+#endif
 					else
 					{
 						Oid seqOid = GetSequenceOid(tableRelationId, defaultValue->adnum);
@ -546,6 +554,13 @@ pg_get_tableschemadef_string(Oid tableRelationId, IncludeSequenceDefaults
 		appendStringInfoString(&buffer, "(");
 		appendStringInfoString(&buffer, checkString);
 		appendStringInfoString(&buffer, ")");
+
+#if PG_VERSION_NUM >= PG_VERSION_18
+		if (!checkConstraint->ccenforced)
+		{
+			appendStringInfoString(&buffer, " NOT ENFORCED");
+		}
+#endif
 	}

 	/* close create table's outer parentheses */
@ -1715,3 +1730,317 @@ RoleSpecString(RoleSpec *spec, bool withQuoteIdentifier)
 		}
 	}
 }
+
+
+/*
+ * Recursively search an expression for a Param and return its paramid
+ * Intended for indirection management: UPDATE SET () = (SELECT )
+ * Does not cover all options but those supported by Citus.
+ */
+static int
+GetParamId(Node *expr)
+{
+	int paramid = 0;
+
+	if (expr == NULL)
+	{
+		return paramid;
+	}
+
+	/* If it's a Param, return its attnum */
+	if (IsA(expr, Param))
+	{
+		Param *param = (Param *) expr;
+		paramid = param->paramid;
+	}
+	/* If it's a FuncExpr, search in arguments */
+	else if (IsA(expr, FuncExpr))
+	{
+		FuncExpr *func = (FuncExpr *) expr;
+		ListCell *lc;
+
+		foreach(lc, func->args)
+		{
+			paramid = GetParamId((Node *) lfirst(lc));
+			if (paramid != 0)
+			{
+				break; /* Stop at the first valid paramid */
+			}
+		}
+	}
+
+	return paramid;
+}
+
+
+/*
+ * list_sort comparator to sort target list by paramid (in MULTIEXPR)
+ * Intended for indirection management: UPDATE SET () = (SELECT )
+ */
+static int
+target_list_cmp(const ListCell *a, const ListCell *b)
+{
+	TargetEntry *tleA = lfirst(a);
+	TargetEntry *tleB = lfirst(b);
+
+	/*
+	 * Deal with resjunk entries; sublinks are marked resjunk and
+	 * are placed at the end of the target list so this logic
+	 * ensures they stay grouped at the end of the target list:
+	 */
+	if (tleA->resjunk || tleB->resjunk)
+	{
+		return tleA->resjunk - tleB->resjunk;
+	}
+
+	int la = GetParamId((Node *) tleA->expr);
+	int lb = GetParamId((Node *) tleB->expr);
+
+	/*
+	 * Should be looking at legitimate param ids
+	 */
+	Assert(la > 0);
+	Assert(lb > 0);
+
+	/*
+	 * Return -1, 0 or 1 depending on if la is less than,
+	 * equal to or greater than lb
+	 */
+	return (la > lb) - (la < lb);
+}
+
+
+/*
+ * Used by get_update_query_targetlist_def() (in ruleutils) to reorder the target
+ * list on the left side of the update:
+ * SET () = (SELECT )
+ * Reordering the SELECT side only does not work, consider a case like:
+ * SET (col_1, col3) = (SELECT 1, 3), (col_2) = (SELECT 2)
+ * Without ensure_update_targetlist_in_param_order(), this will lead to an incorrect
+ * deparsed query:
+ * SET (col_1, col2) = (SELECT 1, 3), (col_3) = (SELECT 2)
+ */
+void
+ensure_update_targetlist_in_param_order(List *targetList)
+{
+	bool need_to_sort_target_list = false;
+	int previous_paramid = 0;
+	ListCell *l;
+
+	foreach(l, targetList)
+	{
+		TargetEntry *tle = (TargetEntry *) lfirst(l);
+
+		if (!tle->resjunk)
+		{
+			int paramid = GetParamId((Node *) tle->expr);
+			if (paramid < previous_paramid)
+			{
+				need_to_sort_target_list = true;
+				break;
+			}
+
+			previous_paramid = paramid;
+		}
+	}
+
+	if (need_to_sort_target_list)
+	{
+		list_sort(targetList, target_list_cmp);
+	}
+}
+
+
+/*
+ * isSubsRef checks if a given node is a SubscriptingRef or can be
+ * reached through an implicit coercion.
+ */
+static
+bool
+isSubsRef(Node *node)
+{
+	if (node == NULL)
+	{
+		return false;
+	}
+
+	if (IsA(node, CoerceToDomain))
+	{
+		CoerceToDomain *coerceToDomain = (CoerceToDomain *) node;
+		if (coerceToDomain->coercionformat != COERCE_IMPLICIT_CAST)
+		{
+			/* not an implicit coercion, cannot reach to a SubscriptingRef */
+			return false;
+		}
+
+		node = (Node *) coerceToDomain->arg;
+	}
+
+	return (IsA(node, SubscriptingRef));
+}
+
+
+/*
+ * checkTlistForSubsRef - checks if any target entry in the list contains a
+ * SubscriptingRef or can be reached through an implicit coercion. Used by
+ * ExpandMergedSubscriptingRefEntries() to identify if any target entries
+ * need to be expanded - if not the original target list is preserved.
+ */
+static
+bool
+checkTlistForSubsRef(List *targetEntryList)
+{
+	ListCell *tgtCell = NULL;
+
+	foreach(tgtCell, targetEntryList)
+	{
+		TargetEntry *targetEntry = (TargetEntry *) lfirst(tgtCell);
+		Expr *expr = targetEntry->expr;
+
+		if (isSubsRef((Node *) expr))
+		{
+			return true;
+		}
+	}
+
+	return false;
+}
+
+
+/*
+ * ExpandMergedSubscriptingRefEntries takes a list of target entries and expands
+ * each one that references a SubscriptingRef node that indicates multiple (field)
+ * updates on the same attribute, which is applicable for array/json types atm.
+ */
+List *
+ExpandMergedSubscriptingRefEntries(List *targetEntryList)
+{
+	List *newTargetEntryList = NIL;
+	ListCell *tgtCell = NULL;
+
+	if (!checkTlistForSubsRef(targetEntryList))
+	{
+		/* No subscripting refs found, return original list */
+		return targetEntryList;
+	}
+
+	foreach(tgtCell, targetEntryList)
+	{
+		TargetEntry *targetEntry = (TargetEntry *) lfirst(tgtCell);
+		List *expandedTargetEntries = NIL;
+
+		Expr *expr = targetEntry->expr;
+		while (expr)
+		{
+			SubscriptingRef *subsRef = TargetEntryExprFindSubsRef(expr);
+			if (!subsRef)
+			{
+				break;
+			}
+
+			/*
+			 * Remove refexpr from the SubscriptingRef that we are about to
+			 * wrap in a new TargetEntry and save it for the next one.
+			 */
+			Expr *refexpr = subsRef->refexpr;
+			subsRef->refexpr = NULL;
+
+			/*
+			 * Wrap the Expr that holds SubscriptingRef (directly or indirectly)
+			 * in a new TargetEntry; note that it doesn't have a refexpr anymore.
+			 */
+			TargetEntry *newTargetEntry = copyObject(targetEntry);
+			newTargetEntry->expr = expr;
+			expandedTargetEntries = lappend(expandedTargetEntries, newTargetEntry);
+
+			/* now inspect the refexpr that SubscriptingRef at hand were holding */
+			expr = refexpr;
+		}
+
+		if (expandedTargetEntries == NIL)
+		{
+			/* return original entry since it doesn't hold a SubscriptingRef node */
+			newTargetEntryList = lappend(newTargetEntryList, targetEntry);
+		}
+		else
+		{
+			/*
+			 * Need to concat expanded target list entries in reverse order
+			 * to preserve ordering of the original target entry list.
+			 */
+			List *reversedTgtEntries = NIL;
+			ListCell *revCell = NULL;
+			foreach(revCell, expandedTargetEntries)
+			{
+				TargetEntry *tgtEntry = (TargetEntry *) lfirst(revCell);
+				reversedTgtEntries = lcons(tgtEntry, reversedTgtEntries);
+			}
+			newTargetEntryList = list_concat(newTargetEntryList, reversedTgtEntries);
+		}
+	}
+
+	return newTargetEntryList;
+}
+
+
+/*
+ * TargetEntryExprFindSubsRef searches given Expr --assuming that it is part
+ * of a target list entry-- to see if it directly (i.e.: itself) or indirectly
+ * (e.g.: behind some level of coercions) holds a SubscriptingRef node.
+ *
+ * Returns the original SubscriptingRef node on success or NULL otherwise.
+ *
+ * Note that it wouldn't add much value to use expression_tree_walker here
+ * since we are only interested in a subset of the fields of a few certain
+ * node types.
+ */
+static SubscriptingRef *
+TargetEntryExprFindSubsRef(Expr *expr)
+{
+	Node *node = (Node *) expr;
+	while (node)
+	{
+		if (IsA(node, FieldStore))
+		{
+			/*
+			 * ModifyPartialQuerySupported doesn't allow INSERT/UPDATE via
+			 * FieldStore. If we decide supporting such commands, then we
+			 * should take the first element of "newvals" list into account
+			 * here. This is because, to support such commands, we will need
+			 * to expand merged FieldStore into separate target entries too.
+			 *
+			 * For this reason, this block is not reachable atm and need to
+			 * uncomment the following if we decide supporting such commands.
+			 *
+			 * """
+			 *   FieldStore *fieldStore = (FieldStore *) node;
+			 *   node = (Node *) linitial(fieldStore->newvals);
+			 * """
+			 */
+			ereport(ERROR, (errmsg("unexpectedly got FieldStore object when "
+								   "generating shard query")));
+		}
+		else if (IsA(node, CoerceToDomain))
+		{
+			CoerceToDomain *coerceToDomain = (CoerceToDomain *) node;
+			if (coerceToDomain->coercionformat != COERCE_IMPLICIT_CAST)
+			{
+				/* not an implicit coercion, cannot reach to a SubscriptingRef */
+				break;
+			}
+
+			node = (Node *) coerceToDomain->arg;
+		}
+		else if (IsA(node, SubscriptingRef))
+		{
+			return (SubscriptingRef *) node;
+		}
+		else
+		{
+			/* got a node that we are not interested in */
+			break;
+		}
+	}
+
+	return NULL;
+}
--- a/src/backend/distributed/deparser/deparse_extension_stmts.c
+++ b/src/backend/distributed/deparser/deparse_extension_stmts.c
@ -28,7 +28,8 @@ static void AppendCreateExtensionStmtOptions(StringInfo buf, List *options);
 static void AppendDropExtensionStmt(StringInfo buf, DropStmt *stmt);
 static void AppendExtensionNameList(StringInfo buf, List *objects);
 static void AppendAlterExtensionSchemaStmt(StringInfo buf,
-										   AlterObjectSchemaStmt *alterExtensionSchemaStmt);
+										   AlterObjectSchemaStmt *
+										   alterExtensionSchemaStmt);
 static void AppendAlterExtensionStmt(StringInfo buf,
 									 AlterExtensionStmt *alterExtensionStmt);

--- a/src/backend/distributed/deparser/deparse_foreign_server_stmts.c
+++ b/src/backend/distributed/deparser/deparse_foreign_server_stmts.c
@ -290,7 +290,9 @@ GetDefElemActionString(DefElemAction action)
 		}

 		default:
+		{
 			return "";
+		}
 	}
 }

--- a/src/backend/distributed/deparser/deparse_function_stmts.c
+++ b/src/backend/distributed/deparser/deparse_function_stmts.c
@ -118,8 +118,10 @@ ObjectTypeToKeyword(ObjectType objtype)
 		}

 		default:
+		{
 			elog(ERROR, "Unknown object type: %d", objtype);
 			return NULL;
+		}
 	}
 }

--- a/src/backend/distributed/deparser/deparse_statistics_stmts.c
+++ b/src/backend/distributed/deparser/deparse_statistics_stmts.c
@ -242,8 +242,8 @@ AppendColumnNames(StringInfo buf, CreateStatsStmt *stmt)
 		{
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg(
-						 "only simple column references are allowed in CREATE STATISTICS")));
+					 errmsg("only simple column references are allowed "
+							"in CREATE STATISTICS")));
 		}

 		const char *columnName = quote_identifier(column->name);
--- a/src/backend/distributed/deparser/deparse_table_stmts.c
+++ b/src/backend/distributed/deparser/deparse_table_stmts.c
@ -536,8 +536,10 @@ GeneratedWhenStr(char generatedWhen)
 		}

 		default:
+		{
 			ereport(ERROR, (errmsg("unrecognized generated_when: %d",
 								   generatedWhen)));
+		}
 	}
 }

@ -649,13 +651,18 @@ AppendAlterTableCmdAddColumn(StringInfo buf, AlterTableCmd *alterTableCmd,
 		}
 		else if (constraint->contype == CONSTR_GENERATED)
 		{
-			char attgenerated = 's';
-			appendStringInfo(buf, " GENERATED %s AS (%s) STORED",
+			char attgenerated = ATTRIBUTE_GENERATED_STORED;
+#if PG_VERSION_NUM >= PG_VERSION_18
+			attgenerated = constraint->generated_kind;
+#endif
+			appendStringInfo(buf, " GENERATED %s AS (%s) %s",
 							 GeneratedWhenStr(constraint->generated_when),
 							 DeparseRawExprForColumnDefault(relationId, typeOid, typmod,
 															columnDefinition->colname,
 															attgenerated,
-															constraint->raw_expr));
+															constraint->raw_expr),
+							 (attgenerated == ATTRIBUTE_GENERATED_STORED ? "STORED" :
+							  "VIRTUAL"));
 		}
 		else if (constraint->contype == CONSTR_CHECK ||
 				 constraint->contype == CONSTR_PRIMARY ||
--- a/src/backend/distributed/deparser/qualify_statistics_stmt.c
+++ b/src/backend/distributed/deparser/qualify_statistics_stmt.c
@ -34,7 +34,14 @@ QualifyCreateStatisticsStmt(Node *node)
 {
 	CreateStatsStmt *stmt = castNode(CreateStatsStmt, node);

-	RangeVar *relation = (RangeVar *) linitial(stmt->relations);
+	Node *relationNode = (Node *) linitial(stmt->relations);
+
+	if (!IsA(relationNode, RangeVar))
+	{
+		return;
+	}
+
+	RangeVar *relation = (RangeVar *) relationNode;

 	if (relation->schemaname == NULL)
 	{
--- a/src/backend/distributed/deparser/ruleutils_15.c
+++ b/src/backend/distributed/deparser/ruleutils_15.c
@ -1568,7 +1568,6 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
 		if (colinfo->is_new_col[col_index])
 			i++;
 	}
-	Assert(i == colinfo->num_cols);
 	Assert(j == nnewcolumns);
 #endif

@ -3509,6 +3508,8 @@ get_update_query_targetlist_def(Query *query, List *targetList,
 	SubLink    *cur_ma_sublink;
 	List	   *ma_sublinks;

+	targetList = ExpandMergedSubscriptingRefEntries(targetList);
+
 	/*
 	 * Prepare to deal with MULTIEXPR assignments: collect the source SubLinks
 	 * into a list.  We expect them to appear, in ID order, in resjunk tlist
@ -3532,6 +3533,8 @@ get_update_query_targetlist_def(Query *query, List *targetList,
 				}
 			}
 		}
+
+		ensure_update_targetlist_in_param_order(targetList);
 	}
 	next_ma_cell = list_head(ma_sublinks);
 	cur_ma_sublink = NULL;
--- a/src/backend/distributed/deparser/ruleutils_16.c
+++ b/src/backend/distributed/deparser/ruleutils_16.c
@ -1585,7 +1585,6 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
 		if (colinfo->is_new_col[col_index])
 			i++;
 	}
-	Assert(i == colinfo->num_cols);
 	Assert(j == nnewcolumns);
 #endif

@ -3525,6 +3524,8 @@ get_update_query_targetlist_def(Query *query, List *targetList,
 	SubLink    *cur_ma_sublink;
 	List	   *ma_sublinks;

+	targetList = ExpandMergedSubscriptingRefEntries(targetList);
+
 	/*
 	 * Prepare to deal with MULTIEXPR assignments: collect the source SubLinks
 	 * into a list.  We expect them to appear, in ID order, in resjunk tlist
@ -3548,6 +3549,8 @@ get_update_query_targetlist_def(Query *query, List *targetList,
 				}
 			}
 		}
+
+	        ensure_update_targetlist_in_param_order(targetList);
 	}
 	next_ma_cell = list_head(ma_sublinks);
 	cur_ma_sublink = NULL;
--- a/src/backend/distributed/deparser/ruleutils_17.c
+++ b/src/backend/distributed/deparser/ruleutils_17.c
@ -1599,7 +1599,6 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
 		if (colinfo->is_new_col[col_index])
 			i++;
 	}
-	Assert(i == colinfo->num_cols);
 	Assert(j == nnewcolumns);
 #endif

@ -3542,6 +3541,8 @@ get_update_query_targetlist_def(Query *query, List *targetList,
 	SubLink    *cur_ma_sublink;
 	List	   *ma_sublinks;

+	targetList = ExpandMergedSubscriptingRefEntries(targetList);
+
 	/*
 	 * Prepare to deal with MULTIEXPR assignments: collect the source SubLinks
 	 * into a list.  We expect them to appear, in ID order, in resjunk tlist
@ -3565,6 +3566,8 @@ get_update_query_targetlist_def(Query *query, List *targetList,
 				}
 			}
 		}
+
+		ensure_update_targetlist_in_param_order(targetList);
 	}
 	next_ma_cell = list_head(ma_sublinks);
 	cur_ma_sublink = NULL;
--- a/src/backend/distributed/deparser/ruleutils_18.c
+++ b/src/backend/distributed/deparser/ruleutils_18.c
--- a/src/backend/distributed/executor/adaptive_executor.c
+++ b/src/backend/distributed/executor/adaptive_executor.c
@ -642,11 +642,11 @@ static DistributedExecution * CreateDistributedExecution(RowModifyLevel modLevel
 														 xactProperties,
 														 List *jobIdList,
 														 bool localExecutionSupported);
-static TransactionProperties DecideTransactionPropertiesForTaskList(RowModifyLevel
-																	modLevel,
-																	List *taskList,
-																	bool
-																	exludeFromTransaction);
+static TransactionProperties DecideTaskListTransactionProperties(RowModifyLevel
+																 modLevel,
+																 List *taskList,
+																 bool
+																 excludeFromTransaction);
 static void StartDistributedExecution(DistributedExecution *execution);
 static void RunLocalExecution(CitusScanState *scanState, DistributedExecution *execution);
 static void RunDistributedExecution(DistributedExecution *execution);
@ -711,8 +711,8 @@ static void PlacementExecutionReady(TaskPlacementExecution *placementExecution);
 static TaskExecutionState TaskExecutionStateMachine(ShardCommandExecution *
 													shardCommandExecution);
 static int GetEventSetSize(List *sessionList);
-static bool ProcessSessionsWithFailedWaitEventSetOperations(
-	DistributedExecution *execution);
+static bool ProcessSessionsWithFailedWaitEventSetOperations(DistributedExecution *
+															execution);
 static bool HasIncompleteConnectionEstablishment(DistributedExecution *execution);
 static void RebuildWaitEventSet(DistributedExecution *execution);
 static void RebuildWaitEventSetForSessions(DistributedExecution *execution);
@ -760,7 +760,7 @@ AdaptiveExecutorPreExecutorRun(CitusScanState *scanState)
 	 */
 	LockPartitionsForDistributedPlan(distributedPlan);

-	ExecuteSubPlans(distributedPlan);
+	ExecuteSubPlans(distributedPlan, RequestedForExplainAnalyze(scanState));

 	scanState->finishedPreScan = true;
 }
@ -842,7 +842,7 @@ AdaptiveExecutor(CitusScanState *scanState)

 	bool excludeFromXact = false;

-	TransactionProperties xactProperties = DecideTransactionPropertiesForTaskList(
+	TransactionProperties xactProperties = DecideTaskListTransactionProperties(
 		distributedPlan->modLevel, taskList, excludeFromXact);

 	/*
@ -941,7 +941,7 @@ ExecuteUtilityTaskList(List *utilityTaskList, bool localExecutionSupported)
 		modLevel, utilityTaskList, MaxAdaptiveExecutorPoolSize, localExecutionSupported
 		);
 	executionParams->xactProperties =
-		DecideTransactionPropertiesForTaskList(modLevel, utilityTaskList, false);
+		DecideTaskListTransactionProperties(modLevel, utilityTaskList, false);
 	executionParams->isUtilityCommand = true;

 	return ExecuteTaskListExtended(executionParams);
@ -963,8 +963,8 @@ ExecuteUtilityTaskListExtended(List *utilityTaskList, int poolSize,

 	bool excludeFromXact = false;
 	executionParams->xactProperties =
-		DecideTransactionPropertiesForTaskList(modLevel, utilityTaskList,
-											   excludeFromXact);
+		DecideTaskListTransactionProperties(modLevel, utilityTaskList,
+											excludeFromXact);
 	executionParams->isUtilityCommand = true;

 	return ExecuteTaskListExtended(executionParams);
@ -984,7 +984,7 @@ ExecuteTaskList(RowModifyLevel modLevel, List *taskList)
 		);

 	bool excludeFromXact = false;
-	executionParams->xactProperties = DecideTransactionPropertiesForTaskList(
+	executionParams->xactProperties = DecideTaskListTransactionProperties(
 		modLevel, taskList, excludeFromXact);

 	return ExecuteTaskListExtended(executionParams);
@ -1010,7 +1010,7 @@ ExecuteTaskListOutsideTransaction(RowModifyLevel modLevel, List *taskList,
 		modLevel, taskList, targetPoolSize, localExecutionSupported
 		);

-	executionParams->xactProperties = DecideTransactionPropertiesForTaskList(
+	executionParams->xactProperties = DecideTaskListTransactionProperties(
 		modLevel, taskList, true);
 	return ExecuteTaskListExtended(executionParams);
 }
@ -1032,7 +1032,7 @@ CreateDefaultExecutionParams(RowModifyLevel modLevel, List *taskList,
 		modLevel, taskList, targetPoolSize, localExecutionSupported
 		);

-	executionParams->xactProperties = DecideTransactionPropertiesForTaskList(
+	executionParams->xactProperties = DecideTaskListTransactionProperties(
 		modLevel, taskList, false);
 	executionParams->expectResults = expectResults;
 	executionParams->tupleDestination = tupleDest;
@ -1252,7 +1252,7 @@ CreateDistributedExecution(RowModifyLevel modLevel, List *taskList,


 /*
- * DecideTransactionPropertiesForTaskList decides whether to use remote transaction
+ * DecideTaskListTransactionProperties decides whether to use remote transaction
 * blocks, whether to use 2PC for the given task list, and whether to error on any
 * failure.
 *
@ -1260,8 +1260,8 @@ CreateDistributedExecution(RowModifyLevel modLevel, List *taskList,
 * errorOnAnyFailure, but not the other way around) we keep them in the same place.
 */
 static TransactionProperties
-DecideTransactionPropertiesForTaskList(RowModifyLevel modLevel, List *taskList, bool
-									   exludeFromTransaction)
+DecideTaskListTransactionProperties(RowModifyLevel modLevel, List *taskList, bool
+									excludeFromTransaction)
 {
 	TransactionProperties xactProperties;

@ -1277,7 +1277,7 @@ DecideTransactionPropertiesForTaskList(RowModifyLevel modLevel, List *taskList,
 		return xactProperties;
 	}

-	if (exludeFromTransaction)
+	if (excludeFromTransaction)
 	{
 		xactProperties.useRemoteTransactionBlocks = TRANSACTION_BLOCKS_DISALLOWED;
 		return xactProperties;
@ -2634,10 +2634,8 @@ OpenNewConnections(WorkerPool *workerPool, int newConnectionCount,
 		connectionFlags |= adaptiveConnectionManagementFlag;

 		/* open a new connection to the worker */
-		MultiConnection *connection = StartNodeUserDatabaseConnection(connectionFlags,
-																	  workerPool->nodeName,
-																	  workerPool->nodePort,
-																	  NULL, NULL);
+		MultiConnection *connection = StartNodeUserDatabaseConnection(
+			connectionFlags, workerPool->nodeName, workerPool->nodePort, NULL, NULL);
 		if (!connection)
 		{
 			/* connection can only be NULL for optional connections */
@ -3804,7 +3802,7 @@ PopAssignedPlacementExecution(WorkerSession *session)


 /*
- * PopAssignedPlacementExecution finds an executable task from the queue of assigned tasks.
+ * PopUnAssignedPlacementExecution finds an executable task from the queue of unassigned tasks.
 */
 static TaskPlacementExecution *
 PopUnassignedPlacementExecution(WorkerPool *workerPool)
--- a/src/backend/distributed/executor/citus_custom_scan.c
+++ b/src/backend/distributed/executor/citus_custom_scan.c
@ -67,8 +67,8 @@ static void CitusPreExecScan(CitusScanState *scanState);
 static bool ModifyJobNeedsEvaluation(Job *workerJob);
 static void RegenerateTaskForFasthPathQuery(Job *workerJob);
 static void RegenerateTaskListForInsert(Job *workerJob);
-static DistributedPlan * CopyDistributedPlanWithoutCache(
-	DistributedPlan *originalDistributedPlan);
+static DistributedPlan * CopyDistributedPlanWithoutCache(DistributedPlan *
+														 originalDistributedPlan);
 static void CitusEndScan(CustomScanState *node);
 static void CitusReScan(CustomScanState *node);
 static void EnsureForceDelegationDistributionKey(Job *job);
@ -682,11 +682,13 @@ RegenerateTaskForFasthPathQuery(Job *workerJob)
 	}

 	bool isLocalTableModification = false;
+	bool delayedFastPath = false;
 	GenerateSingleShardRouterTaskList(workerJob,
 									  relationShardList,
 									  placementList,
 									  shardId,
-									  isLocalTableModification);
+									  isLocalTableModification,
+									  delayedFastPath);
 }


--- a/src/backend/distributed/executor/distributed_intermediate_results.c
+++ b/src/backend/distributed/executor/distributed_intermediate_results.c
@ -69,8 +69,8 @@ static List * WrapTasksForPartitioning(const char *resultIdPrefix,
 									   bool binaryFormat);
 static List * ExecutePartitionTaskList(List *partitionTaskList,
 									   CitusTableCacheEntry *targetRelation);
-static PartitioningTupleDest * CreatePartitioningTupleDest(
-	CitusTableCacheEntry *targetRelation);
+static PartitioningTupleDest * CreatePartitioningTupleDest(CitusTableCacheEntry *
+														   targetRelation);
 static void PartitioningTupleDestPutTuple(TupleDestination *self, Task *task,
 										  int placementIndex, int queryNumber,
 										  HeapTuple heapTuple, uint64 tupleLibpqSize);
--- a/src/backend/distributed/executor/insert_select_executor.c
+++ b/src/backend/distributed/executor/insert_select_executor.c
@ -42,6 +42,7 @@
 #include "distributed/merge_planner.h"
 #include "distributed/metadata_cache.h"
 #include "distributed/multi_executor.h"
+#include "distributed/multi_explain.h"
 #include "distributed/multi_partitioning_utils.h"
 #include "distributed/multi_physical_planner.h"
 #include "distributed/multi_router_planner.h"
@ -65,7 +66,8 @@ static HTAB * ExecutePlanIntoColocatedIntermediateResults(Oid targetRelationId,
 														  List *insertTargetList,
 														  PlannedStmt *selectPlan,
 														  EState *executorState,
-														  char *intermediateResultIdPrefix);
+														  char *
+														  intermediateResultIdPrefix);
 static int PartitionColumnIndexFromColumnList(Oid relationId, List *columnNameList);
 static void WrapTaskListForProjection(List *taskList, List *projectedTargetEntries);

@ -121,7 +123,7 @@ NonPushableInsertSelectExecScan(CustomScanState *node)
 			bool binaryFormat =
 				CanUseBinaryCopyFormatForTargetList(selectQuery->targetList);

-			ExecuteSubPlans(distSelectPlan);
+			ExecuteSubPlans(distSelectPlan, RequestedForExplainAnalyze(scanState));

 			/*
 			 * We have a separate directory for each transaction, so choosing
--- a/src/backend/distributed/executor/local_executor.c
+++ b/src/backend/distributed/executor/local_executor.c
@ -313,6 +313,7 @@ ExecuteLocalTaskListExtended(List *taskList,
 		{
 			int taskNumParams = numParams;
 			Oid *taskParameterTypes = parameterTypes;
+			int taskType = GetTaskQueryType(task);

 			if (task->parametersInQueryStringResolved)
 			{
@ -330,7 +331,7 @@ ExecuteLocalTaskListExtended(List *taskList,
 			 * for concatenated strings, we set queryStringList so that we can access
 			 * each query string.
 			 */
-			if (GetTaskQueryType(task) == TASK_QUERY_TEXT_LIST)
+			if (taskType == TASK_QUERY_TEXT_LIST)
 			{
 				List *queryStringList = task->taskQuery.data.queryStringList;
 				totalRowsProcessed +=
@ -342,22 +343,31 @@ ExecuteLocalTaskListExtended(List *taskList,
 				continue;
 			}

-			Query *shardQuery = ParseQueryString(TaskQueryString(task),
-												 taskParameterTypes,
-												 taskNumParams);
+			if (taskType != TASK_QUERY_LOCAL_PLAN)
+			{
+				Query *shardQuery = ParseQueryString(TaskQueryString(task),
+													 taskParameterTypes,
+													 taskNumParams);

+				int cursorOptions = CURSOR_OPT_PARALLEL_OK;

-			int cursorOptions = CURSOR_OPT_PARALLEL_OK;
-
-			/*
-			 * Altough the shardQuery is local to this node, we prefer planner()
-			 * over standard_planner(). The primary reason for that is Citus itself
-			 * is not very tolarent standard_planner() calls that doesn't go through
-			 * distributed_planner() because of the way that restriction hooks are
-			 * implemented. So, let planner to call distributed_planner() which
-			 * eventually calls standard_planner().
-			 */
-			localPlan = planner(shardQuery, NULL, cursorOptions, paramListInfo);
+				/*
+				 * Altough the shardQuery is local to this node, we prefer planner()
+				 * over standard_planner(). The primary reason for that is Citus itself
+				 * is not very tolarent standard_planner() calls that doesn't go through
+				 * distributed_planner() because of the way that restriction hooks are
+				 * implemented. So, let planner to call distributed_planner() which
+				 * eventually calls standard_planner().
+				 */
+				localPlan = planner(shardQuery, NULL, cursorOptions, paramListInfo);
+			}
+			else
+			{
+				ereport(DEBUG2, (errmsg(
+									 "Local executor: Using task's cached local plan for task %u",
+									 task->taskId)));
+				localPlan = TaskQueryLocalPlan(task);
+			}
 		}

 		char *shardQueryString = NULL;
@ -754,14 +764,29 @@ ExecuteTaskPlan(PlannedStmt *taskPlan, char *queryString,
 															 localPlacementIndex) :
 								 CreateDestReceiver(DestNone);

-	/* Create a QueryDesc for the query */
-	QueryDesc *queryDesc = CreateQueryDesc(taskPlan, queryString,
-										   GetActiveSnapshot(), InvalidSnapshot,
-										   destReceiver, paramListInfo,
-										   queryEnv, 0);
+	QueryDesc *queryDesc = CreateQueryDesc(
+		taskPlan,          /* PlannedStmt *plannedstmt */
+		queryString,       /* const char *sourceText */
+		GetActiveSnapshot(),   /* Snapshot snapshot */
+		InvalidSnapshot,       /* Snapshot crosscheck_snapshot */
+		destReceiver,      /* DestReceiver *dest */
+		paramListInfo,     /* ParamListInfo params */
+		queryEnv,          /* QueryEnvironment *queryEnv */
+		0                  /* int instrument_options */
+		);

 	ExecutorStart(queryDesc, eflags);
+
+/* run the plan: count = 0 (all rows) */
+#if PG_VERSION_NUM >= PG_VERSION_18
+
+	/* PG 18+ dropped the “execute_once” boolean */
+	ExecutorRun(queryDesc, scanDirection, 0L);
+#else
+
+	/* PG 17 and prevs still expect the 4th ‘once’ argument */
 	ExecutorRun(queryDesc, scanDirection, 0L, true);
+#endif

 	/*
 	 * We'll set the executorState->es_processed later, for now only remember
@ -799,7 +824,7 @@ RecordNonDistTableAccessesForTask(Task *task)
 		 * if we're wrong.
 		 */
 		ereport(ERROR, (errmsg("shard " UINT64_FORMAT " does not have any shard "
-													  "placements",
+							   "placements",
 							   task->anchorShardId)));
 	}

--- a/src/backend/distributed/executor/merge_executor.c
+++ b/src/backend/distributed/executor/merge_executor.c
@ -23,6 +23,7 @@
 #include "distributed/merge_executor.h"
 #include "distributed/merge_planner.h"
 #include "distributed/multi_executor.h"
+#include "distributed/multi_explain.h"
 #include "distributed/multi_partitioning_utils.h"
 #include "distributed/multi_router_planner.h"
 #include "distributed/repartition_executor.h"
@ -37,11 +38,13 @@ static HTAB * ExecuteMergeSourcePlanIntoColocatedIntermediateResults(Oid targetR
 																	 sourceTargetList,
 																	 PlannedStmt *
 																	 sourcePlan,
-																	 EState *executorState,
+																	 EState *
+																	 executorState,
 																	 char *
 																	 intermediateResultIdPrefix,
 																	 int
-																	 partitionColumnIndex);
+																	 partitionColumnIndex)
+;


 /*
@ -132,7 +135,7 @@ ExecuteSourceAtWorkerAndRepartition(CitusScanState *scanState)
 	ereport(DEBUG1, (errmsg("Executing subplans of the source query and "
 							"storing the results at the respective node(s)")));

-	ExecuteSubPlans(distSourcePlan);
+	ExecuteSubPlans(distSourcePlan, RequestedForExplainAnalyze(scanState));

 	/*
 	 * We have a separate directory for each transaction, so choosing
--- a/src/backend/distributed/executor/multi_executor.c
+++ b/src/backend/distributed/executor/multi_executor.c
@ -235,7 +235,20 @@ CitusExecutorRun(QueryDesc *queryDesc,
 			/* postgres will switch here again and will restore back on its own */
 			MemoryContextSwitchTo(oldcontext);

-			standard_ExecutorRun(queryDesc, direction, count, execute_once);
+			#if PG_VERSION_NUM >= PG_VERSION_18
+
+			/* PG18+ drops the “execute_once” argument */
+			standard_ExecutorRun(queryDesc,
+								 direction,
+								 count);
+		#else
+
+			/* PG17-: original four-arg signature */
+			standard_ExecutorRun(queryDesc,
+								 direction,
+								 count,
+								 execute_once);
+		#endif
 		}

 		if (totalTime)
@ -675,7 +688,7 @@ ExecuteQueryIntoDestReceiver(Query *query, ParamListInfo params, DestReceiver *d
 * ExecutePlanIntoDestReceiver executes a query plan and sends results to the given
 * DestReceiver.
 */
-void
+uint64
 ExecutePlanIntoDestReceiver(PlannedStmt *queryPlan, ParamListInfo params,
 							DestReceiver *dest)
 {
@ -688,16 +701,44 @@ ExecutePlanIntoDestReceiver(PlannedStmt *queryPlan, ParamListInfo params,
 	/* don't display the portal in pg_cursors, it is for internal use only */
 	portal->visible = false;

-	PortalDefineQuery(portal,
-					  NULL,
-					  "",
-					  CMDTAG_SELECT,
-					  list_make1(queryPlan),
-					  NULL);
+	PortalDefineQuery(
+		portal,
+		NULL,                 /* no prepared statement name */
+		"",                   /* query text */
+		CMDTAG_SELECT,        /* command tag */
+		list_make1(queryPlan),/* list of PlannedStmt* */
+		NULL                  /* no CachedPlan */
+		);

 	PortalStart(portal, params, eflags, GetActiveSnapshot());
-	PortalRun(portal, count, false, true, dest, dest, NULL);
+
+
+	QueryCompletion qc = { 0 };
+
+#if PG_VERSION_NUM >= PG_VERSION_18
+
+/* PG 18+: six-arg signature (drop the run_once bool) */
+	PortalRun(portal,
+			  count,  /* how many rows to fetch */
+			  false,  /* isTopLevel */
+			  dest,   /* DestReceiver *dest */
+			  dest,   /* DestReceiver *altdest */
+			  &qc);  /* QueryCompletion *qc */
+#else
+
+/* PG 17-: original seven-arg signature */
+	PortalRun(portal,
+			  count,  /* how many rows to fetch */
+			  false,  /* isTopLevel */
+			  true,   /* run_once */
+			  dest,   /* DestReceiver *dest */
+			  dest,   /* DestReceiver *altdest */
+			  &qc);  /* QueryCompletion *qc */
+#endif
+
 	PortalDrop(portal, false);
+
+	return qc.nprocessed;
 }


--- a/src/backend/distributed/executor/partitioned_intermediate_results.c
+++ b/src/backend/distributed/executor/partitioned_intermediate_results.c
@ -242,7 +242,27 @@ worker_partition_query_result(PG_FUNCTION_ARGS)
 		allowNullPartitionColumnValues);

 	/* execute the query */
-	PortalRun(portal, FETCH_ALL, false, true, dest, dest, NULL);
+#if PG_VERSION_NUM >= PG_VERSION_18
+
+	/* PG18+: drop the “run_once” bool */
+	PortalRun(portal,
+			  FETCH_ALL,   /* count */
+			  false,       /* isTopLevel */
+			  dest,        /* dest receiver */
+			  dest,        /* alternative dest */
+			  NULL);       /* QueryCompletion *qc */
+#else
+
+	/* PG15–17: original seven‐arg signature */
+	PortalRun(portal,
+			  FETCH_ALL,   /* count */
+			  false,       /* isTopLevel */
+			  true,        /* run_once */
+			  dest,        /* dest receiver */
+			  dest,        /* alternative dest */
+			  NULL);       /* QueryCompletion *qc */
+#endif
+

 	/* construct the output result */
 	TupleDesc returnTupleDesc = NULL;
@ -295,8 +315,15 @@ StartPortalForQueryExecution(const char *queryString)
 	/* don't display the portal in pg_cursors, it is for internal use only */
 	portal->visible = false;

-	PortalDefineQuery(portal, NULL, queryString, CMDTAG_SELECT,
-					  list_make1(queryPlan), NULL);
+	PortalDefineQuery(
+		portal,
+		NULL,
+		queryString,
+		CMDTAG_SELECT,
+		list_make1(queryPlan),
+		NULL                   /* no CachedPlan */
+		);
+
 	int eflags = 0;
 	PortalStart(portal, NULL, eflags, GetActiveSnapshot());

--- a/src/backend/distributed/executor/subplan_execution.c
+++ b/src/backend/distributed/executor/subplan_execution.c
@ -30,13 +30,22 @@ int MaxIntermediateResult = 1048576; /* maximum size in KB the intermediate resu
 /* when this is true, we enforce intermediate result size limit in all executors */
 int SubPlanLevel = 0;

+/*
+ * SubPlanExplainAnalyzeContext is both a memory context for storing
+ * subplans’ EXPLAIN ANALYZE output and a flag indicating that execution
+ * is running under EXPLAIN ANALYZE for subplans.
+ */
+MemoryContext SubPlanExplainAnalyzeContext = NULL;
+SubPlanExplainOutputData *SubPlanExplainOutput;
+extern uint8 TotalExplainOutputCapacity;
+extern uint8 NumTasksOutput;

 /*
 * ExecuteSubPlans executes a list of subplans from a distributed plan
 * by sequentially executing each plan from the top.
 */
 void
-ExecuteSubPlans(DistributedPlan *distributedPlan)
+ExecuteSubPlans(DistributedPlan *distributedPlan, bool explainAnalyzeEnabled)
 {
 	uint64 planId = distributedPlan->planId;
 	List *subPlanList = distributedPlan->subPlanList;
@ -47,6 +56,19 @@ ExecuteSubPlans(DistributedPlan *distributedPlan)
 		return;
 	}

+	/*
+	 * If the root DistributedPlan has EXPLAIN ANALYZE enabled,
+	 * its subplans should also have EXPLAIN ANALYZE enabled.
+	 */
+	if (explainAnalyzeEnabled)
+	{
+		SubPlanExplainAnalyzeContext = GetMemoryChunkContext(distributedPlan);
+	}
+	else
+	{
+		SubPlanExplainAnalyzeContext = NULL;
+	}
+
 	HTAB *intermediateResultsHash = MakeIntermediateResultHTAB();
 	RecordSubplanExecutionsOnNodes(intermediateResultsHash, distributedPlan);

@ -79,7 +101,23 @@ ExecuteSubPlans(DistributedPlan *distributedPlan)

 		TimestampTz startTimestamp = GetCurrentTimestamp();

-		ExecutePlanIntoDestReceiver(plannedStmt, params, copyDest);
+		uint64 nprocessed;
+
+		PG_TRY();
+		{
+			nprocessed =
+				ExecutePlanIntoDestReceiver(plannedStmt, params, copyDest);
+		}
+		PG_CATCH();
+		{
+			SubPlanExplainAnalyzeContext = NULL;
+			SubPlanExplainOutput = NULL;
+			TotalExplainOutputCapacity = 0;
+			NumTasksOutput = 0;
+			PG_RE_THROW();
+		}
+		PG_END_TRY();
+

 		/*
 		 * EXPLAIN ANALYZE instrumentations. Calculating these are very light-weight,
@ -94,10 +132,24 @@ ExecuteSubPlans(DistributedPlan *distributedPlan)
 		subPlan->durationMillisecs += durationMicrosecs * MICRO_TO_MILLI_SECOND;

 		subPlan->bytesSentPerWorker = RemoteFileDestReceiverBytesSent(copyDest);
+		subPlan->ntuples = nprocessed;
 		subPlan->remoteWorkerCount = list_length(remoteWorkerNodeList);
 		subPlan->writeLocalFile = entry->writeLocalFile;

 		SubPlanLevel--;
+
+		/*
+		 * Save the EXPLAIN ANALYZE output(s) for later extraction in ExplainSubPlans().
+		 * Because the SubPlan context isn’t available during distributed execution,
+		 * pass the pointer as a global variable in SubPlanExplainOutput.
+		 */
+		subPlan->totalExplainOutput = SubPlanExplainOutput;
+		subPlan->numTasksOutput = NumTasksOutput;
+		SubPlanExplainOutput = NULL;
+		TotalExplainOutputCapacity = 0;
+		NumTasksOutput = 0;
 		FreeExecutorState(estate);
 	}
+
+	SubPlanExplainAnalyzeContext = NULL;
 }
--- a/src/backend/distributed/metadata/dependency.c
+++ b/src/backend/distributed/metadata/dependency.c
@ -180,9 +180,10 @@ static bool FollowExtAndInternalDependencies(ObjectAddressCollector *collector,
 											 DependencyDefinition *definition);
 static void ApplyAddToDependencyList(ObjectAddressCollector *collector,
 									 DependencyDefinition *definition);
-static void ApplyAddCitusDependedObjectsToDependencyList(
-	ObjectAddressCollector *collector,
-	DependencyDefinition *definition);
+static void ApplyAddCitusDependedObjectsToDependencyList(ObjectAddressCollector *
+														 collector,
+														 DependencyDefinition *
+														 definition);
 static List * GetViewRuleReferenceDependencyList(Oid relationId);
 static List * ExpandCitusSupportedTypes(ObjectAddressCollector *collector,
 										ObjectAddress target);
@ -1249,8 +1250,9 @@ IsObjectAddressOwnedByCitus(const ObjectAddress *objectAddress)
 		return false;
 	}

-	bool ownedByCitus = extObjectAddress.objectId == citusId;
-	bool ownedByCitusColumnar = extObjectAddress.objectId == citusColumnarId;
+	bool ownedByCitus = OidIsValid(citusId) && extObjectAddress.objectId == citusId;
+	bool ownedByCitusColumnar = OidIsValid(citusColumnarId) &&
+								extObjectAddress.objectId == citusColumnarId;

 	return ownedByCitus || ownedByCitusColumnar;
 }
--- a/src/backend/distributed/metadata/distobject.c
+++ b/src/backend/distributed/metadata/distobject.c
@ -338,8 +338,8 @@ ShouldMarkRelationDistributed(Oid relationId)
 	bool ownedByExtension = IsTableOwnedByExtension(relationId);
 	bool alreadyDistributed = IsObjectDistributed(relationAddress);
 	bool hasUnsupportedDependency =
-		DeferErrorIfAnyObjectHasUnsupportedDependency(list_make1(relationAddress)) !=
-		NULL;
+		DeferErrorIfAnyObjectHasUnsupportedDependency(
+			list_make1(relationAddress)) != NULL;
 	bool hasCircularDependency =
 		DeferErrorIfCircularDependencyExists(relationAddress) != NULL;

@ -680,11 +680,9 @@ UpdateDistributedObjectColocationId(uint32 oldColocationId,
 	HeapTuple heapTuple;
 	while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor)))
 	{
-		Datum values[Natts_pg_dist_object];
-		bool isnull[Natts_pg_dist_object];
-		bool replace[Natts_pg_dist_object];
-
-		memset(replace, 0, sizeof(replace));
+		Datum *values = palloc0(tupleDescriptor->natts * sizeof(Datum));
+		bool *isnull = palloc0(tupleDescriptor->natts * sizeof(bool));
+		bool *replace = palloc0(tupleDescriptor->natts * sizeof(bool));

 		replace[Anum_pg_dist_object_colocationid - 1] = true;

@ -698,6 +696,10 @@ UpdateDistributedObjectColocationId(uint32 oldColocationId,

 		CatalogTupleUpdate(pgDistObjectRel, &heapTuple->t_self, heapTuple);
 		CitusInvalidateRelcacheByRelid(DistObjectRelationId());
+
+		pfree(values);
+		pfree(isnull);
+		pfree(replace);
 	}

 	systable_endscan(scanDescriptor);
@ -783,3 +785,23 @@ DistributedSequenceList(void)
 	relation_close(pgDistObjectRel, AccessShareLock);
 	return distributedSequenceList;
 }
+
+
+/*
+ * GetForceDelegationAttrIndexInPgDistObject returns attrnum for force_delegation attr.
+ *
+ * force_delegation attr was added to table pg_dist_object using alter operation after
+ * the version where Citus started supporting downgrades, and it's only column that we've
+ * introduced to pg_dist_object since then.
+ *
+ * And in case of a downgrade + upgrade, tupleDesc->natts becomes greater than
+ * Natts_pg_dist_object and when this happens, then we know that attrnum force_delegation is
+ * not Anum_pg_dist_object_force_delegation anymore but tupleDesc->natts - 1.
+ */
+int
+GetForceDelegationAttrIndexInPgDistObject(TupleDesc tupleDesc)
+{
+	return tupleDesc->natts == Natts_pg_dist_object
+		   ? (Anum_pg_dist_object_force_delegation - 1)
+		   : tupleDesc->natts - 1;
+}
--- a/src/backend/distributed/metadata/metadata_cache.c
+++ b/src/backend/distributed/metadata/metadata_cache.c
@ -221,6 +221,7 @@ typedef struct MetadataCacheData
 	Oid textCopyFormatId;
 	Oid primaryNodeRoleId;
 	Oid secondaryNodeRoleId;
+	Oid unavailableNodeRoleId;
 	Oid pgTableIsVisibleFuncId;
 	Oid citusTableIsVisibleFuncId;
 	Oid distAuthinfoRelationId;
@ -320,9 +321,10 @@ static void CachedRelationNamespaceLookup(const char *relationName, Oid relnames
 static void CachedRelationNamespaceLookupExtended(const char *relationName,
 												  Oid renamespace, Oid *cachedOid,
 												  bool missing_ok);
-static ShardPlacement * ResolveGroupShardPlacement(
-	GroupShardPlacement *groupShardPlacement, CitusTableCacheEntry *tableEntry,
-	int shardIndex);
+static ShardPlacement * ResolveGroupShardPlacement(GroupShardPlacement *
+												   groupShardPlacement,
+												   CitusTableCacheEntry *tableEntry,
+												   int shardIndex);
 static Oid LookupEnumValueId(Oid typeId, char *valueName);
 static void InvalidateCitusTableCacheEntrySlot(CitusTableCacheEntrySlot *cacheSlot);
 static void InvalidateDistTableCache(void);
@ -521,6 +523,32 @@ IsCitusTableTypeCacheEntry(CitusTableCacheEntry *tableEntry, CitusTableType tabl
 }


+/*
+ * IsFirstShard returns true if the given shardId is the first shard.
+ */
+bool
+IsFirstShard(CitusTableCacheEntry *tableEntry, uint64 shardId)
+{
+	if (tableEntry == NULL || tableEntry->sortedShardIntervalArray == NULL)
+	{
+		return false;
+	}
+	if (tableEntry->sortedShardIntervalArray[0]->shardId == INVALID_SHARD_ID)
+	{
+		return false;
+	}
+
+	if (shardId == tableEntry->sortedShardIntervalArray[0]->shardId)
+	{
+		return true;
+	}
+	else
+	{
+		return false;
+	}
+}
+
+
 /*
 * HasDistributionKey returns true if given Citus table has a distribution key.
 */
@ -729,12 +757,13 @@ PartitionMethodViaCatalog(Oid relationId)
 		return DISTRIBUTE_BY_INVALID;
 	}

-	Datum datumArray[Natts_pg_dist_partition];
-	bool isNullArray[Natts_pg_dist_partition];
-
 	Relation pgDistPartition = table_open(DistPartitionRelationId(), AccessShareLock);

 	TupleDesc tupleDescriptor = RelationGetDescr(pgDistPartition);
+
+	Datum *datumArray = (Datum *) palloc(tupleDescriptor->natts * sizeof(Datum));
+	bool *isNullArray = (bool *) palloc(tupleDescriptor->natts * sizeof(bool));
+
 	heap_deform_tuple(partitionTuple, tupleDescriptor, datumArray, isNullArray);

 	if (isNullArray[Anum_pg_dist_partition_partmethod - 1])
@ -742,6 +771,8 @@ PartitionMethodViaCatalog(Oid relationId)
 		/* partition method cannot be NULL, still let's make sure */
 		heap_freetuple(partitionTuple);
 		table_close(pgDistPartition, NoLock);
+		pfree(datumArray);
+		pfree(isNullArray);
 		return DISTRIBUTE_BY_INVALID;
 	}

@ -750,6 +781,8 @@ PartitionMethodViaCatalog(Oid relationId)

 	heap_freetuple(partitionTuple);
 	table_close(pgDistPartition, NoLock);
+	pfree(datumArray);
+	pfree(isNullArray);

 	return partitionMethodChar;
 }
@ -768,12 +801,12 @@ PartitionColumnViaCatalog(Oid relationId)
 		return NULL;
 	}

-	Datum datumArray[Natts_pg_dist_partition];
-	bool isNullArray[Natts_pg_dist_partition];
-
 	Relation pgDistPartition = table_open(DistPartitionRelationId(), AccessShareLock);

 	TupleDesc tupleDescriptor = RelationGetDescr(pgDistPartition);
+	Datum *datumArray = (Datum *) palloc(tupleDescriptor->natts * sizeof(Datum));
+	bool *isNullArray = (bool *) palloc(tupleDescriptor->natts * sizeof(bool));
+
 	heap_deform_tuple(partitionTuple, tupleDescriptor, datumArray, isNullArray);

 	if (isNullArray[Anum_pg_dist_partition_partkey - 1])
@ -781,6 +814,8 @@ PartitionColumnViaCatalog(Oid relationId)
 		/* partition key cannot be NULL, still let's make sure */
 		heap_freetuple(partitionTuple);
 		table_close(pgDistPartition, NoLock);
+		pfree(datumArray);
+		pfree(isNullArray);
 		return NULL;
 	}

@ -795,6 +830,8 @@ PartitionColumnViaCatalog(Oid relationId)

 	heap_freetuple(partitionTuple);
 	table_close(pgDistPartition, NoLock);
+	pfree(datumArray);
+	pfree(isNullArray);

 	return partitionColumn;
 }
@ -813,12 +850,13 @@ ColocationIdViaCatalog(Oid relationId)
 		return INVALID_COLOCATION_ID;
 	}

-	Datum datumArray[Natts_pg_dist_partition];
-	bool isNullArray[Natts_pg_dist_partition];
-
 	Relation pgDistPartition = table_open(DistPartitionRelationId(), AccessShareLock);

 	TupleDesc tupleDescriptor = RelationGetDescr(pgDistPartition);
+
+	Datum *datumArray = (Datum *) palloc(tupleDescriptor->natts * sizeof(Datum));
+	bool *isNullArray = (bool *) palloc(tupleDescriptor->natts * sizeof(bool));
+
 	heap_deform_tuple(partitionTuple, tupleDescriptor, datumArray, isNullArray);

 	if (isNullArray[Anum_pg_dist_partition_colocationid - 1])
@ -826,6 +864,8 @@ ColocationIdViaCatalog(Oid relationId)
 		/* colocation id cannot be NULL, still let's make sure */
 		heap_freetuple(partitionTuple);
 		table_close(pgDistPartition, NoLock);
+		pfree(datumArray);
+		pfree(isNullArray);
 		return INVALID_COLOCATION_ID;
 	}

@ -834,6 +874,8 @@ ColocationIdViaCatalog(Oid relationId)

 	heap_freetuple(partitionTuple);
 	table_close(pgDistPartition, NoLock);
+	pfree(datumArray);
+	pfree(isNullArray);

 	return colocationId;
 }
@ -1690,8 +1732,11 @@ LookupDistObjectCacheEntry(Oid classid, Oid objid, int32 objsubid)

 	if (HeapTupleIsValid(pgDistObjectTup))
 	{
-		Datum datumArray[Natts_pg_dist_object];
-		bool isNullArray[Natts_pg_dist_object];
+		Datum *datumArray = palloc(pgDistObjectTupleDesc->natts * sizeof(Datum));
+		bool *isNullArray = palloc(pgDistObjectTupleDesc->natts * sizeof(bool));
+
+		int forseDelegationIndex =
+			GetForceDelegationAttrIndexInPgDistObject(pgDistObjectTupleDesc);

 		heap_deform_tuple(pgDistObjectTup, pgDistObjectTupleDesc, datumArray,
 						  isNullArray);
@ -1706,7 +1751,10 @@ LookupDistObjectCacheEntry(Oid classid, Oid objid, int32 objsubid)
 			DatumGetInt32(datumArray[Anum_pg_dist_object_colocationid - 1]);

 		cacheEntry->forceDelegation =
-			DatumGetBool(datumArray[Anum_pg_dist_object_force_delegation - 1]);
+			DatumGetBool(datumArray[forseDelegationIndex]);
+
+		pfree(datumArray);
+		pfree(isNullArray);
 	}
 	else
 	{
@ -1741,10 +1789,11 @@ BuildCitusTableCacheEntry(Oid relationId)
 	}

 	MemoryContext oldContext = NULL;
-	Datum datumArray[Natts_pg_dist_partition];
-	bool isNullArray[Natts_pg_dist_partition];

 	TupleDesc tupleDescriptor = RelationGetDescr(pgDistPartition);
+	Datum *datumArray = (Datum *) palloc(tupleDescriptor->natts * sizeof(Datum));
+	bool *isNullArray = (bool *) palloc(tupleDescriptor->natts * sizeof(bool));
+
 	heap_deform_tuple(distPartitionTuple, tupleDescriptor, datumArray, isNullArray);

 	CitusTableCacheEntry *cacheEntry =
@ -1797,7 +1846,7 @@ BuildCitusTableCacheEntry(Oid relationId)
 		cacheEntry->replicationModel = DatumGetChar(replicationModelDatum);
 	}

-	if (isNullArray[Anum_pg_dist_partition_autoconverted - 1])
+	if (isNullArray[GetAutoConvertedAttrIndexInPgDistPartition(tupleDescriptor)])
 	{
 		/*
 		 * We don't expect this to happen, but set it to false (the default value)
@ -1808,7 +1857,7 @@ BuildCitusTableCacheEntry(Oid relationId)
 	else
 	{
 		cacheEntry->autoConverted = DatumGetBool(
-			datumArray[Anum_pg_dist_partition_autoconverted - 1]);
+			datumArray[GetAutoConvertedAttrIndexInPgDistPartition(tupleDescriptor)]);
 	}

 	heap_freetuple(distPartitionTuple);
@ -1852,6 +1901,9 @@ BuildCitusTableCacheEntry(Oid relationId)

 	table_close(pgDistPartition, NoLock);

+	pfree(datumArray);
+	pfree(isNullArray);
+
 	cacheEntry->isValid = true;

 	return cacheEntry;
@ -3550,6 +3602,20 @@ SecondaryNodeRoleId(void)
 }


+/* return the Oid of the 'unavailable' nodeRole enum value */
+Oid
+UnavailableNodeRoleId(void)
+{
+	if (!MetadataCache.unavailableNodeRoleId)
+	{
+		MetadataCache.unavailableNodeRoleId = LookupStringEnumValueId("noderole",
+																	  "unavailable");
+	}
+
+	return MetadataCache.unavailableNodeRoleId;
+}
+
+
 Oid
 CitusJobStatusScheduledId(void)
 {
@ -4367,6 +4433,8 @@ InitializeWorkerNodeCache(void)
 		workerNode->isActive = currentNode->isActive;
 		workerNode->nodeRole = currentNode->nodeRole;
 		workerNode->shouldHaveShards = currentNode->shouldHaveShards;
+		workerNode->nodeprimarynodeid = currentNode->nodeprimarynodeid;
+		workerNode->nodeisclone = currentNode->nodeisclone;
 		strlcpy(workerNode->nodeCluster, currentNode->nodeCluster, NAMEDATALEN);

 		newWorkerNodeArray[workerNodeIndex++] = workerNode;
@ -5011,10 +5079,13 @@ CitusTableTypeIdList(CitusTableType citusTableType)
 	TupleDesc tupleDescriptor = RelationGetDescr(pgDistPartition);

 	HeapTuple heapTuple = systable_getnext(scanDescriptor);
+	Datum *datumArray = (Datum *) palloc(tupleDescriptor->natts * sizeof(Datum));
+	bool *isNullArray = (bool *) palloc(tupleDescriptor->natts * sizeof(bool));
 	while (HeapTupleIsValid(heapTuple))
 	{
-		bool isNullArray[Natts_pg_dist_partition];
-		Datum datumArray[Natts_pg_dist_partition];
+		memset(datumArray, 0, tupleDescriptor->natts * sizeof(Datum));
+		memset(isNullArray, 0, tupleDescriptor->natts * sizeof(bool));
+
 		heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray);

 		Datum partMethodDatum = datumArray[Anum_pg_dist_partition_partmethod - 1];
@ -5038,6 +5109,9 @@ CitusTableTypeIdList(CitusTableType citusTableType)
 		heapTuple = systable_getnext(scanDescriptor);
 	}

+	pfree(datumArray);
+	pfree(isNullArray);
+
 	systable_endscan(scanDescriptor);
 	table_close(pgDistPartition, AccessShareLock);

--- a/src/backend/distributed/metadata/metadata_sync.c
+++ b/src/backend/distributed/metadata/metadata_sync.c
@ -58,6 +58,7 @@

 #include "distributed/argutils.h"
 #include "distributed/backend_data.h"
+#include "distributed/background_worker_utils.h"
 #include "distributed/citus_ruleutils.h"
 #include "distributed/colocation_utils.h"
 #include "distributed/commands.h"
@ -573,13 +574,17 @@ FetchRelationIdFromPgPartitionHeapTuple(HeapTuple heapTuple, TupleDesc tupleDesc
 {
 	Assert(heapTuple->t_tableOid == DistPartitionRelationId());

-	bool isNullArray[Natts_pg_dist_partition];
-	Datum datumArray[Natts_pg_dist_partition];
+	Datum *datumArray = (Datum *) palloc(tupleDesc->natts * sizeof(Datum));
+	bool *isNullArray = (bool *) palloc(tupleDesc->natts * sizeof(bool));
+
 	heap_deform_tuple(heapTuple, tupleDesc, datumArray, isNullArray);

 	Datum relationIdDatum = datumArray[Anum_pg_dist_partition_logicalrelid - 1];
 	Oid relationId = DatumGetObjectId(relationIdDatum);

+	pfree(datumArray);
+	pfree(isNullArray);
+
 	return relationId;
 }

@ -814,7 +819,7 @@ NodeListInsertCommand(List *workerNodeList)
 	appendStringInfo(nodeListInsertCommand,
 					 "INSERT INTO pg_dist_node (nodeid, groupid, nodename, nodeport, "
 					 "noderack, hasmetadata, metadatasynced, isactive, noderole, "
-					 "nodecluster, shouldhaveshards) VALUES ");
+					 "nodecluster, shouldhaveshards, nodeisclone, nodeprimarynodeid) VALUES ");

 	/* iterate over the worker nodes, add the values */
 	WorkerNode *workerNode = NULL;
@ -824,13 +829,14 @@ NodeListInsertCommand(List *workerNodeList)
 		char *metadataSyncedString = workerNode->metadataSynced ? "TRUE" : "FALSE";
 		char *isActiveString = workerNode->isActive ? "TRUE" : "FALSE";
 		char *shouldHaveShards = workerNode->shouldHaveShards ? "TRUE" : "FALSE";
+		char *nodeiscloneString = workerNode->nodeisclone ? "TRUE" : "FALSE";

 		Datum nodeRoleOidDatum = ObjectIdGetDatum(workerNode->nodeRole);
 		Datum nodeRoleStringDatum = DirectFunctionCall1(enum_out, nodeRoleOidDatum);
 		char *nodeRoleString = DatumGetCString(nodeRoleStringDatum);

 		appendStringInfo(nodeListInsertCommand,
-						 "(%d, %d, %s, %d, %s, %s, %s, %s, '%s'::noderole, %s, %s)",
+						 "(%d, %d, %s, %d, %s, %s, %s, %s, '%s'::noderole, %s, %s, %s, %d)",
 						 workerNode->nodeId,
 						 workerNode->groupId,
 						 quote_literal_cstr(workerNode->workerName),
@ -841,7 +847,9 @@ NodeListInsertCommand(List *workerNodeList)
 						 isActiveString,
 						 nodeRoleString,
 						 quote_literal_cstr(workerNode->nodeCluster),
-						 shouldHaveShards);
+						 shouldHaveShards,
+						 nodeiscloneString,
+						 workerNode->nodeprimarynodeid);

 		processedWorkerNodeCount++;
 		if (processedWorkerNodeCount != workerCount)
@ -875,9 +883,11 @@ NodeListIdempotentInsertCommand(List *workerNodeList)
 						  "hasmetadata = EXCLUDED.hasmetadata, "
 						  "isactive = EXCLUDED.isactive, "
 						  "noderole = EXCLUDED.noderole, "
-						  "nodecluster = EXCLUDED.nodecluster ,"
+						  "nodecluster = EXCLUDED.nodecluster, "
 						  "metadatasynced = EXCLUDED.metadatasynced, "
-						  "shouldhaveshards = EXCLUDED.shouldhaveshards";
+						  "shouldhaveshards = EXCLUDED.shouldhaveshards, "
+						  "nodeisclone = EXCLUDED.nodeisclone, "
+						  "nodeprimarynodeid = EXCLUDED.nodeprimarynodeid";
 	appendStringInfoString(nodeInsertIdempotentCommand, onConflictStr);
 	return nodeInsertIdempotentCommand->data;
 }
@ -3152,37 +3162,26 @@ MetadataSyncSigAlrmHandler(SIGNAL_ARGS)
 BackgroundWorkerHandle *
 SpawnSyncNodeMetadataToNodes(Oid database, Oid extensionOwner)
 {
-	BackgroundWorker worker;
-	BackgroundWorkerHandle *handle = NULL;
+	char workerName[BGW_MAXLEN];

-	/* Configure a worker. */
-	memset(&worker, 0, sizeof(worker));
-	SafeSnprintf(worker.bgw_name, BGW_MAXLEN,
+	SafeSnprintf(workerName, BGW_MAXLEN,
 				 "Citus Metadata Sync: %u/%u",
 				 database, extensionOwner);
-	worker.bgw_flags =
-		BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION;
-	worker.bgw_start_time = BgWorkerStart_ConsistentState;

-	/* don't restart, we manage restarts from maintenance daemon */
-	worker.bgw_restart_time = BGW_NEVER_RESTART;
-	strcpy_s(worker.bgw_library_name, sizeof(worker.bgw_library_name), "citus");
-	strcpy_s(worker.bgw_function_name, sizeof(worker.bgw_library_name),
-			 "SyncNodeMetadataToNodesMain");
-	worker.bgw_main_arg = ObjectIdGetDatum(MyDatabaseId);
-	memcpy_s(worker.bgw_extra, sizeof(worker.bgw_extra), &extensionOwner,
-			 sizeof(Oid));
-	worker.bgw_notify_pid = MyProcPid;
-
-	if (!RegisterDynamicBackgroundWorker(&worker, &handle))
-	{
-		return NULL;
-	}
-
-	pid_t pid;
-	WaitForBackgroundWorkerStartup(handle, &pid);
-
-	return handle;
+	CitusBackgroundWorkerConfig config = {
+		.workerName = workerName,
+		.functionName = "SyncNodeMetadataToNodesMain",
+		.mainArg = ObjectIdGetDatum(MyDatabaseId),
+		.extensionOwner = extensionOwner,
+		.needsNotification = true,
+		.waitForStartup = false,
+		.restartTime = CITUS_BGW_NEVER_RESTART,
+		.startTime = CITUS_BGW_DEFAULT_START_TIME,
+		.workerType = NULL, /* use default */
+		.extraData = NULL,
+		.extraDataSize = 0
+	};
+	return RegisterCitusBackgroundWorker(&config);
 }


@ -5241,7 +5240,7 @@ SendDistObjectCommands(MetadataSyncContext *context)
 		bool forceDelegationIsNull = false;
 		Datum forceDelegationDatum =
 			heap_getattr(nextTuple,
-						 Anum_pg_dist_object_force_delegation,
+						 GetForceDelegationAttrIndexInPgDistObject(tupleDesc) + 1,
 						 tupleDesc,
 						 &forceDelegationIsNull);
 		bool forceDelegation = DatumGetBool(forceDelegationDatum);
--- a/src/backend/distributed/metadata/metadata_utility.c
+++ b/src/backend/distributed/metadata/metadata_utility.c
@ -127,11 +127,11 @@ static bool SetFieldText(int attno, Datum values[], bool isnull[], bool replace[
 static bool SetFieldNull(int attno, Datum values[], bool isnull[], bool replace[]);

 #define InitFieldValue(attno, values, isnull, initValue) \
-	(void) SetFieldValue((attno), (values), (isnull), NULL, (initValue))
+		(void) SetFieldValue((attno), (values), (isnull), NULL, (initValue))
 #define InitFieldText(attno, values, isnull, initValue) \
-	(void) SetFieldText((attno), (values), (isnull), NULL, (initValue))
+		(void) SetFieldText((attno), (values), (isnull), NULL, (initValue))
 #define InitFieldNull(attno, values, isnull) \
-	(void) SetFieldNull((attno), (values), (isnull), NULL)
+		(void) SetFieldNull((attno), (values), (isnull), NULL)

 /* exports for SQL callable functions */
 PG_FUNCTION_INFO_V1(citus_local_disk_space_stats);
@ -812,6 +812,7 @@ GenerateSizeQueryOnMultiplePlacements(List *shardIntervalList,
 		{
 			partitionedShardNames = lappend(partitionedShardNames, quotedShardName);
 		}
+
 		/* for non-partitioned tables, we will use Postgres' size functions */
 		else
 		{
@ -822,8 +823,8 @@ GenerateSizeQueryOnMultiplePlacements(List *shardIntervalList,
 	/* SELECT SUM(worker_partitioned_...) FROM VALUES (...) */
 	char *subqueryForPartitionedShards =
 		GenerateSizeQueryForRelationNameList(partitionedShardNames,
-											 GetWorkerPartitionedSizeUDFNameBySizeQueryType(
-												 sizeQueryType));
+											 GetWorkerPartitionedSizeUDFNameBySizeQueryType
+												 (sizeQueryType));

 	/* SELECT SUM(pg_..._size) FROM VALUES (...) */
 	char *subqueryForNonPartitionedShards =
@ -1919,23 +1920,22 @@ InsertIntoPgDistPartition(Oid relationId, char distributionMethod,
 {
 	char *distributionColumnString = NULL;

-	Datum newValues[Natts_pg_dist_partition];
-	bool newNulls[Natts_pg_dist_partition];
-
 	/* open system catalog and insert new tuple */
 	Relation pgDistPartition = table_open(DistPartitionRelationId(), RowExclusiveLock);
+	TupleDesc tupleDescriptor = RelationGetDescr(pgDistPartition);
+
+	Datum *newValues = (Datum *) palloc0(tupleDescriptor->natts * sizeof(Datum));
+	bool *newNulls = (bool *) palloc0(tupleDescriptor->natts * sizeof(bool));

 	/* form new tuple for pg_dist_partition */
-	memset(newValues, 0, sizeof(newValues));
-	memset(newNulls, false, sizeof(newNulls));
-
 	newValues[Anum_pg_dist_partition_logicalrelid - 1] =
 		ObjectIdGetDatum(relationId);
 	newValues[Anum_pg_dist_partition_partmethod - 1] =
 		CharGetDatum(distributionMethod);
 	newValues[Anum_pg_dist_partition_colocationid - 1] = UInt32GetDatum(colocationId);
 	newValues[Anum_pg_dist_partition_repmodel - 1] = CharGetDatum(replicationModel);
-	newValues[Anum_pg_dist_partition_autoconverted - 1] = BoolGetDatum(autoConverted);
+	newValues[GetAutoConvertedAttrIndexInPgDistPartition(tupleDescriptor)] =
+		BoolGetDatum(autoConverted);

 	/* set partkey column to NULL for reference tables */
 	if (distributionMethod != DISTRIBUTE_BY_NONE)
@ -1951,7 +1951,7 @@ InsertIntoPgDistPartition(Oid relationId, char distributionMethod,
 		newNulls[Anum_pg_dist_partition_partkey - 1] = true;
 	}

-	HeapTuple newTuple = heap_form_tuple(RelationGetDescr(pgDistPartition), newValues,
+	HeapTuple newTuple = heap_form_tuple(tupleDescriptor, newValues,
 										 newNulls);

 	/* finally insert tuple, build index entries & register cache invalidation */
@ -1963,6 +1963,9 @@ InsertIntoPgDistPartition(Oid relationId, char distributionMethod,

 	CommandCounterIncrement();
 	table_close(pgDistPartition, NoLock);
+
+	pfree(newValues);
+	pfree(newNulls);
 }


@ -2154,13 +2157,13 @@ UpdatePlacementGroupId(uint64 placementId, int groupId)
 	ScanKeyData scanKey[1];
 	int scanKeyCount = 1;
 	bool indexOK = true;
-	Datum values[Natts_pg_dist_placement];
-	bool isnull[Natts_pg_dist_placement];
-	bool replace[Natts_pg_dist_placement];
 	bool colIsNull = false;

 	Relation pgDistPlacement = table_open(DistPlacementRelationId(), RowExclusiveLock);
 	TupleDesc tupleDescriptor = RelationGetDescr(pgDistPlacement);
+	Datum *values = (Datum *) palloc0(tupleDescriptor->natts * sizeof(Datum));
+	bool *isnull = (bool *) palloc0(tupleDescriptor->natts * sizeof(bool));
+	bool *replace = (bool *) palloc0(tupleDescriptor->natts * sizeof(bool));
 	ScanKeyInit(&scanKey[0], Anum_pg_dist_placement_placementid,
 				BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(placementId));

@ -2177,8 +2180,6 @@ UpdatePlacementGroupId(uint64 placementId, int groupId)
 							   placementId)));
 	}

-	memset(replace, 0, sizeof(replace));
-
 	values[Anum_pg_dist_placement_groupid - 1] = Int32GetDatum(groupId);
 	isnull[Anum_pg_dist_placement_groupid - 1] = false;
 	replace[Anum_pg_dist_placement_groupid - 1] = true;
@ -2197,6 +2198,10 @@ UpdatePlacementGroupId(uint64 placementId, int groupId)

 	systable_endscan(scanDescriptor);
 	table_close(pgDistPlacement, NoLock);
+
+	pfree(values);
+	pfree(isnull);
+	pfree(replace);
 }


@ -2210,12 +2215,13 @@ UpdatePgDistPartitionAutoConverted(Oid citusTableId, bool autoConverted)
 	ScanKeyData scanKey[1];
 	int scanKeyCount = 1;
 	bool indexOK = true;
-	Datum values[Natts_pg_dist_partition];
-	bool isnull[Natts_pg_dist_partition];
-	bool replace[Natts_pg_dist_partition];

 	Relation pgDistPartition = table_open(DistPartitionRelationId(), RowExclusiveLock);
 	TupleDesc tupleDescriptor = RelationGetDescr(pgDistPartition);
+	Datum *values = (Datum *) palloc0(tupleDescriptor->natts * sizeof(Datum));
+	bool *isnull = (bool *) palloc0(tupleDescriptor->natts * sizeof(bool));
+	bool *replace = (bool *) palloc0(tupleDescriptor->natts * sizeof(bool));
+
 	ScanKeyInit(&scanKey[0], Anum_pg_dist_partition_logicalrelid,
 				BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(citusTableId));

@ -2231,11 +2237,10 @@ UpdatePgDistPartitionAutoConverted(Oid citusTableId, bool autoConverted)
 							   citusTableId)));
 	}

-	memset(replace, 0, sizeof(replace));
-
-	values[Anum_pg_dist_partition_autoconverted - 1] = BoolGetDatum(autoConverted);
-	isnull[Anum_pg_dist_partition_autoconverted - 1] = false;
-	replace[Anum_pg_dist_partition_autoconverted - 1] = true;
+	int autoconvertedindex = GetAutoConvertedAttrIndexInPgDistPartition(tupleDescriptor);
+	values[autoconvertedindex] = BoolGetDatum(autoConverted);
+	isnull[autoconvertedindex] = false;
+	replace[autoconvertedindex] = true;

 	heapTuple = heap_modify_tuple(heapTuple, tupleDescriptor, values, isnull, replace);

@ -2247,6 +2252,10 @@ UpdatePgDistPartitionAutoConverted(Oid citusTableId, bool autoConverted)

 	systable_endscan(scanDescriptor);
 	table_close(pgDistPartition, NoLock);
+
+	pfree(values);
+	pfree(isnull);
+	pfree(replace);
 }


@ -2286,12 +2295,13 @@ UpdateDistributionColumn(Oid relationId, char distributionMethod, Var *distribut
 	ScanKeyData scanKey[1];
 	int scanKeyCount = 1;
 	bool indexOK = true;
-	Datum values[Natts_pg_dist_partition];
-	bool isnull[Natts_pg_dist_partition];
-	bool replace[Natts_pg_dist_partition];

 	Relation pgDistPartition = table_open(DistPartitionRelationId(), RowExclusiveLock);
 	TupleDesc tupleDescriptor = RelationGetDescr(pgDistPartition);
+	Datum *values = (Datum *) palloc0(tupleDescriptor->natts * sizeof(Datum));
+	bool *isnull = (bool *) palloc0(tupleDescriptor->natts * sizeof(bool));
+	bool *replace = (bool *) palloc0(tupleDescriptor->natts * sizeof(bool));
+
 	ScanKeyInit(&scanKey[0], Anum_pg_dist_partition_logicalrelid,
 				BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relationId));

@ -2307,8 +2317,6 @@ UpdateDistributionColumn(Oid relationId, char distributionMethod, Var *distribut
 							   relationId)));
 	}

-	memset(replace, 0, sizeof(replace));
-
 	replace[Anum_pg_dist_partition_partmethod - 1] = true;
 	values[Anum_pg_dist_partition_partmethod - 1] = CharGetDatum(distributionMethod);
 	isnull[Anum_pg_dist_partition_partmethod - 1] = false;
@ -2317,9 +2325,10 @@ UpdateDistributionColumn(Oid relationId, char distributionMethod, Var *distribut
 	values[Anum_pg_dist_partition_colocationid - 1] = UInt32GetDatum(colocationId);
 	isnull[Anum_pg_dist_partition_colocationid - 1] = false;

-	replace[Anum_pg_dist_partition_autoconverted - 1] = true;
-	values[Anum_pg_dist_partition_autoconverted - 1] = BoolGetDatum(false);
-	isnull[Anum_pg_dist_partition_autoconverted - 1] = false;
+	int autoconvertedindex = GetAutoConvertedAttrIndexInPgDistPartition(tupleDescriptor);
+	replace[autoconvertedindex] = true;
+	values[autoconvertedindex] = BoolGetDatum(false);
+	isnull[autoconvertedindex] = false;

 	char *distributionColumnString = nodeToString((Node *) distributionColumn);

@ -2337,6 +2346,10 @@ UpdateDistributionColumn(Oid relationId, char distributionMethod, Var *distribut

 	systable_endscan(scanDescriptor);
 	table_close(pgDistPartition, NoLock);
+
+	pfree(values);
+	pfree(isnull);
+	pfree(replace);
 }


@ -2380,12 +2393,13 @@ UpdateNoneDistTableMetadata(Oid relationId, char replicationModel, uint32 coloca
 	ScanKeyData scanKey[1];
 	int scanKeyCount = 1;
 	bool indexOK = true;
-	Datum values[Natts_pg_dist_partition];
-	bool isnull[Natts_pg_dist_partition];
-	bool replace[Natts_pg_dist_partition];

 	Relation pgDistPartition = table_open(DistPartitionRelationId(), RowExclusiveLock);
 	TupleDesc tupleDescriptor = RelationGetDescr(pgDistPartition);
+	Datum *values = (Datum *) palloc0(tupleDescriptor->natts * sizeof(Datum));
+	bool *isnull = (bool *) palloc0(tupleDescriptor->natts * sizeof(bool));
+	bool *replace = (bool *) palloc0(tupleDescriptor->natts * sizeof(bool));
+
 	ScanKeyInit(&scanKey[0], Anum_pg_dist_partition_logicalrelid,
 				BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relationId));

@ -2401,8 +2415,6 @@ UpdateNoneDistTableMetadata(Oid relationId, char replicationModel, uint32 coloca
 							   relationId)));
 	}

-	memset(replace, 0, sizeof(replace));
-
 	values[Anum_pg_dist_partition_colocationid - 1] = UInt32GetDatum(colocationId);
 	isnull[Anum_pg_dist_partition_colocationid - 1] = false;
 	replace[Anum_pg_dist_partition_colocationid - 1] = true;
@ -2411,9 +2423,10 @@ UpdateNoneDistTableMetadata(Oid relationId, char replicationModel, uint32 coloca
 	isnull[Anum_pg_dist_partition_repmodel - 1] = false;
 	replace[Anum_pg_dist_partition_repmodel - 1] = true;

-	values[Anum_pg_dist_partition_autoconverted - 1] = BoolGetDatum(autoConverted);
-	isnull[Anum_pg_dist_partition_autoconverted - 1] = false;
-	replace[Anum_pg_dist_partition_autoconverted - 1] = true;
+	int autoconvertedindex = GetAutoConvertedAttrIndexInPgDistPartition(tupleDescriptor);
+	values[autoconvertedindex] = BoolGetDatum(autoConverted);
+	isnull[autoconvertedindex] = false;
+	replace[autoconvertedindex] = true;

 	heapTuple = heap_modify_tuple(heapTuple, tupleDescriptor, values, isnull, replace);

@ -2424,6 +2437,10 @@ UpdateNoneDistTableMetadata(Oid relationId, char replicationModel, uint32 coloca

 	systable_endscan(scanDescriptor);
 	table_close(pgDistPartition, NoLock);
+
+	pfree(values);
+	pfree(isnull);
+	pfree(replace);
 }


@ -3113,10 +3130,10 @@ ScheduleBackgroundTask(int64 jobId, Oid owner, char *command, int dependingTaskC

 	/* 2. insert new task */
 	{
-		Datum values[Natts_pg_dist_background_task] = { 0 };
-		bool nulls[Natts_pg_dist_background_task] = { 0 };
+		TupleDesc tupleDescriptor = RelationGetDescr(pgDistBackgroundTask);

-		memset(nulls, true, sizeof(nulls));
+		Datum *values = (Datum *) palloc0(tupleDescriptor->natts * sizeof(Datum));
+		bool *nulls = (bool *) palloc0(tupleDescriptor->natts * sizeof(bool));

 		int64 taskId = GetNextBackgroundTaskTaskId();

@ -3147,15 +3164,17 @@ ScheduleBackgroundTask(int64 jobId, Oid owner, char *command, int dependingTaskC
 		values[Anum_pg_dist_background_task_message - 1] = CStringGetTextDatum("");
 		nulls[Anum_pg_dist_background_task_message - 1] = false;

-		values[Anum_pg_dist_background_task_nodes_involved - 1] =
-			IntArrayToDatum(nodesInvolvedCount, nodesInvolved);
-		nulls[Anum_pg_dist_background_task_nodes_involved - 1] = (nodesInvolvedCount ==
-																  0);
+		int nodesInvolvedIndex =
+			GetNodesInvolvedAttrIndexInPgDistBackgroundTask(tupleDescriptor);
+		values[nodesInvolvedIndex] = IntArrayToDatum(nodesInvolvedCount, nodesInvolved);
+		nulls[nodesInvolvedIndex] = (nodesInvolvedCount == 0);

-		HeapTuple newTuple = heap_form_tuple(RelationGetDescr(pgDistBackgroundTask),
-											 values, nulls);
+		HeapTuple newTuple = heap_form_tuple(tupleDescriptor, values, nulls);
 		CatalogTupleInsert(pgDistBackgroundTask, newTuple);

+		pfree(values);
+		pfree(nulls);
+
 		task = palloc0(sizeof(BackgroundTask));
 		task->taskid = taskId;
 		task->status = BACKGROUND_TASK_STATUS_RUNNABLE;
@ -3268,11 +3287,12 @@ ResetRunningBackgroundTasks(void)
 	List *taskIdsToWait = NIL;
 	while (HeapTupleIsValid(taskTuple = systable_getnext(scanDescriptor)))
 	{
-		Datum values[Natts_pg_dist_background_task] = { 0 };
-		bool isnull[Natts_pg_dist_background_task] = { 0 };
-		bool replace[Natts_pg_dist_background_task] = { 0 };
-
 		TupleDesc tupleDescriptor = RelationGetDescr(pgDistBackgroundTasks);
+
+		Datum *values = (Datum *) palloc(tupleDescriptor->natts * sizeof(Datum));
+		bool *isnull = (bool *) palloc(tupleDescriptor->natts * sizeof(bool));
+		bool *replace = (bool *) palloc0(tupleDescriptor->natts * sizeof(bool));
+
 		heap_deform_tuple(taskTuple, tupleDescriptor, values, isnull);

 		values[Anum_pg_dist_background_task_status - 1] =
@ -3341,6 +3361,10 @@ ResetRunningBackgroundTasks(void)
 									  replace);

 		CatalogTupleUpdate(pgDistBackgroundTasks, &taskTuple->t_self, taskTuple);
+
+		pfree(values);
+		pfree(isnull);
+		pfree(replace);
 	}

 	if (list_length(taskIdsToWait) > 0)
@ -3424,8 +3448,9 @@ DeformBackgroundJobHeapTuple(TupleDesc tupleDescriptor, HeapTuple jobTuple)
 static BackgroundTask *
 DeformBackgroundTaskHeapTuple(TupleDesc tupleDescriptor, HeapTuple taskTuple)
 {
-	Datum values[Natts_pg_dist_background_task] = { 0 };
-	bool nulls[Natts_pg_dist_background_task] = { 0 };
+	Datum *values = (Datum *) palloc(tupleDescriptor->natts * sizeof(Datum));
+	bool *nulls = (bool *) palloc(tupleDescriptor->natts * sizeof(bool));
+
 	heap_deform_tuple(taskTuple, tupleDescriptor, values, nulls);

 	BackgroundTask *task = palloc0(sizeof(BackgroundTask));
@ -3463,13 +3488,18 @@ DeformBackgroundTaskHeapTuple(TupleDesc tupleDescriptor, HeapTuple taskTuple)
 			TextDatumGetCString(values[Anum_pg_dist_background_task_message - 1]);
 	}

-	if (!nulls[Anum_pg_dist_background_task_nodes_involved - 1])
+	int nodesInvolvedIndex =
+		GetNodesInvolvedAttrIndexInPgDistBackgroundTask(tupleDescriptor);
+	if (!nulls[nodesInvolvedIndex])
 	{
 		ArrayType *nodesInvolvedArrayObject =
-			DatumGetArrayTypeP(values[Anum_pg_dist_background_task_nodes_involved - 1]);
+			DatumGetArrayTypeP(values[nodesInvolvedIndex]);
 		task->nodesInvolved = IntegerArrayTypeToList(nodesInvolvedArrayObject);
 	}

+	pfree(values);
+	pfree(nulls);
+
 	return task;
 }

@ -3734,8 +3764,8 @@ JobTasksStatusCount(int64 jobId)
 	HeapTuple heapTuple = NULL;
 	while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor)))
 	{
-		Datum values[Natts_pg_dist_background_task] = { 0 };
-		bool isnull[Natts_pg_dist_background_task] = { 0 };
+		Datum *values = (Datum *) palloc(tupleDescriptor->natts * sizeof(Datum));
+		bool *isnull = (bool *) palloc(tupleDescriptor->natts * sizeof(bool));

 		heap_deform_tuple(heapTuple, tupleDescriptor, values, isnull);

@ -3743,6 +3773,9 @@ JobTasksStatusCount(int64 jobId)
 												1]);
 		BackgroundTaskStatus status = BackgroundTaskStatusByOid(statusOid);

+		pfree(values);
+		pfree(isnull);
+
 		switch (status)
 		{
 			case BACKGROUND_TASK_STATUS_BLOCKED:
@ -3995,9 +4028,9 @@ UpdateBackgroundJob(int64 jobId)
 							   UINT64_FORMAT, jobId)));
 	}

-	Datum values[Natts_pg_dist_background_task] = { 0 };
-	bool isnull[Natts_pg_dist_background_task] = { 0 };
-	bool replace[Natts_pg_dist_background_task] = { 0 };
+	Datum *values = (Datum *) palloc(tupleDescriptor->natts * sizeof(Datum));
+	bool *isnull = (bool *) palloc(tupleDescriptor->natts * sizeof(bool));
+	bool *replace = (bool *) palloc0(tupleDescriptor->natts * sizeof(bool));

 	heap_deform_tuple(heapTuple, tupleDescriptor, values, isnull);

@ -4041,6 +4074,10 @@ UpdateBackgroundJob(int64 jobId)

 	systable_endscan(scanDescriptor);
 	table_close(pgDistBackgroundJobs, NoLock);
+
+	pfree(values);
+	pfree(isnull);
+	pfree(replace);
 }


@ -4076,9 +4113,9 @@ UpdateBackgroundTask(BackgroundTask *task)
 							   task->jobid, task->taskid)));
 	}

-	Datum values[Natts_pg_dist_background_task] = { 0 };
-	bool isnull[Natts_pg_dist_background_task] = { 0 };
-	bool replace[Natts_pg_dist_background_task] = { 0 };
+	Datum *values = (Datum *) palloc(tupleDescriptor->natts * sizeof(Datum));
+	bool *isnull = (bool *) palloc(tupleDescriptor->natts * sizeof(bool));
+	bool *replace = (bool *) palloc0(tupleDescriptor->natts * sizeof(bool));

 	heap_deform_tuple(heapTuple, tupleDescriptor, values, isnull);

@ -4147,6 +4184,10 @@ UpdateBackgroundTask(BackgroundTask *task)

 	systable_endscan(scanDescriptor);
 	table_close(pgDistBackgroundTasks, NoLock);
+
+	pfree(values);
+	pfree(isnull);
+	pfree(replace);
 }


@ -4225,18 +4266,18 @@ CancelTasksForJob(int64 jobid)
 				BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(jobid));

 	const bool indexOK = true;
-	SysScanDesc scanDescriptor = systable_beginscan(pgDistBackgroundTasks,
-													DistBackgroundTaskJobIdTaskIdIndexId(),
-													indexOK, NULL,
-													lengthof(scanKey), scanKey);
+	SysScanDesc scanDescriptor = systable_beginscan(
+		pgDistBackgroundTasks, DistBackgroundTaskJobIdTaskIdIndexId(),
+		indexOK, NULL, lengthof(scanKey), scanKey);

 	List *runningTaskPids = NIL;
 	HeapTuple taskTuple = NULL;
 	while (HeapTupleIsValid(taskTuple = systable_getnext(scanDescriptor)))
 	{
-		Datum values[Natts_pg_dist_background_task] = { 0 };
-		bool nulls[Natts_pg_dist_background_task] = { 0 };
-		bool replace[Natts_pg_dist_background_task] = { 0 };
+		Datum *values = (Datum *) palloc(tupleDescriptor->natts * sizeof(Datum));
+		bool *nulls = (bool *) palloc(tupleDescriptor->natts * sizeof(bool));
+		bool *replace = (bool *) palloc0(tupleDescriptor->natts * sizeof(bool));
+
 		heap_deform_tuple(taskTuple, tupleDescriptor, values, nulls);

 		Oid statusOid =
@ -4285,6 +4326,10 @@ CancelTasksForJob(int64 jobid)
 		taskTuple = heap_modify_tuple(taskTuple, tupleDescriptor, values, nulls,
 									  replace);
 		CatalogTupleUpdate(pgDistBackgroundTasks, &taskTuple->t_self, taskTuple);
+
+		pfree(values);
+		pfree(nulls);
+		pfree(replace);
 	}

 	systable_endscan(scanDescriptor);
@ -4341,9 +4386,9 @@ UnscheduleDependentTasks(BackgroundTask *task)
 									   "task_id: " UINT64_FORMAT, cTaskId)));
 			}

-			Datum values[Natts_pg_dist_background_task] = { 0 };
-			bool isnull[Natts_pg_dist_background_task] = { 0 };
-			bool replace[Natts_pg_dist_background_task] = { 0 };
+			Datum *values = (Datum *) palloc(tupleDescriptor->natts * sizeof(Datum));
+			bool *isnull = (bool *) palloc(tupleDescriptor->natts * sizeof(bool));
+			bool *replace = (bool *) palloc0(tupleDescriptor->natts * sizeof(bool));

 			values[Anum_pg_dist_background_task_status - 1] =
 				ObjectIdGetDatum(CitusTaskStatusUnscheduledId());
@ -4355,6 +4400,10 @@ UnscheduleDependentTasks(BackgroundTask *task)
 			CatalogTupleUpdate(pgDistBackgroundTasks, &heapTuple->t_self, heapTuple);

 			systable_endscan(scanDescriptor);
+
+			pfree(values);
+			pfree(isnull);
+			pfree(replace);
 		}
 	}

@ -4420,3 +4469,43 @@ UnblockDependingBackgroundTasks(BackgroundTask *task)

 	table_close(pgDistBackgroundTasksDepend, NoLock);
 }
+
+
+/*
+ * GetAutoConvertedAttrIndexInPgDistPartition returns attrnum for autoconverted attr.
+ *
+ * autoconverted attr was added to table pg_dist_partition using alter operation after
+ * the version where Citus started supporting downgrades, and it's only column that we've
+ * introduced to pg_dist_partition since then.
+ *
+ * And in case of a downgrade + upgrade, tupleDesc->natts becomes greater than
+ * Natts_pg_dist_partition and when this happens, then we know that attrnum autoconverted is
+ * not Anum_pg_dist_partition_autoconverted anymore but tupleDesc->natts - 1.
+ */
+int
+GetAutoConvertedAttrIndexInPgDistPartition(TupleDesc tupleDesc)
+{
+	return tupleDesc->natts == Natts_pg_dist_partition
+		   ? (Anum_pg_dist_partition_autoconverted - 1)
+		   : tupleDesc->natts - 1;
+}
+
+
+/*
+ * GetNodesInvolvedAttrIndexInPgDistBackgroundTask returns attrnum for nodes_involved attr.
+ *
+ * nodes_involved attr was added to table pg_dist_background_task using alter operation after
+ * the version where Citus started supporting downgrades, and it's only column that we've
+ * introduced to pg_dist_background_task since then.
+ *
+ * And in case of a downgrade + upgrade, tupleDesc->natts becomes greater than
+ * Natts_pg_dist_background_task and when this happens, then we know that attrnum nodes_involved is
+ * not Anum_pg_dist_background_task_nodes_involved anymore but tupleDesc->natts - 1.
+ */
+int
+GetNodesInvolvedAttrIndexInPgDistBackgroundTask(TupleDesc tupleDesc)
+{
+	return tupleDesc->natts == Natts_pg_dist_background_task
+		   ? (Anum_pg_dist_background_task_nodes_involved - 1)
+		   : tupleDesc->natts - 1;
+}
--- a/src/backend/distributed/metadata/node_metadata.c
+++ b/src/backend/distributed/metadata/node_metadata.c
@ -35,6 +35,7 @@

 #include "distributed/citus_acquire_lock.h"
 #include "distributed/citus_safe_lib.h"
+#include "distributed/clonenode_utils.h"
 #include "distributed/colocation_utils.h"
 #include "distributed/commands.h"
 #include "distributed/commands/utility_hook.h"
@ -84,6 +85,8 @@ typedef struct NodeMetadata
 	bool isActive;
 	Oid nodeRole;
 	bool shouldHaveShards;
+	uint32 nodeprimarynodeid;
+	bool nodeisclone;
 	char *nodeCluster;
 } NodeMetadata;

@ -106,11 +109,14 @@ static void InsertNodeRow(int nodeid, char *nodename, int32 nodeport,
 						  NodeMetadata *nodeMetadata);
 static void DeleteNodeRow(char *nodename, int32 nodeport);
 static void BlockDistributedQueriesOnMetadataNodes(void);
-static WorkerNode * TupleToWorkerNode(TupleDesc tupleDescriptor, HeapTuple heapTuple);
+static WorkerNode * TupleToWorkerNode(Relation pgDistNode, TupleDesc tupleDescriptor,
+									  HeapTuple heapTuple);
 static bool NodeIsLocal(WorkerNode *worker);
 static void SetLockTimeoutLocally(int32 lock_cooldown);
 static void UpdateNodeLocation(int32 nodeId, char *newNodeName, int32 newNodePort,
 							   bool localOnly);
+static int GetNodePrimaryNodeIdAttrIndexInPgDistNode(TupleDesc tupleDesc);
+static int GetNodeIsCloneAttrIndexInPgDistNode(TupleDesc tupleDesc);
 static bool UnsetMetadataSyncedForAllWorkers(void);
 static char * GetMetadataSyncCommandToSetNodeColumn(WorkerNode *workerNode,
 													int columnIndex,
@ -120,11 +126,10 @@ static char * NodeMetadataSyncedUpdateCommand(uint32 nodeId, bool metadataSynced
 static void ErrorIfCoordinatorMetadataSetFalse(WorkerNode *workerNode, Datum value,
 											   char *field);
 static WorkerNode * SetShouldHaveShards(WorkerNode *workerNode, bool shouldHaveShards);
-static WorkerNode * FindNodeAnyClusterByNodeId(uint32 nodeId);
 static void ErrorIfAnyNodeNotExist(List *nodeList);
 static void UpdateLocalGroupIdsViaMetadataContext(MetadataSyncContext *context);
-static void SendDeletionCommandsForReplicatedTablePlacements(
-	MetadataSyncContext *context);
+static void SendDeletionCommandsForReplicatedTablePlacements(MetadataSyncContext *context)
+;
 static void SyncNodeMetadata(MetadataSyncContext *context);
 static void SetNodeStateViaMetadataContext(MetadataSyncContext *context,
 										   WorkerNode *workerNode,
@ -134,12 +139,15 @@ static void MarkNodesNotSyncedInLoopBackConnection(MetadataSyncContext *context,
 static void EnsureParentSessionHasExclusiveLockOnPgDistNode(pid_t parentSessionPid);
 static void SetNodeMetadata(MetadataSyncContext *context, bool localOnly);
 static void EnsureTransactionalMetadataSyncMode(void);
-static void LockShardsInWorkerPlacementList(WorkerNode *workerNode, LOCKMODE
-											lockMode);
 static BackgroundWorkerHandle * CheckBackgroundWorkerToObtainLocks(int32 lock_cooldown);
 static BackgroundWorkerHandle * LockPlacementsWithBackgroundWorkersInPrimaryNode(
 	WorkerNode *workerNode, bool force, int32 lock_cooldown);

+
+static int32 CitusAddCloneNode(WorkerNode *primaryWorkerNode,
+							   char *cloneHostname, int32 clonePort);
+static void RemoveCloneNode(WorkerNode *cloneNode);
+
 /* Function definitions go here */

 /* declarations for dynamic loading */
@ -168,6 +176,10 @@ PG_FUNCTION_INFO_V1(citus_coordinator_nodeid);
 PG_FUNCTION_INFO_V1(citus_is_coordinator);
 PG_FUNCTION_INFO_V1(citus_internal_mark_node_not_synced);
 PG_FUNCTION_INFO_V1(citus_is_primary_node);
+PG_FUNCTION_INFO_V1(citus_add_clone_node);
+PG_FUNCTION_INFO_V1(citus_add_clone_node_with_nodeid);
+PG_FUNCTION_INFO_V1(citus_remove_clone_node);
+PG_FUNCTION_INFO_V1(citus_remove_clone_node_with_nodeid);

 /*
 * DefaultNodeMetadata creates a NodeMetadata struct with the fields set to
@ -183,6 +195,8 @@ DefaultNodeMetadata()
 	nodeMetadata.nodeRack = WORKER_DEFAULT_RACK;
 	nodeMetadata.shouldHaveShards = true;
 	nodeMetadata.groupId = INVALID_GROUP_ID;
+	nodeMetadata.nodeisclone = false;
+	nodeMetadata.nodeprimarynodeid = 0; /* 0 typically means InvalidNodeId */

 	return nodeMetadata;
 }
@ -1177,6 +1191,42 @@ ActivateNodeList(MetadataSyncContext *context)
 }


+/*
+ * ActivateCloneNodeAsPrimary sets the given worker node as primary and active
+ * in the pg_dist_node catalog and make the clone node as first class citizen.
+ */
+void
+ActivateCloneNodeAsPrimary(WorkerNode *workerNode)
+{
+	Relation pgDistNode = table_open(DistNodeRelationId(), AccessShareLock);
+	TupleDesc tupleDescriptor = RelationGetDescr(pgDistNode);
+	TupleDesc copiedTupleDescriptor = CreateTupleDescCopy(tupleDescriptor);
+	table_close(pgDistNode, AccessShareLock);
+
+	/*
+	 * Set the node as primary and active.
+	 */
+	SetWorkerColumnLocalOnly(workerNode, Anum_pg_dist_node_noderole,
+							 ObjectIdGetDatum(PrimaryNodeRoleId()));
+	SetWorkerColumnLocalOnly(workerNode, Anum_pg_dist_node_isactive,
+							 BoolGetDatum(true));
+	SetWorkerColumnLocalOnly(workerNode,
+							 GetNodeIsCloneAttrIndexInPgDistNode(copiedTupleDescriptor) +
+							 1,
+							 BoolGetDatum(false));
+	SetWorkerColumnLocalOnly(workerNode,
+							 GetNodePrimaryNodeIdAttrIndexInPgDistNode(
+								 copiedTupleDescriptor) + 1,
+							 Int32GetDatum(0));
+	SetWorkerColumnLocalOnly(workerNode, Anum_pg_dist_node_hasmetadata,
+							 BoolGetDatum(true));
+	SetWorkerColumnLocalOnly(workerNode, Anum_pg_dist_node_metadatasynced,
+							 BoolGetDatum(true));
+	SetWorkerColumnLocalOnly(workerNode, Anum_pg_dist_node_shouldhaveshards,
+							 BoolGetDatum(true));
+}
+
+
 /*
 * Acquires shard metadata locks on all shards residing in the given worker node
 *
@ -1200,7 +1250,8 @@ BackgroundWorkerHandle *
 CheckBackgroundWorkerToObtainLocks(int32 lock_cooldown)
 {
 	BackgroundWorkerHandle *handle = StartLockAcquireHelperBackgroundWorker(MyProcPid,
-																			lock_cooldown);
+																			lock_cooldown)
+	;
 	if (!handle)
 	{
 		/*
@ -1422,6 +1473,305 @@ master_update_node(PG_FUNCTION_ARGS)
 }


+/*
+ * citus_add_clone_node adds a new node as a clone of an existing primary node.
+ */
+Datum
+citus_add_clone_node(PG_FUNCTION_ARGS)
+{
+	CheckCitusVersion(ERROR);
+	EnsureSuperUser();
+	EnsureCoordinator();
+
+	text *cloneHostnameText = PG_GETARG_TEXT_P(0);
+	int32 clonePort = PG_GETARG_INT32(1);
+	text *primaryHostnameText = PG_GETARG_TEXT_P(2);
+	int32 primaryPort = PG_GETARG_INT32(3);
+
+	char *cloneHostname = text_to_cstring(cloneHostnameText);
+	char *primaryHostname = text_to_cstring(primaryHostnameText);
+
+	WorkerNode *primaryWorker = FindWorkerNodeAnyCluster(primaryHostname, primaryPort);
+
+	if (primaryWorker == NULL)
+	{
+		ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						errmsg("primary node %s:%d not found in pg_dist_node",
+							   primaryHostname, primaryPort)));
+	}
+
+	int32 cloneNodeId = CitusAddCloneNode(primaryWorker, cloneHostname, clonePort);
+
+	PG_RETURN_INT32(cloneNodeId);
+}
+
+
+/*
+ * citus_add_clone_node_with_nodeid adds a new node as a clone of an existing primary node
+ * using the primary node's ID. It records the clone's hostname, port, and links it to the
+ * primary node's ID.
+ *
+ * This function is useful when you already know the primary node's ID and want to add a clone
+ * without needing to look it up by hostname and port.
+ */
+Datum
+citus_add_clone_node_with_nodeid(PG_FUNCTION_ARGS)
+{
+	CheckCitusVersion(ERROR);
+	EnsureSuperUser();
+	EnsureCoordinator();
+
+	text *cloneHostnameText = PG_GETARG_TEXT_P(0);
+	int32 clonePort = PG_GETARG_INT32(1);
+	int32 primaryNodeId = PG_GETARG_INT32(2);
+
+	char *cloneHostname = text_to_cstring(cloneHostnameText);
+
+	bool missingOk = false;
+	WorkerNode *primaryWorkerNode = FindNodeWithNodeId(primaryNodeId, missingOk);
+
+	if (primaryWorkerNode == NULL)
+	{
+		ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						errmsg("primary node with ID %d does not exist", primaryNodeId)));
+	}
+
+	int32 cloneNodeId = CitusAddCloneNode(primaryWorkerNode, cloneHostname, clonePort);
+
+	PG_RETURN_INT32(cloneNodeId);
+}
+
+
+/*
+ * CitusAddCloneNode function adds a new node as a clone of an existing primary node.
+ * It records the clone's hostname, port, and links it to the primary node's ID.
+ * The clone is initially marked as inactive and not having shards.
+ */
+static int32
+CitusAddCloneNode(WorkerNode *primaryWorkerNode,
+				  char *cloneHostname, int32 clonePort)
+{
+	Assert(primaryWorkerNode != NULL);
+
+	/* Future-proofing: Ideally, a primary node should not itself be a clone.
+	 * This check might be more relevant once replica promotion logic exists.
+	 * For now, pg_dist_node.nodeisclone defaults to false for existing nodes.
+	 */
+	if (primaryWorkerNode->nodeisclone)
+	{
+		ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						errmsg(
+							"primary node %s:%d is itself a clone and cannot have clones",
+							primaryWorkerNode->workerName, primaryWorkerNode->
+							workerPort)));
+	}
+
+	if (!primaryWorkerNode->shouldHaveShards)
+	{
+		ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						errmsg(
+							"primary node %s:%d does not have shards, node without shards cannot have clones",
+							primaryWorkerNode->workerName, primaryWorkerNode->
+							workerPort)));
+	}
+
+	WorkerNode *existingCloneNode = FindWorkerNodeAnyCluster(cloneHostname, clonePort);
+	if (existingCloneNode != NULL)
+	{
+		/*
+		 * Idempotency check: If the node already exists, is it already correctly
+		 * registered as a clone for THIS primary?
+		 */
+		if (existingCloneNode->nodeisclone &&
+			existingCloneNode->nodeprimarynodeid == primaryWorkerNode->nodeId)
+		{
+			ereport(NOTICE, (errmsg(
+								 "node %s:%d is already registered as a clone for primary %s:%d (nodeid %d)",
+								 cloneHostname, clonePort,
+								 primaryWorkerNode->workerName, primaryWorkerNode->
+								 workerPort, primaryWorkerNode->nodeId)));
+			PG_RETURN_INT32(existingCloneNode->nodeId);
+		}
+		else
+		{
+			ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+							errmsg(
+								"a different node %s:%d (nodeid %d) already exists or is a clone for a different primary",
+								cloneHostname, clonePort, existingCloneNode->nodeId)));
+		}
+	}
+	EnsureValidStreamingReplica(primaryWorkerNode, cloneHostname, clonePort);
+
+	char *operation = "add";
+	EnsureValidCloneMode(primaryWorkerNode, cloneHostname, clonePort, operation);
+
+	NodeMetadata nodeMetadata = DefaultNodeMetadata();
+
+	nodeMetadata.nodeisclone = true;
+	nodeMetadata.nodeprimarynodeid = primaryWorkerNode->nodeId;
+	nodeMetadata.isActive = false; /* Replicas start as inactive */
+	nodeMetadata.shouldHaveShards = false; /* Replicas do not directly own primary shards */
+	nodeMetadata.groupId = INVALID_GROUP_ID; /* Replicas get a new group ID and do not belong to any existing group */
+	nodeMetadata.nodeRole = UnavailableNodeRoleId(); /* The node role is set to 'unavailable' */
+	nodeMetadata.nodeCluster = primaryWorkerNode->nodeCluster; /* Same cluster as primary */
+
+	/* Other fields like hasMetadata, metadataSynced will take defaults from DefaultNodeMetadata
+	 * (typically true, true for hasMetadata and metadataSynced if it's a new node,
+	 * or might need adjustment based on replica strategy)
+	 * For now, let's assume DefaultNodeMetadata provides suitable defaults for these
+	 * or they will be set by AddNodeMetadata/ActivateNodeList if needed.
+	 * Specifically, hasMetadata is often true, and metadataSynced true after activation.
+	 * Since this replica is inactive, metadata sync status might be less critical initially.
+	 */
+
+	bool nodeAlreadyExists = false;
+	bool localOnly = false; /* Propagate change to other workers with metadata */
+
+	/*
+	 * AddNodeMetadata will take an ExclusiveLock on pg_dist_node.
+	 * It also checks again if the node already exists after acquiring the lock.
+	 */
+	int cloneNodeId = AddNodeMetadata(cloneHostname, clonePort, &nodeMetadata,
+									  &nodeAlreadyExists, localOnly);
+
+	if (nodeAlreadyExists)
+	{
+		/* This case should ideally be caught by the FindWorkerNodeAnyCluster check above,
+		 * but AddNodeMetadata does its own check after locking.
+		 * If it already exists and is correctly configured, we might have returned NOTICE above.
+		 * If it exists but is NOT correctly configured as our replica, an ERROR would be more appropriate.
+		 * AddNodeMetadata returns the existing node's ID if it finds one.
+		 * We need to ensure it is the *correct* replica.
+		 */
+		WorkerNode *fetchedExistingNode = FindNodeAnyClusterByNodeId(cloneNodeId);
+		if (fetchedExistingNode != NULL && fetchedExistingNode->nodeisclone &&
+			fetchedExistingNode->nodeprimarynodeid == primaryWorkerNode->nodeId)
+		{
+			ereport(NOTICE, (errmsg(
+								 "node %s:%d was already correctly registered as a clone for primary %s:%d (nodeid %d)",
+								 cloneHostname, clonePort,
+								 primaryWorkerNode->workerName, primaryWorkerNode->
+								 workerPort, primaryWorkerNode->nodeId)));
+
+			/* Intentional fall-through to return cloneNodeId */
+		}
+		else
+		{
+			/* This state is less expected if our initial check passed or errored. */
+			ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR),
+							errmsg(
+								"node %s:%d already exists but is not correctly configured as a clone for primary %s:%d",
+								cloneHostname, clonePort, primaryWorkerNode->workerName,
+								primaryWorkerNode->workerPort)));
+		}
+	}
+
+	TransactionModifiedNodeMetadata = true;
+
+	/*
+	 * Note: Clones added this way are inactive.
+	 * A separate UDF citus_promote_clone_and_rebalance
+	 * would be needed to activate them.
+	 */
+
+	return cloneNodeId;
+}
+
+
+/*
+ * citus_remove_clone_node removes an inactive streaming clone node from Citus metadata.
+ */
+Datum
+citus_remove_clone_node(PG_FUNCTION_ARGS)
+{
+	CheckCitusVersion(ERROR);
+	EnsureSuperUser();
+	EnsureCoordinator();
+
+	text *nodeNameText = PG_GETARG_TEXT_P(0);
+	int32 nodePort = PG_GETARG_INT32(1);
+	char *nodeName = text_to_cstring(nodeNameText);
+
+	WorkerNode *workerNode = FindWorkerNodeAnyCluster(nodeName, nodePort);
+
+	if (workerNode == NULL)
+	{
+		ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						errmsg("node \"%s:%d\" does not exist", nodeName, nodePort)));
+	}
+
+	RemoveCloneNode(workerNode);
+
+	PG_RETURN_VOID();
+}
+
+
+/*
+ * citus_remove_clone_node_with_nodeid removes an inactive clone node from Citus metadata
+ * using the node's ID.
+ */
+Datum
+citus_remove_clone_node_with_nodeid(PG_FUNCTION_ARGS)
+{
+	CheckCitusVersion(ERROR);
+	EnsureSuperUser();
+	EnsureCoordinator();
+
+	uint32 replicaNodeId = PG_GETARG_INT32(0);
+
+	WorkerNode *replicaNode = FindNodeAnyClusterByNodeId(replicaNodeId);
+
+	if (replicaNode == NULL)
+	{
+		ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						errmsg("Clone node with ID %d does not exist", replicaNodeId)));
+	}
+	RemoveCloneNode(replicaNode);
+
+	PG_RETURN_VOID();
+}
+
+
+static void
+RemoveCloneNode(WorkerNode *cloneNode)
+{
+	Assert(cloneNode != NULL);
+
+	if (!cloneNode->nodeisclone)
+	{
+		ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						errmsg("Node %s:%d (ID %d) is not a clone node. "
+							   "Use citus_remove_node() to remove primary or already promoted nodes.",
+							   cloneNode->workerName, cloneNode->workerPort, cloneNode->
+							   nodeId)));
+	}
+
+	if (cloneNode->isActive)
+	{
+		ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						errmsg(
+							"Clone node %s:%d (ID %d) is marked as active and cannot be removed with this function. "
+							"This might indicate a promoted clone. Consider using citus_remove_node() if you are sure, "
+							"or ensure it's properly deactivated if it's an unpromoted clone in an unexpected state.",
+							cloneNode->workerName, cloneNode->workerPort, cloneNode->
+							nodeId)));
+	}
+
+	/*
+	 * All checks passed, proceed with removal.
+	 * RemoveNodeFromCluster handles locking, catalog changes, connection closing, and metadata sync.
+	 */
+	ereport(NOTICE, (errmsg("Removing inactive clone node %s:%d (ID %d)",
+							cloneNode->workerName, cloneNode->workerPort, cloneNode->
+							nodeId)));
+
+	RemoveNodeFromCluster(cloneNode->workerName, cloneNode->workerPort);
+
+	/* RemoveNodeFromCluster might set this, but setting it here ensures it's marked for this UDF's transaction. */
+	TransactionModifiedNodeMetadata = true;
+}
+
+
 /*
 * SetLockTimeoutLocally sets the lock_timeout to the given value.
 * This setting is local.
@ -1440,14 +1790,14 @@ UpdateNodeLocation(int32 nodeId, char *newNodeName, int32 newNodePort, bool loca
 {
 	const bool indexOK = true;

-	ScanKeyData scanKey[1];
-	Datum values[Natts_pg_dist_node];
-	bool isnull[Natts_pg_dist_node];
-	bool replace[Natts_pg_dist_node];
-
 	Relation pgDistNode = table_open(DistNodeRelationId(), RowExclusiveLock);
 	TupleDesc tupleDescriptor = RelationGetDescr(pgDistNode);

+	ScanKeyData scanKey[1];
+	Datum *values = palloc0(tupleDescriptor->natts * sizeof(Datum));
+	bool *isnull = palloc0(tupleDescriptor->natts * sizeof(bool));
+	bool *replace = palloc0(tupleDescriptor->natts * sizeof(bool));
+
 	ScanKeyInit(&scanKey[0], Anum_pg_dist_node_nodeid,
 				BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(nodeId));

@ -1462,8 +1812,6 @@ UpdateNodeLocation(int32 nodeId, char *newNodeName, int32 newNodePort, bool loca
 							   newNodeName, newNodePort)));
 	}

-	memset(replace, 0, sizeof(replace));
-
 	values[Anum_pg_dist_node_nodeport - 1] = Int32GetDatum(newNodePort);
 	isnull[Anum_pg_dist_node_nodeport - 1] = false;
 	replace[Anum_pg_dist_node_nodeport - 1] = true;
@ -1496,6 +1844,10 @@ UpdateNodeLocation(int32 nodeId, char *newNodeName, int32 newNodePort, bool loca

 	systable_endscan(scanDescriptor);
 	table_close(pgDistNode, NoLock);
+
+	pfree(values);
+	pfree(isnull);
+	pfree(replace);
 }


@ -1766,11 +2118,10 @@ citus_internal_mark_node_not_synced(PG_FUNCTION_ARGS)
 	Relation pgDistNode = table_open(DistNodeRelationId(), AccessShareLock);
 	TupleDesc tupleDescriptor = RelationGetDescr(pgDistNode);

-	Datum values[Natts_pg_dist_node];
-	bool isnull[Natts_pg_dist_node];
-	bool replace[Natts_pg_dist_node];
+	Datum *values = palloc0(tupleDescriptor->natts * sizeof(Datum));
+	bool *isnull = palloc0(tupleDescriptor->natts * sizeof(bool));
+	bool *replace = palloc0(tupleDescriptor->natts * sizeof(bool));

-	memset(replace, 0, sizeof(replace));
 	values[Anum_pg_dist_node_metadatasynced - 1] = DatumGetBool(false);
 	isnull[Anum_pg_dist_node_metadatasynced - 1] = false;
 	replace[Anum_pg_dist_node_metadatasynced - 1] = true;
@ -1784,6 +2135,10 @@ citus_internal_mark_node_not_synced(PG_FUNCTION_ARGS)

 	table_close(pgDistNode, NoLock);

+	pfree(values);
+	pfree(isnull);
+	pfree(replace);
+
 	PG_RETURN_VOID();
 }

@ -1859,7 +2214,7 @@ FindWorkerNodeAnyCluster(const char *nodeName, int32 nodePort)
 	HeapTuple heapTuple = GetNodeTuple(nodeName, nodePort);
 	if (heapTuple != NULL)
 	{
-		workerNode = TupleToWorkerNode(tupleDescriptor, heapTuple);
+		workerNode = TupleToWorkerNode(pgDistNode, tupleDescriptor, heapTuple);
 	}

 	table_close(pgDistNode, NoLock);
@ -1871,7 +2226,7 @@ FindWorkerNodeAnyCluster(const char *nodeName, int32 nodePort)
 * FindNodeAnyClusterByNodeId searches pg_dist_node and returns the node with
 * the nodeId. If the node can't be found returns NULL.
 */
-static WorkerNode *
+WorkerNode *
 FindNodeAnyClusterByNodeId(uint32 nodeId)
 {
 	bool includeNodesFromOtherClusters = true;
@ -1966,7 +2321,8 @@ ReadDistNode(bool includeNodesFromOtherClusters)
 	HeapTuple heapTuple = systable_getnext(scanDescriptor);
 	while (HeapTupleIsValid(heapTuple))
 	{
-		WorkerNode *workerNode = TupleToWorkerNode(tupleDescriptor, heapTuple);
+		WorkerNode *workerNode = TupleToWorkerNode(pgDistNode, tupleDescriptor, heapTuple)
+		;

 		if (includeNodesFromOtherClusters ||
 			strncmp(workerNode->nodeCluster, CurrentCluster, WORKER_LENGTH) == 0)
@ -2491,9 +2847,9 @@ SetWorkerColumnLocalOnly(WorkerNode *workerNode, int columnIndex, Datum value)
 	TupleDesc tupleDescriptor = RelationGetDescr(pgDistNode);
 	HeapTuple heapTuple = GetNodeTuple(workerNode->workerName, workerNode->workerPort);

-	Datum values[Natts_pg_dist_node];
-	bool isnull[Natts_pg_dist_node];
-	bool replace[Natts_pg_dist_node];
+	Datum *values = palloc0(tupleDescriptor->natts * sizeof(Datum));
+	bool *isnull = palloc0(tupleDescriptor->natts * sizeof(bool));
+	bool *replace = palloc0(tupleDescriptor->natts * sizeof(bool));

 	if (heapTuple == NULL)
 	{
@ -2501,7 +2857,6 @@ SetWorkerColumnLocalOnly(WorkerNode *workerNode, int columnIndex, Datum value)
 							   workerNode->workerName, workerNode->workerPort)));
 	}

-	memset(replace, 0, sizeof(replace));
 	values[columnIndex - 1] = value;
 	isnull[columnIndex - 1] = false;
 	replace[columnIndex - 1] = true;
@ -2513,10 +2868,14 @@ SetWorkerColumnLocalOnly(WorkerNode *workerNode, int columnIndex, Datum value)
 	CitusInvalidateRelcacheByRelid(DistNodeRelationId());
 	CommandCounterIncrement();

-	WorkerNode *newWorkerNode = TupleToWorkerNode(tupleDescriptor, heapTuple);
+	WorkerNode *newWorkerNode = TupleToWorkerNode(pgDistNode, tupleDescriptor, heapTuple);

 	table_close(pgDistNode, NoLock);

+	pfree(values);
+	pfree(isnull);
+	pfree(replace);
+
 	return newWorkerNode;
 }

@ -2901,16 +3260,15 @@ InsertPlaceholderCoordinatorRecord(void)
 static void
 InsertNodeRow(int nodeid, char *nodeName, int32 nodePort, NodeMetadata *nodeMetadata)
 {
-	Datum values[Natts_pg_dist_node];
-	bool isNulls[Natts_pg_dist_node];
+	Relation pgDistNode = table_open(DistNodeRelationId(), RowExclusiveLock);
+	TupleDesc tupleDescriptor = RelationGetDescr(pgDistNode);
+
+	Datum *values = palloc0(tupleDescriptor->natts * sizeof(Datum));
+	bool *isNulls = palloc0(tupleDescriptor->natts * sizeof(bool));

 	Datum nodeClusterStringDatum = CStringGetDatum(nodeMetadata->nodeCluster);
 	Datum nodeClusterNameDatum = DirectFunctionCall1(namein, nodeClusterStringDatum);

-	/* form new shard tuple */
-	memset(values, 0, sizeof(values));
-	memset(isNulls, false, sizeof(isNulls));
-
 	values[Anum_pg_dist_node_nodeid - 1] = UInt32GetDatum(nodeid);
 	values[Anum_pg_dist_node_groupid - 1] = Int32GetDatum(nodeMetadata->groupId);
 	values[Anum_pg_dist_node_nodename - 1] = CStringGetTextDatum(nodeName);
@ -2924,13 +3282,15 @@ InsertNodeRow(int nodeid, char *nodeName, int32 nodePort, NodeMetadata *nodeMeta
 	values[Anum_pg_dist_node_nodecluster - 1] = nodeClusterNameDatum;
 	values[Anum_pg_dist_node_shouldhaveshards - 1] = BoolGetDatum(
 		nodeMetadata->shouldHaveShards);
-
-	Relation pgDistNode = table_open(DistNodeRelationId(), RowExclusiveLock);
-
-	TupleDesc tupleDescriptor = RelationGetDescr(pgDistNode);
+	values[GetNodeIsCloneAttrIndexInPgDistNode(tupleDescriptor)] =
+		BoolGetDatum(nodeMetadata->nodeisclone);
+	values[GetNodePrimaryNodeIdAttrIndexInPgDistNode(tupleDescriptor)] =
+		Int32GetDatum(nodeMetadata->nodeprimarynodeid);
 	HeapTuple heapTuple = heap_form_tuple(tupleDescriptor, values, isNulls);

+	PushActiveSnapshot(GetTransactionSnapshot());
 	CatalogTupleInsert(pgDistNode, heapTuple);
+	PopActiveSnapshot();

 	CitusInvalidateRelcacheByRelid(DistNodeRelationId());

@ -2939,6 +3299,9 @@ InsertNodeRow(int nodeid, char *nodeName, int32 nodePort, NodeMetadata *nodeMeta

 	/* close relation */
 	table_close(pgDistNode, NoLock);
+
+	pfree(values);
+	pfree(isNulls);
 }


@ -2965,8 +3328,18 @@ DeleteNodeRow(char *nodeName, int32 nodePort)
 	 * https://github.com/citusdata/citus/pull/2855#discussion_r313628554
 	 * https://github.com/citusdata/citus/issues/1890
 	 */
-	Relation replicaIndex = index_open(RelationGetPrimaryKeyIndex(pgDistNode),
-									   AccessShareLock);
+#if PG_VERSION_NUM >= PG_VERSION_18
+
+	/* PG 18+ adds a bool “deferrable_ok” parameter */
+	Relation replicaIndex =
+		index_open(RelationGetPrimaryKeyIndex(pgDistNode, false),
+				   AccessShareLock);
+#else
+	Relation replicaIndex =
+		index_open(RelationGetPrimaryKeyIndex(pgDistNode),
+				   AccessShareLock);
+#endif
+

 	ScanKeyInit(&scanKey[0], Anum_pg_dist_node_nodename,
 				BTEqualStrategyNumber, F_TEXTEQ, CStringGetTextDatum(nodeName));
@ -3005,19 +3378,18 @@ DeleteNodeRow(char *nodeName, int32 nodePort)
 * the caller already has locks on the tuple, and doesn't perform any locking.
 */
 static WorkerNode *
-TupleToWorkerNode(TupleDesc tupleDescriptor, HeapTuple heapTuple)
+TupleToWorkerNode(Relation pgDistNode, TupleDesc tupleDescriptor, HeapTuple heapTuple)
 {
-	Datum datumArray[Natts_pg_dist_node];
-	bool isNullArray[Natts_pg_dist_node];
-
-	Assert(!HeapTupleHasNulls(heapTuple));
-
-	/*
-	 * This function can be called before "ALTER TABLE ... ADD COLUMN nodecluster ...",
-	 * therefore heap_deform_tuple() won't set the isNullArray for this column. We
-	 * initialize it true to be safe in that case.
+	/* we add remove columns from pg_dist_node during extension upgrade and
+	 * and downgrads. Now the issue here is PostgreSQL never reuses the old
+	 * attnum. Dropped columns leave “holes” (attributes with attisdropped = true),
+	 * and a re-added column with the same name gets a new attnum at the end. So
+	 * we cannot use the deined Natts_pg_dist_node to allocate memory and also
+	 * we need to cater for the holes when fetching the column values
 	 */
-	memset(isNullArray, true, sizeof(isNullArray));
+	int nAtts = tupleDescriptor->natts;
+	Datum *datumArray = palloc0(sizeof(Datum) * nAtts);
+	bool *isNullArray = palloc0(sizeof(bool) * nAtts);

 	/*
 	 * We use heap_deform_tuple() instead of heap_getattr() to expand tuple
@ -3044,10 +3416,11 @@ TupleToWorkerNode(TupleDesc tupleDescriptor, HeapTuple heapTuple)
 				   1]);

 	/*
-	 * nodecluster column can be missing. In the case of extension creation/upgrade,
-	 * master_initialize_node_metadata function is called before the nodecluster
-	 * column is added to pg_dist_node table.
+	 * nodecluster, nodeisclone and nodeprimarynodeid columns can be missing. In case
+	 * of extension creation/upgrade, master_initialize_node_metadata function is
+	 * called before the nodecluster column is added to pg_dist_node table.
 	 */
+
 	if (!isNullArray[Anum_pg_dist_node_nodecluster - 1])
 	{
 		Name nodeClusterName =
@ -3056,10 +3429,68 @@ TupleToWorkerNode(TupleDesc tupleDescriptor, HeapTuple heapTuple)
 		strlcpy(workerNode->nodeCluster, nodeClusterString, NAMEDATALEN);
 	}

+	int nodeIsCloneIdx = GetNodeIsCloneAttrIndexInPgDistNode(tupleDescriptor);
+	int nodePrimaryNodeIdIdx = GetNodePrimaryNodeIdAttrIndexInPgDistNode(tupleDescriptor);
+
+	if (!isNullArray[nodeIsCloneIdx])
+	{
+		workerNode->nodeisclone = DatumGetBool(datumArray[nodeIsCloneIdx]);
+	}
+
+	if (!isNullArray[nodePrimaryNodeIdIdx])
+	{
+		workerNode->nodeprimarynodeid = DatumGetInt32(datumArray[nodePrimaryNodeIdIdx]);
+	}
+
+	pfree(datumArray);
+	pfree(isNullArray);
+
 	return workerNode;
 }


+/*
+ * GetNodePrimaryNodeIdAttrIndexInPgDistNode returns attrnum for nodeprimarynodeid attr.
+ *
+ * nodeprimarynodeid attr was added to table pg_dist_node using alter operation
+ * after the version where Citus started supporting downgrades, and it's one of
+ * the two columns that we've introduced to pg_dist_node since then.
+ *
+ * And in case of a downgrade + upgrade, tupleDesc->natts becomes greater than
+ * Natts_pg_dist_node and when this happens, then we know that attrnum
+ * nodeprimarynodeid is not Anum_pg_dist_node_nodeprimarynodeid anymore but
+ * tupleDesc->natts - 1.
+ */
+static int
+GetNodePrimaryNodeIdAttrIndexInPgDistNode(TupleDesc tupleDesc)
+{
+	return tupleDesc->natts == Natts_pg_dist_node
+		   ? (Anum_pg_dist_node_nodeprimarynodeid - 1)
+		   : tupleDesc->natts - 1;
+}
+
+
+/*
+ * GetNodeIsCloneAttrIndexInPgDistNode returns attrnum for nodeisclone attr.
+ *
+ * Like, GetNodePrimaryNodeIdAttrIndexInPgDistNode(), performs a similar
+ * calculation for nodeisclone attribute because this is column added to
+ * pg_dist_node after we started supporting downgrades.
+ *
+ * Only difference with the mentioned function is that we know
+ * the attrnum for nodeisclone is not Anum_pg_dist_node_nodeisclone anymore
+ * but tupleDesc->natts - 2 because we added these columns consecutively
+ * and we first add nodeisclone attribute and then nodeprimarynodeid attribute.
+ */
+static int
+GetNodeIsCloneAttrIndexInPgDistNode(TupleDesc tupleDesc)
+{
+	return tupleDesc->natts == Natts_pg_dist_node
+		   ? (Anum_pg_dist_node_nodeisclone - 1)
+		   : tupleDesc->natts - 2;
+}
+
+
 /*
 * StringToDatum transforms a string representation into a Datum.
 */
@ -3136,15 +3567,15 @@ UnsetMetadataSyncedForAllWorkers(void)
 		updatedAtLeastOne = true;
 	}

+	Datum *values = palloc(tupleDescriptor->natts * sizeof(Datum));
+	bool *isnull = palloc(tupleDescriptor->natts * sizeof(bool));
+	bool *replace = palloc(tupleDescriptor->natts * sizeof(bool));
+
 	while (HeapTupleIsValid(heapTuple))
 	{
-		Datum values[Natts_pg_dist_node];
-		bool isnull[Natts_pg_dist_node];
-		bool replace[Natts_pg_dist_node];
-
-		memset(replace, false, sizeof(replace));
-		memset(isnull, false, sizeof(isnull));
-		memset(values, 0, sizeof(values));
+		memset(values, 0, tupleDescriptor->natts * sizeof(Datum));
+		memset(isnull, 0, tupleDescriptor->natts * sizeof(bool));
+		memset(replace, 0, tupleDescriptor->natts * sizeof(bool));

 		values[Anum_pg_dist_node_metadatasynced - 1] = BoolGetDatum(false);
 		replace[Anum_pg_dist_node_metadatasynced - 1] = true;
@ -3167,6 +3598,10 @@ UnsetMetadataSyncedForAllWorkers(void)
 	CatalogCloseIndexes(indstate);
 	table_close(relation, NoLock);

+	pfree(values);
+	pfree(isnull);
+	pfree(replace);
+
 	return updatedAtLeastOne;
 }

--- a/src/backend/distributed/operations/delete_protocol.c
+++ b/src/backend/distributed/operations/delete_protocol.c
@ -76,7 +76,8 @@ static List * DropTaskList(Oid relationId, char *schemaName, char *relationName,
 						   List *deletableShardIntervalList);
 static void ExecuteDropShardPlacementCommandRemotely(ShardPlacement *shardPlacement,
 													 const char *shardRelationName,
-													 const char *dropShardPlacementCommand);
+													 const char *
+													 dropShardPlacementCommand);
 static char * CreateDropShardPlacementCommand(const char *schemaName,
 											  const char *shardRelationName,
 											  char storageType);
--- a/src/backend/distributed/operations/node_promotion.c
+++ b/src/backend/distributed/operations/node_promotion.c
@ -0,0 +1,424 @@
+#include "postgres.h"
+
+#include "utils/fmgrprotos.h"
+#include "utils/pg_lsn.h"
+
+#include "distributed/argutils.h"
+#include "distributed/clonenode_utils.h"
+#include "distributed/listutils.h"
+#include "distributed/metadata_cache.h"
+#include "distributed/metadata_sync.h"
+#include "distributed/remote_commands.h"
+#include "distributed/shard_rebalancer.h"
+
+
+static void BlockAllWritesToWorkerNode(WorkerNode *workerNode);
+static bool GetNodeIsInRecoveryStatus(WorkerNode *workerNode);
+static void PromoteCloneNode(WorkerNode *cloneWorkerNode);
+static void EnsureSingleNodePromotion(WorkerNode *primaryNode);
+
+PG_FUNCTION_INFO_V1(citus_promote_clone_and_rebalance);
+
+/*
+ * citus_promote_clone_and_rebalance promotes an inactive clone node to become
+ * the new primary node, replacing its original primary node.
+ *
+ * This function performs the following steps:
+ * 1. Validates that the clone node exists and is properly configured
+ * 2. Ensures the clone is inactive and has a valid primary node reference
+ * 3. Blocks all writes to the primary node to prevent data divergence
+ * 4. Waits for the clone to catch up with the primary's WAL position
+ * 5. Promotes the clone node to become a standalone primary
+ * 6. Updates metadata to mark the clone as active and primary
+ * 7. Rebalances shards between the old primary and new primary
+ * 8. Returns information about the promotion and any shard movements
+ *
+ * Arguments:
+ * - clone_nodeid: The node ID of the clone to promote
+ * - catchUpTimeoutSeconds: Maximum time to wait for clone to catch up (default: 300)
+ *
+ * The function ensures data consistency by blocking writes during the promotion
+ * process and verifying replication lag before proceeding.
+ */
+Datum
+citus_promote_clone_and_rebalance(PG_FUNCTION_ARGS)
+{
+	CheckCitusVersion(ERROR);
+
+	/* Ensure superuser and coordinator */
+	EnsureSuperUser();
+	EnsureCoordinator();
+
+	/* Get clone_nodeid argument */
+	int32 cloneNodeIdArg = PG_GETARG_INT32(0);
+
+	/* Get catchUpTimeoutSeconds argument with default value of 300 */
+	int32 catchUpTimeoutSeconds = PG_ARGISNULL(2) ? 300 : PG_GETARG_INT32(2);
+
+	/* Lock pg_dist_node to prevent concurrent modifications during this operation */
+	LockRelationOid(DistNodeRelationId(), RowExclusiveLock);
+
+	WorkerNode *cloneNode = FindNodeAnyClusterByNodeId(cloneNodeIdArg);
+	if (cloneNode == NULL)
+	{
+		ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						errmsg("Clone node with ID %d not found.", cloneNodeIdArg)));
+	}
+
+	if (!cloneNode->nodeisclone || cloneNode->nodeprimarynodeid == 0)
+	{
+		ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						errmsg(
+							"Node %s:%d (ID %d) is not a valid clone or its primary node ID is not set.",
+							cloneNode->workerName, cloneNode->workerPort, cloneNode->
+							nodeId)));
+	}
+
+	if (cloneNode->isActive)
+	{
+		ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						errmsg(
+							"Clone node %s:%d (ID %d) is already active and cannot be promoted.",
+							cloneNode->workerName, cloneNode->workerPort, cloneNode->
+							nodeId)));
+	}
+
+	WorkerNode *primaryNode = FindNodeAnyClusterByNodeId(cloneNode->nodeprimarynodeid);
+	if (primaryNode == NULL)
+	{
+		ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						errmsg("Primary node with ID %d (for clone %s:%d) not found.",
+							   cloneNode->nodeprimarynodeid, cloneNode->workerName,
+							   cloneNode->workerPort)));
+	}
+
+	if (primaryNode->nodeisclone)
+	{
+		ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						errmsg("Primary node %s:%d (ID %d) is itself a clone.",
+							   primaryNode->workerName, primaryNode->workerPort,
+							   primaryNode->nodeId)));
+	}
+
+	if (!primaryNode->isActive)
+	{
+		ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						errmsg("Primary node %s:%d (ID %d) is not active.",
+							   primaryNode->workerName, primaryNode->workerPort,
+							   primaryNode->nodeId)));
+	}
+
+	/* Ensure the primary node is related to the clone node */
+	if (primaryNode->nodeId != cloneNode->nodeprimarynodeid)
+	{
+		ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						errmsg(
+							"Clone node %s:%d (ID %d) is not a clone of the primary node %s:%d (ID %d).",
+							cloneNode->workerName, cloneNode->workerPort, cloneNode->
+							nodeId,
+							primaryNode->workerName, primaryNode->workerPort,
+							primaryNode->nodeId)));
+	}
+
+	EnsureSingleNodePromotion(primaryNode);
+	ereport(NOTICE, (errmsg(
+						 "Starting promotion process for clone node %s:%d (ID %d), original primary %s:%d (ID %d)",
+						 cloneNode->workerName, cloneNode->workerPort, cloneNode->
+						 nodeId,
+						 primaryNode->workerName, primaryNode->workerPort, primaryNode
+						 ->nodeId)));
+
+	/* Step 0: Check if clone is replica of provided primary node and is not synchronous */
+	char *operation = "promote";
+	EnsureValidCloneMode(primaryNode, cloneNode->workerName, cloneNode->workerPort,
+						 operation);
+
+	/* Step 1: Block Writes on Original Primary's Shards */
+	ereport(NOTICE, (errmsg(
+						 "Blocking writes on shards of original primary node %s:%d (group %d)",
+						 primaryNode->workerName, primaryNode->workerPort, primaryNode
+						 ->groupId)));
+
+	BlockAllWritesToWorkerNode(primaryNode);
+
+	/* Step 2: Wait for Clone to Catch Up */
+	ereport(NOTICE, (errmsg(
+						 "Waiting for clone %s:%d to catch up with primary %s:%d (timeout: %d seconds)",
+						 cloneNode->workerName, cloneNode->workerPort,
+						 primaryNode->workerName, primaryNode->workerPort,
+						 catchUpTimeoutSeconds)));
+
+	bool caughtUp = false;
+	const int sleepIntervalSeconds = 5;
+	int elapsedTimeSeconds = 0;
+
+	while (elapsedTimeSeconds < catchUpTimeoutSeconds)
+	{
+		uint64 repLag = GetReplicationLag(primaryNode, cloneNode);
+		if (repLag <= 0)
+		{
+			caughtUp = true;
+			break;
+		}
+		pg_usleep(sleepIntervalSeconds * 1000000L);
+		elapsedTimeSeconds += sleepIntervalSeconds;
+	}
+
+	if (!caughtUp)
+	{
+		ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						errmsg(
+							"Clone %s:%d failed to catch up with primary %s:%d within %d seconds.",
+							cloneNode->workerName, cloneNode->workerPort,
+							primaryNode->workerName, primaryNode->workerPort,
+							catchUpTimeoutSeconds)));
+	}
+
+	ereport(NOTICE, (errmsg("Clone %s:%d is now caught up with primary %s:%d.",
+							cloneNode->workerName, cloneNode->workerPort,
+							primaryNode->workerName, primaryNode->workerPort)));
+
+
+	/* Step 3: PostgreSQL Clone Promotion */
+	ereport(NOTICE, (errmsg("Attempting to promote clone %s:%d via pg_promote().",
+							cloneNode->workerName, cloneNode->workerPort)));
+
+	PromoteCloneNode(cloneNode);
+
+	/* Step 4: Update Clone Metadata in pg_dist_node on Coordinator */
+
+	ereport(NOTICE, (errmsg("Updating metadata for promoted clone %s:%d (ID %d)",
+							cloneNode->workerName, cloneNode->workerPort, cloneNode->
+							nodeId)));
+	ActivateCloneNodeAsPrimary(cloneNode);
+
+	/* We need to sync metadata changes to all nodes before rebalancing shards
+	 * since the rebalancing algorithm depends on the latest metadata.
+	 */
+	SyncNodeMetadataToNodes();
+
+	/* Step 5: Split Shards Between Primary and Clone */
+	SplitShardsBetweenPrimaryAndClone(primaryNode, cloneNode, PG_GETARG_NAME_OR_NULL(1))
+	;
+
+
+	TransactionModifiedNodeMetadata = true; /* Inform Citus about metadata change */
+	TriggerNodeMetadataSyncOnCommit();      /* Ensure changes are propagated */
+
+
+	ereport(NOTICE, (errmsg(
+						 "Clone node %s:%d (ID %d) metadata updated. It is now a primary",
+						 cloneNode->workerName, cloneNode->workerPort, cloneNode->
+						 nodeId)));
+
+
+	/* Step 6: Unblock Writes (should be handled by transaction commit) */
+	ereport(NOTICE, (errmsg(
+						 "Clone node %s:%d (ID %d) successfully registered as a worker node",
+						 cloneNode->workerName, cloneNode->workerPort, cloneNode->
+						 nodeId)));
+
+	PG_RETURN_VOID();
+}
+
+
+/*
+ * PromoteCloneNode promotes a clone node to a primary node using PostgreSQL's
+ * pg_promote() function.
+ *
+ * This function performs the following steps:
+ * 1. Connects to the clone node
+ * 2. Executes pg_promote(wait := true) to promote the clone to primary
+ * 3. Reconnects to verify the promotion was successful
+ * 4. Checks if the node is still in recovery mode (which would indicate failure)
+ *
+ * The function throws an ERROR if:
+ * - Connection to the clone node fails
+ * - The pg_promote() command fails
+ * - The clone is still in recovery mode after promotion attempt
+ *
+ * On success, it logs a NOTICE message confirming the promotion.
+ *
+ * Note: This function assumes the clone has already been validated for promotion
+ * (e.g., replication lag is acceptable, clone is not synchronous, etc.)
+ */
+static void
+PromoteCloneNode(WorkerNode *cloneWorkerNode)
+{
+	/* Step 1: Connect to the clone node */
+	int connectionFlag = 0;
+	MultiConnection *cloneConnection = GetNodeConnection(connectionFlag,
+														 cloneWorkerNode->workerName,
+														 cloneWorkerNode->workerPort);
+
+	if (PQstatus(cloneConnection->pgConn) != CONNECTION_OK)
+	{
+		ReportConnectionError(cloneConnection, ERROR);
+	}
+
+	/* Step 2: Execute pg_promote() to promote the clone to primary */
+	const char *promoteQuery = "SELECT pg_promote(wait := true);";
+	int resultCode = SendRemoteCommand(cloneConnection, promoteQuery);
+	if (resultCode == 0)
+	{
+		ReportConnectionError(cloneConnection, ERROR);
+	}
+	ForgetResults(cloneConnection);
+	CloseConnection(cloneConnection);
+
+	/* Step 3: Reconnect and verify the promotion was successful */
+	if (GetNodeIsInRecoveryStatus(cloneWorkerNode))
+	{
+		ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						errmsg(
+							"Failed to promote clone %s:%d (ID %d). It is still in recovery.",
+							cloneWorkerNode->workerName, cloneWorkerNode->workerPort,
+							cloneWorkerNode->nodeId)));
+	}
+	else
+	{
+		ereport(NOTICE, (errmsg(
+							 "Clone node %s:%d (ID %d) has been successfully promoted.",
+							 cloneWorkerNode->workerName, cloneWorkerNode->workerPort,
+							 cloneWorkerNode->nodeId)));
+	}
+}
+
+
+static void
+BlockAllWritesToWorkerNode(WorkerNode *workerNode)
+{
+	ereport(NOTICE, (errmsg("Blocking all writes to worker node %s:%d (ID %d)",
+							workerNode->workerName, workerNode->workerPort, workerNode->
+							nodeId)));
+
+	LockShardsInWorkerPlacementList(workerNode, AccessExclusiveLock);
+}
+
+
+/*
+ * GetNodeIsInRecoveryStatus checks if a PostgreSQL node is currently in recovery mode.
+ *
+ * This function connects to the specified worker node and executes pg_is_in_recovery()
+ * to determine if the node is still acting as a replica (in recovery) or has been
+ * promoted to a primary (not in recovery).
+ *
+ * Arguments:
+ * - workerNode: The WorkerNode to check recovery status for
+ *
+ * Returns:
+ * - true if the node is in recovery mode (acting as a replica)
+ * - false if the node is not in recovery mode (acting as a primary)
+ *
+ * The function will ERROR if:
+ * - Cannot establish connection to the node
+ * - The remote query fails
+ * - The query result cannot be parsed
+ *
+ * This is used after promoting a clone node to verify that the
+ * promotion was successful and the node is no longer in recovery mode.
+ */
+static bool
+GetNodeIsInRecoveryStatus(WorkerNode *workerNode)
+{
+	int connectionFlag = 0;
+	MultiConnection *nodeConnection = GetNodeConnection(connectionFlag,
+														workerNode->workerName,
+														workerNode->workerPort);
+
+	if (PQstatus(nodeConnection->pgConn) != CONNECTION_OK)
+	{
+		ReportConnectionError(nodeConnection, ERROR);
+	}
+
+	const char *recoveryQuery = "SELECT pg_is_in_recovery();";
+	int resultCode = SendRemoteCommand(nodeConnection, recoveryQuery);
+	if (resultCode == 0)
+	{
+		ReportConnectionError(nodeConnection, ERROR);
+	}
+
+	PGresult *result = GetRemoteCommandResult(nodeConnection, true);
+	if (!IsResponseOK(result))
+	{
+		ReportResultError(nodeConnection, result, ERROR);
+	}
+
+	List *recoveryStatusList = ReadFirstColumnAsText(result);
+	if (list_length(recoveryStatusList) != 1)
+	{
+		PQclear(result);
+		ClearResults(nodeConnection, true);
+		CloseConnection(nodeConnection);
+
+		ereport(ERROR, (errcode(ERRCODE_CONNECTION_FAILURE),
+						errmsg("cannot parse recovery status result from %s:%d",
+							   workerNode->workerName,
+							   workerNode->workerPort)));
+	}
+
+	StringInfo recoveryStatusInfo = (StringInfo) linitial(recoveryStatusList);
+	bool isInRecovery = (strcmp(recoveryStatusInfo->data, "t") == 0) || (strcmp(
+																			 recoveryStatusInfo
+																			 ->data,
+																			 "true") == 0)
+	;
+
+	PQclear(result);
+	ForgetResults(nodeConnection);
+	CloseConnection(nodeConnection);
+
+	return isInRecovery;
+}
+
+
+/*
+ * EnsureSingleNodePromotion ensures that only one node promotion operation
+ * can proceed at a time by acquiring necessary locks and checking for
+ * conflicting operations.
+ *
+ * This function performs the following safety checks:
+ * 1. Verifies no rebalance operations are currently running, as they would
+ *    conflict with the shard redistribution that occurs during promotion
+ * 2. Acquires exclusive placement colocation locks on all shards residing
+ *    on the primary node's group to prevent concurrent shard operations
+ *
+ * The locks are acquired in shard ID order to prevent deadlocks when
+ * multiple operations attempt to lock the same set of shards.
+ *
+ * Arguments:
+ * - primaryNode: The primary node whose shards need to be locked
+ *
+ * Throws ERROR if:
+ * - A rebalance operation is already running
+ * - Unable to acquire necessary locks
+ */
+static void
+EnsureSingleNodePromotion(WorkerNode *primaryNode)
+{
+	/* Error out if some rebalancer is running */
+	int64 jobId = 0;
+	if (HasNonTerminalJobOfType("rebalance", &jobId))
+	{
+		ereport(ERROR, (
+					errmsg("A rebalance operation is already running as job %ld", jobId),
+					errdetail("A rebalance was already scheduled as background job"),
+					errhint("To monitor progress, run: SELECT * FROM "
+							"citus_rebalance_status();")));
+	}
+	List *placementList = AllShardPlacementsOnNodeGroup(primaryNode->groupId);
+
+	/* lock shards in order of shard id to prevent deadlock */
+	placementList = SortList(placementList, CompareShardPlacementsByShardId);
+
+	GroupShardPlacement *placement = NULL;
+	foreach_declared_ptr(placement, placementList)
+	{
+		int64 shardId = placement->shardId;
+		ShardInterval *shardInterval = LoadShardInterval(shardId);
+		Oid distributedTableId = shardInterval->relationId;
+
+		AcquirePlacementColocationLock(distributedTableId, ExclusiveLock, "promote clone")
+		;
+	}
+}
--- a/src/backend/distributed/operations/node_protocol.c
+++ b/src/backend/distributed/operations/node_protocol.c
@ -78,7 +78,8 @@ static void GatherIndexAndConstraintDefinitionListExcludingReplicaIdentity(Form_
 																		   indexForm,
 																		   List **
 																		   indexDDLEventList,
-																		   int indexFlags);
+																		   int
+																		   indexFlags);
 static Datum WorkerNodeGetDatum(WorkerNode *workerNode, TupleDesc tupleDescriptor);

 static char * CitusCreateAlterColumnarTableSet(char *qualifiedRelationName,
@ -746,7 +747,12 @@ GetRelationIdentityOrPK(Relation rel)

 	if (!OidIsValid(idxoid))
 	{
+/* Determine the index OID of the primary key (PG18 adds a second parameter) */
+#if PG_VERSION_NUM >= PG_VERSION_18
+		idxoid = RelationGetPrimaryKeyIndex(rel, false /* deferred_ok */);
+#else
 		idxoid = RelationGetPrimaryKeyIndex(rel);
+#endif
 	}

 	return idxoid;
--- a/src/backend/distributed/operations/shard_cleaner.c
+++ b/src/backend/distributed/operations/shard_cleaner.c
@ -939,8 +939,8 @@ TryDropDatabaseOutsideTransaction(char *databaseName, char *nodeName, int nodePo
 		 * because we don't want to open a transaction block on remote nodes as DROP
 		 * DATABASE commands cannot be run inside a transaction block.
 		 */
-		if (ExecuteOptionalRemoteCommand(connection, commandString, NULL) !=
-			RESPONSE_OKAY)
+		if (ExecuteOptionalRemoteCommand(
+				connection, commandString, NULL) != RESPONSE_OKAY)
 		{
 			executeCommand = false;
 			break;
--- a/src/backend/distributed/operations/shard_rebalancer.c
+++ b/src/backend/distributed/operations/shard_rebalancer.c
@ -81,8 +81,29 @@ typedef struct RebalanceOptions
 	Form_pg_dist_rebalance_strategy rebalanceStrategy;
 	const char *operationName;
 	WorkerNode *workerNode;
+	List *involvedWorkerNodeList;
 } RebalanceOptions;

+typedef struct SplitPrimaryCloneShards
+{
+	/*
+	 * primaryShardPlacementList contains the placements that
+	 * should stay on primary worker node.
+	 */
+	List *primaryShardIdList;
+
+	/*
+	 * cloneShardPlacementList contains the placements that should stay on
+	 * clone worker node.
+	 */
+	List *cloneShardIdList;
+} SplitPrimaryCloneShards;
+
+
+static SplitPrimaryCloneShards * GetPrimaryCloneSplitRebalanceSteps(RebalanceOptions
+																	*options,
+																	WorkerNode
+																	*cloneNode);

 /*
 * RebalanceState is used to keep the internal state of the rebalance
@ -222,6 +243,7 @@ typedef struct ShardMoveDependencies
 {
 	HTAB *colocationDependencies;
 	HTAB *nodeDependencies;
+	bool parallelTransferColocatedShards;
 } ShardMoveDependencies;

 char *VariablesToBePassedToNewConnections = NULL;
@ -270,7 +292,9 @@ static ShardCost GetShardCost(uint64 shardId, void *context);
 static List * NonColocatedDistRelationIdList(void);
 static void RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid);
 static int64 RebalanceTableShardsBackground(RebalanceOptions *options, Oid
-											shardReplicationModeOid);
+											shardReplicationModeOid,
+											bool ParallelTransferReferenceTables,
+											bool ParallelTransferColocatedShards);
 static void AcquireRebalanceColocationLock(Oid relationId, const char *operationName);
 static void ExecutePlacementUpdates(List *placementUpdateList, Oid
 									shardReplicationModeOid, char *noticeOperation);
@ -296,9 +320,12 @@ static HTAB * BuildShardSizesHash(ProgressMonitorData *monitor, HTAB *shardStati
 static void ErrorOnConcurrentRebalance(RebalanceOptions *);
 static List * GetSetCommandListForNewConnections(void);
 static int64 GetColocationId(PlacementUpdateEvent *move);
-static ShardMoveDependencies InitializeShardMoveDependencies();
-static int64 * GenerateTaskMoveDependencyList(PlacementUpdateEvent *move, int64
-											  colocationId,
+static ShardMoveDependencies InitializeShardMoveDependencies(bool
+															 ParallelTransferColocatedShards);
+static int64 * GenerateTaskMoveDependencyList(PlacementUpdateEvent *move,
+											  int64 colocationId,
+											  int64 *refTablesDepTaskIds,
+											  int refTablesDepTaskIdsCount,
 											  ShardMoveDependencies shardMoveDependencies,
 											  int *nDepends);
 static void UpdateShardMoveDependencies(PlacementUpdateEvent *move, uint64 colocationId,
@ -318,6 +345,7 @@ PG_FUNCTION_INFO_V1(pg_dist_rebalance_strategy_enterprise_check);
 PG_FUNCTION_INFO_V1(citus_rebalance_start);
 PG_FUNCTION_INFO_V1(citus_rebalance_stop);
 PG_FUNCTION_INFO_V1(citus_rebalance_wait);
+PG_FUNCTION_INFO_V1(get_snapshot_based_node_split_plan);

 bool RunningUnderCitusTestSuite = false;
 int MaxRebalancerLoggedIgnoredMoves = 5;
@ -517,8 +545,17 @@ GetRebalanceSteps(RebalanceOptions *options)
 		.context = &context,
 	};

+	if (options->involvedWorkerNodeList == NULL)
+	{
+		/*
+		 * If the user did not specify a list of worker nodes, we use all the
+		 * active worker nodes.
+		 */
+		options->involvedWorkerNodeList = SortedActiveWorkers();
+	}
+
 	/* sort the lists to make the function more deterministic */
-	List *activeWorkerList = SortedActiveWorkers();
+	List *activeWorkerList = options->involvedWorkerNodeList; /*SortedActiveWorkers(); */
 	int shardAllowedNodeCount = 0;
 	WorkerNode *workerNode = NULL;
 	foreach_declared_ptr(workerNode, activeWorkerList)
@ -981,6 +1018,7 @@ rebalance_table_shards(PG_FUNCTION_ARGS)
 		.excludedShardArray = PG_GETARG_ARRAYTYPE_P(3),
 		.drainOnly = PG_GETARG_BOOL(5),
 		.rebalanceStrategy = strategy,
+		.involvedWorkerNodeList = NULL,
 		.improvementThreshold = strategy->improvementThreshold,
 	};
 	Oid shardTransferModeOid = PG_GETARG_OID(4);
@ -1014,6 +1052,12 @@ citus_rebalance_start(PG_FUNCTION_ARGS)
 	PG_ENSURE_ARGNOTNULL(2, "shard_transfer_mode");
 	Oid shardTransferModeOid = PG_GETARG_OID(2);

+	PG_ENSURE_ARGNOTNULL(3, "parallel_transfer_reference_tables");
+	bool ParallelTransferReferenceTables = PG_GETARG_BOOL(3);
+
+	PG_ENSURE_ARGNOTNULL(4, "parallel_transfer_colocated_shards");
+	bool ParallelTransferColocatedShards = PG_GETARG_BOOL(4);
+
 	RebalanceOptions options = {
 		.relationIdList = relationIdList,
 		.threshold = strategy->defaultThreshold,
@ -1023,7 +1067,9 @@ citus_rebalance_start(PG_FUNCTION_ARGS)
 		.rebalanceStrategy = strategy,
 		.improvementThreshold = strategy->improvementThreshold,
 	};
-	int jobId = RebalanceTableShardsBackground(&options, shardTransferModeOid);
+	int jobId = RebalanceTableShardsBackground(&options, shardTransferModeOid,
+											   ParallelTransferReferenceTables,
+											   ParallelTransferColocatedShards);

 	if (jobId == 0)
 	{
@ -1988,17 +2034,20 @@ GetColocationId(PlacementUpdateEvent *move)
 * given colocation group and the other one is for tracking source nodes of all moves.
 */
 static ShardMoveDependencies
-InitializeShardMoveDependencies()
+InitializeShardMoveDependencies(bool ParallelTransferColocatedShards)
 {
 	ShardMoveDependencies shardMoveDependencies;
 	shardMoveDependencies.colocationDependencies = CreateSimpleHashWithNameAndSize(int64,
 																				   ShardMoveDependencyInfo,
 																				   "colocationDependencyHashMap",
 																				   6);
+
 	shardMoveDependencies.nodeDependencies = CreateSimpleHashWithNameAndSize(int32,
 																			 ShardMoveSourceNodeHashEntry,
 																			 "nodeDependencyHashMap",
 																			 6);
+	shardMoveDependencies.parallelTransferColocatedShards =
+		ParallelTransferColocatedShards;
 	return shardMoveDependencies;
 }

@ -2009,6 +2058,7 @@ InitializeShardMoveDependencies()
 */
 static int64 *
 GenerateTaskMoveDependencyList(PlacementUpdateEvent *move, int64 colocationId,
+							   int64 *refTablesDepTaskIds, int refTablesDepTaskIdsCount,
 							   ShardMoveDependencies shardMoveDependencies, int *nDepends)
 {
 	HTAB *dependsList = CreateSimpleHashSetWithNameAndSize(int64,
@ -2016,13 +2066,17 @@ GenerateTaskMoveDependencyList(PlacementUpdateEvent *move, int64 colocationId,

 	bool found;

-	/* Check if there exists a move in the same colocation group scheduled earlier. */
-	ShardMoveDependencyInfo *shardMoveDependencyInfo = hash_search(
-		shardMoveDependencies.colocationDependencies, &colocationId, HASH_ENTER, &found);
-
-	if (found)
+	if (!shardMoveDependencies.parallelTransferColocatedShards)
 	{
-		hash_search(dependsList, &shardMoveDependencyInfo->taskId, HASH_ENTER, NULL);
+		/* Check if there exists a move in the same colocation group scheduled earlier. */
+		ShardMoveDependencyInfo *shardMoveDependencyInfo = hash_search(
+			shardMoveDependencies.colocationDependencies, &colocationId, HASH_ENTER, &
+			found);
+
+		if (found)
+		{
+			hash_search(dependsList, &shardMoveDependencyInfo->taskId, HASH_ENTER, NULL);
+		}
 	}

 	/*
@ -2045,6 +2099,23 @@ GenerateTaskMoveDependencyList(PlacementUpdateEvent *move, int64 colocationId,
 		}
 	}

+	*nDepends = hash_get_num_entries(dependsList);
+	if (*nDepends == 0)
+	{
+		/*
+		 * shard copy can only start after finishing copy of reference table shards
+		 * so each shard task will have a dependency on the task that indicates the
+		 * copy complete of reference tables
+		 */
+		while (refTablesDepTaskIdsCount > 0)
+		{
+			int64 refTableTaskId = *refTablesDepTaskIds;
+			hash_search(dependsList, &refTableTaskId, HASH_ENTER, NULL);
+			refTablesDepTaskIds++;
+			refTablesDepTaskIdsCount--;
+		}
+	}
+
 	*nDepends = hash_get_num_entries(dependsList);

 	int64 *dependsArray = NULL;
@ -2076,9 +2147,13 @@ static void
 UpdateShardMoveDependencies(PlacementUpdateEvent *move, uint64 colocationId, int64 taskId,
 							ShardMoveDependencies shardMoveDependencies)
 {
-	ShardMoveDependencyInfo *shardMoveDependencyInfo = hash_search(
-		shardMoveDependencies.colocationDependencies, &colocationId, HASH_ENTER, NULL);
-	shardMoveDependencyInfo->taskId = taskId;
+	if (!shardMoveDependencies.parallelTransferColocatedShards)
+	{
+		ShardMoveDependencyInfo *shardMoveDependencyInfo = hash_search(
+			shardMoveDependencies.colocationDependencies, &colocationId,
+			HASH_ENTER, NULL);
+		shardMoveDependencyInfo->taskId = taskId;
+	}

 	bool found;
 	ShardMoveSourceNodeHashEntry *shardMoveSourceNodeHashEntry = hash_search(
@ -2103,7 +2178,9 @@ UpdateShardMoveDependencies(PlacementUpdateEvent *move, uint64 colocationId, int
 * background job+task infrastructure.
 */
 static int64
-RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationModeOid)
+RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationModeOid,
+							   bool ParallelTransferReferenceTables,
+							   bool ParallelTransferColocatedShards)
 {
 	if (list_length(options->relationIdList) == 0)
 	{
@ -2174,7 +2251,8 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo
 	initStringInfo(&buf);

 	List *referenceTableIdList = NIL;
-	int64 replicateRefTablesTaskId = 0;
+	int64 *refTablesDepTaskIds = NULL;
+	int refTablesDepTaskIdsCount = 0;

 	if (HasNodesWithMissingReferenceTables(&referenceTableIdList))
 	{
@ -2187,22 +2265,41 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo
 		 * Reference tables need to be copied to (newly-added) nodes, this needs to be the
 		 * first task before we can move any other table.
 		 */
-		appendStringInfo(&buf,
-						 "SELECT pg_catalog.replicate_reference_tables(%s)",
-						 quote_literal_cstr(shardTranferModeLabel));
+		if (ParallelTransferReferenceTables)
+		{
+			refTablesDepTaskIds =
+				ScheduleTasksToParallelCopyReferenceTablesOnAllMissingNodes(
+					jobId, shardTransferMode, &refTablesDepTaskIdsCount);
+			ereport(DEBUG2,
+					(errmsg("%d dependent copy reference table tasks for job %ld",
+							refTablesDepTaskIdsCount, jobId),
+					 errdetail("Rebalance scheduled as background job"),
+					 errhint("To monitor progress, run: SELECT * FROM "
+							 "citus_rebalance_status();")));
+		}
+		else
+		{
+			/* Move all reference tables as single task. Classical way */
+			appendStringInfo(&buf,
+							 "SELECT pg_catalog.replicate_reference_tables(%s)",
+							 quote_literal_cstr(shardTranferModeLabel));

-		int32 nodesInvolved[] = { 0 };
+			int32 nodesInvolved[] = { 0 };

-		/* replicate_reference_tables permissions require superuser */
-		Oid superUserId = CitusExtensionOwner();
-		BackgroundTask *task = ScheduleBackgroundTask(jobId, superUserId, buf.data, 0,
-													  NULL, 0, nodesInvolved);
-		replicateRefTablesTaskId = task->taskid;
+			/* replicate_reference_tables permissions require superuser */
+			Oid superUserId = CitusExtensionOwner();
+			BackgroundTask *task = ScheduleBackgroundTask(jobId, superUserId, buf.data, 0,
+														  NULL, 0, nodesInvolved);
+			refTablesDepTaskIds = palloc0(sizeof(int64));
+			refTablesDepTaskIds[0] = task->taskid;
+			refTablesDepTaskIdsCount = 1;
+		}
 	}

 	PlacementUpdateEvent *move = NULL;

-	ShardMoveDependencies shardMoveDependencies = InitializeShardMoveDependencies();
+	ShardMoveDependencies shardMoveDependencies =
+		InitializeShardMoveDependencies(ParallelTransferColocatedShards);

 	foreach_declared_ptr(move, placementUpdateList)
 	{
@ -2220,16 +2317,11 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo
 		int nDepends = 0;

 		int64 *dependsArray = GenerateTaskMoveDependencyList(move, colocationId,
+															 refTablesDepTaskIds,
+															 refTablesDepTaskIdsCount,
 															 shardMoveDependencies,
 															 &nDepends);

-		if (nDepends == 0 && replicateRefTablesTaskId > 0)
-		{
-			nDepends = 1;
-			dependsArray = palloc(nDepends * sizeof(int64));
-			dependsArray[0] = replicateRefTablesTaskId;
-		}
-
 		int32 nodesInvolved[2] = { 0 };
 		nodesInvolved[0] = move->sourceNode->nodeId;
 		nodesInvolved[1] = move->targetNode->nodeId;
@ -3547,6 +3639,352 @@ EnsureShardCostUDF(Oid functionOid)
 }


+/*
+ * SplitShardsBetweenPrimaryAndClone splits the shards in shardPlacementList
+ * between the primary and clone nodes, adding them to the respective lists.
+ */
+void
+SplitShardsBetweenPrimaryAndClone(WorkerNode *primaryNode,
+								  WorkerNode *cloneNode,
+								  Name strategyName)
+{
+	CheckCitusVersion(ERROR);
+
+	List *relationIdList = NonColocatedDistRelationIdList();
+
+	Form_pg_dist_rebalance_strategy strategy = GetRebalanceStrategy(strategyName);/* We use default strategy for now */
+
+	RebalanceOptions options = {
+		.relationIdList = relationIdList,
+		.threshold = 0, /* Threshold is not strictly needed for two nodes */
+		.maxShardMoves = -1, /* No limit on moves between these two nodes */
+		.excludedShardArray = construct_empty_array(INT8OID),
+		.drainOnly = false, /* Not a drain operation */
+		.rebalanceStrategy = strategy,
+		.improvementThreshold = 0, /* Consider all beneficial moves */
+		.workerNode = primaryNode /* indicate Primary node as a source node */
+	};
+
+	SplitPrimaryCloneShards *splitShards = GetPrimaryCloneSplitRebalanceSteps(&options
+																			  ,
+																			  cloneNode);
+	AdjustShardsForPrimaryCloneNodeSplit(primaryNode, cloneNode,
+										 splitShards->primaryShardIdList, splitShards->
+										 cloneShardIdList);
+}
+
+
+/*
+ * GetPrimaryCloneSplitRebalanceSteps returns a List of PlacementUpdateEvents that are needed to
+ * rebalance a list of tables.
+ */
+static SplitPrimaryCloneShards *
+GetPrimaryCloneSplitRebalanceSteps(RebalanceOptions *options, WorkerNode *cloneNode)
+{
+	WorkerNode *sourceNode = options->workerNode;
+	WorkerNode *targetNode = cloneNode;
+
+	/* Initialize rebalance plan functions and context */
+	EnsureShardCostUDF(options->rebalanceStrategy->shardCostFunction);
+	EnsureNodeCapacityUDF(options->rebalanceStrategy->nodeCapacityFunction);
+	EnsureShardAllowedOnNodeUDF(options->rebalanceStrategy->shardAllowedOnNodeFunction);
+
+	RebalanceContext context;
+	memset(&context, 0, sizeof(RebalanceContext));
+	fmgr_info(options->rebalanceStrategy->shardCostFunction, &context.shardCostUDF);
+	fmgr_info(options->rebalanceStrategy->nodeCapacityFunction, &context.nodeCapacityUDF);
+	fmgr_info(options->rebalanceStrategy->shardAllowedOnNodeFunction,
+			  &context.shardAllowedOnNodeUDF);
+
+	RebalancePlanFunctions rebalancePlanFunctions = {
+		.shardAllowedOnNode = ShardAllowedOnNode,
+		.nodeCapacity = NodeCapacity,
+		.shardCost = GetShardCost,
+		.context = &context,
+	};
+
+	/*
+	 * Collect all active shard placements on the source node for the given relations.
+	 * Unlike the main rebalancer, we build a single list of all relevant source placements
+	 * across all specified relations (or all relations if none specified).
+	 */
+	List *allSourcePlacements = NIL;
+	Oid relationIdItr = InvalidOid;
+	foreach_declared_oid(relationIdItr, options->relationIdList)
+	{
+		List *shardPlacementList = FullShardPlacementList(relationIdItr,
+														  options->excludedShardArray);
+		List *activeShardPlacementsForRelation =
+			FilterShardPlacementList(shardPlacementList, IsActiveShardPlacement);
+
+		ShardPlacement *placement = NULL;
+		foreach_declared_ptr(placement, activeShardPlacementsForRelation)
+		{
+			if (placement->nodeId == sourceNode->nodeId)
+			{
+				/* Ensure we don't add duplicate shardId if it's somehow listed under multiple relations */
+				bool alreadyAdded = false;
+				ShardPlacement *existingPlacement = NULL;
+				foreach_declared_ptr(existingPlacement, allSourcePlacements)
+				{
+					if (existingPlacement->shardId == placement->shardId)
+					{
+						alreadyAdded = true;
+						break;
+					}
+				}
+				if (!alreadyAdded)
+				{
+					allSourcePlacements = lappend(allSourcePlacements, placement);
+				}
+			}
+		}
+	}
+
+	List *activeWorkerList = list_make2(options->workerNode, cloneNode);
+	SplitPrimaryCloneShards *splitShards = palloc0(sizeof(SplitPrimaryCloneShards));
+	splitShards->primaryShardIdList = NIL;
+	splitShards->cloneShardIdList = NIL;
+
+	if (list_length(allSourcePlacements) > 0)
+	{
+		/*
+		 * Initialize RebalanceState considering only the source node's shards
+		 * and the two active workers (source and target).
+		 */
+		RebalanceState *state = InitRebalanceState(activeWorkerList, allSourcePlacements,
+												   &rebalancePlanFunctions);
+
+		NodeFillState *sourceFillState = NULL;
+		NodeFillState *targetFillState = NULL;
+		ListCell *fsc = NULL;
+
+		/* Identify the fill states for our specific source and target nodes */
+		foreach(fsc, state->fillStateListAsc) /* Could be fillStateListDesc too, order doesn't matter here */
+		{
+			NodeFillState *fs = (NodeFillState *) lfirst(fsc);
+			if (fs->node->nodeId == sourceNode->nodeId)
+			{
+				sourceFillState = fs;
+			}
+			else if (fs->node->nodeId == targetNode->nodeId)
+			{
+				targetFillState = fs;
+			}
+		}
+
+		if (sourceFillState != NULL && targetFillState != NULL)
+		{
+			/*
+			 * The goal is to move roughly half the total cost from source to target.
+			 * The target node is assumed to be empty or its existing load is not
+			 * considered for this specific two-node balancing plan's shard distribution.
+			 * We calculate costs based *only* on the shards currently on the source node.
+			 */
+
+			/*
+			 * The core idea is to simulate the balancing process between these two nodes.
+			 * We have all shards on sourceFillState. TargetFillState is initially empty (in terms of these specific shards).
+			 * We want to move shards from source to target until their costs are as balanced as possible.
+			 */
+			float4 sourceCurrentCost = sourceFillState->totalCost;
+			float4 targetCurrentCost = 0; /* Representing cost on target from these source shards */
+
+			/* Sort shards on source node by cost (descending). This is a common heuristic. */
+			sourceFillState->shardCostListDesc = SortList(sourceFillState->
+														  shardCostListDesc,
+														  CompareShardCostDesc);
+
+			List *potentialMoves = NIL;
+			ListCell *lc_shardcost = NULL;
+
+			/*
+			 * Iterate through each shard on the source node. For each shard, decide if moving it
+			 * to the target node would improve the balance (or is necessary to reach balance).
+			 * A simple greedy approach: move shard if target node's current cost is less than source's.
+			 */
+			foreach(lc_shardcost, sourceFillState->shardCostListDesc)
+			{
+				ShardCost *shardToConsider = (ShardCost *) lfirst(lc_shardcost);
+
+				/*
+				 * If moving this shard makes the target less loaded than the source would become,
+				 * or if target is simply less loaded currently, consider the move.
+				 * More accurately, we move if target's cost + shard's cost < source's cost - shard's cost (approximately)
+				 * or if target is significantly emptier.
+				 * The condition (targetCurrentCost < sourceCurrentCost - shardToConsider->cost) is a greedy choice.
+				 * A better check: would moving this shard reduce the difference in costs?
+				 * Current difference: abs(sourceCurrentCost - targetCurrentCost)
+				 * Difference after move: abs((sourceCurrentCost - shardToConsider->cost) - (targetCurrentCost + shardToConsider->cost))
+				 * Move if new difference is smaller.
+				 */
+				float4 costOfShard = shardToConsider->cost;
+				float4 diffBefore = fabsf(sourceCurrentCost - targetCurrentCost);
+				float4 diffAfter = fabsf((sourceCurrentCost - costOfShard) - (
+											 targetCurrentCost + costOfShard));
+
+				if (diffAfter < diffBefore)
+				{
+					PlacementUpdateEvent *update = palloc0(sizeof(PlacementUpdateEvent));
+					update->shardId = shardToConsider->shardId;
+					update->sourceNode = sourceNode;
+					update->targetNode = targetNode;
+					update->updateType = PLACEMENT_UPDATE_MOVE;
+					potentialMoves = lappend(potentialMoves, update);
+					splitShards->cloneShardIdList = lappend_int(splitShards->
+																cloneShardIdList,
+																shardToConsider->shardId
+																);
+
+
+					/* Update simulated costs for the next iteration */
+					sourceCurrentCost -= costOfShard;
+					targetCurrentCost += costOfShard;
+				}
+				else
+				{
+					splitShards->primaryShardIdList = lappend_int(splitShards->
+																  primaryShardIdList,
+																  shardToConsider->shardId
+																  );
+				}
+			}
+		}
+
+		/* RebalanceState is in memory context, will be cleaned up */
+	}
+	return splitShards;
+}
+
+
+/*
+ * Snapshot-based node split plan outputs the shard placement plan
+ * for primary and replica based node split
+ *
+ * SQL signature:
+ * get_snapshot_based_node_split_plan(
+ *     primary_node_name text,
+ *     primary_node_port integer,
+ *     replica_node_name text,
+ *     replica_node_port integer,
+ *     rebalance_strategy name DEFAULT NULL
+ *
+ */
+Datum
+get_snapshot_based_node_split_plan(PG_FUNCTION_ARGS)
+{
+	CheckCitusVersion(ERROR);
+
+	text *primaryNodeNameText = PG_GETARG_TEXT_P(0);
+	int32 primaryNodePort = PG_GETARG_INT32(1);
+	text *cloneNodeNameText = PG_GETARG_TEXT_P(2);
+	int32 cloneNodePort = PG_GETARG_INT32(3);
+
+	char *primaryNodeName = text_to_cstring(primaryNodeNameText);
+	char *cloneNodeName = text_to_cstring(cloneNodeNameText);
+
+	WorkerNode *primaryNode = FindWorkerNodeOrError(primaryNodeName, primaryNodePort);
+	WorkerNode *cloneNode = FindWorkerNodeOrError(cloneNodeName, cloneNodePort);
+
+	if (!cloneNode->nodeisclone || cloneNode->nodeprimarynodeid == 0)
+	{
+		ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						errmsg(
+							"Node %s:%d (ID %d) is not a valid clone or its primary node ID is not set.",
+							cloneNode->workerName, cloneNode->workerPort,
+							cloneNode->nodeId)));
+	}
+	if (primaryNode->nodeisclone)
+	{
+		ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						errmsg("Primary node %s:%d (ID %d) is itself a replica.",
+							   primaryNode->workerName, primaryNode->workerPort,
+							   primaryNode->nodeId)));
+	}
+
+	/* Ensure the primary node is related to the replica node */
+	if (primaryNode->nodeId != cloneNode->nodeprimarynodeid)
+	{
+		ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						errmsg(
+							"Clone node %s:%d (ID %d) is not a clone of the primary node %s:%d (ID %d).",
+							cloneNode->workerName, cloneNode->workerPort,
+							cloneNode->nodeId,
+							primaryNode->workerName, primaryNode->workerPort,
+							primaryNode->nodeId)));
+	}
+
+	List *relationIdList = NonColocatedDistRelationIdList();
+
+	Form_pg_dist_rebalance_strategy strategy = GetRebalanceStrategy(
+		PG_GETARG_NAME_OR_NULL(4));
+
+	RebalanceOptions options = {
+		.relationIdList = relationIdList,
+		.threshold = 0, /* Threshold is not strictly needed for two nodes */
+		.maxShardMoves = -1, /* No limit on moves between these two nodes */
+		.excludedShardArray = construct_empty_array(INT8OID),
+		.drainOnly = false, /* Not a drain operation */
+		.rebalanceStrategy = strategy,
+		.improvementThreshold = 0, /* Consider all beneficial moves */
+		.workerNode = primaryNode /* indicate Primary node as a source node */
+	};
+
+	SplitPrimaryCloneShards *splitShards = GetPrimaryCloneSplitRebalanceSteps(
+		&options,
+		cloneNode);
+
+	int shardId = 0;
+	TupleDesc tupdesc;
+	Tuplestorestate *tupstore = SetupTuplestore(fcinfo, &tupdesc);
+	Datum values[4];
+	bool nulls[4];
+
+
+	foreach_declared_int(shardId, splitShards->primaryShardIdList)
+	{
+		ShardInterval *shardInterval = LoadShardInterval(shardId);
+		List *colocatedShardList = ColocatedShardIntervalList(shardInterval);
+		ListCell *colocatedShardCell = NULL;
+		foreach(colocatedShardCell, colocatedShardList)
+		{
+			ShardInterval *colocatedShard = lfirst(colocatedShardCell);
+			int colocatedShardId = colocatedShard->shardId;
+			memset(values, 0, sizeof(values));
+			memset(nulls, 0, sizeof(nulls));
+
+			values[0] = ObjectIdGetDatum(RelationIdForShard(colocatedShardId));
+			values[1] = UInt64GetDatum(colocatedShardId);
+			values[2] = UInt64GetDatum(ShardLength(colocatedShardId));
+			values[3] = PointerGetDatum(cstring_to_text("Primary Node"));
+			tuplestore_putvalues(tupstore, tupdesc, values, nulls);
+		}
+	}
+
+	foreach_declared_int(shardId, splitShards->cloneShardIdList)
+	{
+		ShardInterval *shardInterval = LoadShardInterval(shardId);
+		List *colocatedShardList = ColocatedShardIntervalList(shardInterval);
+		ListCell *colocatedShardCell = NULL;
+		foreach(colocatedShardCell, colocatedShardList)
+		{
+			ShardInterval *colocatedShard = lfirst(colocatedShardCell);
+			int colocatedShardId = colocatedShard->shardId;
+			memset(values, 0, sizeof(values));
+			memset(nulls, 0, sizeof(nulls));
+
+			values[0] = ObjectIdGetDatum(RelationIdForShard(colocatedShardId));
+			values[1] = UInt64GetDatum(colocatedShardId);
+			values[2] = UInt64GetDatum(ShardLength(colocatedShardId));
+			values[3] = PointerGetDatum(cstring_to_text("Clone Node"));
+			tuplestore_putvalues(tupstore, tupdesc, values, nulls);
+		}
+	}
+
+	return (Datum) 0;
+}
+
+
 /*
 * EnsureNodeCapacityUDF checks that the UDF matching the oid has the correct
 * signature to be used as a NodeCapacity function. The expected signature is:
--- a/src/backend/distributed/operations/shard_split.c
+++ b/src/backend/distributed/operations/shard_split.c
@ -131,17 +131,19 @@ static void UpdateDistributionColumnsForShardGroup(List *colocatedShardList,
 												   uint32 colocationId);
 static void InsertSplitChildrenShardMetadata(List *shardGroupSplitIntervalListList,
 											 List *workersForPlacementList);
-static void CreatePartitioningHierarchyForBlockingSplit(
-	List *shardGroupSplitIntervalListList,
-	List *workersForPlacementList);
+static void CreatePartitioningHierarchyForBlockingSplit(List *
+														shardGroupSplitIntervalListList,
+														List *workersForPlacementList);
 static void CreateForeignKeyConstraints(List *shardGroupSplitIntervalListList,
 										List *workersForPlacementList);
 static Task * CreateTaskForDDLCommandList(List *ddlCommandList, WorkerNode *workerNode);
-static StringInfo CreateSplitShardReplicationSetupUDF(
-	List *sourceColocatedShardIntervalList, List *shardGroupSplitIntervalListList,
-	List *destinationWorkerNodesList,
-	DistributionColumnMap *
-	distributionColumnOverrides);
+static StringInfo CreateSplitShardReplicationSetupUDF(List *
+													  sourceColocatedShardIntervalList,
+													  List *
+													  shardGroupSplitIntervalListList,
+													  List *destinationWorkerNodesList,
+													  DistributionColumnMap *
+													  distributionColumnOverrides);
 static List * ParseReplicationSlotInfoFromResult(PGresult *result);

 static List * ExecuteSplitShardReplicationSetupUDF(WorkerNode *sourceWorkerNode,
@ -816,7 +818,7 @@ CreateAuxiliaryStructuresForShardGroup(List *shardGroupSplitIntervalListList,
 		ROW_MODIFY_NONE,
 		ddlTaskExecList,
 		MaxAdaptiveExecutorPoolSize,
-		NULL /* jobIdList (ignored by API implementation) */);
+		NULL /* jobIdList (ignored by API impl.) */);
 }


@ -883,7 +885,7 @@ DoSplitCopy(WorkerNode *sourceShardNode, List *sourceColocatedShardIntervalList,

 	ExecuteTaskListOutsideTransaction(ROW_MODIFY_NONE, splitCopyTaskList,
 									  MaxAdaptiveExecutorPoolSize,
-									  NULL /* jobIdList (ignored by API implementation) */);
+									  NULL /* jobIdList (ignored by API impl.) */);
 }


@ -1546,12 +1548,15 @@ NonBlockingShardSplit(SplitOperation splitOperation,
 	 * 9) Logically replicate all the changes and do most of the table DDL,
 	 * like index and foreign key creation.
 	 */
+	bool skipInterShardRelationshipCreation = false;
+
 	CompleteNonBlockingShardTransfer(sourceColocatedShardIntervalList,
 									 sourceConnection,
 									 publicationInfoHash,
 									 logicalRepTargetList,
 									 groupedLogicalRepTargetsHash,
-									 SHARD_SPLIT);
+									 SHARD_SPLIT,
+									 skipInterShardRelationshipCreation);

 	/*
 	 * 10) Delete old shards metadata and mark the shards as to be deferred drop.
@ -1877,8 +1882,9 @@ ExecuteSplitShardReplicationSetupUDF(WorkerNode *sourceWorkerNode,

 		ereport(ERROR, (errcode(ERRCODE_CONNECTION_FAILURE),
 						errmsg(
-							"Failed to run worker_split_shard_replication_setup UDF. It should successfully execute "
-							" for splitting a shard in a non-blocking way. Please retry.")));
+							"Failed to run worker_split_shard_replication_setup UDF. "
+							"It should successfully execute for splitting a shard in "
+							"a non-blocking way. Please retry.")));
 	}

 	/* Get replication slot information */
--- a/src/backend/distributed/operations/shard_transfer.c
+++ b/src/backend/distributed/operations/shard_transfer.c
@ -107,16 +107,18 @@ static void ErrorIfSameNode(char *sourceNodeName, int sourceNodePort,
 static void CopyShardTables(List *shardIntervalList, char *sourceNodeName,
 							int32 sourceNodePort, char *targetNodeName,
 							int32 targetNodePort, bool useLogicalReplication,
-							const char *operationName);
+							const char *operationName, uint32 optionFlags);
 static void CopyShardTablesViaLogicalReplication(List *shardIntervalList,
 												 char *sourceNodeName,
 												 int32 sourceNodePort,
 												 char *targetNodeName,
-												 int32 targetNodePort);
+												 int32 targetNodePort,
+												 uint32 optionFlags);

 static void CopyShardTablesViaBlockWrites(List *shardIntervalList, char *sourceNodeName,
 										  int32 sourceNodePort,
-										  char *targetNodeName, int32 targetNodePort);
+										  char *targetNodeName, int32 targetNodePort,
+										  uint32 optionFlags);
 static void EnsureShardCanBeCopied(int64 shardId, const char *sourceNodeName,
 								   int32 sourceNodePort, const char *targetNodeName,
 								   int32 targetNodePort);
@ -165,7 +167,8 @@ static List * PostLoadShardCreationCommandList(ShardInterval *shardInterval,
 static ShardCommandList * CreateShardCommandList(ShardInterval *shardInterval,
 												 List *ddlCommandList);
 static char * CreateShardCopyCommand(ShardInterval *shard, WorkerNode *targetNode);
-
+static void AcquireShardPlacementLock(uint64_t shardId, int lockMode, Oid relationId,
+									  const char *operationName);

 /* declarations for dynamic loading */
 PG_FUNCTION_INFO_V1(citus_copy_shard_placement);
@ -174,7 +177,7 @@ PG_FUNCTION_INFO_V1(master_copy_shard_placement);
 PG_FUNCTION_INFO_V1(citus_move_shard_placement);
 PG_FUNCTION_INFO_V1(citus_move_shard_placement_with_nodeid);
 PG_FUNCTION_INFO_V1(master_move_shard_placement);
-
+PG_FUNCTION_INFO_V1(citus_internal_copy_single_shard_placement);
 double DesiredPercentFreeAfterMove = 10;
 bool CheckAvailableSpaceBeforeMove = true;

@ -203,7 +206,7 @@ citus_copy_shard_placement(PG_FUNCTION_ARGS)

 	TransferShards(shardId, sourceNodeName, sourceNodePort,
 				   targetNodeName, targetNodePort,
-				   shardReplicationMode, SHARD_TRANSFER_COPY);
+				   shardReplicationMode, SHARD_TRANSFER_COPY, 0);

 	PG_RETURN_VOID();
 }
@ -232,7 +235,7 @@ citus_copy_shard_placement_with_nodeid(PG_FUNCTION_ARGS)

 	TransferShards(shardId, sourceNode->workerName, sourceNode->workerPort,
 				   targetNode->workerName, targetNode->workerPort,
-				   shardReplicationMode, SHARD_TRANSFER_COPY);
+				   shardReplicationMode, SHARD_TRANSFER_COPY, 0);

 	PG_RETURN_VOID();
 }
@ -267,13 +270,69 @@ master_copy_shard_placement(PG_FUNCTION_ARGS)

 	TransferShards(shardId, sourceNodeName, sourceNodePort,
 				   targetNodeName, targetNodePort,
-				   shardReplicationMode, SHARD_TRANSFER_COPY);
+				   shardReplicationMode, SHARD_TRANSFER_COPY, 0);


 	PG_RETURN_VOID();
 }


+/*
+ * citus_internal_copy_single_shard_placement is an internal function that
+ * copies a single shard placement from a source node to a target node.
+ * It has two main differences from citus_copy_shard_placement:
+ * 1. it copies only a single shard placement, not all colocated shards
+ * 2. It allows to defer the constraints creation and this same function
+ *   can be used to create the constraints later.
+ *
+ * The primary use case for this function is to transfer the shards of
+ * reference tables. Since all reference tables are colocated together,
+ * and each reference table has only one shard, this function can be used
+ * to transfer the shards of reference tables in parallel.
+ * Furthermore, the reference tables could have relations with
+ * other reference tables, so we need to ensure that their constraints
+ * are also transferred after copying the shards to the target node.
+ * For this reason, we allow the caller to defer the constraints creation.
+ *
+ * This function is not supposed to be called by the user directly.
+ */
+Datum
+citus_internal_copy_single_shard_placement(PG_FUNCTION_ARGS)
+{
+	CheckCitusVersion(ERROR);
+	EnsureCoordinator();
+
+	int64 shardId = PG_GETARG_INT64(0);
+	uint32 sourceNodeId = PG_GETARG_INT32(1);
+	uint32 targetNodeId = PG_GETARG_INT32(2);
+	uint32 flags = PG_GETARG_INT32(3);
+	Oid shardReplicationModeOid = PG_GETARG_OID(4);
+
+	bool missingOk = false;
+	WorkerNode *sourceNode = FindNodeWithNodeId(sourceNodeId, missingOk);
+	WorkerNode *targetNode = FindNodeWithNodeId(targetNodeId, missingOk);
+
+	char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);
+
+	/*
+	 * This is an internal function that is used by the rebalancer.
+	 * It is not supposed to be called by the user directly.
+	 */
+	if (!IsRebalancerInternalBackend())
+	{
+		ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						errmsg("This is an internal Citus function that can only"
+							   " be used by a rebalancer task")));
+	}
+
+	TransferShards(shardId, sourceNode->workerName, sourceNode->workerPort,
+				   targetNode->workerName, targetNode->workerPort,
+				   shardReplicationMode, SHARD_TRANSFER_COPY, flags);
+
+	PG_RETURN_VOID();
+}
+
+
 /*
 * citus_move_shard_placement moves given shard (and its co-located shards) from one
 * node to the other node. To accomplish this it entirely recreates the table structure
@ -315,7 +374,7 @@ citus_move_shard_placement(PG_FUNCTION_ARGS)
 	char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);
 	TransferShards(shardId, sourceNodeName, sourceNodePort,
 				   targetNodeName, targetNodePort,
-				   shardReplicationMode, SHARD_TRANSFER_MOVE);
+				   shardReplicationMode, SHARD_TRANSFER_MOVE, 0);

 	PG_RETURN_VOID();
 }
@ -343,20 +402,77 @@ citus_move_shard_placement_with_nodeid(PG_FUNCTION_ARGS)
 	char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);
 	TransferShards(shardId, sourceNode->workerName,
 				   sourceNode->workerPort, targetNode->workerName,
-				   targetNode->workerPort, shardReplicationMode, SHARD_TRANSFER_MOVE);
+				   targetNode->workerPort, shardReplicationMode, SHARD_TRANSFER_MOVE, 0);

 	PG_RETURN_VOID();
 }


 /*
- * TransferShards is the function for shard transfers.
+ * AcquireShardPlacementLock tries to acquire a lock on the shardid
+ * while moving/copying the shard placement. If this
+ * is it not possible it fails instantly because this means
+ * another move/copy on same shard is currently happening. */
+static void
+AcquireShardPlacementLock(uint64_t shardId, int lockMode, Oid relationId,
+						  const char *operationName)
+{
+	LOCKTAG tag;
+	const bool sessionLock = false;
+	const bool dontWait = true;
+
+	SET_LOCKTAG_SHARD_MOVE(tag, shardId);
+
+	LockAcquireResult lockAcquired = LockAcquire(&tag, lockMode, sessionLock, dontWait);
+	if (!lockAcquired)
+	{
+		ereport(ERROR, (errmsg("could not acquire the lock required to %s %s",
+							   operationName,
+							   generate_qualified_relation_name(relationId)),
+						errdetail("It means that either a concurrent shard move "
+								  "or colocated distributed table creation is "
+								  "happening."),
+						errhint("Make sure that the concurrent operation has "
+								"finished and re-run the command")));
+	}
+}
+
+
+/*
+ * TransferShards is responsible for handling shard transfers.
+ *
+ * The optionFlags parameter controls the transfer behavior:
+ *
+ * - By default, shard colocation groups are treated as a single unit. This works
+ *   well for distributed tables, since they can contain multiple colocated shards
+ *   on the same node, and shard transfers can still be parallelized at the group level.
+ *
+ * - Reference tables are different: every reference table belongs to the same
+ *   colocation group but has only a single shard. To parallelize reference table
+ *   transfers, we must bypass the colocation group. The
+ *   SHARD_TRANSFER_SINGLE_SHARD_ONLY flag enables this behavior by transferring
+ *   only the specific shardId passed into the function, ignoring colocated shards.
+ *
+ * - Reference tables may also define foreign key relationships with each other.
+ *   Since we cannot create those relationships until all shards have been moved,
+ *   the SHARD_TRANSFER_SKIP_CREATE_RELATIONSHIPS flag is used to defer their
+ *   creation until shard transfer completes.
+ *
+ * - After shards are transferred, the SHARD_TRANSFER_CREATE_RELATIONSHIPS_ONLY
+ *   flag is used to create the foreign key relationships for already-transferred
+ *   reference tables.
+ *
+ * Currently, optionFlags are only used to customize reference table transfers.
+ * For distributed tables, optionFlags should always be set to 0.
+ * passing 0 as optionFlags means that the default behavior will be used for
+ * all aspects of the shard transfer. That is to consider all colocated shards
+ * as a single unit and return after creating the necessary relationships.
 */
 void
 TransferShards(int64 shardId, char *sourceNodeName,
 			   int32 sourceNodePort, char *targetNodeName,
 			   int32 targetNodePort, char shardReplicationMode,
-			   ShardTransferType transferType)
+			   ShardTransferType transferType, uint32 optionFlags)
 {
 	/* strings to be used in log messages */
 	const char *operationName = ShardTransferTypeNames[transferType];
@ -385,20 +501,36 @@ TransferShards(int64 shardId, char *sourceNodeName,

 	ErrorIfTargetNodeIsNotSafeForTransfer(targetNodeName, targetNodePort, transferType);

-	AcquirePlacementColocationLock(distributedTableId, ExclusiveLock, operationName);
+	AcquirePlacementColocationLock(distributedTableId, RowExclusiveLock, operationName);

-	List *colocatedTableList = ColocatedTableList(distributedTableId);
-	List *colocatedShardList = ColocatedShardIntervalList(shardInterval);
+	List *colocatedTableList;
+	List *colocatedShardList;
+
+	/*
+	 * If SHARD_TRANSFER_SINGLE_SHARD_ONLY is set, we only transfer a single shard
+	 * specified by shardId. Otherwise, we transfer all colocated shards.
+	 */
+	bool isSingleShardOnly = optionFlags & SHARD_TRANSFER_SINGLE_SHARD_ONLY;
+
+	if (isSingleShardOnly)
+	{
+		colocatedTableList = list_make1_oid(distributedTableId);
+		colocatedShardList = list_make1(shardInterval);
+	}
+	else
+	{
+		colocatedTableList = ColocatedTableList(distributedTableId);
+		colocatedShardList = ColocatedShardIntervalList(shardInterval);
+	}

 	EnsureTableListOwner(colocatedTableList);

 	if (transferType == SHARD_TRANSFER_MOVE)
 	{
 		/*
-		 * Block concurrent DDL / TRUNCATE commands on the relation. Similarly,
-		 * block concurrent citus_move_shard_placement() on any shard of
-		 * the same relation. This is OK for now since we're executing shard
-		 * moves sequentially anyway.
+		 * Block concurrent DDL / TRUNCATE commands on the relation. while,
+		 * allow concurrent citus_move_shard_placement() on the shards of
+		 * the same relation.
 		 */
 		LockColocatedRelationsForMove(colocatedTableList);
 	}
@ -412,14 +544,66 @@ TransferShards(int64 shardId, char *sourceNodeName,

 	/*
 	 * We sort shardIntervalList so that lock operations will not cause any
-	 * deadlocks.
+	 * deadlocks. But we do not need to do that if the list contain only one
+	 * shard.
 	 */
-	colocatedShardList = SortList(colocatedShardList, CompareShardIntervalsById);
+	if (!isSingleShardOnly)
+	{
+		colocatedShardList = SortList(colocatedShardList, CompareShardIntervalsById);
+	}

-	if (TransferAlreadyCompleted(colocatedShardList,
-								 sourceNodeName, sourceNodePort,
-								 targetNodeName, targetNodePort,
-								 transferType))
+	/* We have pretty much covered the concurrent rebalance operations
+	 * and we want to allow concurrent moves within the same colocation group.
+	 * but at the same time we want to block the concurrent moves on the same shard
+	 * placement. So we lock the shard moves before starting the transfer.
+	 */
+	foreach_declared_ptr(shardInterval, colocatedShardList)
+	{
+		int64 shardIdToLock = shardInterval->shardId;
+		AcquireShardPlacementLock(shardIdToLock, ExclusiveLock, distributedTableId,
+								  operationName);
+	}
+
+	bool transferAlreadyCompleted = TransferAlreadyCompleted(colocatedShardList,
+															 sourceNodeName,
+															 sourceNodePort,
+															 targetNodeName,
+															 targetNodePort,
+															 transferType);
+
+	/*
+	 * If we just need to create the shard relationships,We don't need to do anything
+	 * else other than calling CopyShardTables with SHARD_TRANSFER_CREATE_RELATIONSHIPS_ONLY
+	 * flag.
+	 */
+	bool createRelationshipsOnly = optionFlags & SHARD_TRANSFER_CREATE_RELATIONSHIPS_ONLY;
+
+	if (createRelationshipsOnly)
+	{
+		if (!transferAlreadyCompleted)
+		{
+			/*
+			 * if the transfer is not completed, and we are here just to create
+			 * the relationships, we can return right away
+			 */
+			ereport(WARNING, (errmsg("shard is not present on node %s:%d",
+									 targetNodeName, targetNodePort),
+							  errdetail("%s may have not completed.",
+										operationNameCapitalized)));
+			return;
+		}
+
+		CopyShardTables(colocatedShardList, sourceNodeName, sourceNodePort, targetNodeName
+						,
+						targetNodePort, (shardReplicationMode ==
+										 TRANSFER_MODE_FORCE_LOGICAL),
+						operationFunctionName, optionFlags);
+
+		/* We don't need to do anything else, just return */
+		return;
+	}
+
+	if (transferAlreadyCompleted)
 	{
 		/* if the transfer is already completed, we can return right away */
 		ereport(WARNING, (errmsg("shard is already present on node %s:%d",
@ -515,7 +699,8 @@ TransferShards(int64 shardId, char *sourceNodeName,
 	}

 	CopyShardTables(colocatedShardList, sourceNodeName, sourceNodePort, targetNodeName,
-					targetNodePort, useLogicalReplication, operationFunctionName);
+					targetNodePort, useLogicalReplication, operationFunctionName,
+					optionFlags);

 	if (transferType == SHARD_TRANSFER_MOVE)
 	{
@ -574,6 +759,205 @@ TransferShards(int64 shardId, char *sourceNodeName,
 }


+/*
+ * AdjustShardsForPrimaryCloneNodeSplit is called when a primary-clone node split
+ * occurs. It adjusts the shard placements between the primary and clone nodes based
+ * on the provided shard lists. Since the clone is an exact replica of the primary
+ * but the metadata is not aware of this replication, this function updates the
+ * metadata to reflect the new shard distribution.
+ *
+ * The function handles three types of shards:
+ *
+ * 1. Shards moving to clone node (cloneShardList):
+ *    - Updates shard placement metadata to move placements from primary to clone
+ *    - No data movement is needed since the clone already has the data
+ *    - Adds cleanup records to remove the shard data from primary at transaction commit
+ *
+ * 2. Shards staying on primary node (primaryShardList):
+ *    - Metadata already correctly reflects these shards on primary
+ *    - Adds cleanup records to remove the shard data from clone node
+ *
+ * 3. Reference tables:
+ *    - Inserts new placement records on the clone node
+ *    - Data is already present on clone, so only metadata update is needed
+ *
+ * This function does not perform any actual data movement; it only updates the
+ * shard placement metadata and schedules cleanup operations for later execution.
+ */
+void
+AdjustShardsForPrimaryCloneNodeSplit(WorkerNode *primaryNode,
+									 WorkerNode *cloneNode,
+									 List *primaryShardList,
+									 List *cloneShardList)
+{
+	/* Input validation */
+	if (primaryNode == NULL || cloneNode == NULL)
+	{
+		ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						errmsg("primary or clone worker node is NULL")));
+	}
+
+	if (primaryNode->nodeId == cloneNode->nodeId)
+	{
+		ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						errmsg("primary and clone nodes must be different")));
+	}
+
+	ereport(NOTICE, (errmsg(
+						 "adjusting shard placements for primary %s:%d and clone %s:%d",
+						 primaryNode->workerName, primaryNode->workerPort,
+						 cloneNode->workerName, cloneNode->workerPort)));
+
+	RegisterOperationNeedingCleanup();
+
+	/*
+	 * Process shards that will stay on the primary node.
+	 * For these shards, we need to remove their data from the clone node
+	 * since the metadata already correctly reflects them on primary.
+	 */
+	uint64 shardId = 0;
+	uint32 primaryGroupId = GroupForNode(primaryNode->workerName, primaryNode->workerPort)
+	;
+	uint32 cloneGroupId = GroupForNode(cloneNode->workerName, cloneNode->workerPort);
+
+	ereport(NOTICE, (errmsg("processing %d shards for primary node GroupID %d",
+							list_length(primaryShardList), primaryGroupId)));
+
+
+	/*
+	 * For each shard staying on primary, insert cleanup records to remove
+	 * the shard data from the clone node. The metadata already correctly
+	 * reflects these shards on primary, so no metadata changes are needed.
+	 */
+	foreach_declared_int(shardId, primaryShardList)
+	{
+		ShardInterval *shardInterval = LoadShardInterval(shardId);
+		List *colocatedShardList = ColocatedShardIntervalList(shardInterval);
+
+		char *qualifiedShardName = ConstructQualifiedShardName(shardInterval);
+		ereport(LOG, (errmsg(
+						  "inserting DELETE shard record for shard %s from clone node GroupID %d",
+						  qualifiedShardName, cloneGroupId)));
+
+		InsertCleanupRecordsForShardPlacementsOnNode(colocatedShardList,
+													 cloneGroupId);
+	}
+
+
+	/*
+	 * Process shards that will move to the clone node.
+	 * For these shards, we need to:
+	 * 1. Update metadata to move placements from primary to clone
+	 * 2. Remove the shard data from primary (via cleanup records)
+	 * 3. No data movement needed since clone already has the data
+	 */
+	ereport(NOTICE, (errmsg("processing %d shards for clone node GroupID %d", list_length(
+								cloneShardList), cloneGroupId)));
+
+	foreach_declared_int(shardId, cloneShardList)
+	{
+		ShardInterval *shardInterval = LoadShardInterval(shardId);
+		List *colocatedShardList = ColocatedShardIntervalList(shardInterval);
+
+		/*
+		 * Create new shard placement records on the clone node for all
+		 * colocated shards. This moves the shard placements from primary
+		 * to clone in the metadata.
+		 */
+		foreach_declared_ptr(shardInterval, colocatedShardList)
+		{
+			uint64 colocatedShardId = shardInterval->shardId;
+
+			uint64 placementId = GetNextPlacementId();
+			InsertShardPlacementRow(colocatedShardId, placementId,
+									ShardLength(colocatedShardId),
+									cloneGroupId);
+		}
+
+		/*
+		 * Update the metadata on worker nodes to reflect the new shard
+		 * placement distribution between primary and clone nodes.
+		 */
+		UpdateColocatedShardPlacementMetadataOnWorkers(shardId,
+													   primaryNode->workerName,
+													   primaryNode->workerPort,
+													   cloneNode->workerName,
+													   cloneNode->workerPort);
+
+		/*
+		 * Remove the shard placement records from primary node metadata
+		 * since these shards are now served from the clone node.
+		 */
+		DropShardPlacementsFromMetadata(colocatedShardList,
+										primaryNode->workerName, primaryNode->workerPort);
+
+		char *qualifiedShardName = ConstructQualifiedShardName(shardInterval);
+		ereport(LOG, (errmsg(
+						  "inserting DELETE shard record for shard %s from primary node GroupID %d",
+						  qualifiedShardName, primaryGroupId)));
+
+		/*
+		 * Insert cleanup records to remove the shard data from primary node
+		 * at transaction commit. This frees up space on the primary node
+		 * since the data is now served from the clone node.
+		 */
+		InsertCleanupRecordsForShardPlacementsOnNode(colocatedShardList,
+													 primaryGroupId);
+	}
+
+	/*
+	 * Handle reference tables - these need to be available on both
+	 * primary and clone nodes. Since the clone already has the data,
+	 * we just need to insert placement records for the clone node.
+	 */
+	int colocationId = GetReferenceTableColocationId();
+
+	if (colocationId == INVALID_COLOCATION_ID)
+	{
+		/* we have no reference table yet. */
+		return;
+	}
+	ShardInterval *shardInterval = NULL;
+	List *referenceTableIdList = CitusTableTypeIdList(REFERENCE_TABLE);
+	Oid referenceTableId = linitial_oid(referenceTableIdList);
+	List *shardIntervalList = LoadShardIntervalList(referenceTableId);
+	foreach_declared_ptr(shardInterval, shardIntervalList)
+	{
+		List *colocatedShardList = ColocatedShardIntervalList(shardInterval);
+		ShardInterval *colocatedShardInterval = NULL;
+
+		/*
+		 * For each reference table shard, create placement records on the
+		 * clone node. The data is already present on the clone, so we only
+		 * need to update the metadata to make the clone aware of these shards.
+		 */
+		foreach_declared_ptr(colocatedShardInterval, colocatedShardList)
+		{
+			uint64 colocatedShardId = colocatedShardInterval->shardId;
+
+			/*
+			 * Insert shard placement record for the clone node and
+			 * propagate the metadata change to worker nodes.
+			 */
+			uint64 placementId = GetNextPlacementId();
+			InsertShardPlacementRow(colocatedShardId, placementId,
+									ShardLength(colocatedShardId),
+									cloneGroupId);
+
+			char *placementCommand = PlacementUpsertCommand(colocatedShardId, placementId,
+															0, cloneGroupId);
+
+			SendCommandToWorkersWithMetadata(placementCommand);
+		}
+	}
+
+	ereport(NOTICE, (errmsg(
+						 "shard placement adjustment complete for primary %s:%d and clone %s:%d",
+						 primaryNode->workerName, primaryNode->workerPort,
+						 cloneNode->workerName, cloneNode->workerPort)));
+}
+
+
 /*
 * Insert deferred cleanup records.
 * The shards will be dropped by background cleaner later.
@ -676,7 +1060,7 @@ IsShardListOnNode(List *colocatedShardList, char *targetNodeName, uint32 targetN

 /*
 * LockColocatedRelationsForMove takes a list of relations, locks all of them
- * using ShareUpdateExclusiveLock
+ * using ShareLock
 */
 static void
 LockColocatedRelationsForMove(List *colocatedTableList)
@ -684,7 +1068,7 @@ LockColocatedRelationsForMove(List *colocatedTableList)
 	Oid colocatedTableId = InvalidOid;
 	foreach_declared_oid(colocatedTableId, colocatedTableList)
 	{
-		LockRelationOid(colocatedTableId, ShareUpdateExclusiveLock);
+		LockRelationOid(colocatedTableId, RowExclusiveLock);
 	}
 }

@ -1333,7 +1717,7 @@ ErrorIfReplicatingDistributedTableWithFKeys(List *tableIdList)
 static void
 CopyShardTables(List *shardIntervalList, char *sourceNodeName, int32 sourceNodePort,
 				char *targetNodeName, int32 targetNodePort, bool useLogicalReplication,
-				const char *operationName)
+				const char *operationName, uint32 optionFlags)
 {
 	if (list_length(shardIntervalList) < 1)
 	{
@ -1343,16 +1727,22 @@ CopyShardTables(List *shardIntervalList, char *sourceNodeName, int32 sourceNodeP
 	/* Start operation to prepare for generating cleanup records */
 	RegisterOperationNeedingCleanup();

-	if (useLogicalReplication)
+	bool createRelationshipsOnly = optionFlags & SHARD_TRANSFER_CREATE_RELATIONSHIPS_ONLY;
+
+	/*
+	 * If we're just going to create relationships only always use
+	 * CopyShardTablesViaBlockWrites.
+	 */
+	if (useLogicalReplication && !createRelationshipsOnly)
 	{
 		CopyShardTablesViaLogicalReplication(shardIntervalList, sourceNodeName,
 											 sourceNodePort, targetNodeName,
-											 targetNodePort);
+											 targetNodePort, optionFlags);
 	}
 	else
 	{
 		CopyShardTablesViaBlockWrites(shardIntervalList, sourceNodeName, sourceNodePort,
-									  targetNodeName, targetNodePort);
+									  targetNodeName, targetNodePort, optionFlags);
 	}

 	/*
@ -1369,7 +1759,7 @@ CopyShardTables(List *shardIntervalList, char *sourceNodeName, int32 sourceNodeP
 static void
 CopyShardTablesViaLogicalReplication(List *shardIntervalList, char *sourceNodeName,
 									 int32 sourceNodePort, char *targetNodeName,
-									 int32 targetNodePort)
+									 int32 targetNodePort, uint32 optionFlags)
 {
 	MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext,
 													   "CopyShardTablesViaLogicalReplication",
@ -1407,9 +1797,13 @@ CopyShardTablesViaLogicalReplication(List *shardIntervalList, char *sourceNodeNa

 	MemoryContextSwitchTo(oldContext);

+	bool skipRelationshipCreation = (optionFlags &
+									 SHARD_TRANSFER_SKIP_CREATE_RELATIONSHIPS);
+
 	/* data copy is done seperately when logical replication is used */
 	LogicallyReplicateShards(shardIntervalList, sourceNodeName,
-							 sourceNodePort, targetNodeName, targetNodePort);
+							 sourceNodePort, targetNodeName, targetNodePort,
+							 skipRelationshipCreation);
 }


@ -1437,7 +1831,7 @@ CreateShardCommandList(ShardInterval *shardInterval, List *ddlCommandList)
 static void
 CopyShardTablesViaBlockWrites(List *shardIntervalList, char *sourceNodeName,
 							  int32 sourceNodePort, char *targetNodeName,
-							  int32 targetNodePort)
+							  int32 targetNodePort, uint32 optionFlags)
 {
 	MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext,
 													   "CopyShardTablesViaBlockWrites",
@ -1446,127 +1840,150 @@ CopyShardTablesViaBlockWrites(List *shardIntervalList, char *sourceNodeName,

 	WorkerNode *sourceNode = FindWorkerNode(sourceNodeName, sourceNodePort);
 	WorkerNode *targetNode = FindWorkerNode(targetNodeName, targetNodePort);
-
-	/* iterate through the colocated shards and copy each */
 	ShardInterval *shardInterval = NULL;
-	foreach_declared_ptr(shardInterval, shardIntervalList)
-	{
-		/*
-		 * For each shard we first create the shard table in a separate
-		 * transaction and then we copy the data and create the indexes in a
-		 * second separate transaction. The reason we don't do both in a single
-		 * transaction is so we can see the size of the new shard growing
-		 * during the copy when we run get_rebalance_progress in another
-		 * session. If we wouldn't split these two phases up, then the table
-		 * wouldn't be visible in the session that get_rebalance_progress uses.
-		 * So get_rebalance_progress would always report its size as 0.
-		 */
-		List *ddlCommandList = RecreateShardDDLCommandList(shardInterval, sourceNodeName,
-														   sourceNodePort);
-		char *tableOwner = TableOwner(shardInterval->relationId);

-		/* drop the shard we created on the target, in case of failure */
-		InsertCleanupRecordOutsideTransaction(CLEANUP_OBJECT_SHARD_PLACEMENT,
-											  ConstructQualifiedShardName(shardInterval),
-											  GroupForNode(targetNodeName,
-														   targetNodePort),
-											  CLEANUP_ON_FAILURE);
-
-		SendCommandListToWorkerOutsideTransaction(targetNodeName, targetNodePort,
-												  tableOwner, ddlCommandList);
-	}
-
-	UpdatePlacementUpdateStatusForShardIntervalList(
-		shardIntervalList,
-		sourceNodeName,
-		sourceNodePort,
-		PLACEMENT_UPDATE_STATUS_COPYING_DATA);
-
-	ConflictWithIsolationTestingBeforeCopy();
-	CopyShardsToNode(sourceNode, targetNode, shardIntervalList, NULL);
-	ConflictWithIsolationTestingAfterCopy();
-
-	UpdatePlacementUpdateStatusForShardIntervalList(
-		shardIntervalList,
-		sourceNodeName,
-		sourceNodePort,
-		PLACEMENT_UPDATE_STATUS_CREATING_CONSTRAINTS);
-
-	foreach_declared_ptr(shardInterval, shardIntervalList)
-	{
-		List *ddlCommandList =
-			PostLoadShardCreationCommandList(shardInterval, sourceNodeName,
-											 sourceNodePort);
-		char *tableOwner = TableOwner(shardInterval->relationId);
-		SendCommandListToWorkerOutsideTransaction(targetNodeName, targetNodePort,
-												  tableOwner, ddlCommandList);
-
-		MemoryContextReset(localContext);
-	}
+	bool createRelationshipsOnly = optionFlags & SHARD_TRANSFER_CREATE_RELATIONSHIPS_ONLY;

 	/*
-	 * Once all shards are copied, we can recreate relationships between shards.
-	 * Create DDL commands to Attach child tables to their parents in a partitioning hierarchy.
+	 * If we’re only asked to create the relationships, the shards are already
+	 * present and populated on the node. Skip the table‑setup and data‑loading
+	 * steps and proceed straight to creating the relationships.
 	 */
-	List *shardIntervalWithDDCommandsList = NIL;
-	foreach_declared_ptr(shardInterval, shardIntervalList)
+	if (!createRelationshipsOnly)
 	{
-		if (PartitionTable(shardInterval->relationId))
+		/* iterate through the colocated shards and copy each */
+		foreach_declared_ptr(shardInterval, shardIntervalList)
 		{
-			char *attachPartitionCommand =
-				GenerateAttachShardPartitionCommand(shardInterval);
+			/*
+			 * For each shard we first create the shard table in a separate
+			 * transaction and then we copy the data and create the indexes in a
+			 * second separate transaction. The reason we don't do both in a single
+			 * transaction is so we can see the size of the new shard growing
+			 * during the copy when we run get_rebalance_progress in another
+			 * session. If we wouldn't split these two phases up, then the table
+			 * wouldn't be visible in the session that get_rebalance_progress uses.
+			 * So get_rebalance_progress would always report its size as 0.
+			 */
+			List *ddlCommandList = RecreateShardDDLCommandList(shardInterval,
+															   sourceNodeName,
+															   sourceNodePort);
+			char *tableOwner = TableOwner(shardInterval->relationId);

-			ShardCommandList *shardCommandList = CreateShardCommandList(
-				shardInterval,
-				list_make1(attachPartitionCommand));
-			shardIntervalWithDDCommandsList = lappend(shardIntervalWithDDCommandsList,
-													  shardCommandList);
+			/* drop the shard we created on the target, in case of failure */
+			InsertCleanupRecordOutsideTransaction(CLEANUP_OBJECT_SHARD_PLACEMENT,
+												  ConstructQualifiedShardName(
+													  shardInterval),
+												  GroupForNode(targetNodeName,
+															   targetNodePort),
+												  CLEANUP_ON_FAILURE);
+
+			SendCommandListToWorkerOutsideTransaction(targetNodeName, targetNodePort,
+													  tableOwner, ddlCommandList);
+		}
+
+		UpdatePlacementUpdateStatusForShardIntervalList(
+			shardIntervalList,
+			sourceNodeName,
+			sourceNodePort,
+			PLACEMENT_UPDATE_STATUS_COPYING_DATA);
+
+		ConflictWithIsolationTestingBeforeCopy();
+		CopyShardsToNode(sourceNode, targetNode, shardIntervalList, NULL);
+		ConflictWithIsolationTestingAfterCopy();
+
+		UpdatePlacementUpdateStatusForShardIntervalList(
+			shardIntervalList,
+			sourceNodeName,
+			sourceNodePort,
+			PLACEMENT_UPDATE_STATUS_CREATING_CONSTRAINTS);
+
+		foreach_declared_ptr(shardInterval, shardIntervalList)
+		{
+			List *ddlCommandList =
+				PostLoadShardCreationCommandList(shardInterval, sourceNodeName,
+												 sourceNodePort);
+			char *tableOwner = TableOwner(shardInterval->relationId);
+			SendCommandListToWorkerOutsideTransaction(targetNodeName, targetNodePort,
+													  tableOwner, ddlCommandList);
+
+			MemoryContextReset(localContext);
 		}
 	}

-	UpdatePlacementUpdateStatusForShardIntervalList(
-		shardIntervalList,
-		sourceNodeName,
-		sourceNodePort,
-		PLACEMENT_UPDATE_STATUS_CREATING_FOREIGN_KEYS);
-
 	/*
-	 * Iterate through the colocated shards and create DDL commamnds
-	 * to create the foreign constraints.
+	 * Skip creating shard relationships if the caller has requested that they
+	 * not be created.
 	 */
-	foreach_declared_ptr(shardInterval, shardIntervalList)
+	bool skipRelationshipCreation = (optionFlags &
+									 SHARD_TRANSFER_SKIP_CREATE_RELATIONSHIPS);
+
+	if (!skipRelationshipCreation)
 	{
-		List *shardForeignConstraintCommandList = NIL;
-		List *referenceTableForeignConstraintList = NIL;
+		/*
+		 * Once all shards are copied, we can recreate relationships between shards.
+		 * Create DDL commands to Attach child tables to their parents in a partitioning hierarchy.
+		 */
+		List *shardIntervalWithDDCommandsList = NIL;
+		foreach_declared_ptr(shardInterval, shardIntervalList)
+		{
+			if (PartitionTable(shardInterval->relationId))
+			{
+				char *attachPartitionCommand =
+					GenerateAttachShardPartitionCommand(shardInterval);

-		CopyShardForeignConstraintCommandListGrouped(shardInterval,
-													 &shardForeignConstraintCommandList,
-													 &referenceTableForeignConstraintList);
+				ShardCommandList *shardCommandList = CreateShardCommandList(
+					shardInterval,
+					list_make1(attachPartitionCommand));
+				shardIntervalWithDDCommandsList = lappend(shardIntervalWithDDCommandsList,
+														  shardCommandList);
+			}
+		}

-		ShardCommandList *shardCommandList = CreateShardCommandList(
-			shardInterval,
-			list_concat(shardForeignConstraintCommandList,
-						referenceTableForeignConstraintList));
-		shardIntervalWithDDCommandsList = lappend(shardIntervalWithDDCommandsList,
-												  shardCommandList);
+		UpdatePlacementUpdateStatusForShardIntervalList(
+			shardIntervalList,
+			sourceNodeName,
+			sourceNodePort,
+			PLACEMENT_UPDATE_STATUS_CREATING_FOREIGN_KEYS);
+
+		/*
+		 * Iterate through the colocated shards and create DDL commamnds
+		 * to create the foreign constraints.
+		 */
+		foreach_declared_ptr(shardInterval, shardIntervalList)
+		{
+			List *shardForeignConstraintCommandList = NIL;
+			List *referenceTableForeignConstraintList = NIL;
+
+			CopyShardForeignConstraintCommandListGrouped(shardInterval,
+														 &
+														 shardForeignConstraintCommandList,
+														 &
+														 referenceTableForeignConstraintList);
+
+			ShardCommandList *shardCommandList = CreateShardCommandList(
+				shardInterval,
+				list_concat(shardForeignConstraintCommandList,
+							referenceTableForeignConstraintList));
+			shardIntervalWithDDCommandsList = lappend(shardIntervalWithDDCommandsList,
+													  shardCommandList);
+		}
+
+		/* Now execute the Partitioning & Foreign constraints creation commads. */
+		ShardCommandList *shardCommandList = NULL;
+		foreach_declared_ptr(shardCommandList, shardIntervalWithDDCommandsList)
+		{
+			char *tableOwner = TableOwner(shardCommandList->shardInterval->relationId);
+			SendCommandListToWorkerOutsideTransaction(targetNodeName, targetNodePort,
+													  tableOwner,
+													  shardCommandList->ddlCommandList);
+		}
+
+		UpdatePlacementUpdateStatusForShardIntervalList(
+			shardIntervalList,
+			sourceNodeName,
+			sourceNodePort,
+			PLACEMENT_UPDATE_STATUS_COMPLETING);
 	}
-
-	/* Now execute the Partitioning & Foreign constraints creation commads. */
-	ShardCommandList *shardCommandList = NULL;
-	foreach_declared_ptr(shardCommandList, shardIntervalWithDDCommandsList)
-	{
-		char *tableOwner = TableOwner(shardCommandList->shardInterval->relationId);
-		SendCommandListToWorkerOutsideTransaction(targetNodeName, targetNodePort,
-												  tableOwner,
-												  shardCommandList->ddlCommandList);
-	}
-
-	UpdatePlacementUpdateStatusForShardIntervalList(
-		shardIntervalList,
-		sourceNodeName,
-		sourceNodePort,
-		PLACEMENT_UPDATE_STATUS_COMPLETING);
-
 	MemoryContextReset(localContext);
 	MemoryContextSwitchTo(oldContext);
 }
@ -1647,7 +2064,7 @@ CopyShardsToNode(WorkerNode *sourceNode, WorkerNode *targetNode, List *shardInte

 	ExecuteTaskListOutsideTransaction(ROW_MODIFY_NONE, copyTaskList,
 									  MaxAdaptiveExecutorPoolSize,
-									  NULL /* jobIdList (ignored by API implementation) */);
+									  NULL /* jobIdList (ignored by API impl.) */);
 }


@ -2050,6 +2467,7 @@ UpdateColocatedShardPlacementMetadataOnWorkers(int64 shardId,
 						 "SELECT citus_internal.update_placement_metadata(%ld, %d, %d)",
 						 colocatedShard->shardId,
 						 sourceGroupId, targetGroupId);
+
 		SendCommandToWorkersWithMetadata(updateCommand->data);
 	}
 }
--- a/src/backend/distributed/operations/worker_copy_table_to_node_udf.c
+++ b/src/backend/distributed/operations/worker_copy_table_to_node_udf.c
@ -13,6 +13,7 @@

 #include "postgres.h"

+#include "executor/executor.h"    /* for CreateExecutorState(), FreeExecutorState(), CreateExprContext(), etc. */
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"

--- a/src/backend/distributed/operations/worker_shard_copy.c
+++ b/src/backend/distributed/operations/worker_shard_copy.c
@ -471,8 +471,8 @@ WriteLocalTuple(TupleTableSlot *slot, ShardCopyDestReceiver *copyDest)
 	SetLocalExecutionStatus(LOCAL_EXECUTION_REQUIRED);

 	bool isBinaryCopy = localCopyOutState->binary;
-	bool shouldAddBinaryHeaders = (isBinaryCopy && localCopyOutState->fe_msgbuf->len ==
-								   0);
+	bool shouldAddBinaryHeaders = (isBinaryCopy &&
+								   localCopyOutState->fe_msgbuf->len == 0);
 	if (shouldAddBinaryHeaders)
 	{
 		AppendCopyBinaryHeaders(localCopyOutState);
--- a/src/backend/distributed/operations/worker_split_copy_udf.c
+++ b/src/backend/distributed/operations/worker_split_copy_udf.c
@ -71,8 +71,8 @@ worker_split_copy(PG_FUNCTION_ARGS)
 	if (arrayHasNull)
 	{
 		ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
-						errmsg(
-							"pg_catalog.split_copy_info array cannot contain null values")));
+						errmsg("pg_catalog.split_copy_info array "
+							   "cannot contain null values")));
 	}

 	const int slice_ndim = 0;
--- a/src/backend/distributed/planner/deparse_shard_query.c
+++ b/src/backend/distributed/planner/deparse_shard_query.c
@ -16,6 +16,8 @@
 #include "access/heapam.h"
 #include "access/htup_details.h"
 #include "catalog/pg_constraint.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_operator.h"
 #include "lib/stringinfo.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
@ -38,6 +40,8 @@
 #include "distributed/metadata_cache.h"
 #include "distributed/multi_physical_planner.h"
 #include "distributed/multi_router_planner.h"
+#include "distributed/query_utils.h"
+#include "distributed/recursive_planning.h"
 #include "distributed/shard_utils.h"
 #include "distributed/stats/stat_tenants.h"
 #include "distributed/version_compat.h"
@ -204,6 +208,252 @@ UpdateTaskQueryString(Query *query, Task *task)
 }


+/*
+ * CreateQualsForShardInterval creates the necessary qual conditions over the
+ * given attnum and rtindex for the given shard interval.
+ */
+Node *
+CreateQualsForShardInterval(RelationShard *relationShard, int attnum, int rtindex)
+{
+	uint64 shardId = relationShard->shardId;
+	Oid relationId = relationShard->relationId;
+
+	CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
+	Var *partitionColumnVar = cacheEntry->partitionColumn;
+
+	/*
+	 * Add constraints for the relation identified by rtindex, specifically on its column at attnum.
+	 * Create a Var node representing this column, which will be used to compare against the bounds
+	 * from the partition column of shard interval.
+	 */
+
+	Var *outerTablePartitionColumnVar = makeVar(
+		rtindex, attnum, partitionColumnVar->vartype,
+		partitionColumnVar->vartypmod,
+		partitionColumnVar->varcollid,
+		0);
+
+	bool isFirstShard = IsFirstShard(cacheEntry, shardId);
+
+	/* load the interval for the shard and create constant nodes for the upper/lower bounds */
+	ShardInterval *shardInterval = LoadShardInterval(shardId);
+	Const *constNodeLowerBound = makeConst(INT4OID, -1, InvalidOid, sizeof(int32),
+										   shardInterval->minValue, false, true);
+	Const *constNodeUpperBound = makeConst(INT4OID, -1, InvalidOid, sizeof(int32),
+										   shardInterval->maxValue, false, true);
+	Const *constNodeZero = makeConst(INT4OID, -1, InvalidOid, sizeof(int32),
+									 Int32GetDatum(0), false, true);
+
+	/* create a function expression node for the hash partition column */
+	FuncExpr *hashFunction = makeNode(FuncExpr);
+	hashFunction->funcid = cacheEntry->hashFunction->fn_oid;
+	hashFunction->args = list_make1(outerTablePartitionColumnVar);
+	hashFunction->funcresulttype = get_func_rettype(cacheEntry->hashFunction->fn_oid);
+	hashFunction->funcretset = false;
+
+	/* create a function expression for the lower bound of the shard interval */
+	Oid resultTypeOid = get_func_rettype(
+		cacheEntry->shardIntervalCompareFunction->fn_oid);
+	FuncExpr *lowerBoundFuncExpr = makeNode(FuncExpr);
+	lowerBoundFuncExpr->funcid = cacheEntry->shardIntervalCompareFunction->fn_oid;
+	lowerBoundFuncExpr->args = list_make2((Node *) constNodeLowerBound,
+										  (Node *) hashFunction);
+	lowerBoundFuncExpr->funcresulttype = resultTypeOid;
+	lowerBoundFuncExpr->funcretset = false;
+
+	Oid lessThan = GetSysCacheOid(OPERNAMENSP, Anum_pg_operator_oid, CStringGetDatum("<"),
+								  resultTypeOid, resultTypeOid, ObjectIdGetDatum(
+									  PG_CATALOG_NAMESPACE));
+
+	/*
+	 * Finally, check if the comparison result is less than 0, i.e.,
+	 * shardInterval->minValue < hash(partitionColumn)
+	 * See SearchCachedShardInterval for the behavior at the boundaries.
+	 */
+	Expr *lowerBoundExpr = make_opclause(lessThan, BOOLOID, false,
+										 (Expr *) lowerBoundFuncExpr,
+										 (Expr *) constNodeZero, InvalidOid, InvalidOid);
+
+	/* create a function expression for the upper bound of the shard interval */
+	FuncExpr *upperBoundFuncExpr = makeNode(FuncExpr);
+	upperBoundFuncExpr->funcid = cacheEntry->shardIntervalCompareFunction->fn_oid;
+	upperBoundFuncExpr->args = list_make2((Node *) hashFunction,
+										  (Expr *) constNodeUpperBound);
+	upperBoundFuncExpr->funcresulttype = resultTypeOid;
+	upperBoundFuncExpr->funcretset = false;
+
+	Oid lessThanOrEqualTo = GetSysCacheOid(OPERNAMENSP, Anum_pg_operator_oid,
+										   CStringGetDatum("<="),
+										   resultTypeOid, resultTypeOid,
+										   ObjectIdGetDatum(PG_CATALOG_NAMESPACE));
+
+
+	/*
+	 * Finally, check if the comparison result is less than or equal to 0, i.e.,
+	 * hash(partitionColumn) <= shardInterval->maxValue
+	 * See SearchCachedShardInterval for the behavior at the boundaries.
+	 */
+	Expr *upperBoundExpr = make_opclause(lessThanOrEqualTo, BOOLOID, false,
+										 (Expr *) upperBoundFuncExpr,
+										 (Expr *) constNodeZero, InvalidOid, InvalidOid);
+
+
+	/* create a node for both upper and lower bound */
+	Node *shardIntervalBoundQuals = make_and_qual((Node *) lowerBoundExpr,
+												  (Node *) upperBoundExpr);
+
+	/*
+	 * Add a null test for the partition column for the first shard.
+	 * This is because we need to include the null values in exactly one of the shard queries.
+	 * The null test is added as an OR clause to the existing AND clause.
+	 */
+	if (isFirstShard)
+	{
+		/* null test for the first shard */
+		NullTest *nullTest = makeNode(NullTest);
+		nullTest->nulltesttype = IS_NULL;  /* Check for IS NULL */
+		nullTest->arg = (Expr *) outerTablePartitionColumnVar;  /* The variable to check */
+		nullTest->argisrow = false;
+		shardIntervalBoundQuals = (Node *) make_orclause(list_make2(nullTest,
+																	shardIntervalBoundQuals));
+	}
+	return shardIntervalBoundQuals;
+}
+
+
+/*
+ * UpdateWhereClauseToPushdownRecurringOuterJoinWalker walks over the query tree and
+ * updates the WHERE clause for outer joins satisfying feasibility conditions.
+ */
+bool
+UpdateWhereClauseToPushdownRecurringOuterJoinWalker(Node *node, List *relationShardList)
+{
+	if (node == NULL)
+	{
+		return false;
+	}
+
+	if (IsA(node, Query))
+	{
+		UpdateWhereClauseToPushdownRecurringOuterJoin((Query *) node, relationShardList);
+		return query_tree_walker((Query *) node,
+								 UpdateWhereClauseToPushdownRecurringOuterJoinWalker,
+								 relationShardList, QTW_EXAMINE_RTES_BEFORE);
+	}
+
+	if (!IsA(node, RangeTblEntry))
+	{
+		return expression_tree_walker(node,
+									  UpdateWhereClauseToPushdownRecurringOuterJoinWalker,
+									  relationShardList);
+	}
+
+	return false;
+}
+
+
+/*
+ * UpdateWhereClauseToPushdownRecurringOuterJoin
+ *
+ * Inject shard interval predicates into the query WHERE clause for certain
+ * outer joins to make the join semantically correct when distributed.
+ *
+ * Why this is needed:
+ *   When an inner side of an OUTER JOIN is a distributed table that has been
+ *   routed to a single shard, we cannot simply replace the RTE with the shard
+ *   name and rely on implicit pruning: the preserved (outer) side could still
+ *   produce rows whose join keys would hash to other shards. To keep results
+ *   consistent with the global execution semantics we restrict the preserved
+ *   (outer) side to only those partition key values that would route to the
+ *   chosen shard (plus NULLs, which are assigned to exactly one shard).
+ *
+ * What the function does:
+ *   1. Iterate over the top-level jointree->fromlist.
+ *   2. For each JoinExpr call CanPushdownRecurringOuterJoinExtended() which:
+ *        - Verifies shape / join type is eligible.
+ *        - Returns:
+ *            outerRtIndex : RT index whose column we will constrain,
+ *            outerRte / innerRte,
+ *            attnum       : attribute number (partition column) on outer side.
+ *                           This is compared to partition column of innerRte.
+ *   3. Find the RelationShard for the inner distributed table (innerRte->relid)
+ *      in relationShardList; skip if absent (no fixed shard chosen).
+ *   4. Build the shard qualification with CreateQualsForShardInterval():
+ *        (minValue < hash(partcol) AND hash(partcol) <= maxValue)
+ *      and, for the first shard only, OR (partcol IS NULL).
+ *      The Var refers to (outerRtIndex, attnum) so the restriction applies to
+ *      the preserved outer input.
+ *   5. AND the new quals into jointree->quals (creating it if NULL).
+ *
+ * The function does not return anything, it modifies the query in place.
+ */
+void
+UpdateWhereClauseToPushdownRecurringOuterJoin(Query *query, List *relationShardList)
+{
+	if (query == NULL)
+	{
+		return;
+	}
+
+	FromExpr *fromExpr = query->jointree;
+	if (fromExpr == NULL || fromExpr->fromlist == NIL)
+	{
+		return;
+	}
+
+	ListCell *fromExprCell;
+	foreach(fromExprCell, fromExpr->fromlist)
+	{
+		Node *fromItem = (Node *) lfirst(fromExprCell);
+		if (!IsA(fromItem, JoinExpr))
+		{
+			continue;
+		}
+		JoinExpr *joinExpr = (JoinExpr *) fromItem;
+
+		/*
+		 * We will check if we need to add constraints to the WHERE clause.
+		 */
+		RangeTblEntry *innerRte = NULL;
+		RangeTblEntry *outerRte = NULL;
+		int outerRtIndex = -1;
+		int attnum;
+		if (!CanPushdownRecurringOuterJoinExtended(joinExpr, query, &outerRtIndex,
+												   &outerRte, &innerRte, &attnum))
+		{
+			continue;
+		}
+
+		if (attnum == InvalidAttrNumber)
+		{
+			continue;
+		}
+		ereport(DEBUG5, (errmsg(
+							 "Distributed table from the inner part of the outer join: %s.",
+							 innerRte->eref->aliasname)));
+
+		RelationShard *relationShard = FindRelationShard(innerRte->relid,
+														 relationShardList);
+
+		if (relationShard == NULL || relationShard->shardId == INVALID_SHARD_ID)
+		{
+			continue;
+		}
+
+		Node *shardIntervalBoundQuals = CreateQualsForShardInterval(relationShard, attnum,
+																	outerRtIndex);
+		if (fromExpr->quals == NULL)
+		{
+			fromExpr->quals = (Node *) shardIntervalBoundQuals;
+		}
+		else
+		{
+			fromExpr->quals = make_and_qual(fromExpr->quals, shardIntervalBoundQuals);
+		}
+	}
+}
+
+
 /*
 * UpdateRelationToShardNames walks over the query tree and appends shard ids to
 * relations. It uses unique identity value to establish connection between a
@ -439,6 +689,27 @@ SetTaskQueryStringList(Task *task, List *queryStringList)
 }


+void
+SetTaskQueryPlan(Task *task, Query *query, PlannedStmt *localPlan)
+{
+	Assert(localPlan != NULL);
+	task->taskQuery.queryType = TASK_QUERY_LOCAL_PLAN;
+	task->taskQuery.data.localCompiled = (LocalCompilation *) palloc0(
+		sizeof(LocalCompilation));
+	task->taskQuery.data.localCompiled->query = query;
+	task->taskQuery.data.localCompiled->plan = localPlan;
+	task->queryCount = 1;
+}
+
+
+PlannedStmt *
+TaskQueryLocalPlan(Task *task)
+{
+	Assert(task->taskQuery.queryType == TASK_QUERY_LOCAL_PLAN);
+	return task->taskQuery.data.localCompiled->plan;
+}
+
+
 /*
 * DeparseTaskQuery is a general way of deparsing a query based on a task.
 */
@ -524,6 +795,26 @@ TaskQueryString(Task *task)
 	{
 		return task->taskQuery.data.queryStringLazy;
 	}
+	else if (taskQueryType == TASK_QUERY_LOCAL_PLAN)
+	{
+		Query *query = task->taskQuery.data.localCompiled->query;
+		Assert(query != NULL);
+
+		/*
+		 * Use the query of the local compilation to generate the
+		 * query string. For local compiled tasks, the query is retained
+		 * for this purpose, which may be EXPLAIN ANALYZing the task, or
+		 * command logging. Generating the query string on the fly is
+		 * acceptable because the plan of the local compilation is used
+		 * for query execution.
+		 */
+		MemoryContext previousContext = MemoryContextSwitchTo(GetMemoryChunkContext(
+																  query));
+		UpdateRelationToShardNames((Node *) query, task->relationShardList);
+		MemoryContextSwitchTo(previousContext);
+		return AnnotateQuery(DeparseTaskQuery(task, query),
+							 task->partitionKeyValue, task->colocationId);
+	}

 	Query *jobQueryReferenceForLazyDeparsing =
 		task->taskQuery.data.jobQueryReferenceForLazyDeparsing;
--- a/src/backend/distributed/planner/distributed_planner.c
+++ b/src/backend/distributed/planner/distributed_planner.c
@ -13,6 +13,7 @@
 #include "postgres.h"

 #include "funcapi.h"
+#include "miscadmin.h"

 #include "access/htup_details.h"
 #include "access/xact.h"
@ -75,17 +76,6 @@
 #endif


-/* RouterPlanType is used to determine the router plan to invoke */
-typedef enum RouterPlanType
-{
-	INSERT_SELECT_INTO_CITUS_TABLE,
-	INSERT_SELECT_INTO_LOCAL_TABLE,
-	DML_QUERY,
-	SELECT_QUERY,
-	MERGE_QUERY,
-	REPLAN_WITH_BOUND_PARAMETERS
-} RouterPlanType;
-
 static List *plannerRestrictionContextList = NIL;
 int MultiTaskQueryLogLevel = CITUS_LOG_LEVEL_OFF; /* multi-task query log level */
 static uint64 NextPlanId = 1;
@ -95,8 +85,8 @@ int PlannerLevel = 0;

 static bool ListContainsDistributedTableRTE(List *rangeTableList,
 											bool *maybeHasForeignDistributedTable);
-static PlannedStmt * CreateDistributedPlannedStmt(
-	DistributedPlanningContext *planContext);
+static PlannedStmt * CreateDistributedPlannedStmt(DistributedPlanningContext *
+												  planContext);
 static PlannedStmt * InlineCtesAndCreateDistributedPlannedStmt(uint64 planId,
 															   DistributedPlanningContext
 															   *planContext);
@ -135,13 +125,15 @@ static void AdjustReadIntermediateResultsCostInternal(RelOptInfo *relOptInfo,
 													  Const *resultFormatConst);
 static List * OuterPlanParamsList(PlannerInfo *root);
 static List * CopyPlanParamList(List *originalPlanParamList);
-static PlannerRestrictionContext * CreateAndPushPlannerRestrictionContext(void);
+static void CreateAndPushPlannerRestrictionContext(DistributedPlanningContext *
+												   planContext,
+												   FastPathRestrictionContext *
+												   fastPathContext);
 static PlannerRestrictionContext * CurrentPlannerRestrictionContext(void);
 static void PopPlannerRestrictionContext(void);
-static void ResetPlannerRestrictionContext(
-	PlannerRestrictionContext *plannerRestrictionContext);
-static PlannedStmt * PlanFastPathDistributedStmt(DistributedPlanningContext *planContext,
-												 Node *distributionKeyValue);
+static void ResetPlannerRestrictionContext(PlannerRestrictionContext *
+										   plannerRestrictionContext);
+static PlannedStmt * PlanFastPathDistributedStmt(DistributedPlanningContext *planContext);
 static PlannedStmt * PlanDistributedStmt(DistributedPlanningContext *planContext,
 										 int rteIdCounter);
 static RTEListProperties * GetRTEListProperties(List *rangeTableList);
@ -152,10 +144,12 @@ static RouterPlanType GetRouterPlanType(Query *query,
 										bool hasUnresolvedParams);
 static void ConcatenateRTablesAndPerminfos(PlannedStmt *mainPlan,
 										   PlannedStmt *concatPlan);
-static bool CheckPostPlanDistribution(bool isDistributedQuery,
-									  Query *origQuery,
-									  List *rangeTableList,
-									  Query *plannedQuery);
+static bool CheckPostPlanDistribution(DistributedPlanningContext *planContext,
+									  bool isDistributedQuery,
+									  List *rangeTableList);
+#if PG_VERSION_NUM >= PG_VERSION_18
+static int DisableSelfJoinElimination(void);
+#endif

 /* Distributed planner hook */
 PlannedStmt *
@ -166,7 +160,10 @@ distributed_planner(Query *parse,
 {
 	bool needsDistributedPlanning = false;
 	bool fastPathRouterQuery = false;
-	Node *distributionKeyValue = NULL;
+	FastPathRestrictionContext fastPathContext = { 0 };
+#if PG_VERSION_NUM >= PG_VERSION_18
+	int saveNestLevel = -1;
+#endif

 	List *rangeTableList = ExtractRangeTableEntryList(parse);

@ -191,8 +188,7 @@ distributed_planner(Query *parse,
 											&maybeHasForeignDistributedTable);
 		if (needsDistributedPlanning)
 		{
-			fastPathRouterQuery = FastPathRouterQuery(parse, &distributionKeyValue);
-
+			fastPathRouterQuery = FastPathRouterQuery(parse, &fastPathContext);
 			if (maybeHasForeignDistributedTable)
 			{
 				WarnIfListHasForeignDistributedTable(rangeTableList);
@ -231,6 +227,10 @@ distributed_planner(Query *parse,
 			bool setPartitionedTablesInherited = false;
 			AdjustPartitioningForDistributedPlanning(rangeTableList,
 													 setPartitionedTablesInherited);
+
+#if PG_VERSION_NUM >= PG_VERSION_18
+			saveNestLevel = DisableSelfJoinElimination();
+#endif
 		}
 	}

@ -247,8 +247,9 @@ distributed_planner(Query *parse,
 	 */
 	HideCitusDependentObjectsOnQueriesOfPgMetaTables((Node *) parse, NULL);

-	/* create a restriction context and put it at the end if context list */
-	planContext.plannerRestrictionContext = CreateAndPushPlannerRestrictionContext();
+	/* create a restriction context and put it at the end of our plan context's context list */
+	CreateAndPushPlannerRestrictionContext(&planContext,
+										   &fastPathContext);

 	/*
 	 * We keep track of how many times we've recursed into the planner, primarily
@ -264,7 +265,7 @@ distributed_planner(Query *parse,
 	{
 		if (fastPathRouterQuery)
 		{
-			result = PlanFastPathDistributedStmt(&planContext, distributionKeyValue);
+			result = PlanFastPathDistributedStmt(&planContext);
 		}
 		else
 		{
@ -276,10 +277,19 @@ distributed_planner(Query *parse,
 			planContext.plan = standard_planner(planContext.query, NULL,
 												planContext.cursorOptions,
 												planContext.boundParams);
-			needsDistributedPlanning = CheckPostPlanDistribution(needsDistributedPlanning,
-																 planContext.originalQuery,
-																 rangeTableList,
-																 planContext.query);
+#if PG_VERSION_NUM >= PG_VERSION_18
+			if (needsDistributedPlanning)
+			{
+				Assert(saveNestLevel > 0);
+				AtEOXact_GUC(true, saveNestLevel);
+			}
+
+			/* Pop the plan context from the current restriction context */
+			planContext.plannerRestrictionContext->planContext = NULL;
+#endif
+			needsDistributedPlanning = CheckPostPlanDistribution(&planContext,
+																 needsDistributedPlanning,
+																 rangeTableList);

 			if (needsDistributedPlanning)
 			{
@ -649,30 +659,21 @@ IsMultiTaskPlan(DistributedPlan *distributedPlan)
 * the FastPathPlanner.
 */
 static PlannedStmt *
-PlanFastPathDistributedStmt(DistributedPlanningContext *planContext,
-							Node *distributionKeyValue)
+PlanFastPathDistributedStmt(DistributedPlanningContext *planContext)
 {
 	FastPathRestrictionContext *fastPathContext =
 		planContext->plannerRestrictionContext->fastPathRestrictionContext;
+	Assert(fastPathContext != NULL);
+	Assert(fastPathContext->fastPathRouterQuery);

-	planContext->plannerRestrictionContext->fastPathRestrictionContext->
-	fastPathRouterQuery = true;
+	FastPathPreprocessParseTree(planContext->query);

-	if (distributionKeyValue == NULL)
+	if (!fastPathContext->delayFastPathPlanning)
 	{
-		/* nothing to record */
+		planContext->plan = FastPathPlanner(planContext->originalQuery,
+											planContext->query,
+											planContext->boundParams);
 	}
-	else if (IsA(distributionKeyValue, Const))
-	{
-		fastPathContext->distributionKeyValue = (Const *) distributionKeyValue;
-	}
-	else if (IsA(distributionKeyValue, Param))
-	{
-		fastPathContext->distributionKeyHasParam = true;
-	}
-
-	planContext->plan = FastPathPlanner(planContext->originalQuery, planContext->query,
-										planContext->boundParams);

 	return CreateDistributedPlannedStmt(planContext);
 }
@ -803,6 +804,8 @@ CreateDistributedPlannedStmt(DistributedPlanningContext *planContext)
 		RaiseDeferredError(distributedPlan->planningError, ERROR);
 	}

+	CheckAndBuildDelayedFastPathPlan(planContext, distributedPlan);
+
 	/* remember the plan's identifier for identifying subplans */
 	distributedPlan->planId = planId;

@ -1104,7 +1107,8 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina
 	 * set_plan_references>add_rtes_to_flat_rtable>add_rte_to_flat_rtable.
 	 */
 	List *subPlanList = GenerateSubplansForSubqueriesAndCTEs(planId, originalQuery,
-															 plannerRestrictionContext);
+															 plannerRestrictionContext,
+															 routerPlan);

 	/*
 	 * If subqueries were recursively planned then we need to replan the query
@ -2034,6 +2038,32 @@ multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo,
 		lappend(relationRestrictionContext->relationRestrictionList, relationRestriction);

 	MemoryContextSwitchTo(oldMemoryContext);
+
+#if PG_VERSION_NUM >= PG_VERSION_18
+	if (root->query_level == 1 && plannerRestrictionContext->planContext != NULL)
+	{
+		/* We're at the top query with a distributed context; see if Postgres
+		 * has changed the query tree we passed to it in distributed_planner().
+		 * This check was necessitated by PG commit 1e4351a, becuase in it the
+		 * planner modfies a copy of the passed in query tree with the consequence
+		 * that changes are not reflected back to the caller of standard_planner().
+		 */
+		Query *query = plannerRestrictionContext->planContext->query;
+		if (root->parse != query)
+		{
+			/*
+			 * The Postgres planner has reconstructed the query tree, so the query
+			 * tree our distributed context passed in (to standard_planner() is
+			 * updated to track the new query tree.
+			 */
+			ereport(DEBUG4, (errmsg(
+								 "Detected query reconstruction by Postgres planner, updating "
+								 "planContext to track it")));
+
+			plannerRestrictionContext->planContext->query = root->parse;
+		}
+	}
+#endif
 }


@ -2407,13 +2437,17 @@ CopyPlanParamList(List *originalPlanParamList)


 /*
- * CreateAndPushPlannerRestrictionContext creates a new relation restriction context
- * and a new join context, inserts it to the beginning of the
- * plannerRestrictionContextList. Finally, the planner restriction context is
- * inserted to the beginning of the plannerRestrictionContextList and it is returned.
+ * CreateAndPushPlannerRestrictionContext creates a new planner restriction
+ * context with an empty relation restriction context and an empty join and
+ * a copy of the given fast path restriction context (if present). Finally,
+ * the planner restriction context is inserted to the beginning of the
+ * global plannerRestrictionContextList and, in PG18+, given a reference to
+ * its distributed plan context.
 */
-static PlannerRestrictionContext *
-CreateAndPushPlannerRestrictionContext(void)
+static void
+CreateAndPushPlannerRestrictionContext(DistributedPlanningContext *planContext,
+									   FastPathRestrictionContext *
+									   fastPathRestrictionContext)
 {
 	PlannerRestrictionContext *plannerRestrictionContext =
 		palloc0(sizeof(PlannerRestrictionContext));
@ -2427,6 +2461,21 @@ CreateAndPushPlannerRestrictionContext(void)
 	plannerRestrictionContext->fastPathRestrictionContext =
 		palloc0(sizeof(FastPathRestrictionContext));

+	if (fastPathRestrictionContext != NULL)
+	{
+		/* copy the given fast path restriction context */
+		FastPathRestrictionContext *plannersFastPathCtx =
+			plannerRestrictionContext->fastPathRestrictionContext;
+		plannersFastPathCtx->fastPathRouterQuery =
+			fastPathRestrictionContext->fastPathRouterQuery;
+		plannersFastPathCtx->distributionKeyValue =
+			fastPathRestrictionContext->distributionKeyValue;
+		plannersFastPathCtx->distributionKeyHasParam =
+			fastPathRestrictionContext->distributionKeyHasParam;
+		plannersFastPathCtx->delayFastPathPlanning =
+			fastPathRestrictionContext->delayFastPathPlanning;
+	}
+
 	plannerRestrictionContext->memoryContext = CurrentMemoryContext;

 	/* we'll apply logical AND as we add tables */
@ -2435,7 +2484,11 @@ CreateAndPushPlannerRestrictionContext(void)
 	plannerRestrictionContextList = lcons(plannerRestrictionContext,
 										  plannerRestrictionContextList);

-	return plannerRestrictionContext;
+	planContext->plannerRestrictionContext = plannerRestrictionContext;
+
+#if PG_VERSION_NUM >= PG_VERSION_18
+	plannerRestrictionContext->planContext = planContext;
+#endif
 }


@ -2496,6 +2549,18 @@ CurrentPlannerRestrictionContext(void)
 static void
 PopPlannerRestrictionContext(void)
 {
+#if PG_VERSION_NUM >= PG_VERSION_18
+
+	/*
+	 * PG18+: Clear the restriction context's planContext pointer; this is done
+	 * by distributed_planner() when popping the context, but in case of error
+	 * during standard_planner() we want to clean up here also.
+	 */
+	PlannerRestrictionContext *plannerRestrictionContext =
+		(PlannerRestrictionContext *) linitial(plannerRestrictionContextList);
+	plannerRestrictionContext->planContext = NULL;
+#endif
+
 	plannerRestrictionContextList = list_delete_first(plannerRestrictionContextList);
 }

@ -2740,12 +2805,13 @@ WarnIfListHasForeignDistributedTable(List *rangeTableList)


 static bool
-CheckPostPlanDistribution(bool isDistributedQuery,
-						  Query *origQuery, List *rangeTableList,
-						  Query *plannedQuery)
+CheckPostPlanDistribution(DistributedPlanningContext *planContext, bool
+						  isDistributedQuery, List *rangeTableList)
 {
 	if (isDistributedQuery)
 	{
+		Query *origQuery = planContext->originalQuery;
+		Query *plannedQuery = planContext->query;
 		Node *origQuals = origQuery->jointree->quals;
 		Node *plannedQuals = plannedQuery->jointree->quals;

@ -2764,6 +2830,23 @@ CheckPostPlanDistribution(bool isDistributedQuery,
 		 */
 		if (origQuals != NULL && plannedQuals == NULL)
 		{
+			bool planHasDistTable = ListContainsDistributedTableRTE(
+				planContext->plan->rtable, NULL);
+
+			/*
+			 * If the Postgres plan has a distributed table, we know for sure that
+			 * the query requires distributed planning.
+			 */
+			if (planHasDistTable)
+			{
+				return true;
+			}
+
+			/*
+			 * Otherwise, if the query has less range table entries after Postgres,
+			 * planning, we should re-evaluate the distribution of the query. Postgres
+			 * may have optimized away all citus tables, per issues 7782, 7783.
+			 */
 			List *rtesPostPlan = ExtractRangeTableEntryList(plannedQuery);
 			if (list_length(rtesPostPlan) < list_length(rangeTableList))
 			{
@ -2775,3 +2858,27 @@ CheckPostPlanDistribution(bool isDistributedQuery,

 	return isDistributedQuery;
 }
+
+
+#if PG_VERSION_NUM >= PG_VERSION_18
+
+/*
+ * DisableSelfJoinElimination is used to prevent self join elimination
+ * during distributed query planning to ensure shard queries are correctly
+ * generated. PG18's self join elimination (fc069a3a6) changes the Query
+ * in a way that can cause problems for queries with a mix of Citus and
+ * Postgres tables. Self join elimination is allowed on Postgres tables
+ * only so queries involving shards get the benefit of it.
+ */
+static int
+DisableSelfJoinElimination(void)
+{
+	int NestLevel = NewGUCNestLevel();
+	set_config_option("enable_self_join_elimination", "off",
+					  (superuser() ? PGC_SUSET : PGC_USERSET), PGC_S_SESSION,
+					  GUC_ACTION_LOCAL, true, 0, false);
+	return NestLevel;
+}
+
+
+#endif
--- a/src/backend/distributed/planner/fast_path_router_planner.c
+++ b/src/backend/distributed/planner/fast_path_router_planner.c
@ -43,8 +43,10 @@

 #include "pg_version_constants.h"

+#include "distributed/citus_clauses.h"
 #include "distributed/distributed_planner.h"
 #include "distributed/insert_select_planner.h"
+#include "distributed/local_executor.h"
 #include "distributed/metadata_cache.h"
 #include "distributed/multi_physical_planner.h" /* only to use some utility functions */
 #include "distributed/multi_router_planner.h"
@ -53,6 +55,7 @@
 #include "distributed/shardinterval_utils.h"

 bool EnableFastPathRouterPlanner = true;
+bool EnableLocalFastPathQueryOptimization = true;

 static bool ColumnAppearsMultipleTimes(Node *quals, Var *distributionKey);
 static bool DistKeyInSimpleOpExpression(Expr *clause, Var *distColumn,
@ -61,6 +64,24 @@ static bool ConjunctionContainsColumnFilter(Node *node,
 											Var *column,
 											Node **distributionKeyValue);

+/*
+ * FastPathPreprocessParseTree is used to apply transformations on the parse tree
+ * that are expected by the Postgres planner. This is called on both delayed FastPath
+ * and non-delayed FastPath queries.
+ */
+void
+FastPathPreprocessParseTree(Query *parse)
+{
+	/*
+	 * Citus planner relies on some of the transformations on constant
+	 * evaluation on the parse tree.
+	 */
+	parse->targetList =
+		(List *) eval_const_expressions(NULL, (Node *) parse->targetList);
+	parse->jointree->quals =
+		(Node *) eval_const_expressions(NULL, (Node *) parse->jointree->quals);
+}
+

 /*
 * FastPathPlanner is intended to be used instead of standard_planner() for trivial
@ -73,15 +94,6 @@ static bool ConjunctionContainsColumnFilter(Node *node,
 PlannedStmt *
 FastPathPlanner(Query *originalQuery, Query *parse, ParamListInfo boundParams)
 {
-	/*
-	 * Citus planner relies on some of the transformations on constant
-	 * evaluation on the parse tree.
-	 */
-	parse->targetList =
-		(List *) eval_const_expressions(NULL, (Node *) parse->targetList);
-	parse->jointree->quals =
-		(Node *) eval_const_expressions(NULL, (Node *) parse->jointree->quals);
-
 	PlannedStmt *result = GeneratePlaceHolderPlannedStmt(originalQuery);

 	return result;
@ -112,9 +124,9 @@ GeneratePlaceHolderPlannedStmt(Query *parse)
 	Plan *plan = &scanNode->plan;
 #endif

-	Node *distKey PG_USED_FOR_ASSERTS_ONLY = NULL;
+	FastPathRestrictionContext fprCtxt PG_USED_FOR_ASSERTS_ONLY = { 0 };

-	Assert(FastPathRouterQuery(parse, &distKey));
+	Assert(FastPathRouterQuery(parse, &fprCtxt));

 	/* there is only a single relation rte */
 #if PG_VERSION_NUM >= PG_VERSION_16
@ -150,27 +162,83 @@ GeneratePlaceHolderPlannedStmt(Query *parse)
 }


+/*
+ * InitializeFastPathContext - helper function to initialize a FastPath
+ * restriction context with the details that the FastPath code path needs.
+ */
+static void
+InitializeFastPathContext(FastPathRestrictionContext *fastPathContext,
+						  Node *distributionKeyValue,
+						  bool canAvoidDeparse,
+						  Query *query)
+{
+	Assert(fastPathContext != NULL);
+	Assert(!fastPathContext->fastPathRouterQuery);
+	Assert(!fastPathContext->delayFastPathPlanning);
+
+	/*
+	 * We're looking at a fast path query, so we can fill the
+	 * fastPathContext with relevant details.
+	 */
+	fastPathContext->fastPathRouterQuery = true;
+	if (distributionKeyValue == NULL)
+	{
+		/* nothing to record */
+	}
+	else if (IsA(distributionKeyValue, Const))
+	{
+		fastPathContext->distributionKeyValue = (Const *) distributionKeyValue;
+	}
+	else if (IsA(distributionKeyValue, Param))
+	{
+		fastPathContext->distributionKeyHasParam = true;
+	}
+
+	/*
+	 * If local execution and the fast path optimization to
+	 * avoid deparse are enabled, and it is safe to do local
+	 * execution..
+	 */
+	if (EnableLocalFastPathQueryOptimization &&
+		EnableLocalExecution &&
+		GetCurrentLocalExecutionStatus() != LOCAL_EXECUTION_DISABLED)
+	{
+		/*
+		 * .. we can delay fast path planning until we know whether
+		 * or not the shard is local. Make a final check for volatile
+		 * functions in the query tree to determine if we should delay
+		 * the fast path planning.
+		 */
+		fastPathContext->delayFastPathPlanning = canAvoidDeparse &&
+												 !FindNodeMatchingCheckFunction(
+			(Node *) query,
+			CitusIsVolatileFunction);
+	}
+}
+
+
 /*
 * FastPathRouterQuery gets a query and returns true if the query is eligible for
- * being a fast path router query.
+ * being a fast path router query. It also fills the given fastPathContext with
+ * details about the query such as the distribution key value (if available),
+ * whether the distribution key is a parameter, and the range table entry for the
+ * table being queried.
 * The requirements for the fast path query can be listed below:
 *
 *   - SELECT/UPDATE/DELETE query without CTES, sublinks-subqueries, set operations
 *   - The query should touch only a single hash distributed or reference table
 *   - The distribution with equality operator should be in the WHERE clause
 *      and it should be ANDed with any other filters. Also, the distribution
- *      key should only exists once in the WHERE clause. So basically,
+ *      key should only exist once in the WHERE clause. So basically,
 *          SELECT ... FROM dist_table WHERE dist_key = X
 *      If the filter is a const, distributionKeyValue is set
 *   - All INSERT statements (including multi-row INSERTs) as long as the commands
 *     don't have any sublinks/CTEs etc
+ *   -
 */
 bool
-FastPathRouterQuery(Query *query, Node **distributionKeyValue)
+FastPathRouterQuery(Query *query, FastPathRestrictionContext *fastPathContext)
 {
-	FromExpr *joinTree = query->jointree;
-	Node *quals = NULL;
-
 	if (!EnableFastPathRouterPlanner)
 	{
 		return false;
@ -201,11 +269,20 @@ FastPathRouterQuery(Query *query, Node **distributionKeyValue)
 	else if (query->commandType == CMD_INSERT)
 	{
 		/* we don't need to do any further checks, all INSERTs are fast-path */
+		InitializeFastPathContext(fastPathContext, NULL, true, query);
 		return true;
 	}

-	/* make sure that the only range table in FROM clause */
-	if (list_length(query->rtable) != 1)
+	int numFromRels = list_length(query->rtable);
+
+	/* make sure that there is only one range table in FROM clause */
+	if ((numFromRels != 1)
+#if PG_VERSION_NUM >= PG_VERSION_18
+
+	    /* with a PG18+ twist for GROUP rte - if present make sure there's two range tables */
+		&& (!query->hasGroupRTE || numFromRels != 2)
+#endif
+		)
 	{
 		return false;
 	}
@ -225,6 +302,10 @@ FastPathRouterQuery(Query *query, Node **distributionKeyValue)
 		return false;
 	}

+	bool isFastPath = false;
+	bool canAvoidDeparse = false;
+	Node *distributionKeyValue = NULL;
+
 	/*
 	 * If the table doesn't have a distribution column, we don't need to
 	 * check anything further.
@ -232,45 +313,62 @@ FastPathRouterQuery(Query *query, Node **distributionKeyValue)
 	Var *distributionKey = PartitionColumn(distributedTableId, 1);
 	if (!distributionKey)
 	{
-		return true;
+		/*
+		 * Local execution may avoid a deparse on single shard distributed tables or
+		 * citus local tables. We don't yet support reference tables in this code-path
+		 * because modifications on reference tables are complicated to support here.
+		 */
+		canAvoidDeparse = IsCitusTableTypeCacheEntry(cacheEntry,
+													 SINGLE_SHARD_DISTRIBUTED) ||
+						  IsCitusTableTypeCacheEntry(cacheEntry, CITUS_LOCAL_TABLE);
+		isFastPath = true;
 	}
-
-	/* WHERE clause should not be empty for distributed tables */
-	if (joinTree == NULL ||
-		(IsCitusTableTypeCacheEntry(cacheEntry, DISTRIBUTED_TABLE) && joinTree->quals ==
-		 NULL))
+	else
 	{
-		return false;
+		FromExpr *joinTree = query->jointree;
+		Node *quals = NULL;
+
+		canAvoidDeparse = IsCitusTableTypeCacheEntry(cacheEntry, DISTRIBUTED_TABLE);
+
+		if (joinTree == NULL ||
+			(joinTree->quals == NULL && canAvoidDeparse))
+		{
+			/* no quals, not a fast path query */
+			return false;
+		}
+
+		quals = joinTree->quals;
+		if (quals != NULL && IsA(quals, List))
+		{
+			quals = (Node *) make_ands_explicit((List *) quals);
+		}
+
+		/*
+		 * Distribution column must be used in a simple equality match check and it must be
+		 * place at top level conjunction operator. In simple words, we should have
+		 *	    WHERE dist_key = VALUE [AND  ....];
+		 *
+		 *	We're also not allowing any other appearances of the distribution key in the quals.
+		 *
+		 *	Overall the logic might sound fuzzy since it involves two individual checks:
+		 *	    (a) Check for top level AND operator with one side being "dist_key = const"
+		 *	    (b) Only allow single appearance of "dist_key" in the quals
+		 *
+		 *	This is to simplify both of the individual checks and omit various edge cases
+		 *	that might arise with multiple distribution keys in the quals.
+		 */
+		isFastPath = (ConjunctionContainsColumnFilter(quals, distributionKey,
+													  &distributionKeyValue) &&
+					  !ColumnAppearsMultipleTimes(quals, distributionKey));
 	}

-	/* convert list of expressions into expression tree for further processing */
-	quals = joinTree->quals;
-	if (quals != NULL && IsA(quals, List))
+	if (isFastPath)
 	{
-		quals = (Node *) make_ands_explicit((List *) quals);
+		InitializeFastPathContext(fastPathContext, distributionKeyValue, canAvoidDeparse,
+								  query);
 	}

-	/*
-	 * Distribution column must be used in a simple equality match check and it must be
-	 * place at top level conjunction operator. In simple words, we should have
-	 *	    WHERE dist_key = VALUE [AND  ....];
-	 *
-	 *	We're also not allowing any other appearances of the distribution key in the quals.
-	 *
-	 *	Overall the logic might sound fuzzy since it involves two individual checks:
-	 *	    (a) Check for top level AND operator with one side being "dist_key = const"
-	 *	    (b) Only allow single appearance of "dist_key" in the quals
-	 *
-	 *	This is to simplify both of the individual checks and omit various edge cases
-	 *	that might arise with multiple distribution keys in the quals.
-	 */
-	if (ConjunctionContainsColumnFilter(quals, distributionKey, distributionKeyValue) &&
-		!ColumnAppearsMultipleTimes(quals, distributionKey))
-	{
-		return true;
-	}
-
-	return false;
+	return isFastPath;
 }


--- a/src/backend/distributed/planner/function_call_delegation.c
+++ b/src/backend/distributed/planner/function_call_delegation.c
@ -828,12 +828,13 @@ IsShardKeyValueAllowed(Const *shardKey, uint32 colocationId)
 	Assert(AllowedDistributionColumnValue.isActive);
 	Assert(ExecutorLevel > AllowedDistributionColumnValue.executorLevel);

-	ereport(DEBUG4, errmsg("Comparing saved:%s with Shard key: %s colocationid:%d:%d",
-						   pretty_format_node_dump(
-							   nodeToString(
-								   AllowedDistributionColumnValue.distributionColumnValue)),
-						   pretty_format_node_dump(nodeToString(shardKey)),
-						   AllowedDistributionColumnValue.colocationId, colocationId));
+	ereport(DEBUG4, errmsg(
+				"Comparing saved:%s with Shard key: %s colocationid:%d:%d",
+				pretty_format_node_dump(
+					nodeToString(AllowedDistributionColumnValue.
+								 distributionColumnValue)),
+				pretty_format_node_dump(nodeToString(shardKey)),
+				AllowedDistributionColumnValue.colocationId, colocationId));

 	return (equal(AllowedDistributionColumnValue.distributionColumnValue, shardKey) &&
 			(AllowedDistributionColumnValue.colocationId == colocationId));
--- a/src/backend/distributed/planner/insert_select_planner.c
+++ b/src/backend/distributed/planner/insert_select_planner.c
@ -66,7 +66,8 @@ static bool InsertSelectHasRouterSelect(Query *originalQuery,
 										PlannerRestrictionContext *
 										plannerRestrictionContext);
 static Task * RouterModifyTaskForShardInterval(Query *originalQuery,
-											   CitusTableCacheEntry *targetTableCacheEntry,
+											   CitusTableCacheEntry *
+											   targetTableCacheEntry,
 											   ShardInterval *shardInterval,
 											   PlannerRestrictionContext *
 											   plannerRestrictionContext,
@ -428,11 +429,10 @@ CreateInsertSelectIntoLocalTablePlan(uint64 planId, Query *insertSelectQuery,
 									 ParamListInfo boundParams, bool hasUnresolvedParams,
 									 PlannerRestrictionContext *plannerRestrictionContext)
 {
-	RangeTblEntry *selectRte = ExtractSelectRangeTableEntry(insertSelectQuery);
-
 	PrepareInsertSelectForCitusPlanner(insertSelectQuery);

 	/* get the SELECT query (may have changed after PrepareInsertSelectForCitusPlanner) */
+	RangeTblEntry *selectRte = ExtractSelectRangeTableEntry(insertSelectQuery);
 	Query *selectQuery = selectRte->subquery;

 	bool allowRecursivePlanning = true;
@ -513,6 +513,13 @@ PrepareInsertSelectForCitusPlanner(Query *insertSelectQuery)

 	bool isWrapped = false;

+	/*
+	 * PG18 is stricter about GroupRTE/GroupVar. For INSERT … SELECT with a GROUP BY,
+	 * flatten the SELECT’s targetList and havingQual so Vars point to base RTEs and
+	 * avoid Unrecognized range table id.
+	 */
+	FlattenGroupExprs(selectRte->subquery);
+
 	if (selectRte->subquery->setOperations != NULL)
 	{
 		/*
@ -766,7 +773,8 @@ DistributedInsertSelectSupported(Query *queryTree, RangeTblEntry *insertRte,
 	{
 		/* first apply toplevel pushdown checks to SELECT query */
 		error =
-			DeferErrorIfUnsupportedSubqueryPushdown(subquery, plannerRestrictionContext);
+			DeferErrorIfUnsupportedSubqueryPushdown(subquery, plannerRestrictionContext,
+													true);
 		if (error)
 		{
 			return error;
@ -1145,10 +1153,11 @@ ReorderInsertSelectTargetLists(Query *originalQuery, RangeTblEntry *insertRte,
 									exprTypmod((Node *) newSubqueryTargetEntry->expr),
 									exprCollation((Node *) newSubqueryTargetEntry->expr),
 									0);
-		TargetEntry *newInsertTargetEntry = makeTargetEntry((Expr *) newInsertVar,
-															originalAttrNo,
-															oldInsertTargetEntry->resname,
-															oldInsertTargetEntry->resjunk);
+		TargetEntry *newInsertTargetEntry = makeTargetEntry(
+			(Expr *) newInsertVar,
+			originalAttrNo,
+			oldInsertTargetEntry->resname,
+			oldInsertTargetEntry->resjunk);

 		newInsertTargetlist = lappend(newInsertTargetlist, newInsertTargetEntry);
 		resno++;
@ -1430,11 +1439,6 @@ static DistributedPlan *
 CreateNonPushableInsertSelectPlan(uint64 planId, Query *parse, ParamListInfo boundParams)
 {
 	Query *insertSelectQuery = copyObject(parse);
-
-	RangeTblEntry *selectRte = ExtractSelectRangeTableEntry(insertSelectQuery);
-	RangeTblEntry *insertRte = ExtractResultRelationRTEOrError(insertSelectQuery);
-	Oid targetRelationId = insertRte->relid;
-
 	DistributedPlan *distributedPlan = CitusMakeNode(DistributedPlan);
 	distributedPlan->modLevel = RowModifyLevelForQuery(insertSelectQuery);

@ -1449,6 +1453,7 @@ CreateNonPushableInsertSelectPlan(uint64 planId, Query *parse, ParamListInfo bou
 	PrepareInsertSelectForCitusPlanner(insertSelectQuery);

 	/* get the SELECT query (may have changed after PrepareInsertSelectForCitusPlanner) */
+	RangeTblEntry *selectRte = ExtractSelectRangeTableEntry(insertSelectQuery);
 	Query *selectQuery = selectRte->subquery;

 	/*
@ -1471,6 +1476,9 @@ CreateNonPushableInsertSelectPlan(uint64 planId, Query *parse, ParamListInfo bou
 	PlannedStmt *selectPlan = pg_plan_query(selectQueryCopy, NULL, cursorOptions,
 											boundParams);

+	/* decide whether we can repartition the results */
+	RangeTblEntry *insertRte = ExtractResultRelationRTEOrError(insertSelectQuery);
+	Oid targetRelationId = insertRte->relid;
 	bool repartitioned = IsRedistributablePlan(selectPlan->planTree) &&
 						 IsSupportedRedistributionTarget(targetRelationId);

--- a/src/backend/distributed/planner/merge_planner.c
+++ b/src/backend/distributed/planner/merge_planner.c
@ -41,6 +41,7 @@
 static int SourceResultPartitionColumnIndex(Query *mergeQuery,
 											List *sourceTargetList,
 											CitusTableCacheEntry *targetRelation);
+static int FindTargetListEntryWithVarExprAttno(List *targetList, AttrNumber varattno);
 static Var * ValidateAndReturnVarIfSupported(Node *entryExpr);
 static DeferredErrorMessage * DeferErrorIfTargetHasFalseClause(Oid targetRelationId,
 															   PlannerRestrictionContext *
@ -66,7 +67,8 @@ static DeferredErrorMessage * MergeQualAndTargetListFunctionsSupported(Oid
 																	   Query *query,
 																	   Node *quals,
 																	   List *targetList,
-																	   CmdType commandType);
+																	   CmdType
+																	   commandType);

 static DistributedPlan * CreateRouterMergePlan(Oid targetRelationId, Query *originalQuery,
 											   Query *query,
@ -422,6 +424,9 @@ ErrorIfMergeHasUnsupportedTables(Oid targetRelationId, List *rangeTableList)
 			case RTE_VALUES:
 			case RTE_JOIN:
 			case RTE_CTE:
+#if PG_VERSION_NUM >= PG_VERSION_18
+			case RTE_GROUP:
+#endif
 			{
 				/* Skip them as base table(s) will be checked */
 				continue;
@ -570,8 +575,8 @@ IsDistributionColumnInMergeSource(Expr *columnExpression, Query *query, bool
 		Var *distributionColumn = DistPartitionKey(relationId);

 		/* not all distributed tables have partition column */
-		if (distributionColumn != NULL && column->varattno ==
-			distributionColumn->varattno)
+		if (distributionColumn != NULL &&
+			column->varattno == distributionColumn->varattno)
 		{
 			isDistributionColumn = true;
 		}
@ -628,6 +633,22 @@ MergeQualAndTargetListFunctionsSupported(Oid resultRelationId, Query *query,
 			}
 		}

+		/*
+		 * joinTree->quals, retrieved by GetMergeJoinTree() - either from
+		 * mergeJoinCondition (PG >= 17) or jointree->quals (PG < 17),
+		 * only contains the quals that present in "ON (..)" clause. Action
+		 * quals that can be specified for each specific action, as in
+		 * "WHEN <match condition> AND <action quals> THEN <action>"", are
+		 * saved into "qual" field of the corresponding action's entry in
+		 * mergeActionList, see
+		 * https://github.com/postgres/postgres/blob/e6da68a6e1d60a037b63a9c9ed36e5ef0a996769/src/backend/parser/parse_merge.c#L285-L293.
+		 *
+		 * For this reason, even if TargetEntryChangesValue() could prove that
+		 * an action's quals ensure that the action cannot change the distribution
+		 * key, this is not the case as we don't provide action quals to
+		 * TargetEntryChangesValue(), but just joinTree, which only contains
+		 * the "ON (..)" clause quals.
+		 */
 		if (targetEntryDistributionColumn &&
 			TargetEntryChangesValue(targetEntry, distributionColumn, joinTree))
 		{
@ -1025,8 +1046,9 @@ DeferErrorIfTargetHasFalseClause(Oid targetRelationId,
 								 PlannerRestrictionContext *plannerRestrictionContext)
 {
 	ListCell *restrictionCell = NULL;
-	foreach(restrictionCell,
-			plannerRestrictionContext->relationRestrictionContext->relationRestrictionList)
+	foreach(
+		restrictionCell,
+		plannerRestrictionContext->relationRestrictionContext->relationRestrictionList)
 	{
 		RelationRestriction *relationRestriction =
 			(RelationRestriction *) lfirst(restrictionCell);
@ -1058,7 +1080,8 @@ DeferErrorIfTargetHasFalseClause(Oid targetRelationId,
 */
 static DeferredErrorMessage *
 DeferErrorIfRoutableMergeNotSupported(Query *query, List *rangeTableList,
-									  PlannerRestrictionContext *plannerRestrictionContext,
+									  PlannerRestrictionContext *
+									  plannerRestrictionContext,
 									  Oid targetRelationId)
 {
 	List *distTablesList = NIL;
@ -1095,8 +1118,8 @@ DeferErrorIfRoutableMergeNotSupported(Query *query, List *rangeTableList,

 	if (list_length(distTablesList) > 0 && list_length(localTablesList) > 0)
 	{
-		ereport(DEBUG1, (errmsg(
-							 "A mix of distributed and local table, try repartitioning")));
+		ereport(DEBUG1, (errmsg("A mix of distributed and local table, "
+								"try repartitioning")));
 		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
 							 "A mix of distributed and citus-local table, "
 							 "routable query is not possible", NULL, NULL);
@ -1149,7 +1172,8 @@ DeferErrorIfRoutableMergeNotSupported(Query *query, List *rangeTableList,
 	{
 		deferredError =
 			DeferErrorIfUnsupportedSubqueryPushdown(query,
-													plannerRestrictionContext);
+													plannerRestrictionContext,
+													true);
 		if (deferredError)
 		{
 			ereport(DEBUG1, (errmsg("Sub-query is not pushable, try repartitioning")));
@ -1410,7 +1434,8 @@ SourceResultPartitionColumnIndex(Query *mergeQuery, List *sourceTargetList,
 	Assert(sourceRepartitionVar);

 	int sourceResultRepartitionColumnIndex =
-		DistributionColumnIndex(sourceTargetList, sourceRepartitionVar);
+		FindTargetListEntryWithVarExprAttno(sourceTargetList,
+											sourceRepartitionVar->varattno);

 	if (sourceResultRepartitionColumnIndex == -1)
 	{
@ -1561,6 +1586,33 @@ FetchAndValidateInsertVarIfExists(Oid targetRelationId, Query *query)
 }


+/*
+ * FindTargetListEntryWithVarExprAttno finds the index of the target
+ * entry whose expr is a Var that points to input varattno.
+ *
+ * If no such target entry is found, it returns -1.
+ */
+static int
+FindTargetListEntryWithVarExprAttno(List *targetList, AttrNumber varattno)
+{
+	int targetEntryIndex = 0;
+
+	TargetEntry *targetEntry = NULL;
+	foreach_declared_ptr(targetEntry, targetList)
+	{
+		if (IsA(targetEntry->expr, Var) &&
+			((Var *) targetEntry->expr)->varattno == varattno)
+		{
+			return targetEntryIndex;
+		}
+
+		targetEntryIndex++;
+	}
+
+	return -1;
+}
+
+
 /*
 * IsLocalTableModification returns true if the table modified is a Postgres table.
 * We do not support recursive planning for MERGE yet, so we could have a join
--- a/src/backend/distributed/planner/multi_explain.c
+++ b/src/backend/distributed/planner/multi_explain.c
@ -26,6 +26,7 @@
 #include "commands/tablecmds.h"
 #include "executor/tstoreReceiver.h"
 #include "lib/stringinfo.h"
+#include "nodes/nodeFuncs.h"
 #include "nodes/plannodes.h"
 #include "nodes/primnodes.h"
 #include "nodes/print.h"
@ -44,6 +45,11 @@
 #include "utils/snapmgr.h"

 #include "pg_version_constants.h"
+#if PG_VERSION_NUM >= PG_VERSION_18
+#include "commands/explain_dr.h"   /* CreateExplainSerializeDestReceiver() */
+#include "commands/explain_format.h"
+#endif
+

 #include "distributed/citus_depended_object.h"
 #include "distributed/citus_nodefuncs.h"
@ -68,6 +74,7 @@
 #include "distributed/placement_connection.h"
 #include "distributed/recursive_planning.h"
 #include "distributed/remote_commands.h"
+#include "distributed/subplan_execution.h"
 #include "distributed/tuple_destination.h"
 #include "distributed/tuplestore.h"
 #include "distributed/version_compat.h"
@ -78,6 +85,7 @@
 bool ExplainDistributedQueries = true;
 bool ExplainAllTasks = false;
 int ExplainAnalyzeSortMethod = EXPLAIN_ANALYZE_SORT_BY_TIME;
+extern MemoryContext SubPlanExplainAnalyzeContext;

 /*
 * If enabled, EXPLAIN ANALYZE output & other statistics of last worker task
@ -85,6 +93,11 @@ int ExplainAnalyzeSortMethod = EXPLAIN_ANALYZE_SORT_BY_TIME;
 */
 static char *SavedExplainPlan = NULL;
 static double SavedExecutionDurationMillisec = 0.0;
+static double SavedExplainPlanNtuples = 0;
+static double SavedExplainPlanNloops = 0;
+extern SubPlanExplainOutputData *SubPlanExplainOutput;
+uint8 TotalExplainOutputCapacity = 0;
+uint8 NumTasksOutput = 0;

 /* struct to save explain flags */
 typedef struct
@ -134,14 +147,7 @@ typedef struct ExplainAnalyzeDestination
 	TupleDesc lastSavedExplainAnalyzeTupDesc;
 } ExplainAnalyzeDestination;

-#if PG_VERSION_NUM >= PG_VERSION_17
-
-/*
- * Various places within need to convert bytes to kilobytes.  Round these up
- * to the next whole kilobyte.
- * copied from explain.c
- */
-#define BYTES_TO_KILOBYTES(b) (((b) + 1023) / 1024)
+#if PG_VERSION_NUM >= PG_VERSION_17 && PG_VERSION_NUM < PG_VERSION_18

 /* copied from explain.c */
 /* Instrumentation data for SERIALIZE option */
@ -153,13 +159,7 @@ typedef struct SerializeMetrics
 } SerializeMetrics;

 /* copied from explain.c */
-static bool peek_buffer_usage(ExplainState *es, const BufferUsage *usage);
-static void show_buffer_usage(ExplainState *es, const BufferUsage *usage);
-static void show_memory_counters(ExplainState *es,
-								 const MemoryContextCounters *mem_counters);
 static void ExplainIndentText(ExplainState *es);
-static void ExplainPrintSerialize(ExplainState *es,
-								  SerializeMetrics *metrics);
 static SerializeMetrics GetSerializationMetrics(DestReceiver *dest);

 /*
@ -187,6 +187,23 @@ typedef struct SerializeDestReceiver
 } SerializeDestReceiver;
 #endif

+#if PG_VERSION_NUM >= PG_VERSION_17
+
+/*
+ * Various places within need to convert bytes to kilobytes.  Round these up
+ * to the next whole kilobyte.
+ * copied from explain.c
+ */
+#define BYTES_TO_KILOBYTES(b) (((b) + 1023) / 1024)
+
+/* copied from explain.c */
+static bool peek_buffer_usage(ExplainState *es, const BufferUsage *usage);
+static void show_buffer_usage(ExplainState *es, const BufferUsage *usage);
+static void show_memory_counters(ExplainState *es,
+								 const MemoryContextCounters *mem_counters);
+static void ExplainPrintSerialize(ExplainState *es,
+								  SerializeMetrics *metrics);
+#endif

 /* Explain functions for distributed queries */
 static void ExplainSubPlans(DistributedPlan *distributedPlan, ExplainState *es);
@ -210,7 +227,8 @@ static const char * ExplainFormatStr(ExplainFormat format);
 #if PG_VERSION_NUM >= PG_VERSION_17
 static const char * ExplainSerializeStr(ExplainSerializeOption serializeOption);
 #endif
-static void ExplainWorkerPlan(PlannedStmt *plannedStmt, DestReceiver *dest,
+static void ExplainWorkerPlan(PlannedStmt *plannedStmt, DistributedSubPlan *subPlan,
+							  DestReceiver *dest,
 							  ExplainState *es,
 							  const char *queryString, ParamListInfo params,
 							  QueryEnvironment *queryEnv,
@ -219,7 +237,9 @@ static void ExplainWorkerPlan(PlannedStmt *plannedStmt, DestReceiver *dest,
 							  const BufferUsage *bufusage,
 							  const MemoryContextCounters *mem_counters,
 #endif
-							  double *executionDurationMillisec);
+							  double *executionDurationMillisec,
+							  double *executionTuples,
+							  double *executionLoops);
 static ExplainFormat ExtractFieldExplainFormat(Datum jsonbDoc, const char *fieldName,
 											   ExplainFormat defaultValue);
 #if PG_VERSION_NUM >= PG_VERSION_17
@ -251,7 +271,8 @@ static double elapsed_time(instr_time *starttime);
 static void ExplainPropertyBytes(const char *qlabel, int64 bytes, ExplainState *es);
 static uint64 TaskReceivedTupleData(Task *task);
 static bool ShowReceivedTupleData(CitusScanState *scanState, ExplainState *es);
-
+static bool PlanStateAnalyzeWalker(PlanState *planState, void *ctx);
+static void ExtractAnalyzeStats(DistributedSubPlan *subPlan, PlanState *planState);

 /* exports for SQL callable functions */
 PG_FUNCTION_INFO_V1(worker_last_saved_explain_analyze);
@ -427,6 +448,84 @@ NonPushableMergeCommandExplainScan(CustomScanState *node, List *ancestors,
 }


+/*
+ * ExtractAnalyzeStats parses the EXPLAIN ANALYZE output of the pre-executed
+ * subplans and injects the parsed statistics into queryDesc->planstate->instrument.
+ */
+static void
+ExtractAnalyzeStats(DistributedSubPlan *subPlan, PlanState *planState)
+{
+	if (!planState)
+	{
+		return;
+	}
+
+	Instrumentation *instr = planState->instrument;
+	if (!IsA(planState, CustomScanState))
+	{
+		instr->ntuples = subPlan->ntuples;
+		instr->nloops = 1; /* subplan nodes are executed only once */
+		return;
+	}
+
+	Assert(IsA(planState, CustomScanState));
+
+	if (subPlan->numTasksOutput <= 0)
+	{
+		return;
+	}
+
+	ListCell *lc;
+	int tasksOutput = 0;
+	double tasksNtuples = 0;
+	double tasksNloops = 0;
+	memset(instr, 0, sizeof(Instrumentation));
+	DistributedPlan *newdistributedPlan =
+		((CitusScanState *) planState)->distributedPlan;
+
+	/*
+	 * Inject the earlier executed results—extracted from the workers' EXPLAIN output—
+	 * into the newly created tasks.
+	 */
+	foreach(lc, newdistributedPlan->workerJob->taskList)
+	{
+		Task *task = (Task *) lfirst(lc);
+		uint32 taskId = task->taskId;
+
+		if (tasksOutput > subPlan->numTasksOutput)
+		{
+			break;
+		}
+
+		if (!subPlan->totalExplainOutput[taskId].explainOutput)
+		{
+			continue;
+		}
+
+		/*
+		 * Now feed the earlier saved output, which will be used
+		 * by RemoteExplain() when printing tasks
+		 */
+		MemoryContext taskContext = GetMemoryChunkContext(task);
+		task->totalReceivedTupleData =
+			subPlan->totalExplainOutput[taskId].totalReceivedTupleData;
+		task->fetchedExplainAnalyzeExecutionDuration =
+			subPlan->totalExplainOutput[taskId].executionDuration;
+		task->fetchedExplainAnalyzePlan =
+			MemoryContextStrdup(taskContext,
+								subPlan->totalExplainOutput[taskId].explainOutput);
+		tasksNtuples += subPlan->totalExplainOutput[taskId].executionNtuples;
+		tasksNloops = subPlan->totalExplainOutput[taskId].executionNloops;
+
+		subPlan->totalExplainOutput[taskId].explainOutput = NULL;
+		tasksOutput++;
+	}
+
+	instr->ntuples = tasksNtuples;
+	instr->nloops = tasksNloops;
+}
+
+
 /*
 * ExplainSubPlans generates EXPLAIN output for subplans for CTEs
 * and complex subqueries. Because the planning for these queries
@ -445,7 +544,6 @@ ExplainSubPlans(DistributedPlan *distributedPlan, ExplainState *es)
 	{
 		DistributedSubPlan *subPlan = (DistributedSubPlan *) lfirst(subPlanCell);
 		PlannedStmt *plan = subPlan->plan;
-		IntoClause *into = NULL;
 		ParamListInfo params = NULL;

 		/*
@ -529,6 +627,12 @@ ExplainSubPlans(DistributedPlan *distributedPlan, ExplainState *es)

 		ExplainOpenGroup("PlannedStmt", "PlannedStmt", false, es);

+		DestReceiver *dest = None_Receiver; /* No query execution */
+		double executionDurationMillisec = 0.0;
+		double executionTuples = 0;
+		double executionLoops = 0;
+
+/* Capture memory stats on PG17+ */
 #if PG_VERSION_NUM >= PG_VERSION_17
 		if (es->memory)
 		{
@ -536,12 +640,20 @@ ExplainSubPlans(DistributedPlan *distributedPlan, ExplainState *es)
 			MemoryContextMemConsumed(planner_ctx, &mem_counters);
 		}

-		ExplainOnePlan(plan, into, es, queryString, params, NULL, &planduration,
-					   (es->buffers ? &bufusage : NULL),
-					   (es->memory ? &mem_counters : NULL));
+		/* Execute EXPLAIN without ANALYZE */
+		ExplainWorkerPlan(plan, subPlan, dest, es, queryString, params, NULL,
+						  &planduration,
+						  (es->buffers ? &bufusage : NULL),
+						  (es->memory ? &mem_counters : NULL),
+						  &executionDurationMillisec,
+						  &executionTuples,
+						  &executionLoops);
 #else
-		ExplainOnePlan(plan, into, es, queryString, params, NULL, &planduration,
-					   (es->buffers ? &bufusage : NULL));
+
+		/* Execute EXPLAIN without ANALYZE */
+		ExplainWorkerPlan(plan, subPlan, dest, es, queryString, params, NULL,
+						  &planduration, &executionDurationMillisec,
+						  &executionTuples, &executionLoops);
 #endif

 		ExplainCloseGroup("PlannedStmt", "PlannedStmt", false, es);
@ -1212,17 +1324,19 @@ worker_last_saved_explain_analyze(PG_FUNCTION_ARGS)
 	if (SavedExplainPlan != NULL)
 	{
 		int columnCount = tupleDescriptor->natts;
-		if (columnCount != 2)
+		if (columnCount != 4)
 		{
-			ereport(ERROR, (errmsg("expected 3 output columns in definition of "
+			ereport(ERROR, (errmsg("expected 4 output columns in definition of "
 								   "worker_last_saved_explain_analyze, but got %d",
 								   columnCount)));
 		}

-		bool columnNulls[2] = { false };
-		Datum columnValues[2] = {
+		bool columnNulls[4] = { false };
+		Datum columnValues[4] = {
 			CStringGetTextDatum(SavedExplainPlan),
-			Float8GetDatum(SavedExecutionDurationMillisec)
+			Float8GetDatum(SavedExecutionDurationMillisec),
+			Float8GetDatum(SavedExplainPlanNtuples),
+			Float8GetDatum(SavedExplainPlanNloops)
 		};

 		tuplestore_putvalues(tupleStore, tupleDescriptor, columnValues, columnNulls);
@ -1243,6 +1357,8 @@ worker_save_query_explain_analyze(PG_FUNCTION_ARGS)
 	text *queryText = PG_GETARG_TEXT_P(0);
 	char *queryString = text_to_cstring(queryText);
 	double executionDurationMillisec = 0.0;
+	double executionTuples = 0;
+	double executionLoops = 0;

 	Datum explainOptions = PG_GETARG_DATUM(1);
 	ExplainState *es = NewExplainState();
@ -1359,16 +1475,19 @@ worker_save_query_explain_analyze(PG_FUNCTION_ARGS)
 	}

 	/* do the actual EXPLAIN ANALYZE */
-	ExplainWorkerPlan(plan, tupleStoreDest, es, queryString, boundParams, NULL,
+	ExplainWorkerPlan(plan, NULL, tupleStoreDest, es, queryString, boundParams, NULL,
 					  &planDuration,
 					  (es->buffers ? &bufusage : NULL),
 					  (es->memory ? &mem_counters : NULL),
-					  &executionDurationMillisec);
+					  &executionDurationMillisec,
+					  &executionTuples,
+					  &executionLoops);
 #else

 	/* do the actual EXPLAIN ANALYZE */
-	ExplainWorkerPlan(plan, tupleStoreDest, es, queryString, boundParams, NULL,
-					  &planDuration, &executionDurationMillisec);
+	ExplainWorkerPlan(plan, NULL, tupleStoreDest, es, queryString, boundParams, NULL,
+					  &planDuration, &executionDurationMillisec,
+					  &executionTuples, &executionLoops);
 #endif

 	ExplainEndOutput(es);
@ -1379,6 +1498,8 @@ worker_save_query_explain_analyze(PG_FUNCTION_ARGS)

 	SavedExplainPlan = pstrdup(es->str->data);
 	SavedExecutionDurationMillisec = executionDurationMillisec;
+	SavedExplainPlanNtuples = executionTuples;
+	SavedExplainPlanNloops = executionLoops;

 	MemoryContextSwitchTo(oldContext);

@ -1558,22 +1679,40 @@ CitusExplainOneQuery(Query *query, int cursorOptions, IntoClause *into,
 		BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
 	}

+/* capture memory stats on PG17+ */
 #if PG_VERSION_NUM >= PG_VERSION_17
 	if (es->memory)
 	{
 		MemoryContextSwitchTo(saved_ctx);
 		MemoryContextMemConsumed(planner_ctx, &mem_counters);
 	}
+#endif

-	/* run it (if needed) and produce output */
-	ExplainOnePlan(plan, into, es, queryString, params, queryEnv,
-				   &planduration, (es->buffers ? &bufusage : NULL),
-				   (es->memory ? &mem_counters : NULL));
+#if PG_VERSION_NUM >= PG_VERSION_17
+
+	/* PostgreSQL 17 signature (9 args: includes mem_counters) */
+	ExplainOnePlan(
+		plan,
+		into,
+		es,
+		queryString,
+		params,
+		queryEnv,
+		&planduration,
+		(es->buffers ? &bufusage : NULL),
+		(es->memory ? &mem_counters : NULL)
+		);
 #else
-
-	/* run it (if needed) and produce output */
-	ExplainOnePlan(plan, into, es, queryString, params, queryEnv,
-				   &planduration, (es->buffers ? &bufusage : NULL));
+	ExplainOnePlan(
+		plan,
+		into,
+		es,
+		queryString,
+		params,
+		queryEnv,
+		&planduration,
+		(es->buffers ? &bufusage : NULL)
+		);
 #endif
 }

@ -1590,11 +1729,13 @@ CreateExplainAnlyzeDestination(Task *task, TupleDestination *taskDest)
 	tupleDestination->originalTask = task;
 	tupleDestination->originalTaskDestination = taskDest;

-	TupleDesc lastSavedExplainAnalyzeTupDesc = CreateTemplateTupleDesc(2);
+	TupleDesc lastSavedExplainAnalyzeTupDesc = CreateTemplateTupleDesc(4);

 	TupleDescInitEntry(lastSavedExplainAnalyzeTupDesc, 1, "explain analyze", TEXTOID, 0,
 					   0);
 	TupleDescInitEntry(lastSavedExplainAnalyzeTupDesc, 2, "duration", FLOAT8OID, 0, 0);
+	TupleDescInitEntry(lastSavedExplainAnalyzeTupDesc, 3, "ntuples", FLOAT8OID, 0, 0);
+	TupleDescInitEntry(lastSavedExplainAnalyzeTupDesc, 4, "nloops", FLOAT8OID, 0, 0);

 	tupleDestination->lastSavedExplainAnalyzeTupDesc = lastSavedExplainAnalyzeTupDesc;

@ -1605,6 +1746,51 @@ CreateExplainAnlyzeDestination(Task *task, TupleDestination *taskDest)
 }


+/*
+ * EnsureExplainOutputCapacity is to ensure capacity for new entries. Input
+ * parameter requiredSize is minimum number of elements needed.
+ */
+static void
+EnsureExplainOutputCapacity(int requiredSize)
+{
+	if (requiredSize < TotalExplainOutputCapacity)
+	{
+		return;
+	}
+
+	int newCapacity =
+		(TotalExplainOutputCapacity == 0) ? 32 : TotalExplainOutputCapacity * 2;
+
+	while (newCapacity <= requiredSize)
+	{
+		newCapacity *= 2;
+	}
+
+	if (SubPlanExplainOutput == NULL)
+	{
+		SubPlanExplainOutput =
+			(SubPlanExplainOutputData *) MemoryContextAllocZero(
+				SubPlanExplainAnalyzeContext,
+				newCapacity *
+				sizeof(SubPlanExplainOutputData));
+	}
+	else
+	{
+		/* Use repalloc and manually zero the new memory */
+		int oldSize = TotalExplainOutputCapacity * sizeof(SubPlanExplainOutputData);
+		int newSize = newCapacity * sizeof(SubPlanExplainOutputData);
+
+		SubPlanExplainOutput =
+			(SubPlanExplainOutputData *) repalloc(SubPlanExplainOutput, newSize);
+
+		/* Zero out the newly allocated memory */
+		MemSet((char *) SubPlanExplainOutput + oldSize, 0, newSize - oldSize);
+	}
+
+	TotalExplainOutputCapacity = newCapacity;
+}
+
+
 /*
 * ExplainAnalyzeDestPutTuple implements TupleDestination->putTuple
 * for ExplainAnalyzeDestination.
@ -1614,6 +1800,8 @@ ExplainAnalyzeDestPutTuple(TupleDestination *self, Task *task,
 						   int placementIndex, int queryNumber,
 						   HeapTuple heapTuple, uint64 tupleLibpqSize)
 {
+	uint32 taskId = task->taskId;
+
 	ExplainAnalyzeDestination *tupleDestination = (ExplainAnalyzeDestination *) self;
 	if (queryNumber == 0)
 	{
@ -1621,6 +1809,13 @@ ExplainAnalyzeDestPutTuple(TupleDestination *self, Task *task,
 		originalTupDest->putTuple(originalTupDest, task, placementIndex, 0, heapTuple,
 								  tupleLibpqSize);
 		tupleDestination->originalTask->totalReceivedTupleData += tupleLibpqSize;
+
+		if (SubPlanExplainAnalyzeContext)
+		{
+			EnsureExplainOutputCapacity(taskId + 1);
+			SubPlanExplainOutput[taskId].totalReceivedTupleData =
+				tupleDestination->originalTask->totalReceivedTupleData;
+		}
 	}
 	else if (queryNumber == 1)
 	{
@ -1636,6 +1831,8 @@ ExplainAnalyzeDestPutTuple(TupleDestination *self, Task *task,
 		}

 		Datum executionDuration = heap_getattr(heapTuple, 2, tupDesc, &isNull);
+		Datum executionTuples = heap_getattr(heapTuple, 3, tupDesc, &isNull);
+		Datum executionLoops = heap_getattr(heapTuple, 4, tupDesc, &isNull);

 		if (isNull)
 		{
@ -1645,6 +1842,8 @@ ExplainAnalyzeDestPutTuple(TupleDestination *self, Task *task,

 		char *fetchedExplainAnalyzePlan = TextDatumGetCString(explainAnalyze);
 		double fetchedExplainAnalyzeExecutionDuration = DatumGetFloat8(executionDuration);
+		double fetchedExplainAnalyzeTuples = DatumGetFloat8(executionTuples);
+		double fetchedExplainAnalyzeLoops = DatumGetFloat8(executionLoops);

 		/*
 		 * Allocate fetchedExplainAnalyzePlan in the same context as the Task, since we are
@ -1670,6 +1869,20 @@ ExplainAnalyzeDestPutTuple(TupleDestination *self, Task *task,
 			placementIndex;
 		tupleDestination->originalTask->fetchedExplainAnalyzeExecutionDuration =
 			fetchedExplainAnalyzeExecutionDuration;
+
+		/* We should build tupleDestination in subPlan similar to the above */
+		if (SubPlanExplainAnalyzeContext)
+		{
+			EnsureExplainOutputCapacity(taskId + 1);
+			SubPlanExplainOutput[taskId].explainOutput =
+				MemoryContextStrdup(SubPlanExplainAnalyzeContext,
+									fetchedExplainAnalyzePlan);
+			SubPlanExplainOutput[taskId].executionDuration =
+				fetchedExplainAnalyzeExecutionDuration;
+			SubPlanExplainOutput[taskId].executionNtuples = fetchedExplainAnalyzeTuples;
+			SubPlanExplainOutput[taskId].executionNloops = fetchedExplainAnalyzeLoops;
+			NumTasksOutput++;
+		}
 	}
 	else
 	{
@ -1732,7 +1945,14 @@ ExplainAnalyzeDestTupleDescForQuery(TupleDestination *self, int queryNumber)
 bool
 RequestedForExplainAnalyze(CitusScanState *node)
 {
-	return (node->customScanState.ss.ps.state->es_instrument != 0);
+	/*
+	 * When running a distributed plan—either the root plan or a subplan’s
+	 * distributed fragment—we need to know if we’re under EXPLAIN ANALYZE.
+	 * Subplans can’t receive the EXPLAIN ANALYZE flag directly, so we use
+	 * SubPlanExplainAnalyzeContext as a flag to indicate that context.
+	 */
+	return (node->customScanState.ss.ps.state->es_instrument != 0) ||
+		   (SubPlanLevel > 0 && SubPlanExplainAnalyzeContext);
 }


@ -1805,7 +2025,7 @@ WrapQueryForExplainAnalyze(const char *queryString, TupleDesc tupleDesc,
 			appendStringInfoString(columnDef, ", ");
 		}

-		Form_pg_attribute attr = &tupleDesc->attrs[columnIndex];
+		Form_pg_attribute attr = TupleDescAttr(tupleDesc, columnIndex);
 		char *attrType = format_type_extended(attr->atttypid, attr->atttypmod,
 											  FORMAT_TYPE_TYPEMOD_GIVEN |
 											  FORMAT_TYPE_FORCE_QUALIFY);
@ -1891,7 +2111,8 @@ FetchPlanQueryForExplainAnalyze(const char *queryString, ParamListInfo params)
 	}

 	appendStringInfoString(fetchQuery,
-						   "SELECT explain_analyze_output, execution_duration "
+						   "SELECT explain_analyze_output, execution_duration, "
+						   "execution_ntuples, execution_nloops "
 						   "FROM worker_last_saved_explain_analyze()");

 	return fetchQuery->data;
@ -2026,25 +2247,57 @@ ExplainOneQuery(Query *query, int cursorOptions,
 			BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
 		}

+/* 1) Capture memory counters on PG17+ only once: */
 #if PG_VERSION_NUM >= PG_VERSION_17
 		if (es->memory)
 		{
 			MemoryContextSwitchTo(saved_ctx);
 			MemoryContextMemConsumed(planner_ctx, &mem_counters);
 		}
-		/* run it (if needed) and produce output */
-		ExplainOnePlan(plan, into, es, queryString, params, queryEnv,
-					   &planduration, (es->buffers ? &bufusage : NULL),
-					   (es->memory ? &mem_counters : NULL));
+#endif
+
+#if PG_VERSION_NUM >= PG_VERSION_17
+		ExplainOnePlan(
+			plan,
+			into,
+			es,
+			queryString,
+			params,
+			queryEnv,
+			&planduration,
+			(es->buffers  ? &bufusage    : NULL),
+			(es->memory   ? &mem_counters: NULL)
+		);
 #else
-		/* run it (if needed) and produce output */
-		ExplainOnePlan(plan, into, es, queryString, params, queryEnv,
-					   &planduration, (es->buffers ? &bufusage : NULL));
+		ExplainOnePlan(
+			plan,
+			into,
+			es,
+			queryString,
+			params,
+			queryEnv,
+			&planduration,
+			(es->buffers ? &bufusage : NULL)
+		);
 #endif
 	}
 }


+/*
+ * PlanStateAnalyzeWalker Tree walker callback that visits each PlanState node in the
+ * plan tree and extracts analyze statistics from CustomScanState tasks using
+ * ExtractAnalyzeStats. Always returns false to recurse into all children.
+ */
+static bool
+PlanStateAnalyzeWalker(PlanState *planState, void *ctx)
+{
+	DistributedSubPlan *subplan = (DistributedSubPlan *) ctx;
+	ExtractAnalyzeStats(subplan, planState);
+	return false;
+}
+
+
 /*
 * ExplainWorkerPlan produces explain output into es. If es->analyze, it also executes
 * the given plannedStmt and sends the results to dest. It puts total time to execute in
@ -2059,20 +2312,25 @@ ExplainOneQuery(Query *query, int cursorOptions,
 * destination.
 */
 static void
-ExplainWorkerPlan(PlannedStmt *plannedstmt, DestReceiver *dest, ExplainState *es,
+ExplainWorkerPlan(PlannedStmt *plannedstmt, DistributedSubPlan *subPlan, DestReceiver *dest, ExplainState *es,
 				  const char *queryString, ParamListInfo params, QueryEnvironment *queryEnv,
 				  const instr_time *planduration,
 #if PG_VERSION_NUM >= PG_VERSION_17
 				  const BufferUsage *bufusage,
 			      const MemoryContextCounters *mem_counters,
 #endif
-				  double *executionDurationMillisec)
+				  double *executionDurationMillisec,
+				  double *executionTuples,
+				  double *executionLoops)
 {
 	QueryDesc  *queryDesc;
 	instr_time	starttime;
 	double		totaltime = 0;
 	int			eflags;
 	int			instrument_option = 0;
+	/* Sub-plan already executed; skipping execution */
+	bool executeQuery = (es->analyze && !subPlan);
+	bool executeSubplan = (es->analyze && subPlan);

 	Assert(plannedstmt->commandType != CMD_UTILITY);

@ -2102,12 +2360,19 @@ ExplainWorkerPlan(PlannedStmt *plannedstmt, DestReceiver *dest, ExplainState *es
 	UpdateActiveSnapshotCommandId();

 	/* Create a QueryDesc for the query */
-	queryDesc = CreateQueryDesc(plannedstmt, queryString,
-								GetActiveSnapshot(), InvalidSnapshot,
-								dest, params, queryEnv, instrument_option);
+	queryDesc = CreateQueryDesc(
+		plannedstmt,    /* PlannedStmt *plannedstmt */
+		queryString,    /* const char *sourceText */
+		GetActiveSnapshot(),   /* Snapshot snapshot */
+		InvalidSnapshot,       /* Snapshot crosscheck_snapshot */
+		dest,           /* DestReceiver *dest */
+		params,         /* ParamListInfo params */
+		queryEnv,       /* QueryEnvironment *queryEnv */
+		instrument_option /* int instrument_options */
+	);

 	/* Select execution options */
-	if (es->analyze)
+	if (executeQuery)
 		eflags = 0;				/* default run-to-completion flags */
 	else
 		eflags = EXEC_FLAG_EXPLAIN_ONLY;
@ -2116,12 +2381,19 @@ ExplainWorkerPlan(PlannedStmt *plannedstmt, DestReceiver *dest, ExplainState *es
 	ExecutorStart(queryDesc, eflags);

 	/* Execute the plan for statistics if asked for */
-	if (es->analyze)
+	if (executeQuery)
 	{
 		ScanDirection dir = ForwardScanDirection;

 		/* run the plan */
-		ExecutorRun(queryDesc, dir, 0L, true);
+/* run the plan: count = 0 (all rows) */
+#if PG_VERSION_NUM >= PG_VERSION_18
+    /* PG 18+ dropped the “execute_once” boolean */
+    ExecutorRun(queryDesc, dir, 0L);
+#else
+    /* PG 17- still expect the 4th ‘once’ argument */
+    ExecutorRun(queryDesc, dir, 0L, true);
+#endif

 		/* run cleanup too */
 		ExecutorFinish(queryDesc);
@ -2132,6 +2404,12 @@ ExplainWorkerPlan(PlannedStmt *plannedstmt, DestReceiver *dest, ExplainState *es

 	ExplainOpenGroup("Query", NULL, true, es);

+	if (executeSubplan)
+	{
+		ExtractAnalyzeStats(subPlan, queryDesc->planstate);
+		planstate_tree_walker(queryDesc->planstate, PlanStateAnalyzeWalker, (void *) subPlan);
+	}
+
 	/* Create textual dump of plan tree */
 	ExplainPrintPlan(es, queryDesc);

@ -2204,6 +2482,13 @@ ExplainWorkerPlan(PlannedStmt *plannedstmt, DestReceiver *dest, ExplainState *es
 	 */
 	INSTR_TIME_SET_CURRENT(starttime);

+	if (executeQuery)
+	{
+		Instrumentation *instr = queryDesc->planstate->instrument;
+		*executionTuples = instr->ntuples;
+		*executionLoops = instr->nloops;
+	}
+
 	ExecutorEnd(queryDesc);

 	FreeQueryDesc(queryDesc);
@ -2211,7 +2496,7 @@ ExplainWorkerPlan(PlannedStmt *plannedstmt, DestReceiver *dest, ExplainState *es
 	PopActiveSnapshot();

 	/* We need a CCI just in case query expanded to multiple plans */
-	if (es->analyze)
+	if (executeQuery)
 		CommandCounterIncrement();

 	totaltime += elapsed_time(&starttime);
@ -2248,6 +2533,50 @@ elapsed_time(instr_time *starttime)
 }


+#if PG_VERSION_NUM >= PG_VERSION_17 && PG_VERSION_NUM < PG_VERSION_18
+/*
+ * Indent a text-format line.
+ *
+ * We indent by two spaces per indentation level.  However, when emitting
+ * data for a parallel worker there might already be data on the current line
+ * (cf. ExplainOpenWorker); in that case, don't indent any more.
+ *
+ * Copied from explain.c.
+ */
+static void
+ExplainIndentText(ExplainState *es)
+{
+	Assert(es->format == EXPLAIN_FORMAT_TEXT);
+	if (es->str->len == 0 || es->str->data[es->str->len - 1] == '\n')
+		appendStringInfoSpaces(es->str, es->indent * 2);
+}
+
+
+/*
+ * GetSerializationMetrics - collect metrics
+ *
+ * We have to be careful here since the receiver could be an IntoRel
+ * receiver if the subject statement is CREATE TABLE AS.  In that
+ * case, return all-zeroes stats.
+ *
+ * Copied from explain.c.
+ */
+static SerializeMetrics
+GetSerializationMetrics(DestReceiver *dest)
+{
+	SerializeMetrics empty;
+
+	if (dest->mydest == DestExplainSerialize)
+		return ((SerializeDestReceiver *) dest)->metrics;
+
+	memset(&empty, 0, sizeof(SerializeMetrics));
+	INSTR_TIME_SET_ZERO(empty.timeSpent);
+
+	return empty;
+}
+#endif
+
+
 #if PG_VERSION_NUM >= PG_VERSION_17
 /*
 * Return whether show_buffer_usage would have anything to print, if given
@ -2466,24 +2795,6 @@ show_buffer_usage(ExplainState *es, const BufferUsage *usage)
 }


-/*
- * Indent a text-format line.
- *
- * We indent by two spaces per indentation level.  However, when emitting
- * data for a parallel worker there might already be data on the current line
- * (cf. ExplainOpenWorker); in that case, don't indent any more.
- *
- * Copied from explain.c.
- */
-static void
-ExplainIndentText(ExplainState *es)
-{
-	Assert(es->format == EXPLAIN_FORMAT_TEXT);
-	if (es->str->len == 0 || es->str->data[es->str->len - 1] == '\n')
-		appendStringInfoSpaces(es->str, es->indent * 2);
-}
-
-
 /*
 * Show memory usage details.
 *
@ -2560,7 +2871,7 @@ ExplainPrintSerialize(ExplainState *es, SerializeMetrics *metrics)
 			ExplainPropertyFloat("Time", "ms",
 								 1000.0 * INSTR_TIME_GET_DOUBLE(metrics->timeSpent),
 								 3, es);
-		ExplainPropertyUInteger("Output Volume", "kB",
+		ExplainPropertyInteger("Output Volume", "kB",
 								BYTES_TO_KILOBYTES(metrics->bytesSent), es);
 		ExplainPropertyText("Format", format, es);
 		if (es->buffers)
@ -2569,28 +2880,4 @@ ExplainPrintSerialize(ExplainState *es, SerializeMetrics *metrics)

 	ExplainCloseGroup("Serialization", "Serialization", true, es);
 }
-
-
-/*
- * GetSerializationMetrics - collect metrics
- *
- * We have to be careful here since the receiver could be an IntoRel
- * receiver if the subject statement is CREATE TABLE AS.  In that
- * case, return all-zeroes stats.
- *
- * Copied from explain.c.
- */
-static SerializeMetrics
-GetSerializationMetrics(DestReceiver *dest)
-{
-	SerializeMetrics empty;
-
-	if (dest->mydest == DestExplainSerialize)
-		return ((SerializeDestReceiver *) dest)->metrics;
-
-	memset(&empty, 0, sizeof(SerializeMetrics));
-	INSTR_TIME_SET_ZERO(empty.timeSpent);
-
-	return empty;
-}
 #endif
--- a/Show More
+++ b/Show More