From 3c9f179356dfcc5a672deb7b328c3898ced8bead Mon Sep 17 00:00:00 2001 From: Mehmet Yilmaz Date: Wed, 16 Jul 2025 12:38:55 +0000 Subject: [PATCH 01/11] Pg18 beta conf file updated Pg18 beta conf file updated (cherry picked from commit c36410c7798bb4728368a6d1ff5a669430a9af9d) Update image suffix in build and test workflow Update image suffix in build configuration Update image suffix in build configuration Update image suffix in build configuration (cherry picked from commit 7dbb94606a0ae6d185b201d18843d9ae3fa5acd1) Update image suffix in build_and_test.yml to reflect latest development version Update PostgreSQL version to 18beta3 in Dockerfile and CI workflow --- .devcontainer/Dockerfile | 15 ++++++- .github/workflows/build_and_test.yml | 42 +++++++++++++++---- .../workflows/packaging-test-pipelines.yml | 2 +- configure | 2 +- configure.ac | 2 +- 5 files changed, 52 insertions(+), 11 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 6579c52d9..54b2df65e 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -108,6 +108,18 @@ RUN mkdir .pgenv-staging/ RUN cp -r .pgenv/src .pgenv/pgsql-* .pgenv/config .pgenv-staging/ RUN rm .pgenv-staging/config/default.conf +FROM base AS pg18 +RUN MAKEFLAGS="-j $(nproc)" pgenv build 18beta3 +RUN rm .pgenv/src/*.tar* +RUN make -C .pgenv/src/postgresql-*/ clean +RUN make -C .pgenv/src/postgresql-*/src/include install + +# Stage the pgenv artifacts for PG18 +RUN mkdir .pgenv-staging/ +RUN cp -r .pgenv/src .pgenv/pgsql-* .pgenv/config .pgenv-staging/ +RUN rm .pgenv-staging/config/default.conf + + FROM base AS uncrustify-builder RUN sudo apt update && sudo apt install -y cmake tree @@ -201,6 +213,7 @@ COPY --link --from=uncrustify-builder /uncrustify/usr/ /usr/ COPY --link --from=pg15 /home/citus/.pgenv-staging/ /home/citus/.pgenv/ COPY --link --from=pg16 /home/citus/.pgenv-staging/ /home/citus/.pgenv/ COPY --link --from=pg17 /home/citus/.pgenv-staging/ /home/citus/.pgenv/ +COPY --link --from=pg18 /home/citus/.pgenv-staging/ /home/citus/.pgenv/ COPY --link --from=pipenv /home/citus/.local/share/virtualenvs/ /home/citus/.local/share/virtualenvs/ @@ -216,7 +229,7 @@ COPY --chown=citus:citus .psqlrc . RUN sudo chown --from=root:root citus:citus -R ~ # sets default pg version -RUN pgenv switch 17.6 +RUN pgenv switch 18beta3 # make connecting to the coordinator easy ENV PGPORT=9700 diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 5a99abef1..764a904d6 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -31,12 +31,13 @@ jobs: pgupgrade_image_name: "ghcr.io/citusdata/pgupgradetester" style_checker_image_name: "ghcr.io/citusdata/stylechecker" style_checker_tools_version: "0.8.18" - sql_snapshot_pg_version: "17.6" - image_suffix: "-va20872f" - pg15_version: '{ "major": "15", "full": "15.14" }' - pg16_version: '{ "major": "16", "full": "16.10" }' - pg17_version: '{ "major": "17", "full": "17.6" }' - upgrade_pg_versions: "15.14-16.10-17.6" + sql_snapshot_pg_version: "17.5" + image_suffix: "-dev-8e2a1ce" + pg15_version: '{ "major": "15", "full": "15.13" }' + pg16_version: '{ "major": "16", "full": "16.9" }' + pg17_version: '{ "major": "17", "full": "17.5" }' + pg18_version: '{ "major": "18", "full": "18beta3" }' + upgrade_pg_versions: "15.13-16.9-17.5-18beta3" steps: # Since GHA jobs need at least one step we use a noop step here. - name: Set up parameters @@ -113,6 +114,7 @@ jobs: - ${{ needs.params.outputs.pg15_version }} - ${{ needs.params.outputs.pg16_version }} - ${{ needs.params.outputs.pg17_version }} + - ${{ needs.params.outputs.pg18_version }} runs-on: ubuntu-latest container: image: "${{ matrix.image_name }}:${{ fromJson(matrix.pg_version).full }}${{ matrix.image_suffix }}" @@ -144,6 +146,7 @@ jobs: - ${{ needs.params.outputs.pg15_version }} - ${{ needs.params.outputs.pg16_version }} - ${{ needs.params.outputs.pg17_version }} + - ${{ needs.params.outputs.pg18_version }} make: - check-split - check-multi @@ -174,6 +177,10 @@ jobs: pg_version: ${{ needs.params.outputs.pg17_version }} suite: regress image_name: ${{ needs.params.outputs.fail_test_image_name }} + - make: check-failure + pg_version: ${{ needs.params.outputs.pg18_version }} + suite: regress + image_name: ${{ needs.params.outputs.fail_test_image_name }} - make: check-enterprise-failure pg_version: ${{ needs.params.outputs.pg15_version }} suite: regress @@ -186,6 +193,10 @@ jobs: pg_version: ${{ needs.params.outputs.pg17_version }} suite: regress image_name: ${{ needs.params.outputs.fail_test_image_name }} + - make: check-enterprise-failure + pg_version: ${{ needs.params.outputs.pg18_version }} + suite: regress + image_name: ${{ needs.params.outputs.fail_test_image_name }} - make: check-pytest pg_version: ${{ needs.params.outputs.pg15_version }} suite: regress @@ -198,6 +209,10 @@ jobs: pg_version: ${{ needs.params.outputs.pg17_version }} suite: regress image_name: ${{ needs.params.outputs.fail_test_image_name }} + - make: check-pytest + pg_version: ${{ needs.params.outputs.pg18_version }} + suite: regress + image_name: ${{ needs.params.outputs.fail_test_image_name }} - make: installcheck suite: cdc image_name: ${{ needs.params.outputs.test_image_name }} @@ -210,6 +225,10 @@ jobs: suite: cdc image_name: ${{ needs.params.outputs.test_image_name }} pg_version: ${{ needs.params.outputs.pg17_version }} + - make: installcheck + suite: cdc + image_name: ${{ needs.params.outputs.test_image_name }} + pg_version: ${{ needs.params.outputs.pg18_version }} - make: check-query-generator pg_version: ${{ needs.params.outputs.pg15_version }} suite: regress @@ -222,6 +241,10 @@ jobs: pg_version: ${{ needs.params.outputs.pg17_version }} suite: regress image_name: ${{ needs.params.outputs.fail_test_image_name }} + - make: check-query-generator + pg_version: ${{ needs.params.outputs.pg18_version }} + suite: regress + image_name: ${{ needs.params.outputs.fail_test_image_name }} runs-on: ubuntu-latest container: image: "${{ matrix.image_name }}:${{ fromJson(matrix.pg_version).full }}${{ needs.params.outputs.image_suffix }}" @@ -265,6 +288,7 @@ jobs: - ${{ needs.params.outputs.pg15_version }} - ${{ needs.params.outputs.pg16_version }} - ${{ needs.params.outputs.pg17_version }} + - ${{ needs.params.outputs.pg18_version }} parallel: [0,1,2,3,4,5] # workaround for running 6 parallel jobs steps: - uses: actions/checkout@v4 @@ -315,6 +339,10 @@ jobs: new_pg_major: 17 - old_pg_major: 15 new_pg_major: 17 + - old_pg_major: 17 + new_pg_major: 18 + - old_pg_major: 16 + new_pg_major: 18 env: old_pg_major: ${{ matrix.old_pg_major }} new_pg_major: ${{ matrix.new_pg_major }} @@ -509,7 +537,7 @@ jobs: name: Test flakyness runs-on: ubuntu-latest container: - image: ${{ needs.params.outputs.fail_test_image_name }}:${{ fromJson(needs.params.outputs.pg17_version).full }}${{ needs.params.outputs.image_suffix }} + image: ${{ needs.params.outputs.fail_test_image_name }}:${{ fromJson(needs.params.outputs.pg18_version).full }}${{ needs.params.outputs.image_suffix }} options: --user root env: runs: 8 diff --git a/.github/workflows/packaging-test-pipelines.yml b/.github/workflows/packaging-test-pipelines.yml index db0fd08ef..e6c379f7b 100644 --- a/.github/workflows/packaging-test-pipelines.yml +++ b/.github/workflows/packaging-test-pipelines.yml @@ -29,7 +29,7 @@ jobs: # Postgres versions are stored in .github/workflows/build_and_test.yml # file in json strings with major and full keys. # Below command extracts the versions and get the unique values. - pg_versions=$(cat .github/workflows/build_and_test.yml | grep -oE '"major": "[0-9]+", "full": "[0-9.]+"' | sed -E 's/"major": "([0-9]+)", "full": "([0-9.]+)"/\1/g' | sort | uniq | tr '\n', ',') + pg_versions=$(cat .github/workflows/build_and_test.yml | grep -oE '"major": "[0-9]+", "full": "[^"]+"' | sed -E 's/.*"major": "([0-9]+)".*/\1/' | sort -n | uniq | tr '\n' ',') pg_versions_array="[ ${pg_versions} ]" echo "Supported PG Versions: ${pg_versions_array}" # Below line is needed to set the output variable to be used in the next job diff --git a/configure b/configure index cdaf0e78b..e7d65e685 100755 --- a/configure +++ b/configure @@ -2588,7 +2588,7 @@ fi if test "$with_pg_version_check" = no; then { $as_echo "$as_me:${as_lineno-$LINENO}: building against PostgreSQL $version_num (skipped compatibility check)" >&5 $as_echo "$as_me: building against PostgreSQL $version_num (skipped compatibility check)" >&6;} -elif test "$version_num" != '15' -a "$version_num" != '16' -a "$version_num" != '17'; then +elif test "$version_num" != '15' -a "$version_num" != '16' -a "$version_num" != '17' -a "$version_num" != '18'; then as_fn_error $? "Citus is not compatible with the detected PostgreSQL version ${version_num}." "$LINENO" 5 else { $as_echo "$as_me:${as_lineno-$LINENO}: building against PostgreSQL $version_num" >&5 diff --git a/configure.ac b/configure.ac index c7b5ba1de..c4eb15653 100644 --- a/configure.ac +++ b/configure.ac @@ -80,7 +80,7 @@ AC_SUBST(with_pg_version_check) if test "$with_pg_version_check" = no; then AC_MSG_NOTICE([building against PostgreSQL $version_num (skipped compatibility check)]) -elif test "$version_num" != '15' -a "$version_num" != '16' -a "$version_num" != '17'; then +elif test "$version_num" != '15' -a "$version_num" != '16' -a "$version_num" != '17' -a "$version_num" != '18'; then AC_MSG_ERROR([Citus is not compatible with the detected PostgreSQL version ${version_num}.]) else AC_MSG_NOTICE([building against PostgreSQL $version_num]) From 9ac30643ae432ff9b6905193dbe11734a9b8de63 Mon Sep 17 00:00:00 2001 From: Mehmet YILMAZ Date: Wed, 27 Aug 2025 15:58:53 +0300 Subject: [PATCH 02/11] =?UTF-8?q?Stabilize=20multi=5Finsert=5Fselect=20exp?= =?UTF-8?q?ected:=20accept=20unqualified=20columns=20in=20WHERE=20?= =?UTF-8?q?=E2=80=A6=20IS=20NOT=20NULL=20(PG15=E2=80=93PG18)=20(#8139)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DESCRIPTION: Stabilize multi_insert_select expected: accept unqualified columns in WHERE … IS NOT NULL fixes #8133 **Context** * With PG18, ruleutils adds a GROUP RTE and improves column-name dedup. As a side-effect, Vars that point at the GROUP RTE print as unqualified column names even when `varprefix` is true. * In Citus’ vendored `ruleutils_18.c` we already flattened GROUP Vars in `targetList` and `havingQual`, but not in `jointree->quals`. * For queries like `INSERT … SELECT … GROUP BY …`, Citus injects an implicit null-guard on the group key in the WHERE clause. Because that Var was still referencing the GROUP RTE, the deparser emitted `WHERE (user_id IS NOT NULL)` instead of `WHERE (raw_events_first.user_id IS NOT NULL)`, causing regress diffs only in grouped SELECTs. * Related upstream change: PostgreSQL commit `52c707483ce4d0161127e4958d981d1b5655865e` (ruleutils column-name de-dup / GROUP RTE exposure). **What changed** * Added an alternative expected file `src/test/regress/expected/multi_insert_select_0.out` to keep CI green across mixed environments where the qualified form may still be produced. --- .../expected/multi_insert_select_0.out | 3812 +++++++++++++++++ 1 file changed, 3812 insertions(+) create mode 100644 src/test/regress/expected/multi_insert_select_0.out diff --git a/src/test/regress/expected/multi_insert_select_0.out b/src/test/regress/expected/multi_insert_select_0.out new file mode 100644 index 000000000..f1ee0b7ba --- /dev/null +++ b/src/test/regress/expected/multi_insert_select_0.out @@ -0,0 +1,3812 @@ +-- +-- MULTI_INSERT_SELECT +-- +CREATE SCHEMA multi_insert_select; +SET search_path = multi_insert_select,public; +SET citus.next_shard_id TO 13300000; +SET citus.next_placement_id TO 13300000; +-- create co-located tables +SET citus.shard_count = 4; +SET citus.shard_replication_factor = 2; +-- order of execution might change in parallel executions +-- and the error details might contain the worker node +-- so be less verbose with \set VERBOSITY TERSE when necessary +CREATE TABLE raw_events_first (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint, UNIQUE(user_id, value_1)); +SELECT create_distributed_table('raw_events_first', 'user_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE raw_events_second (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint, UNIQUE(user_id, value_1)); +SELECT create_distributed_table('raw_events_second', 'user_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE agg_events (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp, UNIQUE(user_id, value_1_agg)); +SELECT create_distributed_table('agg_events', 'user_id');; + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- create the reference table as well +CREATE TABLE reference_table (user_id int); +SELECT create_reference_table('reference_table'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE insert_select_varchar_test (key varchar, value int); +SELECT create_distributed_table('insert_select_varchar_test', 'key', 'hash'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- set back to the defaults +SET citus.shard_count = DEFAULT; +SET citus.shard_replication_factor = DEFAULT; +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (1, now(), 10, 100, 1000.1, 10000); +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (2, now(), 20, 200, 2000.1, 20000); +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (3, now(), 30, 300, 3000.1, 30000); +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (4, now(), 40, 400, 4000.1, 40000); +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (5, now(), 50, 500, 5000.1, 50000); +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (6, now(), 60, 600, 6000.1, 60000); +SET client_min_messages TO DEBUG2; +-- raw table to raw table +INSERT INTO raw_events_second SELECT * FROM raw_events_first; +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) +-- see that our first multi shard INSERT...SELECT works expected +SET client_min_messages TO INFO; +SELECT + raw_events_first.user_id +FROM + raw_events_first, raw_events_second +WHERE + raw_events_first.user_id = raw_events_second.user_id +ORDER BY + user_id DESC; + user_id +--------------------------------------------------------------------- + 6 + 5 + 4 + 3 + 2 + 1 +(6 rows) + +-- see that we get unique vialitons +\set VERBOSITY TERSE +INSERT INTO raw_events_second SELECT * FROM raw_events_first; +ERROR: duplicate key value violates unique constraint "raw_events_second_user_id_value_1_key_13300004" +\set VERBOSITY DEFAULT +-- stable functions should be allowed +INSERT INTO raw_events_second (user_id, time) +SELECT + user_id, now() +FROM + raw_events_first +WHERE + user_id < 0; +INSERT INTO raw_events_second (user_id) +SELECT + user_id +FROM + raw_events_first +WHERE + time > now() + interval '1 day'; +-- hide version-dependent PL/pgSQL context messages +\set VERBOSITY terse +-- make sure we evaluate stable functions on the master, once +CREATE OR REPLACE FUNCTION evaluate_on_master() +RETURNS int LANGUAGE plpgsql STABLE +AS $function$ +BEGIN + RAISE NOTICE 'evaluating on master'; + RETURN 0; +END; +$function$; +INSERT INTO raw_events_second (user_id, value_1) +SELECT + user_id, evaluate_on_master() +FROM + raw_events_first +WHERE + user_id < 0; +NOTICE: evaluating on master +-- make sure we don't evaluate stable functions with column arguments +SET citus.enable_metadata_sync TO OFF; +CREATE OR REPLACE FUNCTION evaluate_on_master(x int) +RETURNS int LANGUAGE plpgsql STABLE +AS $function$ +BEGIN + RAISE NOTICE 'evaluating on master'; + RETURN x; +END; +$function$; +RESET citus.enable_metadata_sync; +INSERT INTO raw_events_second (user_id, value_1) +SELECT + user_id, evaluate_on_master(value_1) +FROM + raw_events_first +WHERE + user_id = 0; +ERROR: function multi_insert_select.evaluate_on_master(integer) does not exist +-- add one more row +INSERT INTO raw_events_first (user_id, time) VALUES + (7, now()); +-- try a single shard query +SET client_min_messages TO DEBUG2; +INSERT INTO raw_events_second (user_id, time) SELECT user_id, time FROM raw_events_first WHERE user_id = 7; +DEBUG: Creating router plan +DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id, "time") SELECT raw_events_first.user_id, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) 7) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away +SET client_min_messages TO INFO; +-- add one more row +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (8, now(), 80, 800, 8000, 80000); +-- reorder columns +SET client_min_messages TO DEBUG2; +INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time) +SELECT + value_2, value_1, value_3, value_4, user_id, time +FROM + raw_events_first +WHERE + user_id = 8; +DEBUG: Creating router plan +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) 8) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away +-- a zero shard select +INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time) +SELECT + value_2, value_1, value_3, value_4, user_id, time +FROM + raw_events_first +WHERE + false; +DEBUG: Creating router plan +DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away +-- another zero shard select +INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time) +SELECT + value_2, value_1, value_3, value_4, user_id, time +FROM + raw_events_first +WHERE + 0 != 0; +DEBUG: Creating router plan +DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away +-- add one more row +SET client_min_messages TO INFO; +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (9, now(), 90, 900, 9000, 90000); +-- show that RETURNING also works +SET client_min_messages TO DEBUG2; +INSERT INTO raw_events_second (user_id, value_1, value_3) +SELECT + user_id, value_1, value_3 +FROM + raw_events_first +WHERE + value_3 = 9000 +RETURNING *; +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.value_3 OPERATOR(pg_catalog.=) (9000)::double precision) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.value_3 OPERATOR(pg_catalog.=) (9000)::double precision) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.value_3 OPERATOR(pg_catalog.=) (9000)::double precision) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.value_3 OPERATOR(pg_catalog.=) (9000)::double precision) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 + user_id | time | value_1 | value_2 | value_3 | value_4 +--------------------------------------------------------------------- + 9 | | 90 | | 9000 | +(1 row) + +-- hits two shards +\set VERBOSITY TERSE +INSERT INTO raw_events_second (user_id, value_1, value_3) +SELECT + user_id, value_1, value_3 +FROM + raw_events_first +WHERE + user_id = 9 OR user_id = 16 +RETURNING *; +DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (((raw_events_first.user_id OPERATOR(pg_catalog.=) 9) OR (raw_events_first.user_id OPERATOR(pg_catalog.=) 16)) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 +DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (((raw_events_first.user_id OPERATOR(pg_catalog.=) 9) OR (raw_events_first.user_id OPERATOR(pg_catalog.=) 16)) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 +ERROR: duplicate key value violates unique constraint "raw_events_second_user_id_value_1_key_13300007" +-- now do some aggregations +INSERT INTO agg_events +SELECT + user_id, sum(value_1), avg(value_2), sum(value_3), count(value_4) +FROM + raw_events_first +GROUP BY + user_id; +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, avg(raw_events_first.value_2) AS avg, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, avg(raw_events_first.value_2) AS avg, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, avg(raw_events_first.value_2) AS avg, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, avg(raw_events_first.value_2) AS avg, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id +-- group by column not exists on the SELECT target list +INSERT INTO agg_events (value_3_agg, value_4_agg, value_1_agg, user_id) +SELECT + sum(value_3), count(value_4), sum(value_1), user_id +FROM + raw_events_first +GROUP BY + value_2, user_id +RETURNING *; +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.value_2, raw_events_first.user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.value_2, raw_events_first.user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.value_2, raw_events_first.user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.value_2, raw_events_first.user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time +ERROR: duplicate key value violates unique constraint "agg_events_user_id_value_1_agg_key_13300008" +-- some subquery tests +INSERT INTO agg_events + (value_1_agg, + user_id) +SELECT SUM(value_1), + id +FROM (SELECT raw_events_second.user_id AS id, + raw_events_second.value_1 + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id) AS foo +GROUP BY id +ORDER BY id; +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT foo.id, sum(foo.value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)) foo WHERE (id IS NOT NULL) GROUP BY foo.id ORDER BY foo.id +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT foo.id, sum(foo.value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)) foo WHERE (id IS NOT NULL) GROUP BY foo.id ORDER BY foo.id +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT foo.id, sum(foo.value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)) foo WHERE (id IS NOT NULL) GROUP BY foo.id ORDER BY foo.id +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT foo.id, sum(foo.value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)) foo WHERE (id IS NOT NULL) GROUP BY foo.id ORDER BY foo.id +ERROR: duplicate key value violates unique constraint "agg_events_user_id_value_1_agg_key_13300008" +-- subquery one more level depth +INSERT INTO agg_events + (value_4_agg, + value_1_agg, + user_id) +SELECT v4, + v1, + id +FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.user_id) AS foo +ORDER BY id; +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT foo.id, foo.v1, foo.v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE (foo.id IS NOT NULL) ORDER BY foo.id +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT foo.id, foo.v1, foo.v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE (foo.id IS NOT NULL) ORDER BY foo.id +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT foo.id, foo.v1, foo.v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE (foo.id IS NOT NULL) ORDER BY foo.id +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT foo.id, foo.v1, foo.v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE (foo.id IS NOT NULL) ORDER BY foo.id +ERROR: duplicate key value violates unique constraint "agg_events_user_id_value_1_agg_key_13300008" +\set VERBOSITY DEFAULT +-- join between subqueries +INSERT INTO agg_events + (user_id) +SELECT f2.id FROM +(SELECT + id +FROM (SELECT reference_table.user_id AS id + FROM raw_events_first, + reference_table + WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f +INNER JOIN +(SELECT v4, + v1, + id +FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.user_id + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 +ON (f.id = f2.id); +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f2.id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f2.id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f2.id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f2.id IS NOT NULL) +-- add one more level subqueris on top of subquery JOINs +INSERT INTO agg_events + (user_id, value_4_agg) +SELECT + outer_most.id, max(outer_most.value) +FROM +( + SELECT f2.id as id, f2.v4 as value FROM + (SELECT + id + FROM (SELECT reference_table.user_id AS id + FROM raw_events_first, + reference_table + WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f + INNER JOIN + (SELECT v4, + v1, + id + FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.user_id + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 +ON (f.id = f2.id)) as outer_most +GROUP BY + outer_most.id; +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (id IS NOT NULL) GROUP BY outer_most.id +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (id IS NOT NULL) GROUP BY outer_most.id +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (id IS NOT NULL) GROUP BY outer_most.id +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (id IS NOT NULL) GROUP BY outer_most.id +-- subqueries in WHERE clause +INSERT INTO raw_events_second + (user_id) +SELECT user_id +FROM raw_events_first +WHERE user_id IN (SELECT user_id + FROM raw_events_second + WHERE user_id = 2); +DEBUG: Creating router plan +DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) 2))) AND (raw_events_first.user_id IS NOT NULL)) +INSERT INTO raw_events_second + (user_id) +SELECT user_id +FROM raw_events_first +WHERE user_id IN (SELECT user_id + FROM raw_events_second + WHERE user_id != 2 AND value_1 = 2000) +ON conflict (user_id, value_1) DO NOTHING; +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.<>) 2) AND (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000)))) AND (raw_events_first.user_id IS NOT NULL)) ON CONFLICT(user_id, value_1) DO NOTHING +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.<>) 2) AND (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000)))) AND (raw_events_first.user_id IS NOT NULL)) ON CONFLICT(user_id, value_1) DO NOTHING +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.<>) 2) AND (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000)))) AND (raw_events_first.user_id IS NOT NULL)) ON CONFLICT(user_id, value_1) DO NOTHING +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.<>) 2) AND (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000)))) AND (raw_events_first.user_id IS NOT NULL)) ON CONFLICT(user_id, value_1) DO NOTHING +INSERT INTO raw_events_second + (user_id) +SELECT user_id +FROM raw_events_first +WHERE user_id IN (SELECT user_id + FROM raw_events_second WHERE false); +DEBUG: Creating router plan +DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away +INSERT INTO raw_events_second + (user_id) +SELECT user_id +FROM raw_events_first +WHERE user_id IN (SELECT user_id + FROM raw_events_second + WHERE value_1 = 1000 OR value_1 = 2000 OR value_1 = 3000); +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE ((raw_events_second.value_1 OPERATOR(pg_catalog.=) 1000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 3000)))) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE ((raw_events_second.value_1 OPERATOR(pg_catalog.=) 1000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 3000)))) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE ((raw_events_second.value_1 OPERATOR(pg_catalog.=) 1000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 3000)))) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE ((raw_events_second.value_1 OPERATOR(pg_catalog.=) 1000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 3000)))) AND (raw_events_first.user_id IS NOT NULL)) +-- lets mix subqueries in FROM clause and subqueries in WHERE +INSERT INTO agg_events + (user_id) +SELECT f2.id FROM +(SELECT + id +FROM (SELECT reference_table.user_id AS id + FROM raw_events_first, + reference_table + WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f +INNER JOIN +(SELECT v4, + v1, + id +FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.user_id + HAVING SUM(raw_events_second.value_4) > 1000) AS foo2 ) as f2 +ON (f.id = f2.id) +WHERE f.id IN (SELECT user_id + FROM raw_events_second); +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (1000)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE ((f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second)) AND (f2.id IS NOT NULL)) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (1000)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE ((f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second)) AND (f2.id IS NOT NULL)) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (1000)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE ((f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second)) AND (f2.id IS NOT NULL)) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (1000)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE ((f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second)) AND (f2.id IS NOT NULL)) +-- some UPSERTS +INSERT INTO agg_events AS ae + ( + user_id, + value_1_agg, + agg_time + ) +SELECT user_id, + value_1, + time +FROM raw_events_first +ON conflict (user_id, value_1_agg) +DO UPDATE + SET agg_time = EXCLUDED.agg_time + WHERE ae.agg_time < EXCLUDED.agg_time; +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) +-- upserts with returning +INSERT INTO agg_events AS ae + ( + user_id, + value_1_agg, + agg_time + ) +SELECT user_id, + value_1, + time +FROM raw_events_first +ON conflict (user_id, value_1_agg) +DO UPDATE + SET agg_time = EXCLUDED.agg_time + WHERE ae.agg_time < EXCLUDED.agg_time +RETURNING user_id, value_1_agg; +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg + user_id | value_1_agg +--------------------------------------------------------------------- + 7 | +(1 row) + +INSERT INTO agg_events (user_id, value_1_agg) +SELECT + user_id, sum(value_1 + value_2) +FROM + raw_events_first GROUP BY user_id; +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) AS sum FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) AS sum FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) AS sum FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) AS sum FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id +-- FILTER CLAUSE +INSERT INTO agg_events (user_id, value_1_agg) +SELECT + user_id, sum(value_1 + value_2) FILTER (where value_3 = 15) +FROM + raw_events_first GROUP BY user_id; +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) FILTER (WHERE (raw_events_first.value_3 OPERATOR(pg_catalog.=) (15)::double precision)) AS sum FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) FILTER (WHERE (raw_events_first.value_3 OPERATOR(pg_catalog.=) (15)::double precision)) AS sum FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) FILTER (WHERE (raw_events_first.value_3 OPERATOR(pg_catalog.=) (15)::double precision)) AS sum FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) FILTER (WHERE (raw_events_first.value_3 OPERATOR(pg_catalog.=) (15)::double precision)) AS sum FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id +-- a test with reference table JOINs +INSERT INTO + agg_events (user_id, value_1_agg) +SELECT + raw_events_first.user_id, sum(value_1) +FROM + reference_table, raw_events_first +WHERE + raw_events_first.user_id = reference_table.user_id +GROUP BY + raw_events_first.user_id; +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM multi_insert_select.reference_table_13300012 reference_table, multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id) AND (user_id IS NOT NULL)) GROUP BY raw_events_first.user_id +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM multi_insert_select.reference_table_13300012 reference_table, multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id) AND (user_id IS NOT NULL)) GROUP BY raw_events_first.user_id +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM multi_insert_select.reference_table_13300012 reference_table, multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id) AND (user_id IS NOT NULL)) GROUP BY raw_events_first.user_id +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM multi_insert_select.reference_table_13300012 reference_table, multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id) AND (user_id IS NOT NULL)) GROUP BY raw_events_first.user_id +-- a note on the outer joins is that +-- we filter out outer join results +-- where partition column returns +-- NULL. Thus, we could INSERT less rows +-- than we expect from subquery result. +-- see the following tests +SET client_min_messages TO INFO; +-- we don't want to see constraint violations, so truncate first +TRUNCATE agg_events; +-- add a row to first table to make table contents different +INSERT INTO raw_events_second (user_id, time, value_1, value_2, value_3, value_4) VALUES + (10, now(), 100, 10000, 10000, 100000); +DELETE FROM raw_events_second WHERE user_id = 2; +-- we select 11 rows +SELECT t1.user_id AS col1, + t2.user_id AS col2 + FROM raw_events_first t1 + FULL JOIN raw_events_second t2 + ON t1.user_id = t2.user_id + ORDER BY t1.user_id, + t2.user_id; + col1 | col2 +--------------------------------------------------------------------- + 1 | 1 + 2 | + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + | 10 +(10 rows) + +SET client_min_messages TO DEBUG2; +-- we insert 10 rows since we filtered out +-- NULL partition column values +INSERT INTO agg_events (user_id, value_1_agg) +SELECT t1.user_id AS col1, + t2.user_id AS col2 +FROM raw_events_first t1 + FULL JOIN raw_events_second t2 + ON t1.user_id = t2.user_id; +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (multi_insert_select.raw_events_first_13300000 t1 FULL JOIN multi_insert_select.raw_events_second_13300004 t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id))) WHERE (t1.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (multi_insert_select.raw_events_first_13300001 t1 FULL JOIN multi_insert_select.raw_events_second_13300005 t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id))) WHERE (t1.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (multi_insert_select.raw_events_first_13300002 t1 FULL JOIN multi_insert_select.raw_events_second_13300006 t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id))) WHERE (t1.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (multi_insert_select.raw_events_first_13300003 t1 FULL JOIN multi_insert_select.raw_events_second_13300007 t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id))) WHERE (t1.user_id IS NOT NULL) +SET client_min_messages TO INFO; +-- see that the results are different from the SELECT query +SELECT + user_id, value_1_agg +FROM + agg_events +ORDER BY + user_id, value_1_agg; + user_id | value_1_agg +--------------------------------------------------------------------- + 1 | 1 + 2 | + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(9 rows) + +-- we don't want to see constraint violations, so truncate first +SET client_min_messages TO INFO; +TRUNCATE agg_events; +SET client_min_messages TO DEBUG2; +-- DISTINCT clause +INSERT INTO agg_events (value_1_agg, user_id) + SELECT + DISTINCT value_1, user_id + FROM + raw_events_first; +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) +-- we don't want to see constraint violations, so truncate first +SET client_min_messages TO INFO; +truncate agg_events; +SET client_min_messages TO DEBUG2; +-- DISTINCT ON clauses are supported +-- distinct on(non-partition column) +-- values are pulled to master +INSERT INTO agg_events (value_1_agg, user_id) + SELECT + DISTINCT ON (value_1) value_1, user_id + FROM + raw_events_first; +DEBUG: cannot push down this subquery +DETAIL: Distinct on columns without partition column is currently unsupported +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Collecting INSERT ... SELECT results on coordinator +SELECT user_id, value_1_agg FROM agg_events ORDER BY 1,2; +DEBUG: Router planner cannot handle multi-shard select queries + user_id | value_1_agg +--------------------------------------------------------------------- + 1 | 10 + 2 | 20 + 3 | 30 + 4 | 40 + 5 | 50 + 6 | 60 + 7 | + 8 | 80 + 9 | 90 +(9 rows) + +-- we don't want to see constraint violations, so truncate first +SET client_min_messages TO INFO; +truncate agg_events; +SET client_min_messages TO DEBUG2; +-- distinct on(partition column) +-- queries are forwared to workers +INSERT INTO agg_events (value_1_agg, user_id) + SELECT + DISTINCT ON (user_id) value_1, user_id + FROM + raw_events_first; +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT ON (raw_events_first.user_id) raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT ON (raw_events_first.user_id) raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT ON (raw_events_first.user_id) raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT ON (raw_events_first.user_id) raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) +SELECT user_id, value_1_agg FROM agg_events ORDER BY 1,2; +DEBUG: Router planner cannot handle multi-shard select queries + user_id | value_1_agg +--------------------------------------------------------------------- + 1 | 10 + 2 | 20 + 3 | 30 + 4 | 40 + 5 | 50 + 6 | 60 + 7 | + 8 | 80 + 9 | 90 +(9 rows) + +-- We support CTEs +BEGIN; +WITH fist_table_agg AS MATERIALIZED + (SELECT max(value_1)+1 as v1_agg, user_id FROM raw_events_first GROUP BY user_id) +INSERT INTO agg_events + (value_1_agg, user_id) + SELECT + v1_agg, user_id + FROM + fist_table_agg; +DEBUG: distributed INSERT ... SELECT can only select from distributed tables +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE fist_table_agg: SELECT (max(value_1) OPERATOR(pg_catalog.+) 1) AS v1_agg, user_id FROM multi_insert_select.raw_events_first GROUP BY user_id +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, v1_agg AS value_1_agg FROM (SELECT fist_table_agg.user_id, fist_table_agg.v1_agg FROM (SELECT intermediate_result.v1_agg, intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v1_agg integer, user_id integer)) fist_table_agg) citus_insert_select_subquery +DEBUG: Creating router plan +DEBUG: Collecting INSERT ... SELECT results on coordinator +ROLLBACK; +-- We do support CTEs that are referenced in the target list +INSERT INTO agg_events + WITH sub_cte AS (SELECT 1) + SELECT + raw_events_first.user_id, (SELECT * FROM sub_cte) + FROM + raw_events_first; +DEBUG: CTE sub_cte is going to be inlined via distributed planning +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) +-- We support set operations +BEGIN; +INSERT INTO + raw_events_first(user_id) +SELECT + user_id +FROM + ((SELECT user_id FROM raw_events_first) UNION + (SELECT user_id FROM raw_events_second)) as foo; +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second) foo WHERE (foo.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second) foo WHERE (foo.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second) foo WHERE (foo.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second) foo WHERE (foo.user_id IS NOT NULL) +ROLLBACK; +-- We do support set operations through recursive planning +BEGIN; +SET LOCAL client_min_messages TO DEBUG; +INSERT INTO + raw_events_first(user_id) + (SELECT user_id FROM raw_events_first) INTERSECT + (SELECT user_id FROM raw_events_first); +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM multi_insert_select.raw_events_first +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM multi_insert_select.raw_events_first +DEBUG: Creating router plan +DEBUG: generating subplan XXX_3 for subquery SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) INTERSECT SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) citus_insert_select_subquery +DEBUG: Creating router plan +DEBUG: Collecting INSERT ... SELECT results on coordinator +ROLLBACK; +-- If the query is router plannable then it is executed via the coordinator +INSERT INTO + raw_events_first(user_id) +SELECT + user_id +FROM + ((SELECT user_id FROM raw_events_first WHERE user_id = 15) EXCEPT + (SELECT user_id FROM raw_events_second where user_id = 17)) as foo; +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +DEBUG: router planner does not support queries that reference non-colocated distributed tables +DEBUG: Distributed planning for a fast-path router query +DEBUG: Creating router plan +DEBUG: query has a single distribution column value: 15 +DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM multi_insert_select.raw_events_first WHERE (user_id OPERATOR(pg_catalog.=) 15) +DEBUG: Distributed planning for a fast-path router query +DEBUG: Creating router plan +DEBUG: query has a single distribution column value: 17 +DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM multi_insert_select.raw_events_second WHERE (user_id OPERATOR(pg_catalog.=) 17) +DEBUG: Creating router plan +DEBUG: generating subplan XXX_3 for subquery SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) EXCEPT SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo +DEBUG: Creating router plan +DEBUG: Collecting INSERT ... SELECT results on coordinator +-- some supported LEFT joins + INSERT INTO agg_events (user_id) + SELECT + raw_events_first.user_id + FROM + raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id; +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL) + SET client_min_messages to debug3; + INSERT INTO agg_events (user_id) + SELECT + raw_events_second.user_id + FROM + reference_table LEFT JOIN raw_events_second ON reference_table.user_id = raw_events_second.user_id; +DEBUG: no shard pruning constraints on raw_events_second found +DEBUG: shard count after pruning for raw_events_second: 4 +DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table +DEBUG: no shard pruning constraints on raw_events_second found +DEBUG: shard count after pruning for raw_events_second: 4 +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: a push down safe left join with recurring left side +DEBUG: no shard pruning constraints on raw_events_second found +DEBUG: shard count after pruning for raw_events_second: 4 +DEBUG: assigned task to node localhost:xxxxx +DEBUG: assigned task to node localhost:xxxxx +DEBUG: assigned task to node localhost:xxxxx +DEBUG: assigned task to node localhost:xxxxx +DEBUG: performing repartitioned INSERT ... SELECT +DEBUG: partitioning SELECT query by column index 0 with name 'user_id' + SET client_min_messages to debug2; + INSERT INTO agg_events (user_id) + SELECT + raw_events_first.user_id + FROM + raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id + WHERE raw_events_first.user_id = 10; +DEBUG: Creating router plan +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) 10) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: Skipping target shard interval 13300009 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300010 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300011 since SELECT query for it pruned away + INSERT INTO agg_events (user_id) + SELECT + raw_events_first.user_id + FROM + raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id + WHERE raw_events_second.user_id = 10 OR raw_events_second.user_id = 11; +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (((raw_events_second.user_id OPERATOR(pg_catalog.=) 10) OR (raw_events_second.user_id OPERATOR(pg_catalog.=) 11)) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN (SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_second(user_id, "time", value_1, value_2, value_3, value_4) ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (((raw_events_second.user_id OPERATOR(pg_catalog.=) 10) OR (raw_events_second.user_id OPERATOR(pg_catalog.=) 11)) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN (SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_second(user_id, "time", value_1, value_2, value_3, value_4) ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (((raw_events_second.user_id OPERATOR(pg_catalog.=) 10) OR (raw_events_second.user_id OPERATOR(pg_catalog.=) 11)) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (((raw_events_second.user_id OPERATOR(pg_catalog.=) 10) OR (raw_events_second.user_id OPERATOR(pg_catalog.=) 11)) AND (raw_events_first.user_id IS NOT NULL)) + INSERT INTO agg_events (user_id) + SELECT + raw_events_first.user_id + FROM + raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id + WHERE raw_events_first.user_id = 10 AND raw_events_first.user_id = 20; +DEBUG: Creating router plan +DEBUG: Skipping target shard interval 13300008 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300009 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300010 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300011 since SELECT query for it pruned away + INSERT INTO agg_events (user_id) + SELECT + raw_events_first.user_id + FROM + raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id + WHERE raw_events_first.user_id = 10 AND raw_events_second.user_id = 20; +DEBUG: Creating router plan +DEBUG: Skipping target shard interval 13300008 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300009 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300010 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300011 since SELECT query for it pruned away + INSERT INTO agg_events (user_id) + SELECT + raw_events_first.user_id + FROM + raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id + WHERE raw_events_first.user_id IN (19, 20, 21); +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM ((SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_first(user_id, "time", value_1, value_2, value_3, value_4) LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) + INSERT INTO agg_events (user_id) + SELECT + raw_events_first.user_id + FROM + raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id + WHERE raw_events_second.user_id IN (19, 20, 21); +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first JOIN (SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_second(user_id, "time", value_1, value_2, value_3, value_4) ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) +SET client_min_messages TO WARNING; + -- following query should use repartitioned joins and results should + -- be routed via coordinator + SET citus.enable_repartition_joins TO true; + INSERT INTO agg_events + (user_id) + SELECT raw_events_first.user_id + FROM raw_events_first, + raw_events_second + WHERE raw_events_second.user_id = raw_events_first.value_1 + AND raw_events_first.value_1 = 12; + -- some unsupported LEFT/INNER JOINs + -- JOIN on one table with partition column other is not + INSERT INTO agg_events (user_id) + SELECT + raw_events_first.user_id + FROM + raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1; +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns + -- same as the above with INNER JOIN + INSERT INTO agg_events (user_id) + SELECT + raw_events_first.user_id + FROM + raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1; + -- a not meaningful query + INSERT INTO agg_events + (user_id) + SELECT raw_events_second.user_id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_first.value_1; +ERROR: cannot perform distributed planning on this query +DETAIL: Cartesian products are currently unsupported + -- both tables joined on non-partition columns + INSERT INTO agg_events (user_id) + SELECT + raw_events_first.user_id + FROM + raw_events_first LEFT JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1; +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns + -- same as the above with INNER JOIN + -- we support this with route to coordinator + SELECT coordinator_plan($Q$ + EXPLAIN (costs off) + INSERT INTO agg_events (user_id) + SELECT + raw_events_first.user_id + FROM + raw_events_first INNER JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Custom Scan (Citus Adaptive) + Task Count: 6 +(4 rows) + +-- EXPLAIN ANALYZE is not supported for INSERT ... SELECT via coordinator +EXPLAIN (costs off, analyze on, BUFFERS OFF) + INSERT INTO agg_events (user_id) + SELECT + raw_events_first.user_id + FROM + raw_events_first INNER JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1; +ERROR: EXPLAIN ANALYZE is currently not supported for INSERT ... SELECT commands via coordinator +-- even if there is a filter on the partition key, since the join is not on the partition key we reject +-- this query +INSERT INTO agg_events (user_id) +SELECT + raw_events_first.user_id +FROM + raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1 +WHERE + raw_events_first.user_id = 10; +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns + -- same as the above with INNER JOIN + -- we support this with route to coordinator + SELECT coordinator_plan($Q$ + EXPLAIN (costs off) + INSERT INTO agg_events (user_id) + SELECT + raw_events_first.user_id + FROM + raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1 + WHERE raw_events_first.user_id = 10; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Custom Scan (Citus Adaptive) + Task Count: 6 +(4 rows) + + -- make things a bit more complicate with IN clauses + -- we support this with route to coordinator + SELECT coordinator_plan($Q$ + EXPLAIN (costs off) + INSERT INTO agg_events (user_id) + SELECT + raw_events_first.user_id + FROM + raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1 + WHERE raw_events_first.value_1 IN (10, 11,12) OR raw_events_second.user_id IN (1,2,3,4); +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Custom Scan (Citus Adaptive) + Task Count: 6 +(4 rows) + + -- implicit join on non partition column should also not be pushed down, + -- so we fall back to route via coordinator + SELECT coordinator_plan($Q$ + EXPLAIN (costs off) + INSERT INTO agg_events + (user_id) + SELECT raw_events_first.user_id + FROM raw_events_first, + raw_events_second + WHERE raw_events_second.user_id = raw_events_first.value_1; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Custom Scan (Citus Adaptive) + Task Count: 6 +(4 rows) + +RESET client_min_messages; + -- The following is again a tricky query for Citus. If the given filter was + -- on value_1 as shown in the above, Citus could push it down and use + -- distributed INSERT/SELECT. But we instead fall back to route via coordinator. + SELECT coordinator_plan($Q$ + EXPLAIN (costs off) + INSERT INTO agg_events + (user_id) + SELECT raw_events_first.user_id + FROM raw_events_first, + raw_events_second + WHERE raw_events_second.user_id = raw_events_first.value_1 + AND raw_events_first.value_2 = 12; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Custom Scan (Citus Adaptive) + Task Count: 6 +(4 rows) + + -- foo is not joined on the partition key so the query is not + -- pushed down. So instead we route via coordinator. + SELECT coordinator_plan($Q$ + EXPLAIN (costs off) + INSERT INTO agg_events + (user_id, value_4_agg) + SELECT + outer_most.id, max(outer_most.value) + FROM + ( + SELECT f2.id as id, f2.v4 as value FROM + (SELECT + id + FROM (SELECT reference_table.user_id AS id + FROM raw_events_first LEFT JOIN + reference_table + ON (raw_events_first.value_1 = reference_table.user_id)) AS foo) as f + INNER JOIN + (SELECT v4, + v1, + id + FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.user_id + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 + ON (f.id = f2.id)) as outer_most + GROUP BY + outer_most.id; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> HashAggregate + Group Key: remote_scan.user_id + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(8 rows) + + -- if the given filter was on value_1 as shown in the above, Citus could + -- push it down. But here the query falls back to route via coordinator. + SELECT coordinator_plan($Q$ + EXPLAIN (costs off) + INSERT INTO agg_events + (user_id) + SELECT raw_events_first.user_id + FROM raw_events_first, + raw_events_second + WHERE raw_events_second.user_id = raw_events_first.value_1 + AND raw_events_first.value_2 = 12; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Custom Scan (Citus Adaptive) + Task Count: 6 +(4 rows) + + -- foo is not joined on the partition key so the query is not + -- pushed down, and it falls back to route via coordinator +SELECT coordinator_plan($Q$ +EXPLAIN (costs off) + INSERT INTO agg_events + (user_id, value_4_agg) + SELECT + outer_most.id, max(outer_most.value) + FROM + ( + SELECT f2.id as id, f2.v4 as value FROM + (SELECT + id + FROM (SELECT reference_table.user_id AS id + FROM raw_events_first LEFT JOIN + reference_table + ON (raw_events_first.value_1 = reference_table.user_id)) AS foo) as f + INNER JOIN + (SELECT v4, + v1, + id + FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.user_id + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 + ON (f.id = f2.id)) as outer_most + GROUP BY + outer_most.id; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> HashAggregate + Group Key: remote_scan.user_id + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(8 rows) + +INSERT INTO agg_events + (value_4_agg, + value_1_agg, + user_id) +SELECT v4, + v1, + id +FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id != raw_events_second.user_id + GROUP BY raw_events_second.user_id) AS foo; +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +SET client_min_messages TO DEBUG2; +-- INSERT returns NULL partition key value via coordinator +INSERT INTO agg_events + (value_4_agg, + value_1_agg, + user_id) +SELECT v4, + v1, + id +FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.value_3 AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.value_3) AS foo; +DEBUG: cannot push down this subquery +DETAIL: Group by list without partition column is currently unsupported when a subquery references a column from another query +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823] +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647] +DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823] +DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647] +DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1] +DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647] +DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1] +DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] +DEBUG: generating subplan XXX_1 for subquery SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.value_3 AS id FROM multi_insert_select.raw_events_first, multi_insert_select.raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.value_3 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT int4(id) AS user_id, int4(v1) AS value_1_agg, int8(v4) AS value_4_agg FROM (SELECT intermediate_result.v4, intermediate_result.v1, intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v4 numeric, v1 bigint, id double precision)) foo +DEBUG: Creating router plan +DEBUG: Collecting INSERT ... SELECT results on coordinator +ERROR: the partition column of table multi_insert_select.agg_events cannot be NULL +-- error cases +-- no part column at all +INSERT INTO raw_events_second + (value_1) +SELECT value_1 +FROM raw_events_first; +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: the query doesn't include the target table's partition column +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: performing repartitioned INSERT ... SELECT +ERROR: the partition column of table multi_insert_select.raw_events_second should have a value +INSERT INTO raw_events_second + (value_1) +SELECT user_id +FROM raw_events_first; +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: the query doesn't include the target table's partition column +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: performing repartitioned INSERT ... SELECT +ERROR: the partition column of table multi_insert_select.raw_events_second should have a value +INSERT INTO raw_events_second + (user_id) +SELECT value_1 +FROM raw_events_first; +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: The target table's partition column should correspond to a partition column in the subquery. +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: performing repartitioned INSERT ... SELECT +DEBUG: partitioning SELECT query by column index 0 with name 'user_id' +ERROR: the partition column value cannot be NULL +CONTEXT: while executing command on localhost:xxxxx +INSERT INTO raw_events_second + (user_id) +SELECT user_id * 2 +FROM raw_events_first; +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: Subquery contains an operator in the same position as the target table's partition column. +HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: performing repartitioned INSERT ... SELECT +DEBUG: partitioning SELECT query by column index 0 with name 'user_id' +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_0,repartitioned_results_xxxxx_from_13300001_to_0}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_1,repartitioned_results_xxxxx_from_13300001_to_1,repartitioned_results_xxxxx_from_13300003_to_1}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300001_to_2}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_3,repartitioned_results_xxxxx_from_13300002_to_3,repartitioned_results_xxxxx_from_13300003_to_3}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) +INSERT INTO raw_events_second + (user_id) +SELECT user_id :: bigint +FROM raw_events_first; +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: Subquery contains an explicit cast in the same position as the target table's partition column. +HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: performing repartitioned INSERT ... SELECT +DEBUG: partitioning SELECT query by column index 0 with name 'user_id' +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_0}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300001_to_1}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300002_to_2}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300003_to_3}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) +INSERT INTO agg_events + (value_3_agg, + value_4_agg, + value_1_agg, + value_2_agg, + user_id) +SELECT SUM(value_3), + Count(value_4), + user_id, + SUM(value_1), + Avg(value_2) +FROM raw_events_first +GROUP BY user_id; +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: Subquery contains an aggregation in the same position as the target table's partition column. +HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: performing repartitioned INSERT ... SELECT +DEBUG: partitioning SELECT query by column index 0 with name 'user_id' +ERROR: the partition column value cannot be NULL +CONTEXT: while executing command on localhost:xxxxx +INSERT INTO agg_events + (value_3_agg, + value_4_agg, + value_1_agg, + value_2_agg, + user_id) +SELECT SUM(value_3), + Count(value_4), + user_id, + SUM(value_1), + value_2 +FROM raw_events_first +GROUP BY user_id, + value_2; +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: The target table's partition column should correspond to a partition column in the subquery. +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: performing repartitioned INSERT ... SELECT +DEBUG: partitioning SELECT query by column index 0 with name 'user_id' +ERROR: the partition column value cannot be NULL +CONTEXT: while executing command on localhost:xxxxx +-- tables should be co-located +INSERT INTO agg_events (user_id) +SELECT + user_id +FROM + reference_table; +DEBUG: Creating router plan +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: The target table's partition column should correspond to a partition column in the subquery. +DEBUG: Distributed planning for a fast-path router query +DEBUG: Creating router plan +DEBUG: Collecting INSERT ... SELECT results on coordinator +-- foo2 is recursively planned and INSERT...SELECT is done via coordinator +INSERT INTO agg_events + (user_id) +SELECT f2.id FROM +(SELECT + id +FROM (SELECT reference_table.user_id AS id + FROM raw_events_first, + reference_table + WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f +INNER JOIN +(SELECT v4, + v1, + id +FROM (SELECT SUM(raw_events_second.value_4) AS v4, + raw_events_second.value_1 AS v1, + SUM(raw_events_second.user_id) AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.value_1 + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 +ON (f.id = f2.id); +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823] +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647] +DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823] +DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647] +DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1] +DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647] +DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1] +DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] +DEBUG: generating subplan XXX_1 for subquery SELECT sum(raw_events_second.value_4) AS v4, raw_events_second.value_1 AS v1, sum(raw_events_second.user_id) AS id FROM multi_insert_select.raw_events_first, multi_insert_select.raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.value_1 HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT int4(f2.id) AS user_id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first, multi_insert_select.reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT intermediate_result.v4, intermediate_result.v1, intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v4 numeric, v1 integer, id bigint)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: performing repartitioned INSERT ... SELECT +DEBUG: partitioning SELECT query by column index 0 with name 'user_id' +-- the second part of the query is not routable since +-- GROUP BY not on the partition column (i.e., value_1) and thus join +-- on f.id = f2.id is not on the partition key (instead on the sum of partition key) +-- but we still recursively plan foo2 and run the query +INSERT INTO agg_events + (user_id) +SELECT f.id FROM +(SELECT + id +FROM (SELECT raw_events_first.user_id AS id + FROM raw_events_first, + reference_table + WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f +INNER JOIN +(SELECT v4, + v1, + id +FROM (SELECT SUM(raw_events_second.value_4) AS v4, + raw_events_second.value_1 AS v1, + SUM(raw_events_second.user_id) AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.value_1 + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 +ON (f.id = f2.id); +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823] +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647] +DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823] +DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647] +DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1] +DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647] +DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1] +DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] +DEBUG: generating subplan XXX_1 for subquery SELECT sum(raw_events_second.value_4) AS v4, raw_events_second.value_1 AS v1, sum(raw_events_second.user_id) AS id FROM multi_insert_select.raw_events_first, multi_insert_select.raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.value_1 HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT f.id AS user_id FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM multi_insert_select.raw_events_first, multi_insert_select.reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT intermediate_result.v4, intermediate_result.v1, intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v4 numeric, v1 integer, id bigint)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: performing repartitioned INSERT ... SELECT +DEBUG: partitioning SELECT query by column index 0 with name 'user_id' +SET client_min_messages TO WARNING; +-- cannot pushdown the query since the JOIN is not equi JOIN +-- falls back to route via coordinator +SELECT coordinator_plan($Q$ +EXPLAIN (costs off) +INSERT INTO agg_events + (user_id, value_4_agg) +SELECT +outer_most.id, max(outer_most.value) + FROM +( + SELECT f2.id as id, f2.v4 as value FROM + (SELECT + id + FROM (SELECT reference_table.user_id AS id + FROM raw_events_first, + reference_table + WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f + INNER JOIN + (SELECT v4, + v1, + id + FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.user_id + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 +ON (f.id != f2.id)) as outer_most +GROUP BY outer_most.id; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> HashAggregate + Group Key: remote_scan.user_id + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(8 rows) + +-- cannot pushdown since foo2 is not join on partition key +-- falls back to route via coordinator +SELECT coordinator_plan($Q$ +EXPLAIN (costs off) +INSERT INTO agg_events + (user_id, value_4_agg) +SELECT + outer_most.id, max(outer_most.value) +FROM +( + SELECT f2.id as id, f2.v4 as value FROM + (SELECT + id + FROM (SELECT reference_table.user_id AS id + FROM raw_events_first, + reference_table + WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f + INNER JOIN + (SELECT v4, + v1, + id + FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.value_1 + GROUP BY raw_events_second.user_id + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 +ON (f.id = f2.id)) as outer_most +GROUP BY + outer_most.id; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> HashAggregate + Group Key: remote_scan.user_id + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> HashAggregate + Group Key: remote_scan.id + Filter: (pg_catalog.sum(remote_scan.worker_column_4) > '10'::numeric) + -> Custom Scan (Citus Adaptive) + Task Count: 6 +(11 rows) + +-- cannot push down since foo doesn't have en equi join +-- falls back to route via coordinator +SELECT coordinator_plan($Q$ +EXPLAIN (costs off) +INSERT INTO agg_events + (user_id, value_4_agg) +SELECT + outer_most.id, max(outer_most.value) +FROM +( + SELECT f2.id as id, f2.v4 as value FROM + (SELECT + id + FROM (SELECT reference_table.user_id AS id + FROM raw_events_first, + reference_table + WHERE raw_events_first.user_id != reference_table.user_id ) AS foo) as f + INNER JOIN + (SELECT v4, + v1, + id + FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.user_id + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 +ON (f.id = f2.id)) as outer_most +GROUP BY + outer_most.id; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> HashAggregate + Group Key: remote_scan.user_id + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(8 rows) + +-- some unsupported LATERAL JOINs +-- join on averages is not on the partition key +-- should fall back to route via coordinator +SELECT coordinator_plan($Q$ +EXPLAIN (costs off) +INSERT INTO agg_events (user_id, value_4_agg) +SELECT + averages.user_id, avg(averages.value_4) +FROM + (SELECT + raw_events_second.user_id + FROM + reference_table JOIN raw_events_second on (reference_table.user_id = raw_events_second.user_id) + ) reference_ids + JOIN LATERAL + (SELECT + user_id, value_4 + FROM + raw_events_first WHERE + value_4 = reference_ids.user_id) as averages ON true + GROUP BY averages.user_id; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> HashAggregate + Group Key: remote_scan.user_id + -> Custom Scan (Citus Adaptive) + Task Count: 6 +(6 rows) + +-- join among reference_ids and averages is not on the partition key +-- should fall back to route via coordinator +SELECT coordinator_plan($Q$ +EXPLAIN (costs off) +INSERT INTO agg_events (user_id, value_4_agg) +SELECT + averages.user_id, avg(averages.value_4) +FROM + (SELECT + raw_events_second.user_id + FROM + reference_table JOIN raw_events_second on (reference_table.user_id = raw_events_second.user_id) + ) reference_ids + JOIN LATERAL + (SELECT + user_id, value_4 + FROM + raw_events_first) as averages ON averages.value_4 = reference_ids.user_id + GROUP BY averages.user_id; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> HashAggregate + Group Key: remote_scan.user_id + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(8 rows) + +-- join among the agg_ids and averages is not on the partition key +-- should fall back to route via coordinator +SELECT coordinator_plan($Q$ +EXPLAIN (costs off) +INSERT INTO agg_events (user_id, value_4_agg) +SELECT + averages.user_id, avg(averages.value_4) +FROM + (SELECT + raw_events_second.user_id + FROM + reference_table JOIN raw_events_second on (reference_table.user_id = raw_events_second.user_id) + ) reference_ids + JOIN LATERAL + (SELECT + user_id, value_4 + FROM + raw_events_first) as averages ON averages.user_id = reference_ids.user_id +JOIN LATERAL + (SELECT user_id, value_4 FROM agg_events) as agg_ids ON (agg_ids.value_4 = averages.user_id) + GROUP BY averages.user_id; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Custom Scan (Citus Adaptive) + Task Count: 6 +(4 rows) + +-- Selected value in the WHERE is not partition key, so we cannot use distributed +-- INSERT/SELECT and falls back route via coordinator +SELECT coordinator_plan($Q$ +EXPLAIN (costs off) +INSERT INTO raw_events_second + (user_id) +SELECT user_id +FROM raw_events_first +WHERE user_id IN (SELECT value_1 + FROM raw_events_second); +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: repartition + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(6 rows) + +-- same as above but slightly more complex +-- since it also includes subquery in FROM as well +SELECT coordinator_plan($Q$ +EXPLAIN (costs off) +INSERT INTO agg_events + (user_id) +SELECT f2.id FROM + +(SELECT + id +FROM (SELECT reference_table.user_id AS id + FROM raw_events_first, + reference_table + WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f +INNER JOIN +(SELECT v4, + v1, + id +FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.user_id + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 +ON (f.id = f2.id) +WHERE f.id IN (SELECT value_1 + FROM raw_events_second); +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: repartition + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(6 rows) + +-- some more semi-anti join tests +SET client_min_messages TO DEBUG2; +-- join in where +INSERT INTO raw_events_second + (user_id) +SELECT user_id +FROM raw_events_first +WHERE user_id IN (SELECT raw_events_second.user_id + FROM raw_events_second, raw_events_first + WHERE raw_events_second.user_id = raw_events_first.user_id AND raw_events_first.user_id = 200); +DEBUG: Creating router plan +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second, multi_insert_select.raw_events_first_13300000 raw_events_first_1 WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first_1.user_id) AND (raw_events_first_1.user_id OPERATOR(pg_catalog.=) 200)))) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away +RESET client_min_messages; +-- we cannot push this down since it is NOT IN +-- we use repartition insert/select instead +SELECT coordinator_plan($Q$ +EXPLAIN (costs off) +INSERT INTO raw_events_second + (user_id) +SELECT user_id +FROM raw_events_first +WHERE user_id NOT IN (SELECT raw_events_second.user_id + FROM raw_events_second, raw_events_first + WHERE raw_events_second.user_id = raw_events_first.user_id AND raw_events_first.user_id = 200); +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: repartition + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 1 +(6 rows) + +SET client_min_messages TO DEBUG2; +-- safe to push down +INSERT INTO raw_events_second + (user_id) +SELECT user_id +FROM raw_events_first +WHERE EXISTS (SELECT 1 + FROM raw_events_second + WHERE raw_events_second.user_id =raw_events_first.user_id); +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id))) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id))) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id))) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id))) AND (raw_events_first.user_id IS NOT NULL)) +-- we cannot push down +INSERT INTO raw_events_second + (user_id) +SELECT user_id +FROM raw_events_first +WHERE NOT EXISTS (SELECT 1 + FROM raw_events_second + WHERE raw_events_second.user_id =raw_events_first.user_id); +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id)))) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id)))) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id)))) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id)))) AND (raw_events_first.user_id IS NOT NULL)) +-- more complex LEFT JOINs + INSERT INTO agg_events + (user_id, value_4_agg) + SELECT + outer_most.id, max(outer_most.value) + FROM + ( + SELECT f2.id as id, f2.v4 as value FROM + (SELECT + id + FROM (SELECT raw_events_first.user_id AS id + FROM raw_events_first LEFT JOIN + reference_table + ON (raw_events_first.user_id = reference_table.user_id)) AS foo) as f + LEFT JOIN + (SELECT v4, + v1, + id + FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.user_id + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 + ON (f.id = f2.id)) as outer_most + GROUP BY + outer_most.id; +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.reference_table_13300012 reference_table ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (id IS NOT NULL) GROUP BY outer_most.id +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN multi_insert_select.reference_table_13300012 reference_table ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (id IS NOT NULL) GROUP BY outer_most.id +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN multi_insert_select.reference_table_13300012 reference_table ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (id IS NOT NULL) GROUP BY outer_most.id +DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first LEFT JOIN multi_insert_select.reference_table_13300012 reference_table ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (id IS NOT NULL) GROUP BY outer_most.id +RESET client_min_messages; +-- cannot push down since the f.id IN is matched with value_1 +-- we use repartition insert/select instead +SELECT coordinator_plan($Q$ +EXPLAIN (costs off) +INSERT INTO raw_events_second + (user_id) +SELECT user_id +FROM raw_events_first +WHERE user_id IN ( +SELECT f2.id FROM +(SELECT + id +FROM (SELECT reference_table.user_id AS id + FROM raw_events_first, + reference_table + WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f +INNER JOIN +(SELECT v4, + v1, + id +FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.user_id + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 +ON (f.id = f2.id) +WHERE f.id IN (SELECT value_1 + FROM raw_events_second)); +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: repartition + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(6 rows) + +SET client_min_messages TO DEBUG2; +-- same as above, but this time is it safe to push down since +-- f.id IN is matched with user_id +INSERT INTO raw_events_second + (user_id) +SELECT user_id +FROM raw_events_first +WHERE user_id IN ( +SELECT f2.id FROM +(SELECT + id +FROM (SELECT reference_table.user_id AS id + FROM raw_events_first, + reference_table + WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f +INNER JOIN +(SELECT v4, + v1, + id +FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.user_id + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 +ON (f.id = f2.id) +WHERE f.id IN (SELECT user_id + FROM raw_events_second)); +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first_1, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first_1, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second)))) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first_1, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first_1, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second)))) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first_1, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first_1, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second)))) AND (raw_events_first.user_id IS NOT NULL)) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first_1, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first_1, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second)))) AND (raw_events_first.user_id IS NOT NULL)) +RESET client_min_messages; +-- cannot push down since top level user_id is matched with NOT IN +INSERT INTO raw_events_second + (user_id) +SELECT user_id +FROM raw_events_first +WHERE user_id NOT IN ( +SELECT f2.id FROM +(SELECT + id +FROM (SELECT reference_table.user_id AS id + FROM raw_events_first, + reference_table + WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f +INNER JOIN +(SELECT v4, + v1, + id +FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.user_id + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 +ON (f.id = f2.id) +WHERE f.id IN (SELECT user_id + FROM raw_events_second)); +-- cannot push down since join is not equi join (f.id > f2.id) +INSERT INTO raw_events_second + (user_id) +SELECT user_id +FROM raw_events_first +WHERE user_id IN ( +SELECT f2.id FROM +(SELECT + id +FROM (SELECT reference_table.user_id AS id + FROM raw_events_first, + reference_table + WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f +INNER JOIN +(SELECT v4, + v1, + id +FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.user_id + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 +ON (f.id > f2.id) +WHERE f.id IN (SELECT user_id + FROM raw_events_second)); +-- we currently not support grouping sets +INSERT INTO agg_events + (user_id, + value_1_agg, + value_2_agg) +SELECT user_id, + Sum(value_1) AS sum_val1, + Sum(value_2) AS sum_val2 +FROM raw_events_second +GROUP BY grouping sets ( ( user_id ), ( value_1 ), ( user_id, value_1 ), ( ) ); +ERROR: could not run distributed query with GROUPING SETS, CUBE, or ROLLUP +HINT: Consider using an equality filter on the distributed table's partition column. +-- set back to INFO +SET client_min_messages TO INFO; +-- avoid constraint violations +TRUNCATE raw_events_first; +-- we don't support LIMIT for subquery pushdown, but +-- we recursively plan the query and run it via coordinator +INSERT INTO agg_events(user_id) +SELECT user_id +FROM users_table +WHERE user_id + IN (SELECT + user_id + FROM ( + ( + SELECT + user_id + FROM + ( + SELECT + e1.user_id + FROM + users_table u1, events_table e1 + WHERE + e1.user_id = u1.user_id LIMIT 3 + ) as f_inner + ) + ) AS f2); +-- Altering a table and selecting from it using a multi-shard statement +-- in the same transaction is allowed because we will use the same +-- connections for all co-located placements. +BEGIN; +ALTER TABLE raw_events_second DROP COLUMN value_4; +INSERT INTO raw_events_first SELECT * FROM raw_events_second; +ROLLBACK; +-- Alterating a table and selecting from it using a single-shard statement +-- in the same transaction is disallowed because we will use a different +-- connection. +BEGIN; +ALTER TABLE raw_events_second DROP COLUMN value_4; +INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 100; +ROLLBACK; +-- Altering a reference table and then performing an INSERT ... SELECT which +-- joins with the reference table is allowed, since the INSERT ... SELECT +-- would read from the reference table over the same connections with the ones +-- that performed the parallel DDL. +BEGIN; +ALTER TABLE reference_table ADD COLUMN z int; +INSERT INTO raw_events_first (user_id) +SELECT user_id FROM raw_events_second JOIN reference_table USING (user_id); +ROLLBACK; +-- the same test with sequential DDL should work fine +BEGIN; +SET LOCAL citus.multi_shard_modify_mode TO 'sequential'; +ALTER TABLE reference_table ADD COLUMN z int; +INSERT INTO raw_events_first (user_id) +SELECT user_id FROM raw_events_second JOIN reference_table USING (user_id); +ROLLBACK; +-- Insert after copy is allowed +BEGIN; +COPY raw_events_second (user_id, value_1) FROM STDIN DELIMITER ','; +INSERT INTO raw_events_first SELECT * FROM raw_events_second; +ROLLBACK; +-- Insert after copy is currently allowed for single-shard operation. +-- Both insert and copy are rolled back successfully. +BEGIN; +COPY raw_events_second (user_id, value_1) FROM STDIN DELIMITER ','; +INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 101; +SELECT user_id FROM raw_events_first WHERE user_id = 101; + user_id +--------------------------------------------------------------------- + 101 +(1 row) + +ROLLBACK; +BEGIN; +INSERT INTO raw_events_first SELECT * FROM raw_events_second; +COPY raw_events_first (user_id, value_1) FROM STDIN DELIMITER ','; +ROLLBACK; +BEGIN; +INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 100; +COPY raw_events_first (user_id, value_1) FROM STDIN DELIMITER ','; +ROLLBACK; +-- Similarly, multi-row INSERTs will take part in transactions and reuse connections... +BEGIN; +INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 100; +COPY raw_events_first (user_id, value_1) FROM STDIN DELIMITER ','; +INSERT INTO raw_events_first (user_id, value_1) VALUES (105, 105), (106, 106); +ROLLBACK; +-- selecting from views works +CREATE VIEW test_view AS SELECT * FROM raw_events_first; +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (16, now(), 60, 600, 6000.1, 60000); +SELECT count(*) FROM raw_events_second; + count +--------------------------------------------------------------------- + 45 +(1 row) + +INSERT INTO raw_events_second SELECT * FROM test_view; +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (17, now(), 60, 600, 6000.1, 60000); +INSERT INTO raw_events_second SELECT * FROM test_view WHERE user_id = 17 GROUP BY 1,2,3,4,5,6; +SELECT count(*) FROM raw_events_second; + count +--------------------------------------------------------------------- + 47 +(1 row) + +-- intermediate results (CTEs) should be allowed when doing INSERT...SELECT within a CTE +WITH series AS ( + SELECT s AS val FROM generate_series(60,70) s +), +inserts AS ( + INSERT INTO raw_events_second (user_id) + SELECT + user_id + FROM + raw_events_first JOIN series ON (value_1 = val) + RETURNING + NULL +) +SELECT count(*) FROM inserts; + count +--------------------------------------------------------------------- + 2 +(1 row) + +-- we need this in our next test +truncate raw_events_first; +SET client_min_messages TO DEBUG2; +-- first show that the query works now +INSERT INTO raw_events_first SELECT * FROM raw_events_second; +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_second.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_second.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_second.user_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id IS NOT NULL) +SET client_min_messages TO INFO; +truncate raw_events_first; +SET client_min_messages TO DEBUG2; +-- now show that it works for a single shard query as well +INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 5; +DEBUG: Creating router plan +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) 5) AND (raw_events_second.user_id IS NOT NULL)) +DEBUG: Skipping target shard interval 13300001 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300002 since SELECT query for it pruned away +DEBUG: Skipping target shard interval 13300003 since SELECT query for it pruned away +SET client_min_messages TO INFO; +-- now do some tests with varchars +INSERT INTO insert_select_varchar_test VALUES ('test_1', 10); +INSERT INTO insert_select_varchar_test VALUES ('test_2', 30); +INSERT INTO insert_select_varchar_test (key, value) +SELECT *, 100 +FROM (SELECT f1.key + FROM (SELECT key + FROM insert_select_varchar_test + GROUP BY 1 + HAVING Count(key) < 3) AS f1, + (SELECT key + FROM insert_select_varchar_test + GROUP BY 1 + HAVING Sum(COALESCE(insert_select_varchar_test.value, 0)) > + 20.0) + AS f2 + WHERE f1.key = f2.key + GROUP BY 1) AS foo; +SELECT * FROM insert_select_varchar_test ORDER BY 1 DESC, 2 DESC; + key | value +--------------------------------------------------------------------- + test_2 | 100 + test_2 | 30 + test_1 | 10 +(3 rows) + +-- some tests with DEFAULT columns and constant values +-- this test is mostly importantly intended for deparsing the query correctly +-- but still it is preferable to have this test here instead of multi_deparse_shard_query +CREATE TABLE table_with_defaults +( + store_id int, + first_name text, + default_1 int DEFAULT 1, + last_name text, + default_2 text DEFAULT '2' +); +-- we don't need many shards +SET citus.shard_count = 2; +SELECT create_distributed_table('table_with_defaults', 'store_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- let's see the queries +SET client_min_messages TO DEBUG2; +-- a very simple query +INSERT INTO table_with_defaults SELECT * FROM table_with_defaults; +DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_1, table_with_defaults.last_name, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_1, table_with_defaults.last_name, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) +-- see that defaults are filled +INSERT INTO table_with_defaults (store_id, first_name) +SELECT + store_id, first_name +FROM + table_with_defaults; +DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 1 AS default_1, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 1 AS default_1, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) +-- shuffle one of the defaults and skip the other +INSERT INTO table_with_defaults (default_2, store_id, first_name) +SELECT + default_2, store_id, first_name +FROM + table_with_defaults; +DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 1 AS default_1, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 1 AS default_1, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) +-- shuffle both defaults +INSERT INTO table_with_defaults (default_2, store_id, default_1, first_name) +SELECT + default_2, store_id, default_1, first_name +FROM + table_with_defaults; +DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_1, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_1, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) +-- use constants instead of non-default column +INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name) +SELECT + default_2, 'Freund', store_id, 'Andres' +FROM + table_with_defaults; +DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) +-- use constants instead of non-default column and skip both defauls +INSERT INTO table_with_defaults (last_name, store_id, first_name) +SELECT + 'Freund', store_id, 'Andres' +FROM + table_with_defaults; +DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) +-- use constants instead of default columns +INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name, default_1) +SELECT + 20, last_name, store_id, first_name, 10 +FROM + table_with_defaults; +DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 10, table_with_defaults.last_name, 20 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 10, table_with_defaults.last_name, 20 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) +-- use constants instead of both default columns and non-default columns +INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name, default_1) +SELECT + 20, 'Freund', store_id, 'Andres', 10 +FROM + table_with_defaults; +DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 10, 'Freund'::text AS last_name, 20 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) +DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 10, 'Freund'::text AS last_name, 20 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) +-- some of the ultimate queries where we have constants, +-- defaults and group by entry is not on the target entry +INSERT INTO table_with_defaults (default_2, store_id, first_name) +SELECT + '2000', store_id, 'Andres' +FROM + table_with_defaults +GROUP BY + last_name, store_id; +DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id +DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id +INSERT INTO table_with_defaults (default_1, store_id, first_name, default_2) +SELECT + 1000, store_id, 'Andres', '2000' +FROM + table_with_defaults +GROUP BY + last_name, store_id, first_name; +DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name +DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name +INSERT INTO table_with_defaults (default_1, store_id, first_name, default_2) +SELECT + 1000, store_id, 'Andres', '2000' +FROM + table_with_defaults +GROUP BY + last_name, store_id, first_name, default_2; +DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_2 +DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_2 +INSERT INTO table_with_defaults (default_1, store_id, first_name) +SELECT + 1000, store_id, 'Andres' +FROM + table_with_defaults +GROUP BY + last_name, store_id, first_name, default_2; +DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_2 +DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_2 +RESET client_min_messages; +-- Stable function in default should be allowed +ALTER TABLE table_with_defaults ADD COLUMN t timestamptz DEFAULT now(); +INSERT INTO table_with_defaults (store_id, first_name, last_name) +SELECT + store_id, 'first '||store_id, 'last '||store_id +FROM + table_with_defaults +GROUP BY + store_id, first_name, last_name; +-- Volatile function in default should be disallowed - SERIAL pseudo-types +CREATE TABLE table_with_serial ( + store_id int, + s bigserial +); +SELECT create_distributed_table('table_with_serial', 'store_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO table_with_serial (store_id) +SELECT + store_id +FROM + table_with_defaults +GROUP BY + store_id; +-- Volatile function in default should be disallowed - user-defined sequence +CREATE SEQUENCE user_defined_sequence; +CREATE TABLE table_with_user_sequence ( + store_id int, + s bigint default nextval('user_defined_sequence') +); +SELECT create_distributed_table('table_with_user_sequence', 'store_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO table_with_user_sequence (store_id) +SELECT + store_id +FROM + table_with_defaults +GROUP BY + store_id; +-- do some more error/error message checks +SET citus.shard_count TO 4; +SET citus.shard_replication_factor TO 1; +CREATE TABLE text_table (part_col text, val int); +CREATE TABLE char_table (part_col char[], val int); +create table table_with_starts_with_defaults (a int DEFAULT 5, b int, c int); +SELECT create_distributed_table('text_table', 'part_col'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('char_table','part_col'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('table_with_starts_with_defaults', 'c'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SET client_min_messages TO DEBUG; +INSERT INTO text_table (part_col) + SELECT + CASE WHEN part_col = 'onder' THEN 'marco' + END +FROM text_table ; +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: Subquery contains a case expression in the same position as the target table's partition column. +HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: performing repartitioned INSERT ... SELECT +DEBUG: partitioning SELECT query by column index 0 with name 'part_col' +INSERT INTO text_table (part_col) SELECT COALESCE(part_col, 'onder') FROM text_table; +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: Subquery contains a coalesce expression in the same position as the target table's partition column. +HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: performing repartitioned INSERT ... SELECT +DEBUG: partitioning SELECT query by column index 0 with name 'part_col' +INSERT INTO text_table (part_col) SELECT GREATEST(part_col, 'jason') FROM text_table; +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: Subquery contains a min/max expression in the same position as the target table's partition column. +HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: performing repartitioned INSERT ... SELECT +DEBUG: partitioning SELECT query by column index 0 with name 'part_col' +INSERT INTO text_table (part_col) SELECT LEAST(part_col, 'andres') FROM text_table; +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: Subquery contains a min/max expression in the same position as the target table's partition column. +HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: performing repartitioned INSERT ... SELECT +DEBUG: partitioning SELECT query by column index 0 with name 'part_col' +INSERT INTO text_table (part_col) SELECT NULLIF(part_col, 'metin') FROM text_table; +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column. +HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: performing repartitioned INSERT ... SELECT +DEBUG: partitioning SELECT query by column index 0 with name 'part_col' +INSERT INTO text_table (part_col) SELECT part_col isnull FROM text_table; +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column. +HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: performing repartitioned INSERT ... SELECT +DEBUG: partitioning SELECT query by column index 0 with name 'part_col' +INSERT INTO text_table (part_col) SELECT part_col::text from char_table; +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: Subquery contains an explicit coercion in the same position as the target table's partition column. +HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: performing repartitioned INSERT ... SELECT +DEBUG: partitioning SELECT query by column index 0 with name 'part_col' +INSERT INTO text_table (part_col) SELECT (part_col = 'burak') is true FROM text_table; +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column. +HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: performing repartitioned INSERT ... SELECT +DEBUG: partitioning SELECT query by column index 0 with name 'part_col' +INSERT INTO text_table (part_col) SELECT val FROM text_table; +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: The data type of the target table's partition column should exactly match the data type of the corresponding simple column reference in the subquery. +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: performing repartitioned INSERT ... SELECT +DEBUG: partitioning SELECT query by column index 0 with name 'part_col' +INSERT INTO text_table (part_col) SELECT val::text FROM text_table; +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: Subquery contains an explicit coercion in the same position as the target table's partition column. +HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: performing repartitioned INSERT ... SELECT +DEBUG: partitioning SELECT query by column index 0 with name 'part_col' +RESET client_min_messages; +insert into table_with_starts_with_defaults (b,c) select b,c FROM table_with_starts_with_defaults; +-- Test on partition column without native hash function +CREATE TABLE raw_table +( + id BIGINT, + time DATE +); +CREATE TABLE summary_table +( + time DATE, + count BIGINT +); +SELECT create_distributed_table('raw_table', 'time'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('summary_table', 'time'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO raw_table VALUES(1, '11-11-1980'); +INSERT INTO summary_table SELECT time, COUNT(*) FROM raw_table GROUP BY time; +SELECT * FROM summary_table; + time | count +--------------------------------------------------------------------- + 11-11-1980 | 1 +(1 row) + +-- Test INSERT ... SELECT via coordinator +-- Select from constants +TRUNCATE raw_events_first; +INSERT INTO raw_events_first (user_id, value_1) +SELECT * FROM (VALUES (1,2), (3,4), (5,6)) AS v(int,int); +SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id; + user_id | value_1 +--------------------------------------------------------------------- + 1 | 2 + 3 | 4 + 5 | 6 +(3 rows) + +-- Select from local functions +TRUNCATE raw_events_first; +CREATE SEQUENCE insert_select_test_seq; +SET client_min_messages TO DEBUG; +INSERT INTO raw_events_first (user_id, value_1, value_2) +SELECT + s, nextval('insert_select_test_seq'), (random()*10)::int +FROM + generate_series(1, 5) s; +DEBUG: Creating router plan +DEBUG: distributed INSERT ... SELECT can only select from distributed tables +DEBUG: Collecting INSERT ... SELECT results on coordinator +SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; +DEBUG: Router planner cannot handle multi-shard select queries + user_id | value_1 +--------------------------------------------------------------------- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 +(5 rows) + +-- ON CONFLICT is supported +INSERT INTO raw_events_first (user_id, value_1) +SELECT s, nextval('insert_select_test_seq') FROM generate_series(1, 5) s +ON CONFLICT DO NOTHING; +DEBUG: Creating router plan +DEBUG: distributed INSERT ... SELECT can only select from distributed tables +DEBUG: Collecting INSERT ... SELECT results on coordinator +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300000'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) ON CONFLICT DO NOTHING +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300001'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) ON CONFLICT DO NOTHING +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300002'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) ON CONFLICT DO NOTHING +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300003'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) ON CONFLICT DO NOTHING +-- RETURNING is supported +INSERT INTO raw_events_first (user_id, value_1) +SELECT s, nextval('insert_select_test_seq') FROM generate_series(1, 5) s +RETURNING *; +DEBUG: Creating router plan +DEBUG: distributed INSERT ... SELECT can only select from distributed tables +DEBUG: Collecting INSERT ... SELECT results on coordinator +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300000'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300001'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300002'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 +DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300003'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 + user_id | time | value_1 | value_2 | value_3 | value_4 +--------------------------------------------------------------------- + 1 | | 11 | | | + 2 | | 12 | | | + 3 | | 13 | | | + 4 | | 14 | | | + 5 | | 15 | | | +(5 rows) + +RESET client_min_messages; +-- INSERT ... SELECT and multi-shard SELECT in the same transaction is supported +TRUNCATE raw_events_first; +BEGIN; +INSERT INTO raw_events_first (user_id, value_1) +SELECT s, s FROM generate_series(1, 5) s; +SELECT user_id, value_1 FROM raw_events_first ORDER BY 1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 +(5 rows) + +ROLLBACK; +-- INSERT ... SELECT and single-shard SELECT in the same transaction is supported +TRUNCATE raw_events_first; +BEGIN; +INSERT INTO raw_events_first (user_id, value_1) +SELECT s, s FROM generate_series(1, 5) s; +SELECT user_id, value_1 FROM raw_events_first WHERE user_id = 1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | 1 +(1 row) + +COMMIT; +-- Select from local table +TRUNCATE raw_events_first; +CREATE TEMPORARY TABLE raw_events_first_local AS +SELECT s AS u, 2*s AS v FROM generate_series(1, 5) s; +INSERT INTO raw_events_first (user_id, value_1) +SELECT u, v FROM raw_events_first_local; +SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | 2 + 2 | 4 + 3 | 6 + 4 | 8 + 5 | 10 +(5 rows) + +-- Use columns in opposite order +TRUNCATE raw_events_first; +INSERT INTO raw_events_first (value_1, user_id) +SELECT u, v FROM raw_events_first_local; +SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 2 | 1 + 4 | 2 + 6 | 3 + 8 | 4 + 10 | 5 +(5 rows) + +-- Set operations can work with opposite column order +TRUNCATE raw_events_first; +INSERT INTO raw_events_first (value_3, user_id) +( SELECT v, u::bigint FROM raw_events_first_local ) +UNION ALL +( SELECT v, u FROM raw_events_first_local ); +SELECT user_id, value_3 FROM raw_events_first ORDER BY user_id, value_3; + user_id | value_3 +--------------------------------------------------------------------- + 1 | 2 + 1 | 2 + 2 | 4 + 2 | 4 + 3 | 6 + 3 | 6 + 4 | 8 + 4 | 8 + 5 | 10 + 5 | 10 +(10 rows) + +-- Select from other distributed table with limit +TRUNCATE raw_events_first; +TRUNCATE raw_events_second; +INSERT INTO raw_events_second (user_id, value_4) +SELECT s, 3*s FROM generate_series (1,5) s; +INSERT INTO raw_events_first (user_id, value_1) +SELECT user_id, value_4 FROM raw_events_second LIMIT 5; +SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | 3 + 2 | 6 + 3 | 9 + 4 | 12 + 5 | 15 +(5 rows) + +-- CTEs are supported in local queries +TRUNCATE raw_events_first; +WITH removed_rows AS ( + DELETE FROM raw_events_first_local RETURNING u +) +INSERT INTO raw_events_first (user_id, value_1) +WITH value AS (SELECT 1) +SELECT * FROM removed_rows, value; +SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | 1 + 2 | 1 + 3 | 1 + 4 | 1 + 5 | 1 +(5 rows) + +-- nested CTEs are also supported +TRUNCATE raw_events_first; +INSERT INTO raw_events_first_local SELECT s, 2*s FROM generate_series(0, 10) s; +WITH rows_to_remove AS ( + SELECT u FROM raw_events_first_local WHERE u > 0 +), +removed_rows AS ( + DELETE FROM raw_events_first_local + WHERE u IN (SELECT * FROM rows_to_remove) + RETURNING u, v +) +INSERT INTO raw_events_first (user_id, value_1) +WITH ultra_rows AS ( + WITH numbers AS ( + SELECT s FROM generate_series(1,10) s + ), + super_rows AS ( + SELECT u, v FROM removed_rows JOIN numbers ON (u = s) + ) + SELECT * FROM super_rows LIMIT 5 +) +SELECT u, v FROM ultra_rows; +SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | 2 + 2 | 4 + 3 | 6 + 4 | 8 + 5 | 10 +(5 rows) + +-- CTEs with duplicate names are also supported +TRUNCATE raw_events_first; +WITH super_rows AS ( + SELECT u FROM raw_events_first_local +) +INSERT INTO raw_events_first (user_id, value_1) +WITH super_rows AS ( + SELECT * FROM super_rows GROUP BY u +) +SELECT u, 5 FROM super_rows; +SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 0 | 5 +(1 row) + +-- CTEs are supported in router queries +TRUNCATE raw_events_first; +WITH user_two AS ( + SELECT user_id, value_4 FROM raw_events_second WHERE user_id = 2 +) +INSERT INTO raw_events_first (user_id, value_1) +SELECT * FROM user_two; +SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 2 | 6 +(1 row) + +-- CTEs are supported when there are name collisions +WITH numbers AS ( + SELECT s FROM generate_series(1,10) s +) +INSERT INTO raw_events_first(user_id, value_1) +WITH numbers AS ( + SELECT s, s FROM generate_series(1,5) s +) +SELECT * FROM numbers; +-- Select into distributed table with a sequence +CREATE TABLE "CaseSensitiveTable" ("UserID" int, "Value1" int); +SELECT create_distributed_table('"CaseSensitiveTable"', 'UserID'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO "CaseSensitiveTable" +SELECT s, s FROM generate_series(1,10) s; +SELECT * FROM "CaseSensitiveTable" ORDER BY "UserID"; + UserID | Value1 +--------------------------------------------------------------------- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +DROP TABLE "CaseSensitiveTable"; +-- Select into distributed table with a sequence +CREATE TABLE dist_table_with_sequence (user_id serial, value_1 serial); +SELECT create_distributed_table('dist_table_with_sequence', 'user_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- from local query +INSERT INTO dist_table_with_sequence (value_1) +SELECT s FROM generate_series(1,5) s; +SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 +(5 rows) + +-- from a distributed query +INSERT INTO dist_table_with_sequence (value_1) +SELECT value_1 FROM dist_table_with_sequence ORDER BY value_1; +SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 1 + 7 | 2 + 8 | 3 + 9 | 4 + 10 | 5 +(10 rows) + +TRUNCATE dist_table_with_sequence; +INSERT INTO dist_table_with_sequence (user_id) +SELECT user_id FROM raw_events_second ORDER BY user_id; +SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 +(5 rows) + +WITH top10 AS ( + SELECT user_id FROM raw_events_second WHERE value_1 IS NOT NULL ORDER BY value_1 LIMIT 10 +) +INSERT INTO dist_table_with_sequence (value_1) +SELECT * FROM top10; +SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 +(5 rows) + +-- router queries become logical planner queries when there is a nextval call +INSERT INTO dist_table_with_sequence (user_id) +SELECT user_id FROM dist_table_with_sequence WHERE user_id = 1; +SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | 1 + 1 | 6 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 +(6 rows) + +DROP TABLE dist_table_with_sequence; +-- Select into distributed table with a user-defined sequence +CREATE SEQUENCE seq1; +CREATE SEQUENCE seq2; +CREATE TABLE dist_table_with_user_sequence (user_id int default nextval('seq1'), value_1 bigint default nextval('seq2')); +SELECT create_distributed_table('dist_table_with_user_sequence', 'user_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- from local query +INSERT INTO dist_table_with_user_sequence (value_1) +SELECT s FROM generate_series(1,5) s; +SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 +(5 rows) + +-- from a distributed query +INSERT INTO dist_table_with_user_sequence (value_1) +SELECT value_1 FROM dist_table_with_user_sequence ORDER BY value_1; +SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 1 + 7 | 2 + 8 | 3 + 9 | 4 + 10 | 5 +(10 rows) + +TRUNCATE dist_table_with_user_sequence; +INSERT INTO dist_table_with_user_sequence (user_id) +SELECT user_id FROM raw_events_second ORDER BY user_id; +SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 +(5 rows) + +WITH top10 AS ( + SELECT user_id FROM raw_events_second WHERE value_1 IS NOT NULL ORDER BY value_1 LIMIT 10 +) +INSERT INTO dist_table_with_user_sequence (value_1) +SELECT * FROM top10; +SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 +(5 rows) + +-- router queries become logical planner queries when there is a nextval call +INSERT INTO dist_table_with_user_sequence (user_id) +SELECT user_id FROM dist_table_with_user_sequence WHERE user_id = 1; +SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | 1 + 1 | 6 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 +(6 rows) + +DROP TABLE dist_table_with_user_sequence; +DROP SEQUENCE seq1, seq2; +-- Select from distributed table into reference table +CREATE TABLE ref_table (user_id serial, value_1 int); +SELECT create_reference_table('ref_table'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO ref_table +SELECT user_id, value_1 FROM raw_events_second; +SELECT * FROM ref_table ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | + 2 | + 3 | + 4 | + 5 | +(5 rows) + +INSERT INTO ref_table (value_1) +SELECT value_1 FROM raw_events_second ORDER BY value_1; +SELECT * FROM ref_table ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | + 1 | + 2 | + 2 | + 3 | + 3 | + 4 | + 4 | + 5 | + 5 | +(10 rows) + +INSERT INTO ref_table SELECT * FROM ref_table; +SELECT * FROM ref_table ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | + 1 | + 1 | + 1 | + 2 | + 2 | + 2 | + 2 | + 3 | + 3 | + 3 | + 3 | + 4 | + 4 | + 4 | + 4 | + 5 | + 5 | + 5 | + 5 | +(20 rows) + +DROP TABLE ref_table; +-- Select from distributed table into reference table with user-defined sequence +CREATE SEQUENCE seq1; +CREATE TABLE ref_table_with_user_sequence (user_id int default nextval('seq1'), value_1 int); +SELECT create_reference_table('ref_table_with_user_sequence'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO ref_table_with_user_sequence +SELECT user_id, value_1 FROM raw_events_second; +SELECT * FROM ref_table_with_user_sequence ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | + 2 | + 3 | + 4 | + 5 | +(5 rows) + +INSERT INTO ref_table_with_user_sequence (value_1) +SELECT value_1 FROM raw_events_second ORDER BY value_1; +SELECT * FROM ref_table_with_user_sequence ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | + 1 | + 2 | + 2 | + 3 | + 3 | + 4 | + 4 | + 5 | + 5 | +(10 rows) + +INSERT INTO ref_table_with_user_sequence SELECT * FROM ref_table_with_user_sequence; +SELECT * FROM ref_table_with_user_sequence ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | + 1 | + 1 | + 1 | + 2 | + 2 | + 2 | + 2 | + 3 | + 3 | + 3 | + 3 | + 4 | + 4 | + 4 | + 4 | + 5 | + 5 | + 5 | + 5 | +(20 rows) + +DROP TABLE ref_table_with_user_sequence; +DROP SEQUENCE seq1; +-- Select from reference table into reference table +CREATE TABLE ref1 (d timestamptz); +SELECT create_reference_table('ref1'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE ref2 (d date); +SELECT create_reference_table('ref2'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO ref2 VALUES ('2017-10-31'); +INSERT INTO ref1 SELECT * FROM ref2; +SELECT count(*) from ref1; + count +--------------------------------------------------------------------- + 1 +(1 row) + +-- also test with now() +INSERT INTO ref1 SELECT now() FROM ref2; +SELECT count(*) from ref1; + count +--------------------------------------------------------------------- + 2 +(1 row) + +DROP TABLE ref1; +DROP TABLE ref2; +-- Select into an append-partitioned table is not supported +CREATE TABLE insert_append_table (user_id int, value_4 bigint); +SELECT create_distributed_table('insert_append_table', 'user_id', 'append'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO insert_append_table (user_id, value_4) +SELECT user_id, 1 FROM raw_events_second LIMIT 5; +ERROR: INSERT ... SELECT into an append-distributed table is not supported +DROP TABLE insert_append_table; +-- Insert from other distributed table as prepared statement +TRUNCATE raw_events_first; +PREPARE insert_prep(int) AS +INSERT INTO raw_events_first (user_id, value_1) +SELECT $1, value_4 FROM raw_events_second ORDER BY value_4 LIMIT 1; +EXECUTE insert_prep(1); +EXECUTE insert_prep(2); +EXECUTE insert_prep(3); +EXECUTE insert_prep(4); +EXECUTE insert_prep(5); +EXECUTE insert_prep(6); +SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | 3 + 2 | 3 + 3 | 3 + 4 | 3 + 5 | 3 + 6 | 3 +(6 rows) + +-- Inserting into views is handled via coordinator +TRUNCATE raw_events_first; +INSERT INTO test_view +SELECT * FROM raw_events_second; +SELECT user_id, value_4 FROM test_view ORDER BY user_id, value_4; + user_id | value_4 +--------------------------------------------------------------------- + 1 | 3 + 2 | 6 + 3 | 9 + 4 | 12 + 5 | 15 +(5 rows) + +-- Drop the view now, because the column we are about to drop depends on it +DROP VIEW test_view; +-- Make sure we handle dropped columns correctly +CREATE TABLE drop_col_table (col1 text, col2 text, col3 text); +SELECT create_distributed_table('drop_col_table', 'col2'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE drop_col_table DROP COLUMN col1; +INSERT INTO drop_col_table (col3, col2) +SELECT value_4, user_id FROM raw_events_second LIMIT 5; +SELECT * FROM drop_col_table ORDER BY col2, col3; + col2 | col3 +--------------------------------------------------------------------- + 1 | 3 + 2 | 6 + 3 | 9 + 4 | 12 + 5 | 15 +(5 rows) + +-- make sure the tuple went to the right shard +SELECT * FROM drop_col_table WHERE col2 = '1'; + col2 | col3 +--------------------------------------------------------------------- + 1 | 3 +(1 row) + +RESET client_min_messages; +-- make sure casts are handled correctly +CREATE TABLE coerce_events(user_id int, time timestamp, value_1 numeric); +SELECT create_distributed_table('coerce_events', 'user_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE coerce_agg (user_id int, value_1_agg int); +SELECT create_distributed_table('coerce_agg', 'user_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO coerce_events(user_id, value_1) VALUES (1, 1), (2, 2), (10, 10); +-- numeric -> int (straight function) +INSERT INTO coerce_agg(user_id, value_1_agg) +SELECT * +FROM ( + SELECT user_id, value_1 + FROM coerce_events +) AS ftop +ORDER BY 2 DESC, 1 DESC +LIMIT 5; +-- int -> text +ALTER TABLE coerce_agg ALTER COLUMN value_1_agg TYPE text; +INSERT INTO coerce_agg(user_id, value_1_agg) +SELECT * +FROM ( + SELECT user_id, value_1 + FROM coerce_events +) AS ftop +LIMIT 5; +SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC; + user_id | value_1_agg +--------------------------------------------------------------------- + 10 | 10 + 10 | 10 + 2 | 2 + 2 | 2 + 1 | 1 + 1 | 1 +(6 rows) + +TRUNCATE coerce_agg; +-- int -> char(1) +ALTER TABLE coerce_agg ALTER COLUMN value_1_agg TYPE char(1); +INSERT INTO coerce_agg(user_id, value_1_agg) +SELECT * +FROM ( + SELECT user_id, value_1 + FROM coerce_events +) AS ftop +LIMIT 5; +ERROR: value too long for type character(1) +SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC; + user_id | value_1_agg +--------------------------------------------------------------------- +(0 rows) + +TRUNCATE coerce_agg; +TRUNCATE coerce_events; +-- char(5) -> char(1) +ALTER TABLE coerce_events ALTER COLUMN value_1 TYPE char(5); +INSERT INTO coerce_events(user_id, value_1) VALUES (1, 'aaaaa'), (2, 'bbbbb'); +INSERT INTO coerce_agg(user_id, value_1_agg) +SELECT * +FROM ( + SELECT user_id, value_1 + FROM coerce_events +) AS ftop +LIMIT 5; +ERROR: value too long for type character(1) +-- char(1) -> char(5) +ALTER TABLE coerce_events ALTER COLUMN value_1 TYPE char(1) USING value_1::char(1); +ALTER TABLE coerce_agg ALTER COLUMN value_1_agg TYPE char(5); +INSERT INTO coerce_agg(user_id, value_1_agg) +SELECT * +FROM ( + SELECT user_id, value_1 + FROM coerce_events +) AS ftop +LIMIT 5; +SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC; + user_id | value_1_agg +--------------------------------------------------------------------- + 2 | b + 1 | a +(2 rows) + +TRUNCATE coerce_agg; +TRUNCATE coerce_events; +-- integer -> integer (check VALUE < 5) +ALTER TABLE coerce_events ALTER COLUMN value_1 TYPE integer USING NULL; +ALTER TABLE coerce_agg ALTER COLUMN value_1_agg TYPE integer USING NULL; +ALTER TABLE coerce_agg ADD CONSTRAINT small_number CHECK (value_1_agg < 5); +INSERT INTO coerce_events (user_id, value_1) VALUES (1, 1), (10, 10); +\set VERBOSITY TERSE +INSERT INTO coerce_agg(user_id, value_1_agg) +SELECT * +FROM ( + SELECT user_id, value_1 + FROM coerce_events +) AS ftop; +ERROR: new row for relation "coerce_agg_13300067" violates check constraint "small_number_13300067" +\set VERBOSITY DEFAULT +SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC; + user_id | value_1_agg +--------------------------------------------------------------------- +(0 rows) + +-- integer[3] -> text[3] +TRUNCATE coerce_events; +ALTER TABLE coerce_events ALTER COLUMN value_1 TYPE integer[3] USING NULL; +INSERT INTO coerce_events(user_id, value_1) VALUES (1, '{1,1,1}'), (2, '{2,2,2}'); +ALTER TABLE coerce_agg DROP COLUMN value_1_agg; +ALTER TABLE coerce_agg ADD COLUMN value_1_agg text[3]; +INSERT INTO coerce_agg(user_id, value_1_agg) +SELECT * +FROM ( + SELECT user_id, value_1 + FROM coerce_events +) AS ftop +LIMIT 5; +SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC; + user_id | value_1_agg +--------------------------------------------------------------------- + 2 | {2,2,2} + 1 | {1,1,1} +(2 rows) + +-- INSERT..SELECT + prepared statements + recursive planning +BEGIN; +PREPARE prepared_recursive_insert_select AS +INSERT INTO users_table +SELECT * FROM users_table +WHERE value_1 IN (SELECT value_2 FROM events_table OFFSET 0); +EXECUTE prepared_recursive_insert_select; +EXECUTE prepared_recursive_insert_select; +EXECUTE prepared_recursive_insert_select; +EXECUTE prepared_recursive_insert_select; +EXECUTE prepared_recursive_insert_select; +EXECUTE prepared_recursive_insert_select; +ROLLBACK; +-- upsert with on conflict update distribution column is unsupported +INSERT INTO agg_events AS ae + ( + user_id, + value_1_agg, + agg_time + ) +SELECT user_id, + value_1, + time +FROM raw_events_first +ON conflict (user_id, value_1_agg) +DO UPDATE + SET user_id = 42 +RETURNING user_id, value_1_agg; +ERROR: modifying the partition value of rows is not allowed +-- test a small citus.remote_copy_flush_threshold +BEGIN; +SET LOCAL citus.remote_copy_flush_threshold TO 1; +INSERT INTO raw_events_first +SELECT * FROM raw_events_first OFFSET 0 +ON CONFLICT DO NOTHING; +ABORT; +-- test fix for issue https://github.com/citusdata/citus/issues/5891 +CREATE TABLE dist_table_1( +dist_col integer, +int_col integer, +text_col_1 text, +text_col_2 text +); +SELECT create_distributed_table('dist_table_1', 'dist_col'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO dist_table_1 VALUES (1, 1, 'string', 'string'); +CREATE TABLE dist_table_2( +dist_col integer, +int_col integer +); +SELECT create_distributed_table('dist_table_2', 'dist_col'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO dist_table_2 VALUES (1, 1); +with a as (select random()) INSERT INTO dist_table_1 +SELECT +t1.dist_col, +1, +'string', +'string' +FROM a, dist_table_1 t1 +join dist_table_2 t2 using (dist_col) +limit 1 +returning text_col_1; + text_col_1 +--------------------------------------------------------------------- + string +(1 row) + +CREATE TABLE dist_table_3( +dist_col bigint, +int_col integer +); +SELECT create_distributed_table('dist_table_3', 'dist_col'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- dist_table_2 and dist_table_3 are non-colocated source tables. Repartitioning is also not possible due to +-- different types for distribution columns. Citus would not be able to handle this complex insert select. +INSERT INTO dist_table_1 SELECT dist_table_2.dist_col FROM dist_table_2 JOIN dist_table_3 USING(dist_col); +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +CREATE TABLE dist_table_4( +dist_col integer, +int_col integer +); +SELECT create_distributed_table('dist_table_4', 'dist_col'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Even if target table distribution column is colocated with dist_table_2's distributed column, source tables dist_table_2 and dist_table_4 +-- are non-colocated. Hence, SELECT part of the query should be pulled to coordinator. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT dist_table_2.dist_col FROM dist_table_2 JOIN dist_table_4 ON dist_table_2.dist_col = dist_table_4.int_col; +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Custom Scan (Citus Adaptive) + Task Count: 6 +(4 rows) + +-- For INSERT SELECT, when a lateral query references an outer query, push-down is possible even if limit clause exists in the lateral query. +-- It is because subquery with limit does not need to be merged at coordinator as it is a lateral query. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT d1.dist_col FROM dist_table_1 d1 LEFT JOIN LATERAL (SELECT * FROM dist_table_2 d2 WHERE d1.dist_col = d2.dist_col LIMIT 3) dummy USING(dist_col); +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + Task Count: 4 +(2 rows) + +-- For INSERT SELECT, when push-down is NOT possible when limit clause exists in a subquery at SELECT part of INSERT SELECT. +-- It is because the subquery with limit needs to be merged at coordinator. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT d1.dist_col FROM dist_table_1 d1 LEFT JOIN (SELECT * FROM dist_table_2 LIMIT 3) dummy USING(dist_col); +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: repartition + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Limit + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(7 rows) + +CREATE TABLE dist_table_5(id int, id2 int); +SELECT create_distributed_table('dist_table_5','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE dist_table_6(id int, id2 int); +SELECT create_distributed_table('dist_table_6','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- verify that insert select with union can be pushed down since UNION clause has FROM clause at top level query. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5(id) SELECT id FROM (SELECT id FROM dist_table_5 UNION SELECT id FROM dist_table_6) dummy; +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + Task Count: 4 +(2 rows) + +-- verify that insert select with sublink can be pushed down when tables are colocated. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM dist_table_5 WHERE dist_table_5.id = dist_table_6.id) FROM dist_table_6; +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + Task Count: 4 +(2 rows) + +CREATE TABLE ref_table_1(id int); +SELECT create_reference_table('ref_table_1'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +-- verify that insert select with sublink cannot be pushed down when from clause does not contain any distributed relation. +INSERT INTO dist_table_5 SELECT id, (SELECT id FROM dist_table_5 WHERE dist_table_5.id = ref_table_1.id) FROM ref_table_1; +ERROR: correlated subqueries are not supported when the FROM clause contains a reference table +-- verify that insert select cannot be pushed down when we have recurring range table in from clause. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM ref_table_1 WHERE id = 1) FROM ref_table_1; +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Custom Scan (Citus Adaptive) + Task Count: 1 +(4 rows) + +-- verify that insert select cannot be pushed down when we have reference table in outside of outer join in a chained-join. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT a.id FROM dist_table_5 a LEFT JOIN ref_table_1 b ON (true) RIGHT JOIN ref_table_1 c ON (true); +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(6 rows) + +-- verify that insert select can be pushed down when we have reference table in outside of outer join. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM ref_table_1 LEFT JOIN dist_table_5 USING(id); +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: repartition + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(4 rows) + +-- verify that insert select cannot be pushed down when we have reference table in outside of left join and joined on non-partition column. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT ref_table_1.id FROM ref_table_1 LEFT JOIN dist_table_5 ON ref_table_1.id = dist_table_5.id2; +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(6 rows) + +CREATE TABLE loc_table_1(id int); +-- verify that insert select cannot be pushed down when it contains join between local and distributed tables. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM dist_table_5 JOIN loc_table_1 USING(id); +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: repartition + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Seq Scan on loc_table_1 + Task Count: 4 +(6 rows) + +CREATE VIEW view_1 AS + SELECT id FROM dist_table_6; +CREATE MATERIALIZED VIEW view_2 AS + SELECT id FROM dist_table_6; +-- verify that insert select cannot be pushed down when it contains view. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT * FROM view_1; +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + Task Count: 4 +(2 rows) + +-- verify that insert select cannot be pushed down when it contains materialized view. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT * FROM view_2; +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Seq Scan on view_2 +(3 rows) + +CREATE TABLE append_table(id integer, data text, int_data int); +SELECT create_distributed_table('append_table', 'id', 'append'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT master_create_empty_shard('append_table'); + master_create_empty_shard +--------------------------------------------------------------------- + 13300096 +(1 row) + +-- verify that insert select push down for append tables are not supported. +INSERT INTO append_table SELECT * FROM append_table; +ERROR: INSERT ... SELECT into an append-distributed table is not supported +-- verify that CTEs at top level of INSERT SELECT, that can normally be inlined, would not be inlined by INSERT SELECT pushdown planner +-- and handled by pull to coordinator. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) WITH cte_1 AS (SELECT id FROM dist_table_5 WHERE id > 5) + INSERT INTO dist_table_5 + SELECT id FROM dist_table_5 JOIN cte_1 USING(id) OFFSET 5; +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Limit + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(7 rows) + +-- verify that CTEs at top level of SELECT part, would be inlined by Postgres and pushed down by INSERT SELECT planner. +SELECT coordinator_plan($$ + EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 + WITH cte_1 AS (SELECT id FROM dist_table_5 WHERE id = 5) + SELECT id FROM dist_table_5 JOIN cte_1 USING(id); +$$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + Task Count: 1 +(2 rows) + +--------------------------------------------------------------------- +-- Regression Test Script for Issue #7784 +-- This script tests INSERT ... SELECT with a CTE for: +-- 1. Schema based sharding. +-- 2. A distributed table. +--------------------------------------------------------------------- +-- Enable schema-based sharding +SET citus.enable_schema_based_sharding TO ON; +-- Create a table for schema based sharding +CREATE TABLE version_sch_based ( + id bigserial NOT NULL, + description varchar(255), + PRIMARY KEY (id) +); +-- Insert an initial row. +INSERT INTO version_sch_based (description) VALUES ('Version 1'); +-- Duplicate the row using a CTE and INSERT ... SELECT. +WITH v AS ( + SELECT * FROM version_sch_based WHERE description = 'Version 1' +) +INSERT INTO version_sch_based (description) +SELECT description FROM v; +-- Expected output: +-- id | description +-- ----+------------- +-- 1 | Version 1 +-- 2 | Version 1 +-- Query the table and order by id for consistency. +SELECT * FROM version_sch_based ORDER BY id; + id | description +--------------------------------------------------------------------- + 1 | Version 1 + 2 | Version 1 +(2 rows) + +--------------------------------------------------------------------- +-- Case 2: Distributed Table Scenario +--------------------------------------------------------------------- +SET citus.enable_schema_based_sharding TO OFF; +-- Create a table for the distributed test. +CREATE TABLE version_dist ( + id bigserial NOT NULL, + description varchar(255), + PRIMARY KEY (id) +); +-- Register the table as distributed using the 'id' column as the distribution key. +SELECT create_distributed_table('version_dist', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Insert an initial row. +INSERT INTO version_dist (description) VALUES ('Version 1'); +-- Duplicate the row using a CTE and INSERT ... SELECT. +WITH v AS ( + SELECT * FROM version_dist WHERE description = 'Version 1' +) +INSERT INTO version_dist (description) +SELECT description FROM v; +-- Expected output: +-- id | description +-- ----+------------- +-- 1 | Version 1 +-- 2 | Version 1 +-- Query the table and order by id for consistency. +SELECT * FROM version_dist ORDER BY id; + id | description +--------------------------------------------------------------------- + 1 | Version 1 + 2 | Version 1 +(2 rows) + +--------------------------------------------------------------------- +-- Case 3: Distributed INSERT … SELECT with nextval() +-- Verifies that nextval() is evaluated on the coordinator only. +--------------------------------------------------------------------- +-- A fresh sequence for clarity +CREATE SEQUENCE seq_nextval_test START 100; +-- Table with DEFAULT nextval() +CREATE TABLE version_dist_seq ( + id bigint DEFAULT nextval('seq_nextval_test'), + description text, + PRIMARY KEY (id) +); +SELECT create_distributed_table('version_dist_seq', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Seed one row (id = 100) +INSERT INTO version_dist_seq (description) VALUES ('row‑0'); +-- CTE duplication – should produce **exactly one** new sequence value (id = 101) +WITH v AS ( + SELECT * FROM version_dist_seq WHERE description = 'row‑0' +) +INSERT INTO version_dist_seq (description) +SELECT description FROM v; +-- Expected: ids are 100 and 101 (no gaps, no duplicates) +SELECT id, description FROM version_dist_seq ORDER BY id; + id | description +--------------------------------------------------------------------- + 100 | row‑0 + 101 | row‑0 +(2 rows) + +--------------------------------------------------------------------- +-- Case 4: UNION ALL + nextval() in distributed INSERT … SELECT +--------------------------------------------------------------------- +CREATE SEQUENCE seq_union_test START 200; +CREATE TABLE version_dist_union ( + id bigint DEFAULT nextval('seq_union_test'), + val int, + PRIMARY KEY (id) +); +SELECT create_distributed_table('version_dist_union', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Seed rows +INSERT INTO version_dist_union (val) VALUES (1), (2); +-- UNION ALL duplication; each leg returns two rows -> four inserts total +WITH src AS ( + SELECT val FROM version_dist_union + UNION ALL + SELECT val FROM version_dist_union +) +INSERT INTO version_dist_union(val) +SELECT val FROM src; +-- Expected IDs: 200,201,202,203,204,205 +SELECT id, val FROM version_dist_union ORDER BY id; + id | val +--------------------------------------------------------------------- + 200 | 1 + 201 | 2 + 202 | 1 + 203 | 2 + 204 | 1 + 205 | 2 +(6 rows) + +-- End of Issue #7784 +-- PR #8106 — CTE traversal works when following outer Vars +-- This script exercises three shapes: +-- T1) CTE referenced inside a correlated subquery (one level down) +-- T2) CTE referenced inside a nested subquery (two levels down) +-- T3) Subquery targetlist uses a scalar sublink into the outer CTE +CREATE SCHEMA pr8106_cte_outervar; +SET search_path = pr8106_cte_outervar, public; +-- Base tables for the tests +DROP TABLE IF EXISTS raw_events_first CASCADE; +NOTICE: table "raw_events_first" does not exist, skipping +DROP TABLE IF EXISTS agg_events CASCADE; +NOTICE: table "agg_events" does not exist, skipping +CREATE TABLE raw_events_first( + user_id int, + value_1 int +); +CREATE TABLE agg_events( + user_id int, + value_1_agg int +); +-- Distribute and colocate (distribution key = user_id) +SELECT create_distributed_table('raw_events_first', 'user_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('agg_events', 'user_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Seed data (duplicates on some user_ids; some NULL value_1’s) +INSERT INTO raw_events_first(user_id, value_1) VALUES + (1, 10), (1, 20), (1, NULL), + (2, NULL), + (3, 30), + (4, NULL), + (5, 50), (5, NULL), + (6, NULL); +--------------------------------------------------------------------- +-- T1) CTE referenced inside a correlated subquery (one level down) +--------------------------------------------------------------------- +TRUNCATE agg_events; +WITH c AS MATERIALIZED ( + SELECT user_id FROM raw_events_first +) +INSERT INTO agg_events (user_id) +SELECT t.user_id +FROM raw_events_first t +WHERE EXISTS (SELECT 1 FROM c WHERE c.user_id = t.user_id); +-- Expect one insert per row in raw_events_first (EXISTS always true per user_id) +SELECT 't1_count_matches' AS test, + (SELECT count(*) FROM agg_events) = + (SELECT count(*) FROM raw_events_first) AS ok; + test | ok +--------------------------------------------------------------------- + t1_count_matches | t +(1 row) + +-- Spot-check: how many rows were inserted +SELECT 't1_rows' AS test, count(*) AS rows FROM agg_events; + test | rows +--------------------------------------------------------------------- + t1_rows | 9 +(1 row) + +--------------------------------------------------------------------- +-- T2) CTE referenced inside a nested subquery (two levels down) +--------------------------------------------------------------------- +TRUNCATE agg_events; +WITH c AS MATERIALIZED ( + SELECT user_id FROM raw_events_first +) +INSERT INTO agg_events (user_id) +SELECT t.user_id +FROM raw_events_first t +WHERE EXISTS ( + SELECT 1 + FROM (SELECT user_id FROM c) c2 + WHERE c2.user_id = t.user_id +); +-- Same cardinality expectation as T1 +SELECT 't2_count_matches' AS test, + (SELECT count(*) FROM agg_events) = + (SELECT count(*) FROM raw_events_first) AS ok; + test | ok +--------------------------------------------------------------------- + t2_count_matches | t +(1 row) + +SELECT 't2_rows' AS test, count(*) AS rows FROM agg_events; + test | rows +--------------------------------------------------------------------- + t2_rows | 9 +(1 row) + +--------------------------------------------------------------------- +-- T3) Subquery targetlist uses a scalar sublink into the outer CTE +-- (use MAX() to keep scalar subquery single-row) +--------------------------------------------------------------------- +TRUNCATE agg_events; +WITH c AS (SELECT user_id, value_1 FROM raw_events_first) +INSERT INTO agg_events (user_id, value_1_agg) +SELECT d.user_id, d.value_1_agg +FROM ( + SELECT t.user_id, + (SELECT max(c.value_1) FROM c WHERE c.user_id = t.user_id) AS value_1_agg + FROM raw_events_first t +) AS d +WHERE d.value_1_agg IS NOT NULL; +-- Expect one insert per row in raw_events_first whose user_id has at least one non-NULL value_1 +SELECT 't3_count_matches' AS test, + (SELECT count(*) FROM agg_events) = + ( + SELECT count(*) + FROM raw_events_first t + WHERE EXISTS ( + SELECT 1 FROM raw_events_first c + WHERE c.user_id = t.user_id AND c.value_1 IS NOT NULL + ) + ) AS ok; + test | ok +--------------------------------------------------------------------- + t3_count_matches | t +(1 row) + +-- Also verify no NULLs were inserted into value_1_agg +SELECT 't3_no_null_value_1_agg' AS test, + NOT EXISTS (SELECT 1 FROM agg_events WHERE value_1_agg IS NULL) AS ok; + test | ok +--------------------------------------------------------------------- + t3_no_null_value_1_agg | t +(1 row) + +-- Deterministic sample of results +SELECT 't3_sample' AS test, user_id, value_1_agg +FROM agg_events +ORDER BY user_id +LIMIT 5; + test | user_id | value_1_agg +--------------------------------------------------------------------- + t3_sample | 1 | 20 + t3_sample | 1 | 20 + t3_sample | 1 | 20 + t3_sample | 3 | 30 + t3_sample | 5 | 50 +(5 rows) + +-- End of PR #8106 — CTE traversal works when following outer Vars +SET client_min_messages TO ERROR; +DROP SCHEMA pr8106_cte_outervar CASCADE; +DROP SCHEMA multi_insert_select CASCADE; From a827d7e3cdd66e00cd2156d76230a95c896c5987 Mon Sep 17 00:00:00 2001 From: Mehmet Yilmaz Date: Mon, 15 Sep 2025 13:57:58 +0000 Subject: [PATCH 03/11] bump pg versions --- .devcontainer/Dockerfile | 4 ++-- .github/workflows/build_and_test.yml | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 54b2df65e..9afc9f40c 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -109,7 +109,7 @@ RUN cp -r .pgenv/src .pgenv/pgsql-* .pgenv/config .pgenv-staging/ RUN rm .pgenv-staging/config/default.conf FROM base AS pg18 -RUN MAKEFLAGS="-j $(nproc)" pgenv build 18beta3 +RUN MAKEFLAGS="-j $(nproc)" pgenv build 18rc1 RUN rm .pgenv/src/*.tar* RUN make -C .pgenv/src/postgresql-*/ clean RUN make -C .pgenv/src/postgresql-*/src/include install @@ -229,7 +229,7 @@ COPY --chown=citus:citus .psqlrc . RUN sudo chown --from=root:root citus:citus -R ~ # sets default pg version -RUN pgenv switch 18beta3 +RUN pgenv switch 18rc1 # make connecting to the coordinator easy ENV PGPORT=9700 diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 764a904d6..986d6b3d7 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -31,13 +31,13 @@ jobs: pgupgrade_image_name: "ghcr.io/citusdata/pgupgradetester" style_checker_image_name: "ghcr.io/citusdata/stylechecker" style_checker_tools_version: "0.8.18" - sql_snapshot_pg_version: "17.5" - image_suffix: "-dev-8e2a1ce" - pg15_version: '{ "major": "15", "full": "15.13" }' - pg16_version: '{ "major": "16", "full": "16.9" }' - pg17_version: '{ "major": "17", "full": "17.5" }' - pg18_version: '{ "major": "18", "full": "18beta3" }' - upgrade_pg_versions: "15.13-16.9-17.5-18beta3" + sql_snapshot_pg_version: "17.6" + image_suffix: "-dev-4df94a0" + pg15_version: '{ "major": "15", "full": "15.14" }' + pg16_version: '{ "major": "16", "full": "16.10" }' + pg17_version: '{ "major": "17", "full": "17.6" }' + pg18_version: '{ "major": "18", "full": "18rc1" }' + upgrade_pg_versions: "15.14-16.10-17.6-18rc1" steps: # Since GHA jobs need at least one step we use a noop step here. - name: Set up parameters From 9c303df0f79755f5c5caf63e662c2c47852f71c8 Mon Sep 17 00:00:00 2001 From: Colm Date: Mon, 22 Sep 2025 09:06:39 +0100 Subject: [PATCH 04/11] Postgres 18: Fix regress tests caused by GROUP RTE. (#8206) The change in `merge_planner.c` fixes _unrecognized range table entry_ diffs in merge regress tests (category 2 diffs in #7992), the change in `multi_router_planner.c` fixes _column reference ... is ambiguous_ diffs in `multi_insert_select` and `multi_insert_select_window` (category 3 diffs in #7992). Edit to `common.py` enables standalone regress tests with pg18 (e..g `citus_tests/run_test.py merge`). --- src/test/regress/citus_tests/common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test/regress/citus_tests/common.py b/src/test/regress/citus_tests/common.py index c3ba14ac8..a7b978799 100644 --- a/src/test/regress/citus_tests/common.py +++ b/src/test/regress/citus_tests/common.py @@ -94,6 +94,7 @@ OLDEST_SUPPORTED_CITUS_VERSION_MATRIX = { 15: "11.1.5", 16: "12.1.5", 17: "13.0.1", + 18: "13.2.0", } OLDEST_SUPPORTED_CITUS_VERSION = OLDEST_SUPPORTED_CITUS_VERSION_MATRIX[PG_MAJOR_VERSION] From a1be689711dd4b07346df1f9e1604250ef58152a Mon Sep 17 00:00:00 2001 From: Mehmet Yilmaz Date: Thu, 2 Oct 2025 10:46:59 +0000 Subject: [PATCH 05/11] add pg 18.0 --- .devcontainer/Dockerfile | 4 ++-- .github/workflows/build_and_test.yml | 6 +++--- citus-tools | 1 + 3 files changed, 6 insertions(+), 5 deletions(-) create mode 160000 citus-tools diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 9afc9f40c..02b06adcb 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -109,7 +109,7 @@ RUN cp -r .pgenv/src .pgenv/pgsql-* .pgenv/config .pgenv-staging/ RUN rm .pgenv-staging/config/default.conf FROM base AS pg18 -RUN MAKEFLAGS="-j $(nproc)" pgenv build 18rc1 +RUN MAKEFLAGS="-j $(nproc)" pgenv build 18.0 RUN rm .pgenv/src/*.tar* RUN make -C .pgenv/src/postgresql-*/ clean RUN make -C .pgenv/src/postgresql-*/src/include install @@ -229,7 +229,7 @@ COPY --chown=citus:citus .psqlrc . RUN sudo chown --from=root:root citus:citus -R ~ # sets default pg version -RUN pgenv switch 18rc1 +RUN pgenv switch 18.0 # make connecting to the coordinator easy ENV PGPORT=9700 diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 986d6b3d7..4dde6ae2a 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -32,12 +32,12 @@ jobs: style_checker_image_name: "ghcr.io/citusdata/stylechecker" style_checker_tools_version: "0.8.18" sql_snapshot_pg_version: "17.6" - image_suffix: "-dev-4df94a0" + image_suffix: "-dev-97072ce" pg15_version: '{ "major": "15", "full": "15.14" }' pg16_version: '{ "major": "16", "full": "16.10" }' pg17_version: '{ "major": "17", "full": "17.6" }' - pg18_version: '{ "major": "18", "full": "18rc1" }' - upgrade_pg_versions: "15.14-16.10-17.6-18rc1" + pg18_version: '{ "major": "18", "full": "18.0" }' + upgrade_pg_versions: "15.14-16.10-17.6-18.0" steps: # Since GHA jobs need at least one step we use a noop step here. - name: Set up parameters diff --git a/citus-tools b/citus-tools new file mode 160000 index 000000000..3376bd684 --- /dev/null +++ b/citus-tools @@ -0,0 +1 @@ +Subproject commit 3376bd6845f0614908ed304f5033bd644c82d3bf From d1f2ce4d86547ff641f5962ea5a5b0c149a61455 Mon Sep 17 00:00:00 2001 From: naisila Date: Tue, 7 Oct 2025 12:32:33 +0300 Subject: [PATCH 06/11] Removes citus-tools submodule; add pg15-pg18 upgrade test --- .github/workflows/build_and_test.yml | 2 ++ citus-tools | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) delete mode 160000 citus-tools diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 4dde6ae2a..f31171263 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -343,6 +343,8 @@ jobs: new_pg_major: 18 - old_pg_major: 16 new_pg_major: 18 + - old_pg_major: 15 + new_pg_major: 18 env: old_pg_major: ${{ matrix.old_pg_major }} new_pg_major: ${{ matrix.new_pg_major }} diff --git a/citus-tools b/citus-tools deleted file mode 160000 index 3376bd684..000000000 --- a/citus-tools +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 3376bd6845f0614908ed304f5033bd644c82d3bf From 356f25483755ed1452de078c0b025f6e55ea49fc Mon Sep 17 00:00:00 2001 From: Naisila Puka <37271756+naisila@users.noreply.github.com> Date: Tue, 7 Oct 2025 14:10:11 +0300 Subject: [PATCH 07/11] Revert multi_insert_select_0.out addition (#8232) Probably Colm's commits on `GROUP BY RTE` fixes have fixed the test diffs we used to have. --- .../expected/multi_insert_select_0.out | 3812 ----------------- 1 file changed, 3812 deletions(-) delete mode 100644 src/test/regress/expected/multi_insert_select_0.out diff --git a/src/test/regress/expected/multi_insert_select_0.out b/src/test/regress/expected/multi_insert_select_0.out deleted file mode 100644 index f1ee0b7ba..000000000 --- a/src/test/regress/expected/multi_insert_select_0.out +++ /dev/null @@ -1,3812 +0,0 @@ --- --- MULTI_INSERT_SELECT --- -CREATE SCHEMA multi_insert_select; -SET search_path = multi_insert_select,public; -SET citus.next_shard_id TO 13300000; -SET citus.next_placement_id TO 13300000; --- create co-located tables -SET citus.shard_count = 4; -SET citus.shard_replication_factor = 2; --- order of execution might change in parallel executions --- and the error details might contain the worker node --- so be less verbose with \set VERBOSITY TERSE when necessary -CREATE TABLE raw_events_first (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint, UNIQUE(user_id, value_1)); -SELECT create_distributed_table('raw_events_first', 'user_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -CREATE TABLE raw_events_second (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint, UNIQUE(user_id, value_1)); -SELECT create_distributed_table('raw_events_second', 'user_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -CREATE TABLE agg_events (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp, UNIQUE(user_id, value_1_agg)); -SELECT create_distributed_table('agg_events', 'user_id');; - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- create the reference table as well -CREATE TABLE reference_table (user_id int); -SELECT create_reference_table('reference_table'); - create_reference_table ---------------------------------------------------------------------- - -(1 row) - -CREATE TABLE insert_select_varchar_test (key varchar, value int); -SELECT create_distributed_table('insert_select_varchar_test', 'key', 'hash'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- set back to the defaults -SET citus.shard_count = DEFAULT; -SET citus.shard_replication_factor = DEFAULT; -INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES - (1, now(), 10, 100, 1000.1, 10000); -INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES - (2, now(), 20, 200, 2000.1, 20000); -INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES - (3, now(), 30, 300, 3000.1, 30000); -INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES - (4, now(), 40, 400, 4000.1, 40000); -INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES - (5, now(), 50, 500, 5000.1, 50000); -INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES - (6, now(), 60, 600, 6000.1, 60000); -SET client_min_messages TO DEBUG2; --- raw table to raw table -INSERT INTO raw_events_second SELECT * FROM raw_events_first; -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) --- see that our first multi shard INSERT...SELECT works expected -SET client_min_messages TO INFO; -SELECT - raw_events_first.user_id -FROM - raw_events_first, raw_events_second -WHERE - raw_events_first.user_id = raw_events_second.user_id -ORDER BY - user_id DESC; - user_id ---------------------------------------------------------------------- - 6 - 5 - 4 - 3 - 2 - 1 -(6 rows) - --- see that we get unique vialitons -\set VERBOSITY TERSE -INSERT INTO raw_events_second SELECT * FROM raw_events_first; -ERROR: duplicate key value violates unique constraint "raw_events_second_user_id_value_1_key_13300004" -\set VERBOSITY DEFAULT --- stable functions should be allowed -INSERT INTO raw_events_second (user_id, time) -SELECT - user_id, now() -FROM - raw_events_first -WHERE - user_id < 0; -INSERT INTO raw_events_second (user_id) -SELECT - user_id -FROM - raw_events_first -WHERE - time > now() + interval '1 day'; --- hide version-dependent PL/pgSQL context messages -\set VERBOSITY terse --- make sure we evaluate stable functions on the master, once -CREATE OR REPLACE FUNCTION evaluate_on_master() -RETURNS int LANGUAGE plpgsql STABLE -AS $function$ -BEGIN - RAISE NOTICE 'evaluating on master'; - RETURN 0; -END; -$function$; -INSERT INTO raw_events_second (user_id, value_1) -SELECT - user_id, evaluate_on_master() -FROM - raw_events_first -WHERE - user_id < 0; -NOTICE: evaluating on master --- make sure we don't evaluate stable functions with column arguments -SET citus.enable_metadata_sync TO OFF; -CREATE OR REPLACE FUNCTION evaluate_on_master(x int) -RETURNS int LANGUAGE plpgsql STABLE -AS $function$ -BEGIN - RAISE NOTICE 'evaluating on master'; - RETURN x; -END; -$function$; -RESET citus.enable_metadata_sync; -INSERT INTO raw_events_second (user_id, value_1) -SELECT - user_id, evaluate_on_master(value_1) -FROM - raw_events_first -WHERE - user_id = 0; -ERROR: function multi_insert_select.evaluate_on_master(integer) does not exist --- add one more row -INSERT INTO raw_events_first (user_id, time) VALUES - (7, now()); --- try a single shard query -SET client_min_messages TO DEBUG2; -INSERT INTO raw_events_second (user_id, time) SELECT user_id, time FROM raw_events_first WHERE user_id = 7; -DEBUG: Creating router plan -DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id, "time") SELECT raw_events_first.user_id, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) 7) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away -SET client_min_messages TO INFO; --- add one more row -INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES - (8, now(), 80, 800, 8000, 80000); --- reorder columns -SET client_min_messages TO DEBUG2; -INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time) -SELECT - value_2, value_1, value_3, value_4, user_id, time -FROM - raw_events_first -WHERE - user_id = 8; -DEBUG: Creating router plan -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) 8) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away --- a zero shard select -INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time) -SELECT - value_2, value_1, value_3, value_4, user_id, time -FROM - raw_events_first -WHERE - false; -DEBUG: Creating router plan -DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away --- another zero shard select -INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time) -SELECT - value_2, value_1, value_3, value_4, user_id, time -FROM - raw_events_first -WHERE - 0 != 0; -DEBUG: Creating router plan -DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away --- add one more row -SET client_min_messages TO INFO; -INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES - (9, now(), 90, 900, 9000, 90000); --- show that RETURNING also works -SET client_min_messages TO DEBUG2; -INSERT INTO raw_events_second (user_id, value_1, value_3) -SELECT - user_id, value_1, value_3 -FROM - raw_events_first -WHERE - value_3 = 9000 -RETURNING *; -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.value_3 OPERATOR(pg_catalog.=) (9000)::double precision) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.value_3 OPERATOR(pg_catalog.=) (9000)::double precision) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.value_3 OPERATOR(pg_catalog.=) (9000)::double precision) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.value_3 OPERATOR(pg_catalog.=) (9000)::double precision) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 - user_id | time | value_1 | value_2 | value_3 | value_4 ---------------------------------------------------------------------- - 9 | | 90 | | 9000 | -(1 row) - --- hits two shards -\set VERBOSITY TERSE -INSERT INTO raw_events_second (user_id, value_1, value_3) -SELECT - user_id, value_1, value_3 -FROM - raw_events_first -WHERE - user_id = 9 OR user_id = 16 -RETURNING *; -DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (((raw_events_first.user_id OPERATOR(pg_catalog.=) 9) OR (raw_events_first.user_id OPERATOR(pg_catalog.=) 16)) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 -DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (((raw_events_first.user_id OPERATOR(pg_catalog.=) 9) OR (raw_events_first.user_id OPERATOR(pg_catalog.=) 16)) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 -ERROR: duplicate key value violates unique constraint "raw_events_second_user_id_value_1_key_13300007" --- now do some aggregations -INSERT INTO agg_events -SELECT - user_id, sum(value_1), avg(value_2), sum(value_3), count(value_4) -FROM - raw_events_first -GROUP BY - user_id; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, avg(raw_events_first.value_2) AS avg, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, avg(raw_events_first.value_2) AS avg, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, avg(raw_events_first.value_2) AS avg, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, avg(raw_events_first.value_2) AS avg, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id --- group by column not exists on the SELECT target list -INSERT INTO agg_events (value_3_agg, value_4_agg, value_1_agg, user_id) -SELECT - sum(value_3), count(value_4), sum(value_1), user_id -FROM - raw_events_first -GROUP BY - value_2, user_id -RETURNING *; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.value_2, raw_events_first.user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.value_2, raw_events_first.user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.value_2, raw_events_first.user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.value_2, raw_events_first.user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time -ERROR: duplicate key value violates unique constraint "agg_events_user_id_value_1_agg_key_13300008" --- some subquery tests -INSERT INTO agg_events - (value_1_agg, - user_id) -SELECT SUM(value_1), - id -FROM (SELECT raw_events_second.user_id AS id, - raw_events_second.value_1 - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id) AS foo -GROUP BY id -ORDER BY id; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT foo.id, sum(foo.value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)) foo WHERE (id IS NOT NULL) GROUP BY foo.id ORDER BY foo.id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT foo.id, sum(foo.value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)) foo WHERE (id IS NOT NULL) GROUP BY foo.id ORDER BY foo.id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT foo.id, sum(foo.value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)) foo WHERE (id IS NOT NULL) GROUP BY foo.id ORDER BY foo.id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT foo.id, sum(foo.value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)) foo WHERE (id IS NOT NULL) GROUP BY foo.id ORDER BY foo.id -ERROR: duplicate key value violates unique constraint "agg_events_user_id_value_1_agg_key_13300008" --- subquery one more level depth -INSERT INTO agg_events - (value_4_agg, - value_1_agg, - user_id) -SELECT v4, - v1, - id -FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id) AS foo -ORDER BY id; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT foo.id, foo.v1, foo.v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE (foo.id IS NOT NULL) ORDER BY foo.id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT foo.id, foo.v1, foo.v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE (foo.id IS NOT NULL) ORDER BY foo.id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT foo.id, foo.v1, foo.v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE (foo.id IS NOT NULL) ORDER BY foo.id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT foo.id, foo.v1, foo.v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE (foo.id IS NOT NULL) ORDER BY foo.id -ERROR: duplicate key value violates unique constraint "agg_events_user_id_value_1_agg_key_13300008" -\set VERBOSITY DEFAULT --- join between subqueries -INSERT INTO agg_events - (user_id) -SELECT f2.id FROM -(SELECT - id -FROM (SELECT reference_table.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f -INNER JOIN -(SELECT v4, - v1, - id -FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 -ON (f.id = f2.id); -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f2.id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f2.id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f2.id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f2.id IS NOT NULL) --- add one more level subqueris on top of subquery JOINs -INSERT INTO agg_events - (user_id, value_4_agg) -SELECT - outer_most.id, max(outer_most.value) -FROM -( - SELECT f2.id as id, f2.v4 as value FROM - (SELECT - id - FROM (SELECT reference_table.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f - INNER JOIN - (SELECT v4, - v1, - id - FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 -ON (f.id = f2.id)) as outer_most -GROUP BY - outer_most.id; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (id IS NOT NULL) GROUP BY outer_most.id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (id IS NOT NULL) GROUP BY outer_most.id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (id IS NOT NULL) GROUP BY outer_most.id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (id IS NOT NULL) GROUP BY outer_most.id --- subqueries in WHERE clause -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE user_id IN (SELECT user_id - FROM raw_events_second - WHERE user_id = 2); -DEBUG: Creating router plan -DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) 2))) AND (raw_events_first.user_id IS NOT NULL)) -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE user_id IN (SELECT user_id - FROM raw_events_second - WHERE user_id != 2 AND value_1 = 2000) -ON conflict (user_id, value_1) DO NOTHING; -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.<>) 2) AND (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000)))) AND (raw_events_first.user_id IS NOT NULL)) ON CONFLICT(user_id, value_1) DO NOTHING -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.<>) 2) AND (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000)))) AND (raw_events_first.user_id IS NOT NULL)) ON CONFLICT(user_id, value_1) DO NOTHING -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.<>) 2) AND (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000)))) AND (raw_events_first.user_id IS NOT NULL)) ON CONFLICT(user_id, value_1) DO NOTHING -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.<>) 2) AND (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000)))) AND (raw_events_first.user_id IS NOT NULL)) ON CONFLICT(user_id, value_1) DO NOTHING -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE user_id IN (SELECT user_id - FROM raw_events_second WHERE false); -DEBUG: Creating router plan -DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE user_id IN (SELECT user_id - FROM raw_events_second - WHERE value_1 = 1000 OR value_1 = 2000 OR value_1 = 3000); -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE ((raw_events_second.value_1 OPERATOR(pg_catalog.=) 1000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 3000)))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE ((raw_events_second.value_1 OPERATOR(pg_catalog.=) 1000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 3000)))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE ((raw_events_second.value_1 OPERATOR(pg_catalog.=) 1000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 3000)))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE ((raw_events_second.value_1 OPERATOR(pg_catalog.=) 1000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 3000)))) AND (raw_events_first.user_id IS NOT NULL)) --- lets mix subqueries in FROM clause and subqueries in WHERE -INSERT INTO agg_events - (user_id) -SELECT f2.id FROM -(SELECT - id -FROM (SELECT reference_table.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f -INNER JOIN -(SELECT v4, - v1, - id -FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 1000) AS foo2 ) as f2 -ON (f.id = f2.id) -WHERE f.id IN (SELECT user_id - FROM raw_events_second); -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (1000)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE ((f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second)) AND (f2.id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (1000)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE ((f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second)) AND (f2.id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (1000)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE ((f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second)) AND (f2.id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (1000)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE ((f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second)) AND (f2.id IS NOT NULL)) --- some UPSERTS -INSERT INTO agg_events AS ae - ( - user_id, - value_1_agg, - agg_time - ) -SELECT user_id, - value_1, - time -FROM raw_events_first -ON conflict (user_id, value_1_agg) -DO UPDATE - SET agg_time = EXCLUDED.agg_time - WHERE ae.agg_time < EXCLUDED.agg_time; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) --- upserts with returning -INSERT INTO agg_events AS ae - ( - user_id, - value_1_agg, - agg_time - ) -SELECT user_id, - value_1, - time -FROM raw_events_first -ON conflict (user_id, value_1_agg) -DO UPDATE - SET agg_time = EXCLUDED.agg_time - WHERE ae.agg_time < EXCLUDED.agg_time -RETURNING user_id, value_1_agg; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg - user_id | value_1_agg ---------------------------------------------------------------------- - 7 | -(1 row) - -INSERT INTO agg_events (user_id, value_1_agg) -SELECT - user_id, sum(value_1 + value_2) -FROM - raw_events_first GROUP BY user_id; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) AS sum FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) AS sum FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) AS sum FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) AS sum FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id --- FILTER CLAUSE -INSERT INTO agg_events (user_id, value_1_agg) -SELECT - user_id, sum(value_1 + value_2) FILTER (where value_3 = 15) -FROM - raw_events_first GROUP BY user_id; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) FILTER (WHERE (raw_events_first.value_3 OPERATOR(pg_catalog.=) (15)::double precision)) AS sum FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) FILTER (WHERE (raw_events_first.value_3 OPERATOR(pg_catalog.=) (15)::double precision)) AS sum FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) FILTER (WHERE (raw_events_first.value_3 OPERATOR(pg_catalog.=) (15)::double precision)) AS sum FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) FILTER (WHERE (raw_events_first.value_3 OPERATOR(pg_catalog.=) (15)::double precision)) AS sum FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id --- a test with reference table JOINs -INSERT INTO - agg_events (user_id, value_1_agg) -SELECT - raw_events_first.user_id, sum(value_1) -FROM - reference_table, raw_events_first -WHERE - raw_events_first.user_id = reference_table.user_id -GROUP BY - raw_events_first.user_id; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM multi_insert_select.reference_table_13300012 reference_table, multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id) AND (user_id IS NOT NULL)) GROUP BY raw_events_first.user_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM multi_insert_select.reference_table_13300012 reference_table, multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id) AND (user_id IS NOT NULL)) GROUP BY raw_events_first.user_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM multi_insert_select.reference_table_13300012 reference_table, multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id) AND (user_id IS NOT NULL)) GROUP BY raw_events_first.user_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM multi_insert_select.reference_table_13300012 reference_table, multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id) AND (user_id IS NOT NULL)) GROUP BY raw_events_first.user_id --- a note on the outer joins is that --- we filter out outer join results --- where partition column returns --- NULL. Thus, we could INSERT less rows --- than we expect from subquery result. --- see the following tests -SET client_min_messages TO INFO; --- we don't want to see constraint violations, so truncate first -TRUNCATE agg_events; --- add a row to first table to make table contents different -INSERT INTO raw_events_second (user_id, time, value_1, value_2, value_3, value_4) VALUES - (10, now(), 100, 10000, 10000, 100000); -DELETE FROM raw_events_second WHERE user_id = 2; --- we select 11 rows -SELECT t1.user_id AS col1, - t2.user_id AS col2 - FROM raw_events_first t1 - FULL JOIN raw_events_second t2 - ON t1.user_id = t2.user_id - ORDER BY t1.user_id, - t2.user_id; - col1 | col2 ---------------------------------------------------------------------- - 1 | 1 - 2 | - 3 | 3 - 4 | 4 - 5 | 5 - 6 | 6 - 7 | 7 - 8 | 8 - 9 | 9 - | 10 -(10 rows) - -SET client_min_messages TO DEBUG2; --- we insert 10 rows since we filtered out --- NULL partition column values -INSERT INTO agg_events (user_id, value_1_agg) -SELECT t1.user_id AS col1, - t2.user_id AS col2 -FROM raw_events_first t1 - FULL JOIN raw_events_second t2 - ON t1.user_id = t2.user_id; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (multi_insert_select.raw_events_first_13300000 t1 FULL JOIN multi_insert_select.raw_events_second_13300004 t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id))) WHERE (t1.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (multi_insert_select.raw_events_first_13300001 t1 FULL JOIN multi_insert_select.raw_events_second_13300005 t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id))) WHERE (t1.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (multi_insert_select.raw_events_first_13300002 t1 FULL JOIN multi_insert_select.raw_events_second_13300006 t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id))) WHERE (t1.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (multi_insert_select.raw_events_first_13300003 t1 FULL JOIN multi_insert_select.raw_events_second_13300007 t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id))) WHERE (t1.user_id IS NOT NULL) -SET client_min_messages TO INFO; --- see that the results are different from the SELECT query -SELECT - user_id, value_1_agg -FROM - agg_events -ORDER BY - user_id, value_1_agg; - user_id | value_1_agg ---------------------------------------------------------------------- - 1 | 1 - 2 | - 3 | 3 - 4 | 4 - 5 | 5 - 6 | 6 - 7 | 7 - 8 | 8 - 9 | 9 -(9 rows) - --- we don't want to see constraint violations, so truncate first -SET client_min_messages TO INFO; -TRUNCATE agg_events; -SET client_min_messages TO DEBUG2; --- DISTINCT clause -INSERT INTO agg_events (value_1_agg, user_id) - SELECT - DISTINCT value_1, user_id - FROM - raw_events_first; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) --- we don't want to see constraint violations, so truncate first -SET client_min_messages TO INFO; -truncate agg_events; -SET client_min_messages TO DEBUG2; --- DISTINCT ON clauses are supported --- distinct on(non-partition column) --- values are pulled to master -INSERT INTO agg_events (value_1_agg, user_id) - SELECT - DISTINCT ON (value_1) value_1, user_id - FROM - raw_events_first; -DEBUG: cannot push down this subquery -DETAIL: Distinct on columns without partition column is currently unsupported -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Collecting INSERT ... SELECT results on coordinator -SELECT user_id, value_1_agg FROM agg_events ORDER BY 1,2; -DEBUG: Router planner cannot handle multi-shard select queries - user_id | value_1_agg ---------------------------------------------------------------------- - 1 | 10 - 2 | 20 - 3 | 30 - 4 | 40 - 5 | 50 - 6 | 60 - 7 | - 8 | 80 - 9 | 90 -(9 rows) - --- we don't want to see constraint violations, so truncate first -SET client_min_messages TO INFO; -truncate agg_events; -SET client_min_messages TO DEBUG2; --- distinct on(partition column) --- queries are forwared to workers -INSERT INTO agg_events (value_1_agg, user_id) - SELECT - DISTINCT ON (user_id) value_1, user_id - FROM - raw_events_first; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT ON (raw_events_first.user_id) raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT ON (raw_events_first.user_id) raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT ON (raw_events_first.user_id) raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT ON (raw_events_first.user_id) raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -SELECT user_id, value_1_agg FROM agg_events ORDER BY 1,2; -DEBUG: Router planner cannot handle multi-shard select queries - user_id | value_1_agg ---------------------------------------------------------------------- - 1 | 10 - 2 | 20 - 3 | 30 - 4 | 40 - 5 | 50 - 6 | 60 - 7 | - 8 | 80 - 9 | 90 -(9 rows) - --- We support CTEs -BEGIN; -WITH fist_table_agg AS MATERIALIZED - (SELECT max(value_1)+1 as v1_agg, user_id FROM raw_events_first GROUP BY user_id) -INSERT INTO agg_events - (value_1_agg, user_id) - SELECT - v1_agg, user_id - FROM - fist_table_agg; -DEBUG: distributed INSERT ... SELECT can only select from distributed tables -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for CTE fist_table_agg: SELECT (max(value_1) OPERATOR(pg_catalog.+) 1) AS v1_agg, user_id FROM multi_insert_select.raw_events_first GROUP BY user_id -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, v1_agg AS value_1_agg FROM (SELECT fist_table_agg.user_id, fist_table_agg.v1_agg FROM (SELECT intermediate_result.v1_agg, intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v1_agg integer, user_id integer)) fist_table_agg) citus_insert_select_subquery -DEBUG: Creating router plan -DEBUG: Collecting INSERT ... SELECT results on coordinator -ROLLBACK; --- We do support CTEs that are referenced in the target list -INSERT INTO agg_events - WITH sub_cte AS (SELECT 1) - SELECT - raw_events_first.user_id, (SELECT * FROM sub_cte) - FROM - raw_events_first; -DEBUG: CTE sub_cte is going to be inlined via distributed planning -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) --- We support set operations -BEGIN; -INSERT INTO - raw_events_first(user_id) -SELECT - user_id -FROM - ((SELECT user_id FROM raw_events_first) UNION - (SELECT user_id FROM raw_events_second)) as foo; -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second) foo WHERE (foo.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second) foo WHERE (foo.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second) foo WHERE (foo.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second) foo WHERE (foo.user_id IS NOT NULL) -ROLLBACK; --- We do support set operations through recursive planning -BEGIN; -SET LOCAL client_min_messages TO DEBUG; -INSERT INTO - raw_events_first(user_id) - (SELECT user_id FROM raw_events_first) INTERSECT - (SELECT user_id FROM raw_events_first); -DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM multi_insert_select.raw_events_first -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM multi_insert_select.raw_events_first -DEBUG: Creating router plan -DEBUG: generating subplan XXX_3 for subquery SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) INTERSECT SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) citus_insert_select_subquery -DEBUG: Creating router plan -DEBUG: Collecting INSERT ... SELECT results on coordinator -ROLLBACK; --- If the query is router plannable then it is executed via the coordinator -INSERT INTO - raw_events_first(user_id) -SELECT - user_id -FROM - ((SELECT user_id FROM raw_events_first WHERE user_id = 15) EXCEPT - (SELECT user_id FROM raw_events_second where user_id = 17)) as foo; -DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -DEBUG: router planner does not support queries that reference non-colocated distributed tables -DEBUG: Distributed planning for a fast-path router query -DEBUG: Creating router plan -DEBUG: query has a single distribution column value: 15 -DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM multi_insert_select.raw_events_first WHERE (user_id OPERATOR(pg_catalog.=) 15) -DEBUG: Distributed planning for a fast-path router query -DEBUG: Creating router plan -DEBUG: query has a single distribution column value: 17 -DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM multi_insert_select.raw_events_second WHERE (user_id OPERATOR(pg_catalog.=) 17) -DEBUG: Creating router plan -DEBUG: generating subplan XXX_3 for subquery SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) EXCEPT SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo -DEBUG: Creating router plan -DEBUG: Collecting INSERT ... SELECT results on coordinator --- some supported LEFT joins - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL) - SET client_min_messages to debug3; - INSERT INTO agg_events (user_id) - SELECT - raw_events_second.user_id - FROM - reference_table LEFT JOIN raw_events_second ON reference_table.user_id = raw_events_second.user_id; -DEBUG: no shard pruning constraints on raw_events_second found -DEBUG: shard count after pruning for raw_events_second: 4 -DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table -DEBUG: no shard pruning constraints on raw_events_second found -DEBUG: shard count after pruning for raw_events_second: 4 -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: a push down safe left join with recurring left side -DEBUG: no shard pruning constraints on raw_events_second found -DEBUG: shard count after pruning for raw_events_second: 4 -DEBUG: assigned task to node localhost:xxxxx -DEBUG: assigned task to node localhost:xxxxx -DEBUG: assigned task to node localhost:xxxxx -DEBUG: assigned task to node localhost:xxxxx -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'user_id' - SET client_min_messages to debug2; - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id - WHERE raw_events_first.user_id = 10; -DEBUG: Creating router plan -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) 10) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: Skipping target shard interval 13300009 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300010 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300011 since SELECT query for it pruned away - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id - WHERE raw_events_second.user_id = 10 OR raw_events_second.user_id = 11; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (((raw_events_second.user_id OPERATOR(pg_catalog.=) 10) OR (raw_events_second.user_id OPERATOR(pg_catalog.=) 11)) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN (SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_second(user_id, "time", value_1, value_2, value_3, value_4) ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (((raw_events_second.user_id OPERATOR(pg_catalog.=) 10) OR (raw_events_second.user_id OPERATOR(pg_catalog.=) 11)) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN (SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_second(user_id, "time", value_1, value_2, value_3, value_4) ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (((raw_events_second.user_id OPERATOR(pg_catalog.=) 10) OR (raw_events_second.user_id OPERATOR(pg_catalog.=) 11)) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (((raw_events_second.user_id OPERATOR(pg_catalog.=) 10) OR (raw_events_second.user_id OPERATOR(pg_catalog.=) 11)) AND (raw_events_first.user_id IS NOT NULL)) - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id - WHERE raw_events_first.user_id = 10 AND raw_events_first.user_id = 20; -DEBUG: Creating router plan -DEBUG: Skipping target shard interval 13300008 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300009 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300010 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300011 since SELECT query for it pruned away - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id - WHERE raw_events_first.user_id = 10 AND raw_events_second.user_id = 20; -DEBUG: Creating router plan -DEBUG: Skipping target shard interval 13300008 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300009 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300010 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300011 since SELECT query for it pruned away - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id - WHERE raw_events_first.user_id IN (19, 20, 21); -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM ((SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_first(user_id, "time", value_1, value_2, value_3, value_4) LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id - WHERE raw_events_second.user_id IN (19, 20, 21); -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first JOIN (SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_second(user_id, "time", value_1, value_2, value_3, value_4) ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) -SET client_min_messages TO WARNING; - -- following query should use repartitioned joins and results should - -- be routed via coordinator - SET citus.enable_repartition_joins TO true; - INSERT INTO agg_events - (user_id) - SELECT raw_events_first.user_id - FROM raw_events_first, - raw_events_second - WHERE raw_events_second.user_id = raw_events_first.value_1 - AND raw_events_first.value_1 = 12; - -- some unsupported LEFT/INNER JOINs - -- JOIN on one table with partition column other is not - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns - -- same as the above with INNER JOIN - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1; - -- a not meaningful query - INSERT INTO agg_events - (user_id) - SELECT raw_events_second.user_id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_first.value_1; -ERROR: cannot perform distributed planning on this query -DETAIL: Cartesian products are currently unsupported - -- both tables joined on non-partition columns - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first LEFT JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns - -- same as the above with INNER JOIN - -- we support this with route to coordinator - SELECT coordinator_plan($Q$ - EXPLAIN (costs off) - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first INNER JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) - Task Count: 6 -(4 rows) - --- EXPLAIN ANALYZE is not supported for INSERT ... SELECT via coordinator -EXPLAIN (costs off, analyze on, BUFFERS OFF) - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first INNER JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1; -ERROR: EXPLAIN ANALYZE is currently not supported for INSERT ... SELECT commands via coordinator --- even if there is a filter on the partition key, since the join is not on the partition key we reject --- this query -INSERT INTO agg_events (user_id) -SELECT - raw_events_first.user_id -FROM - raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1 -WHERE - raw_events_first.user_id = 10; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns - -- same as the above with INNER JOIN - -- we support this with route to coordinator - SELECT coordinator_plan($Q$ - EXPLAIN (costs off) - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1 - WHERE raw_events_first.user_id = 10; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) - Task Count: 6 -(4 rows) - - -- make things a bit more complicate with IN clauses - -- we support this with route to coordinator - SELECT coordinator_plan($Q$ - EXPLAIN (costs off) - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1 - WHERE raw_events_first.value_1 IN (10, 11,12) OR raw_events_second.user_id IN (1,2,3,4); -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) - Task Count: 6 -(4 rows) - - -- implicit join on non partition column should also not be pushed down, - -- so we fall back to route via coordinator - SELECT coordinator_plan($Q$ - EXPLAIN (costs off) - INSERT INTO agg_events - (user_id) - SELECT raw_events_first.user_id - FROM raw_events_first, - raw_events_second - WHERE raw_events_second.user_id = raw_events_first.value_1; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) - Task Count: 6 -(4 rows) - -RESET client_min_messages; - -- The following is again a tricky query for Citus. If the given filter was - -- on value_1 as shown in the above, Citus could push it down and use - -- distributed INSERT/SELECT. But we instead fall back to route via coordinator. - SELECT coordinator_plan($Q$ - EXPLAIN (costs off) - INSERT INTO agg_events - (user_id) - SELECT raw_events_first.user_id - FROM raw_events_first, - raw_events_second - WHERE raw_events_second.user_id = raw_events_first.value_1 - AND raw_events_first.value_2 = 12; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) - Task Count: 6 -(4 rows) - - -- foo is not joined on the partition key so the query is not - -- pushed down. So instead we route via coordinator. - SELECT coordinator_plan($Q$ - EXPLAIN (costs off) - INSERT INTO agg_events - (user_id, value_4_agg) - SELECT - outer_most.id, max(outer_most.value) - FROM - ( - SELECT f2.id as id, f2.v4 as value FROM - (SELECT - id - FROM (SELECT reference_table.user_id AS id - FROM raw_events_first LEFT JOIN - reference_table - ON (raw_events_first.value_1 = reference_table.user_id)) AS foo) as f - INNER JOIN - (SELECT v4, - v1, - id - FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 - ON (f.id = f2.id)) as outer_most - GROUP BY - outer_most.id; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> HashAggregate - Group Key: remote_scan.user_id - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(8 rows) - - -- if the given filter was on value_1 as shown in the above, Citus could - -- push it down. But here the query falls back to route via coordinator. - SELECT coordinator_plan($Q$ - EXPLAIN (costs off) - INSERT INTO agg_events - (user_id) - SELECT raw_events_first.user_id - FROM raw_events_first, - raw_events_second - WHERE raw_events_second.user_id = raw_events_first.value_1 - AND raw_events_first.value_2 = 12; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) - Task Count: 6 -(4 rows) - - -- foo is not joined on the partition key so the query is not - -- pushed down, and it falls back to route via coordinator -SELECT coordinator_plan($Q$ -EXPLAIN (costs off) - INSERT INTO agg_events - (user_id, value_4_agg) - SELECT - outer_most.id, max(outer_most.value) - FROM - ( - SELECT f2.id as id, f2.v4 as value FROM - (SELECT - id - FROM (SELECT reference_table.user_id AS id - FROM raw_events_first LEFT JOIN - reference_table - ON (raw_events_first.value_1 = reference_table.user_id)) AS foo) as f - INNER JOIN - (SELECT v4, - v1, - id - FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 - ON (f.id = f2.id)) as outer_most - GROUP BY - outer_most.id; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> HashAggregate - Group Key: remote_scan.user_id - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(8 rows) - -INSERT INTO agg_events - (value_4_agg, - value_1_agg, - user_id) -SELECT v4, - v1, - id -FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id != raw_events_second.user_id - GROUP BY raw_events_second.user_id) AS foo; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -SET client_min_messages TO DEBUG2; --- INSERT returns NULL partition key value via coordinator -INSERT INTO agg_events - (value_4_agg, - value_1_agg, - user_id) -SELECT v4, - v1, - id -FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.value_3 AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.value_3) AS foo; -DEBUG: cannot push down this subquery -DETAIL: Group by list without partition column is currently unsupported when a subquery references a column from another query -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] -DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823] -DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647] -DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825] -DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823] -DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647] -DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825] -DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1] -DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647] -DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825] -DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1] -DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] -DEBUG: generating subplan XXX_1 for subquery SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.value_3 AS id FROM multi_insert_select.raw_events_first, multi_insert_select.raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.value_3 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT int4(id) AS user_id, int4(v1) AS value_1_agg, int8(v4) AS value_4_agg FROM (SELECT intermediate_result.v4, intermediate_result.v1, intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v4 numeric, v1 bigint, id double precision)) foo -DEBUG: Creating router plan -DEBUG: Collecting INSERT ... SELECT results on coordinator -ERROR: the partition column of table multi_insert_select.agg_events cannot be NULL --- error cases --- no part column at all -INSERT INTO raw_events_second - (value_1) -SELECT value_1 -FROM raw_events_first; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: the query doesn't include the target table's partition column -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -ERROR: the partition column of table multi_insert_select.raw_events_second should have a value -INSERT INTO raw_events_second - (value_1) -SELECT user_id -FROM raw_events_first; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: the query doesn't include the target table's partition column -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -ERROR: the partition column of table multi_insert_select.raw_events_second should have a value -INSERT INTO raw_events_second - (user_id) -SELECT value_1 -FROM raw_events_first; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: The target table's partition column should correspond to a partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'user_id' -ERROR: the partition column value cannot be NULL -CONTEXT: while executing command on localhost:xxxxx -INSERT INTO raw_events_second - (user_id) -SELECT user_id * 2 -FROM raw_events_first; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains an operator in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'user_id' -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_0,repartitioned_results_xxxxx_from_13300001_to_0}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_1,repartitioned_results_xxxxx_from_13300001_to_1,repartitioned_results_xxxxx_from_13300003_to_1}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300001_to_2}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_3,repartitioned_results_xxxxx_from_13300002_to_3,repartitioned_results_xxxxx_from_13300003_to_3}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) -INSERT INTO raw_events_second - (user_id) -SELECT user_id :: bigint -FROM raw_events_first; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains an explicit cast in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'user_id' -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_0}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300001_to_1}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300002_to_2}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300003_to_3}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) -INSERT INTO agg_events - (value_3_agg, - value_4_agg, - value_1_agg, - value_2_agg, - user_id) -SELECT SUM(value_3), - Count(value_4), - user_id, - SUM(value_1), - Avg(value_2) -FROM raw_events_first -GROUP BY user_id; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains an aggregation in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'user_id' -ERROR: the partition column value cannot be NULL -CONTEXT: while executing command on localhost:xxxxx -INSERT INTO agg_events - (value_3_agg, - value_4_agg, - value_1_agg, - value_2_agg, - user_id) -SELECT SUM(value_3), - Count(value_4), - user_id, - SUM(value_1), - value_2 -FROM raw_events_first -GROUP BY user_id, - value_2; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: The target table's partition column should correspond to a partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'user_id' -ERROR: the partition column value cannot be NULL -CONTEXT: while executing command on localhost:xxxxx --- tables should be co-located -INSERT INTO agg_events (user_id) -SELECT - user_id -FROM - reference_table; -DEBUG: Creating router plan -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: The target table's partition column should correspond to a partition column in the subquery. -DEBUG: Distributed planning for a fast-path router query -DEBUG: Creating router plan -DEBUG: Collecting INSERT ... SELECT results on coordinator --- foo2 is recursively planned and INSERT...SELECT is done via coordinator -INSERT INTO agg_events - (user_id) -SELECT f2.id FROM -(SELECT - id -FROM (SELECT reference_table.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f -INNER JOIN -(SELECT v4, - v1, - id -FROM (SELECT SUM(raw_events_second.value_4) AS v4, - raw_events_second.value_1 AS v1, - SUM(raw_events_second.user_id) AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.value_1 - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 -ON (f.id = f2.id); -DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] -DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823] -DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647] -DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825] -DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823] -DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647] -DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825] -DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1] -DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647] -DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825] -DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1] -DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] -DEBUG: generating subplan XXX_1 for subquery SELECT sum(raw_events_second.value_4) AS v4, raw_events_second.value_1 AS v1, sum(raw_events_second.user_id) AS id FROM multi_insert_select.raw_events_first, multi_insert_select.raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.value_1 HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT int4(f2.id) AS user_id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first, multi_insert_select.reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT intermediate_result.v4, intermediate_result.v1, intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v4 numeric, v1 integer, id bigint)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'user_id' --- the second part of the query is not routable since --- GROUP BY not on the partition column (i.e., value_1) and thus join --- on f.id = f2.id is not on the partition key (instead on the sum of partition key) --- but we still recursively plan foo2 and run the query -INSERT INTO agg_events - (user_id) -SELECT f.id FROM -(SELECT - id -FROM (SELECT raw_events_first.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f -INNER JOIN -(SELECT v4, - v1, - id -FROM (SELECT SUM(raw_events_second.value_4) AS v4, - raw_events_second.value_1 AS v1, - SUM(raw_events_second.user_id) AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.value_1 - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 -ON (f.id = f2.id); -DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] -DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823] -DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647] -DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825] -DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823] -DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647] -DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825] -DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1] -DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647] -DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825] -DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1] -DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] -DEBUG: generating subplan XXX_1 for subquery SELECT sum(raw_events_second.value_4) AS v4, raw_events_second.value_1 AS v1, sum(raw_events_second.user_id) AS id FROM multi_insert_select.raw_events_first, multi_insert_select.raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.value_1 HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT f.id AS user_id FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM multi_insert_select.raw_events_first, multi_insert_select.reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT intermediate_result.v4, intermediate_result.v1, intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v4 numeric, v1 integer, id bigint)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'user_id' -SET client_min_messages TO WARNING; --- cannot pushdown the query since the JOIN is not equi JOIN --- falls back to route via coordinator -SELECT coordinator_plan($Q$ -EXPLAIN (costs off) -INSERT INTO agg_events - (user_id, value_4_agg) -SELECT -outer_most.id, max(outer_most.value) - FROM -( - SELECT f2.id as id, f2.v4 as value FROM - (SELECT - id - FROM (SELECT reference_table.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f - INNER JOIN - (SELECT v4, - v1, - id - FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 -ON (f.id != f2.id)) as outer_most -GROUP BY outer_most.id; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> HashAggregate - Group Key: remote_scan.user_id - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(8 rows) - --- cannot pushdown since foo2 is not join on partition key --- falls back to route via coordinator -SELECT coordinator_plan($Q$ -EXPLAIN (costs off) -INSERT INTO agg_events - (user_id, value_4_agg) -SELECT - outer_most.id, max(outer_most.value) -FROM -( - SELECT f2.id as id, f2.v4 as value FROM - (SELECT - id - FROM (SELECT reference_table.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f - INNER JOIN - (SELECT v4, - v1, - id - FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.value_1 - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 -ON (f.id = f2.id)) as outer_most -GROUP BY - outer_most.id; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> HashAggregate - Group Key: remote_scan.user_id - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> HashAggregate - Group Key: remote_scan.id - Filter: (pg_catalog.sum(remote_scan.worker_column_4) > '10'::numeric) - -> Custom Scan (Citus Adaptive) - Task Count: 6 -(11 rows) - --- cannot push down since foo doesn't have en equi join --- falls back to route via coordinator -SELECT coordinator_plan($Q$ -EXPLAIN (costs off) -INSERT INTO agg_events - (user_id, value_4_agg) -SELECT - outer_most.id, max(outer_most.value) -FROM -( - SELECT f2.id as id, f2.v4 as value FROM - (SELECT - id - FROM (SELECT reference_table.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id != reference_table.user_id ) AS foo) as f - INNER JOIN - (SELECT v4, - v1, - id - FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 -ON (f.id = f2.id)) as outer_most -GROUP BY - outer_most.id; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> HashAggregate - Group Key: remote_scan.user_id - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(8 rows) - --- some unsupported LATERAL JOINs --- join on averages is not on the partition key --- should fall back to route via coordinator -SELECT coordinator_plan($Q$ -EXPLAIN (costs off) -INSERT INTO agg_events (user_id, value_4_agg) -SELECT - averages.user_id, avg(averages.value_4) -FROM - (SELECT - raw_events_second.user_id - FROM - reference_table JOIN raw_events_second on (reference_table.user_id = raw_events_second.user_id) - ) reference_ids - JOIN LATERAL - (SELECT - user_id, value_4 - FROM - raw_events_first WHERE - value_4 = reference_ids.user_id) as averages ON true - GROUP BY averages.user_id; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> HashAggregate - Group Key: remote_scan.user_id - -> Custom Scan (Citus Adaptive) - Task Count: 6 -(6 rows) - --- join among reference_ids and averages is not on the partition key --- should fall back to route via coordinator -SELECT coordinator_plan($Q$ -EXPLAIN (costs off) -INSERT INTO agg_events (user_id, value_4_agg) -SELECT - averages.user_id, avg(averages.value_4) -FROM - (SELECT - raw_events_second.user_id - FROM - reference_table JOIN raw_events_second on (reference_table.user_id = raw_events_second.user_id) - ) reference_ids - JOIN LATERAL - (SELECT - user_id, value_4 - FROM - raw_events_first) as averages ON averages.value_4 = reference_ids.user_id - GROUP BY averages.user_id; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> HashAggregate - Group Key: remote_scan.user_id - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(8 rows) - --- join among the agg_ids and averages is not on the partition key --- should fall back to route via coordinator -SELECT coordinator_plan($Q$ -EXPLAIN (costs off) -INSERT INTO agg_events (user_id, value_4_agg) -SELECT - averages.user_id, avg(averages.value_4) -FROM - (SELECT - raw_events_second.user_id - FROM - reference_table JOIN raw_events_second on (reference_table.user_id = raw_events_second.user_id) - ) reference_ids - JOIN LATERAL - (SELECT - user_id, value_4 - FROM - raw_events_first) as averages ON averages.user_id = reference_ids.user_id -JOIN LATERAL - (SELECT user_id, value_4 FROM agg_events) as agg_ids ON (agg_ids.value_4 = averages.user_id) - GROUP BY averages.user_id; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) - Task Count: 6 -(4 rows) - --- Selected value in the WHERE is not partition key, so we cannot use distributed --- INSERT/SELECT and falls back route via coordinator -SELECT coordinator_plan($Q$ -EXPLAIN (costs off) -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE user_id IN (SELECT value_1 - FROM raw_events_second); -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: repartition - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(6 rows) - --- same as above but slightly more complex --- since it also includes subquery in FROM as well -SELECT coordinator_plan($Q$ -EXPLAIN (costs off) -INSERT INTO agg_events - (user_id) -SELECT f2.id FROM - -(SELECT - id -FROM (SELECT reference_table.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f -INNER JOIN -(SELECT v4, - v1, - id -FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 -ON (f.id = f2.id) -WHERE f.id IN (SELECT value_1 - FROM raw_events_second); -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: repartition - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(6 rows) - --- some more semi-anti join tests -SET client_min_messages TO DEBUG2; --- join in where -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE user_id IN (SELECT raw_events_second.user_id - FROM raw_events_second, raw_events_first - WHERE raw_events_second.user_id = raw_events_first.user_id AND raw_events_first.user_id = 200); -DEBUG: Creating router plan -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second, multi_insert_select.raw_events_first_13300000 raw_events_first_1 WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first_1.user_id) AND (raw_events_first_1.user_id OPERATOR(pg_catalog.=) 200)))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away -RESET client_min_messages; --- we cannot push this down since it is NOT IN --- we use repartition insert/select instead -SELECT coordinator_plan($Q$ -EXPLAIN (costs off) -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE user_id NOT IN (SELECT raw_events_second.user_id - FROM raw_events_second, raw_events_first - WHERE raw_events_second.user_id = raw_events_first.user_id AND raw_events_first.user_id = 200); -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: repartition - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Custom Scan (Citus Adaptive) - Task Count: 1 -(6 rows) - -SET client_min_messages TO DEBUG2; --- safe to push down -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE EXISTS (SELECT 1 - FROM raw_events_second - WHERE raw_events_second.user_id =raw_events_first.user_id); -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id))) AND (raw_events_first.user_id IS NOT NULL)) --- we cannot push down -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE NOT EXISTS (SELECT 1 - FROM raw_events_second - WHERE raw_events_second.user_id =raw_events_first.user_id); -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id)))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id)))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id)))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id)))) AND (raw_events_first.user_id IS NOT NULL)) --- more complex LEFT JOINs - INSERT INTO agg_events - (user_id, value_4_agg) - SELECT - outer_most.id, max(outer_most.value) - FROM - ( - SELECT f2.id as id, f2.v4 as value FROM - (SELECT - id - FROM (SELECT raw_events_first.user_id AS id - FROM raw_events_first LEFT JOIN - reference_table - ON (raw_events_first.user_id = reference_table.user_id)) AS foo) as f - LEFT JOIN - (SELECT v4, - v1, - id - FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 - ON (f.id = f2.id)) as outer_most - GROUP BY - outer_most.id; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.reference_table_13300012 reference_table ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (id IS NOT NULL) GROUP BY outer_most.id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN multi_insert_select.reference_table_13300012 reference_table ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (id IS NOT NULL) GROUP BY outer_most.id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN multi_insert_select.reference_table_13300012 reference_table ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (id IS NOT NULL) GROUP BY outer_most.id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first LEFT JOIN multi_insert_select.reference_table_13300012 reference_table ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (id IS NOT NULL) GROUP BY outer_most.id -RESET client_min_messages; --- cannot push down since the f.id IN is matched with value_1 --- we use repartition insert/select instead -SELECT coordinator_plan($Q$ -EXPLAIN (costs off) -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE user_id IN ( -SELECT f2.id FROM -(SELECT - id -FROM (SELECT reference_table.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f -INNER JOIN -(SELECT v4, - v1, - id -FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 -ON (f.id = f2.id) -WHERE f.id IN (SELECT value_1 - FROM raw_events_second)); -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: repartition - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(6 rows) - -SET client_min_messages TO DEBUG2; --- same as above, but this time is it safe to push down since --- f.id IN is matched with user_id -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE user_id IN ( -SELECT f2.id FROM -(SELECT - id -FROM (SELECT reference_table.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f -INNER JOIN -(SELECT v4, - v1, - id -FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 -ON (f.id = f2.id) -WHERE f.id IN (SELECT user_id - FROM raw_events_second)); -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first_1, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first_1, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second)))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first_1, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first_1, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second)))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first_1, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first_1, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second)))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first_1, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first_1, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second)))) AND (raw_events_first.user_id IS NOT NULL)) -RESET client_min_messages; --- cannot push down since top level user_id is matched with NOT IN -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE user_id NOT IN ( -SELECT f2.id FROM -(SELECT - id -FROM (SELECT reference_table.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f -INNER JOIN -(SELECT v4, - v1, - id -FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 -ON (f.id = f2.id) -WHERE f.id IN (SELECT user_id - FROM raw_events_second)); --- cannot push down since join is not equi join (f.id > f2.id) -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE user_id IN ( -SELECT f2.id FROM -(SELECT - id -FROM (SELECT reference_table.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f -INNER JOIN -(SELECT v4, - v1, - id -FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 -ON (f.id > f2.id) -WHERE f.id IN (SELECT user_id - FROM raw_events_second)); --- we currently not support grouping sets -INSERT INTO agg_events - (user_id, - value_1_agg, - value_2_agg) -SELECT user_id, - Sum(value_1) AS sum_val1, - Sum(value_2) AS sum_val2 -FROM raw_events_second -GROUP BY grouping sets ( ( user_id ), ( value_1 ), ( user_id, value_1 ), ( ) ); -ERROR: could not run distributed query with GROUPING SETS, CUBE, or ROLLUP -HINT: Consider using an equality filter on the distributed table's partition column. --- set back to INFO -SET client_min_messages TO INFO; --- avoid constraint violations -TRUNCATE raw_events_first; --- we don't support LIMIT for subquery pushdown, but --- we recursively plan the query and run it via coordinator -INSERT INTO agg_events(user_id) -SELECT user_id -FROM users_table -WHERE user_id - IN (SELECT - user_id - FROM ( - ( - SELECT - user_id - FROM - ( - SELECT - e1.user_id - FROM - users_table u1, events_table e1 - WHERE - e1.user_id = u1.user_id LIMIT 3 - ) as f_inner - ) - ) AS f2); --- Altering a table and selecting from it using a multi-shard statement --- in the same transaction is allowed because we will use the same --- connections for all co-located placements. -BEGIN; -ALTER TABLE raw_events_second DROP COLUMN value_4; -INSERT INTO raw_events_first SELECT * FROM raw_events_second; -ROLLBACK; --- Alterating a table and selecting from it using a single-shard statement --- in the same transaction is disallowed because we will use a different --- connection. -BEGIN; -ALTER TABLE raw_events_second DROP COLUMN value_4; -INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 100; -ROLLBACK; --- Altering a reference table and then performing an INSERT ... SELECT which --- joins with the reference table is allowed, since the INSERT ... SELECT --- would read from the reference table over the same connections with the ones --- that performed the parallel DDL. -BEGIN; -ALTER TABLE reference_table ADD COLUMN z int; -INSERT INTO raw_events_first (user_id) -SELECT user_id FROM raw_events_second JOIN reference_table USING (user_id); -ROLLBACK; --- the same test with sequential DDL should work fine -BEGIN; -SET LOCAL citus.multi_shard_modify_mode TO 'sequential'; -ALTER TABLE reference_table ADD COLUMN z int; -INSERT INTO raw_events_first (user_id) -SELECT user_id FROM raw_events_second JOIN reference_table USING (user_id); -ROLLBACK; --- Insert after copy is allowed -BEGIN; -COPY raw_events_second (user_id, value_1) FROM STDIN DELIMITER ','; -INSERT INTO raw_events_first SELECT * FROM raw_events_second; -ROLLBACK; --- Insert after copy is currently allowed for single-shard operation. --- Both insert and copy are rolled back successfully. -BEGIN; -COPY raw_events_second (user_id, value_1) FROM STDIN DELIMITER ','; -INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 101; -SELECT user_id FROM raw_events_first WHERE user_id = 101; - user_id ---------------------------------------------------------------------- - 101 -(1 row) - -ROLLBACK; -BEGIN; -INSERT INTO raw_events_first SELECT * FROM raw_events_second; -COPY raw_events_first (user_id, value_1) FROM STDIN DELIMITER ','; -ROLLBACK; -BEGIN; -INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 100; -COPY raw_events_first (user_id, value_1) FROM STDIN DELIMITER ','; -ROLLBACK; --- Similarly, multi-row INSERTs will take part in transactions and reuse connections... -BEGIN; -INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 100; -COPY raw_events_first (user_id, value_1) FROM STDIN DELIMITER ','; -INSERT INTO raw_events_first (user_id, value_1) VALUES (105, 105), (106, 106); -ROLLBACK; --- selecting from views works -CREATE VIEW test_view AS SELECT * FROM raw_events_first; -INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES - (16, now(), 60, 600, 6000.1, 60000); -SELECT count(*) FROM raw_events_second; - count ---------------------------------------------------------------------- - 45 -(1 row) - -INSERT INTO raw_events_second SELECT * FROM test_view; -INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES - (17, now(), 60, 600, 6000.1, 60000); -INSERT INTO raw_events_second SELECT * FROM test_view WHERE user_id = 17 GROUP BY 1,2,3,4,5,6; -SELECT count(*) FROM raw_events_second; - count ---------------------------------------------------------------------- - 47 -(1 row) - --- intermediate results (CTEs) should be allowed when doing INSERT...SELECT within a CTE -WITH series AS ( - SELECT s AS val FROM generate_series(60,70) s -), -inserts AS ( - INSERT INTO raw_events_second (user_id) - SELECT - user_id - FROM - raw_events_first JOIN series ON (value_1 = val) - RETURNING - NULL -) -SELECT count(*) FROM inserts; - count ---------------------------------------------------------------------- - 2 -(1 row) - --- we need this in our next test -truncate raw_events_first; -SET client_min_messages TO DEBUG2; --- first show that the query works now -INSERT INTO raw_events_first SELECT * FROM raw_events_second; -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_second.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_second.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_second.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id IS NOT NULL) -SET client_min_messages TO INFO; -truncate raw_events_first; -SET client_min_messages TO DEBUG2; --- now show that it works for a single shard query as well -INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 5; -DEBUG: Creating router plan -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) 5) AND (raw_events_second.user_id IS NOT NULL)) -DEBUG: Skipping target shard interval 13300001 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300002 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300003 since SELECT query for it pruned away -SET client_min_messages TO INFO; --- now do some tests with varchars -INSERT INTO insert_select_varchar_test VALUES ('test_1', 10); -INSERT INTO insert_select_varchar_test VALUES ('test_2', 30); -INSERT INTO insert_select_varchar_test (key, value) -SELECT *, 100 -FROM (SELECT f1.key - FROM (SELECT key - FROM insert_select_varchar_test - GROUP BY 1 - HAVING Count(key) < 3) AS f1, - (SELECT key - FROM insert_select_varchar_test - GROUP BY 1 - HAVING Sum(COALESCE(insert_select_varchar_test.value, 0)) > - 20.0) - AS f2 - WHERE f1.key = f2.key - GROUP BY 1) AS foo; -SELECT * FROM insert_select_varchar_test ORDER BY 1 DESC, 2 DESC; - key | value ---------------------------------------------------------------------- - test_2 | 100 - test_2 | 30 - test_1 | 10 -(3 rows) - --- some tests with DEFAULT columns and constant values --- this test is mostly importantly intended for deparsing the query correctly --- but still it is preferable to have this test here instead of multi_deparse_shard_query -CREATE TABLE table_with_defaults -( - store_id int, - first_name text, - default_1 int DEFAULT 1, - last_name text, - default_2 text DEFAULT '2' -); --- we don't need many shards -SET citus.shard_count = 2; -SELECT create_distributed_table('table_with_defaults', 'store_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- let's see the queries -SET client_min_messages TO DEBUG2; --- a very simple query -INSERT INTO table_with_defaults SELECT * FROM table_with_defaults; -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_1, table_with_defaults.last_name, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_1, table_with_defaults.last_name, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) --- see that defaults are filled -INSERT INTO table_with_defaults (store_id, first_name) -SELECT - store_id, first_name -FROM - table_with_defaults; -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 1 AS default_1, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 1 AS default_1, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) --- shuffle one of the defaults and skip the other -INSERT INTO table_with_defaults (default_2, store_id, first_name) -SELECT - default_2, store_id, first_name -FROM - table_with_defaults; -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 1 AS default_1, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 1 AS default_1, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) --- shuffle both defaults -INSERT INTO table_with_defaults (default_2, store_id, default_1, first_name) -SELECT - default_2, store_id, default_1, first_name -FROM - table_with_defaults; -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_1, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_1, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) --- use constants instead of non-default column -INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name) -SELECT - default_2, 'Freund', store_id, 'Andres' -FROM - table_with_defaults; -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) --- use constants instead of non-default column and skip both defauls -INSERT INTO table_with_defaults (last_name, store_id, first_name) -SELECT - 'Freund', store_id, 'Andres' -FROM - table_with_defaults; -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) --- use constants instead of default columns -INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name, default_1) -SELECT - 20, last_name, store_id, first_name, 10 -FROM - table_with_defaults; -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 10, table_with_defaults.last_name, 20 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 10, table_with_defaults.last_name, 20 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) --- use constants instead of both default columns and non-default columns -INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name, default_1) -SELECT - 20, 'Freund', store_id, 'Andres', 10 -FROM - table_with_defaults; -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 10, 'Freund'::text AS last_name, 20 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 10, 'Freund'::text AS last_name, 20 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) --- some of the ultimate queries where we have constants, --- defaults and group by entry is not on the target entry -INSERT INTO table_with_defaults (default_2, store_id, first_name) -SELECT - '2000', store_id, 'Andres' -FROM - table_with_defaults -GROUP BY - last_name, store_id; -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id -INSERT INTO table_with_defaults (default_1, store_id, first_name, default_2) -SELECT - 1000, store_id, 'Andres', '2000' -FROM - table_with_defaults -GROUP BY - last_name, store_id, first_name; -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name -INSERT INTO table_with_defaults (default_1, store_id, first_name, default_2) -SELECT - 1000, store_id, 'Andres', '2000' -FROM - table_with_defaults -GROUP BY - last_name, store_id, first_name, default_2; -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_2 -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_2 -INSERT INTO table_with_defaults (default_1, store_id, first_name) -SELECT - 1000, store_id, 'Andres' -FROM - table_with_defaults -GROUP BY - last_name, store_id, first_name, default_2; -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_2 -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_2 -RESET client_min_messages; --- Stable function in default should be allowed -ALTER TABLE table_with_defaults ADD COLUMN t timestamptz DEFAULT now(); -INSERT INTO table_with_defaults (store_id, first_name, last_name) -SELECT - store_id, 'first '||store_id, 'last '||store_id -FROM - table_with_defaults -GROUP BY - store_id, first_name, last_name; --- Volatile function in default should be disallowed - SERIAL pseudo-types -CREATE TABLE table_with_serial ( - store_id int, - s bigserial -); -SELECT create_distributed_table('table_with_serial', 'store_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO table_with_serial (store_id) -SELECT - store_id -FROM - table_with_defaults -GROUP BY - store_id; --- Volatile function in default should be disallowed - user-defined sequence -CREATE SEQUENCE user_defined_sequence; -CREATE TABLE table_with_user_sequence ( - store_id int, - s bigint default nextval('user_defined_sequence') -); -SELECT create_distributed_table('table_with_user_sequence', 'store_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO table_with_user_sequence (store_id) -SELECT - store_id -FROM - table_with_defaults -GROUP BY - store_id; --- do some more error/error message checks -SET citus.shard_count TO 4; -SET citus.shard_replication_factor TO 1; -CREATE TABLE text_table (part_col text, val int); -CREATE TABLE char_table (part_col char[], val int); -create table table_with_starts_with_defaults (a int DEFAULT 5, b int, c int); -SELECT create_distributed_table('text_table', 'part_col'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT create_distributed_table('char_table','part_col'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT create_distributed_table('table_with_starts_with_defaults', 'c'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SET client_min_messages TO DEBUG; -INSERT INTO text_table (part_col) - SELECT - CASE WHEN part_col = 'onder' THEN 'marco' - END -FROM text_table ; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains a case expression in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'part_col' -INSERT INTO text_table (part_col) SELECT COALESCE(part_col, 'onder') FROM text_table; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains a coalesce expression in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'part_col' -INSERT INTO text_table (part_col) SELECT GREATEST(part_col, 'jason') FROM text_table; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains a min/max expression in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'part_col' -INSERT INTO text_table (part_col) SELECT LEAST(part_col, 'andres') FROM text_table; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains a min/max expression in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'part_col' -INSERT INTO text_table (part_col) SELECT NULLIF(part_col, 'metin') FROM text_table; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'part_col' -INSERT INTO text_table (part_col) SELECT part_col isnull FROM text_table; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'part_col' -INSERT INTO text_table (part_col) SELECT part_col::text from char_table; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains an explicit coercion in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'part_col' -INSERT INTO text_table (part_col) SELECT (part_col = 'burak') is true FROM text_table; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'part_col' -INSERT INTO text_table (part_col) SELECT val FROM text_table; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: The data type of the target table's partition column should exactly match the data type of the corresponding simple column reference in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'part_col' -INSERT INTO text_table (part_col) SELECT val::text FROM text_table; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains an explicit coercion in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'part_col' -RESET client_min_messages; -insert into table_with_starts_with_defaults (b,c) select b,c FROM table_with_starts_with_defaults; --- Test on partition column without native hash function -CREATE TABLE raw_table -( - id BIGINT, - time DATE -); -CREATE TABLE summary_table -( - time DATE, - count BIGINT -); -SELECT create_distributed_table('raw_table', 'time'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT create_distributed_table('summary_table', 'time'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO raw_table VALUES(1, '11-11-1980'); -INSERT INTO summary_table SELECT time, COUNT(*) FROM raw_table GROUP BY time; -SELECT * FROM summary_table; - time | count ---------------------------------------------------------------------- - 11-11-1980 | 1 -(1 row) - --- Test INSERT ... SELECT via coordinator --- Select from constants -TRUNCATE raw_events_first; -INSERT INTO raw_events_first (user_id, value_1) -SELECT * FROM (VALUES (1,2), (3,4), (5,6)) AS v(int,int); -SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 2 - 3 | 4 - 5 | 6 -(3 rows) - --- Select from local functions -TRUNCATE raw_events_first; -CREATE SEQUENCE insert_select_test_seq; -SET client_min_messages TO DEBUG; -INSERT INTO raw_events_first (user_id, value_1, value_2) -SELECT - s, nextval('insert_select_test_seq'), (random()*10)::int -FROM - generate_series(1, 5) s; -DEBUG: Creating router plan -DEBUG: distributed INSERT ... SELECT can only select from distributed tables -DEBUG: Collecting INSERT ... SELECT results on coordinator -SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; -DEBUG: Router planner cannot handle multi-shard select queries - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 -(5 rows) - --- ON CONFLICT is supported -INSERT INTO raw_events_first (user_id, value_1) -SELECT s, nextval('insert_select_test_seq') FROM generate_series(1, 5) s -ON CONFLICT DO NOTHING; -DEBUG: Creating router plan -DEBUG: distributed INSERT ... SELECT can only select from distributed tables -DEBUG: Collecting INSERT ... SELECT results on coordinator -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300000'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) ON CONFLICT DO NOTHING -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300001'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) ON CONFLICT DO NOTHING -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300002'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) ON CONFLICT DO NOTHING -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300003'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) ON CONFLICT DO NOTHING --- RETURNING is supported -INSERT INTO raw_events_first (user_id, value_1) -SELECT s, nextval('insert_select_test_seq') FROM generate_series(1, 5) s -RETURNING *; -DEBUG: Creating router plan -DEBUG: distributed INSERT ... SELECT can only select from distributed tables -DEBUG: Collecting INSERT ... SELECT results on coordinator -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300000'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300001'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300002'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300003'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 - user_id | time | value_1 | value_2 | value_3 | value_4 ---------------------------------------------------------------------- - 1 | | 11 | | | - 2 | | 12 | | | - 3 | | 13 | | | - 4 | | 14 | | | - 5 | | 15 | | | -(5 rows) - -RESET client_min_messages; --- INSERT ... SELECT and multi-shard SELECT in the same transaction is supported -TRUNCATE raw_events_first; -BEGIN; -INSERT INTO raw_events_first (user_id, value_1) -SELECT s, s FROM generate_series(1, 5) s; -SELECT user_id, value_1 FROM raw_events_first ORDER BY 1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 -(5 rows) - -ROLLBACK; --- INSERT ... SELECT and single-shard SELECT in the same transaction is supported -TRUNCATE raw_events_first; -BEGIN; -INSERT INTO raw_events_first (user_id, value_1) -SELECT s, s FROM generate_series(1, 5) s; -SELECT user_id, value_1 FROM raw_events_first WHERE user_id = 1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 -(1 row) - -COMMIT; --- Select from local table -TRUNCATE raw_events_first; -CREATE TEMPORARY TABLE raw_events_first_local AS -SELECT s AS u, 2*s AS v FROM generate_series(1, 5) s; -INSERT INTO raw_events_first (user_id, value_1) -SELECT u, v FROM raw_events_first_local; -SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 2 - 2 | 4 - 3 | 6 - 4 | 8 - 5 | 10 -(5 rows) - --- Use columns in opposite order -TRUNCATE raw_events_first; -INSERT INTO raw_events_first (value_1, user_id) -SELECT u, v FROM raw_events_first_local; -SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 2 | 1 - 4 | 2 - 6 | 3 - 8 | 4 - 10 | 5 -(5 rows) - --- Set operations can work with opposite column order -TRUNCATE raw_events_first; -INSERT INTO raw_events_first (value_3, user_id) -( SELECT v, u::bigint FROM raw_events_first_local ) -UNION ALL -( SELECT v, u FROM raw_events_first_local ); -SELECT user_id, value_3 FROM raw_events_first ORDER BY user_id, value_3; - user_id | value_3 ---------------------------------------------------------------------- - 1 | 2 - 1 | 2 - 2 | 4 - 2 | 4 - 3 | 6 - 3 | 6 - 4 | 8 - 4 | 8 - 5 | 10 - 5 | 10 -(10 rows) - --- Select from other distributed table with limit -TRUNCATE raw_events_first; -TRUNCATE raw_events_second; -INSERT INTO raw_events_second (user_id, value_4) -SELECT s, 3*s FROM generate_series (1,5) s; -INSERT INTO raw_events_first (user_id, value_1) -SELECT user_id, value_4 FROM raw_events_second LIMIT 5; -SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 3 - 2 | 6 - 3 | 9 - 4 | 12 - 5 | 15 -(5 rows) - --- CTEs are supported in local queries -TRUNCATE raw_events_first; -WITH removed_rows AS ( - DELETE FROM raw_events_first_local RETURNING u -) -INSERT INTO raw_events_first (user_id, value_1) -WITH value AS (SELECT 1) -SELECT * FROM removed_rows, value; -SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 2 | 1 - 3 | 1 - 4 | 1 - 5 | 1 -(5 rows) - --- nested CTEs are also supported -TRUNCATE raw_events_first; -INSERT INTO raw_events_first_local SELECT s, 2*s FROM generate_series(0, 10) s; -WITH rows_to_remove AS ( - SELECT u FROM raw_events_first_local WHERE u > 0 -), -removed_rows AS ( - DELETE FROM raw_events_first_local - WHERE u IN (SELECT * FROM rows_to_remove) - RETURNING u, v -) -INSERT INTO raw_events_first (user_id, value_1) -WITH ultra_rows AS ( - WITH numbers AS ( - SELECT s FROM generate_series(1,10) s - ), - super_rows AS ( - SELECT u, v FROM removed_rows JOIN numbers ON (u = s) - ) - SELECT * FROM super_rows LIMIT 5 -) -SELECT u, v FROM ultra_rows; -SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 2 - 2 | 4 - 3 | 6 - 4 | 8 - 5 | 10 -(5 rows) - --- CTEs with duplicate names are also supported -TRUNCATE raw_events_first; -WITH super_rows AS ( - SELECT u FROM raw_events_first_local -) -INSERT INTO raw_events_first (user_id, value_1) -WITH super_rows AS ( - SELECT * FROM super_rows GROUP BY u -) -SELECT u, 5 FROM super_rows; -SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 0 | 5 -(1 row) - --- CTEs are supported in router queries -TRUNCATE raw_events_first; -WITH user_two AS ( - SELECT user_id, value_4 FROM raw_events_second WHERE user_id = 2 -) -INSERT INTO raw_events_first (user_id, value_1) -SELECT * FROM user_two; -SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 2 | 6 -(1 row) - --- CTEs are supported when there are name collisions -WITH numbers AS ( - SELECT s FROM generate_series(1,10) s -) -INSERT INTO raw_events_first(user_id, value_1) -WITH numbers AS ( - SELECT s, s FROM generate_series(1,5) s -) -SELECT * FROM numbers; --- Select into distributed table with a sequence -CREATE TABLE "CaseSensitiveTable" ("UserID" int, "Value1" int); -SELECT create_distributed_table('"CaseSensitiveTable"', 'UserID'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO "CaseSensitiveTable" -SELECT s, s FROM generate_series(1,10) s; -SELECT * FROM "CaseSensitiveTable" ORDER BY "UserID"; - UserID | Value1 ---------------------------------------------------------------------- - 1 | 1 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 - 6 | 6 - 7 | 7 - 8 | 8 - 9 | 9 - 10 | 10 -(10 rows) - -DROP TABLE "CaseSensitiveTable"; --- Select into distributed table with a sequence -CREATE TABLE dist_table_with_sequence (user_id serial, value_1 serial); -SELECT create_distributed_table('dist_table_with_sequence', 'user_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- from local query -INSERT INTO dist_table_with_sequence (value_1) -SELECT s FROM generate_series(1,5) s; -SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 -(5 rows) - --- from a distributed query -INSERT INTO dist_table_with_sequence (value_1) -SELECT value_1 FROM dist_table_with_sequence ORDER BY value_1; -SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 - 6 | 1 - 7 | 2 - 8 | 3 - 9 | 4 - 10 | 5 -(10 rows) - -TRUNCATE dist_table_with_sequence; -INSERT INTO dist_table_with_sequence (user_id) -SELECT user_id FROM raw_events_second ORDER BY user_id; -SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 -(5 rows) - -WITH top10 AS ( - SELECT user_id FROM raw_events_second WHERE value_1 IS NOT NULL ORDER BY value_1 LIMIT 10 -) -INSERT INTO dist_table_with_sequence (value_1) -SELECT * FROM top10; -SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 -(5 rows) - --- router queries become logical planner queries when there is a nextval call -INSERT INTO dist_table_with_sequence (user_id) -SELECT user_id FROM dist_table_with_sequence WHERE user_id = 1; -SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 1 | 6 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 -(6 rows) - -DROP TABLE dist_table_with_sequence; --- Select into distributed table with a user-defined sequence -CREATE SEQUENCE seq1; -CREATE SEQUENCE seq2; -CREATE TABLE dist_table_with_user_sequence (user_id int default nextval('seq1'), value_1 bigint default nextval('seq2')); -SELECT create_distributed_table('dist_table_with_user_sequence', 'user_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- from local query -INSERT INTO dist_table_with_user_sequence (value_1) -SELECT s FROM generate_series(1,5) s; -SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 -(5 rows) - --- from a distributed query -INSERT INTO dist_table_with_user_sequence (value_1) -SELECT value_1 FROM dist_table_with_user_sequence ORDER BY value_1; -SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 - 6 | 1 - 7 | 2 - 8 | 3 - 9 | 4 - 10 | 5 -(10 rows) - -TRUNCATE dist_table_with_user_sequence; -INSERT INTO dist_table_with_user_sequence (user_id) -SELECT user_id FROM raw_events_second ORDER BY user_id; -SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 -(5 rows) - -WITH top10 AS ( - SELECT user_id FROM raw_events_second WHERE value_1 IS NOT NULL ORDER BY value_1 LIMIT 10 -) -INSERT INTO dist_table_with_user_sequence (value_1) -SELECT * FROM top10; -SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 -(5 rows) - --- router queries become logical planner queries when there is a nextval call -INSERT INTO dist_table_with_user_sequence (user_id) -SELECT user_id FROM dist_table_with_user_sequence WHERE user_id = 1; -SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 1 | 6 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 -(6 rows) - -DROP TABLE dist_table_with_user_sequence; -DROP SEQUENCE seq1, seq2; --- Select from distributed table into reference table -CREATE TABLE ref_table (user_id serial, value_1 int); -SELECT create_reference_table('ref_table'); - create_reference_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO ref_table -SELECT user_id, value_1 FROM raw_events_second; -SELECT * FROM ref_table ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | - 2 | - 3 | - 4 | - 5 | -(5 rows) - -INSERT INTO ref_table (value_1) -SELECT value_1 FROM raw_events_second ORDER BY value_1; -SELECT * FROM ref_table ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | - 1 | - 2 | - 2 | - 3 | - 3 | - 4 | - 4 | - 5 | - 5 | -(10 rows) - -INSERT INTO ref_table SELECT * FROM ref_table; -SELECT * FROM ref_table ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | - 1 | - 1 | - 1 | - 2 | - 2 | - 2 | - 2 | - 3 | - 3 | - 3 | - 3 | - 4 | - 4 | - 4 | - 4 | - 5 | - 5 | - 5 | - 5 | -(20 rows) - -DROP TABLE ref_table; --- Select from distributed table into reference table with user-defined sequence -CREATE SEQUENCE seq1; -CREATE TABLE ref_table_with_user_sequence (user_id int default nextval('seq1'), value_1 int); -SELECT create_reference_table('ref_table_with_user_sequence'); - create_reference_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO ref_table_with_user_sequence -SELECT user_id, value_1 FROM raw_events_second; -SELECT * FROM ref_table_with_user_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | - 2 | - 3 | - 4 | - 5 | -(5 rows) - -INSERT INTO ref_table_with_user_sequence (value_1) -SELECT value_1 FROM raw_events_second ORDER BY value_1; -SELECT * FROM ref_table_with_user_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | - 1 | - 2 | - 2 | - 3 | - 3 | - 4 | - 4 | - 5 | - 5 | -(10 rows) - -INSERT INTO ref_table_with_user_sequence SELECT * FROM ref_table_with_user_sequence; -SELECT * FROM ref_table_with_user_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | - 1 | - 1 | - 1 | - 2 | - 2 | - 2 | - 2 | - 3 | - 3 | - 3 | - 3 | - 4 | - 4 | - 4 | - 4 | - 5 | - 5 | - 5 | - 5 | -(20 rows) - -DROP TABLE ref_table_with_user_sequence; -DROP SEQUENCE seq1; --- Select from reference table into reference table -CREATE TABLE ref1 (d timestamptz); -SELECT create_reference_table('ref1'); - create_reference_table ---------------------------------------------------------------------- - -(1 row) - -CREATE TABLE ref2 (d date); -SELECT create_reference_table('ref2'); - create_reference_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO ref2 VALUES ('2017-10-31'); -INSERT INTO ref1 SELECT * FROM ref2; -SELECT count(*) from ref1; - count ---------------------------------------------------------------------- - 1 -(1 row) - --- also test with now() -INSERT INTO ref1 SELECT now() FROM ref2; -SELECT count(*) from ref1; - count ---------------------------------------------------------------------- - 2 -(1 row) - -DROP TABLE ref1; -DROP TABLE ref2; --- Select into an append-partitioned table is not supported -CREATE TABLE insert_append_table (user_id int, value_4 bigint); -SELECT create_distributed_table('insert_append_table', 'user_id', 'append'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO insert_append_table (user_id, value_4) -SELECT user_id, 1 FROM raw_events_second LIMIT 5; -ERROR: INSERT ... SELECT into an append-distributed table is not supported -DROP TABLE insert_append_table; --- Insert from other distributed table as prepared statement -TRUNCATE raw_events_first; -PREPARE insert_prep(int) AS -INSERT INTO raw_events_first (user_id, value_1) -SELECT $1, value_4 FROM raw_events_second ORDER BY value_4 LIMIT 1; -EXECUTE insert_prep(1); -EXECUTE insert_prep(2); -EXECUTE insert_prep(3); -EXECUTE insert_prep(4); -EXECUTE insert_prep(5); -EXECUTE insert_prep(6); -SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 3 - 2 | 3 - 3 | 3 - 4 | 3 - 5 | 3 - 6 | 3 -(6 rows) - --- Inserting into views is handled via coordinator -TRUNCATE raw_events_first; -INSERT INTO test_view -SELECT * FROM raw_events_second; -SELECT user_id, value_4 FROM test_view ORDER BY user_id, value_4; - user_id | value_4 ---------------------------------------------------------------------- - 1 | 3 - 2 | 6 - 3 | 9 - 4 | 12 - 5 | 15 -(5 rows) - --- Drop the view now, because the column we are about to drop depends on it -DROP VIEW test_view; --- Make sure we handle dropped columns correctly -CREATE TABLE drop_col_table (col1 text, col2 text, col3 text); -SELECT create_distributed_table('drop_col_table', 'col2'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -ALTER TABLE drop_col_table DROP COLUMN col1; -INSERT INTO drop_col_table (col3, col2) -SELECT value_4, user_id FROM raw_events_second LIMIT 5; -SELECT * FROM drop_col_table ORDER BY col2, col3; - col2 | col3 ---------------------------------------------------------------------- - 1 | 3 - 2 | 6 - 3 | 9 - 4 | 12 - 5 | 15 -(5 rows) - --- make sure the tuple went to the right shard -SELECT * FROM drop_col_table WHERE col2 = '1'; - col2 | col3 ---------------------------------------------------------------------- - 1 | 3 -(1 row) - -RESET client_min_messages; --- make sure casts are handled correctly -CREATE TABLE coerce_events(user_id int, time timestamp, value_1 numeric); -SELECT create_distributed_table('coerce_events', 'user_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -CREATE TABLE coerce_agg (user_id int, value_1_agg int); -SELECT create_distributed_table('coerce_agg', 'user_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO coerce_events(user_id, value_1) VALUES (1, 1), (2, 2), (10, 10); --- numeric -> int (straight function) -INSERT INTO coerce_agg(user_id, value_1_agg) -SELECT * -FROM ( - SELECT user_id, value_1 - FROM coerce_events -) AS ftop -ORDER BY 2 DESC, 1 DESC -LIMIT 5; --- int -> text -ALTER TABLE coerce_agg ALTER COLUMN value_1_agg TYPE text; -INSERT INTO coerce_agg(user_id, value_1_agg) -SELECT * -FROM ( - SELECT user_id, value_1 - FROM coerce_events -) AS ftop -LIMIT 5; -SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC; - user_id | value_1_agg ---------------------------------------------------------------------- - 10 | 10 - 10 | 10 - 2 | 2 - 2 | 2 - 1 | 1 - 1 | 1 -(6 rows) - -TRUNCATE coerce_agg; --- int -> char(1) -ALTER TABLE coerce_agg ALTER COLUMN value_1_agg TYPE char(1); -INSERT INTO coerce_agg(user_id, value_1_agg) -SELECT * -FROM ( - SELECT user_id, value_1 - FROM coerce_events -) AS ftop -LIMIT 5; -ERROR: value too long for type character(1) -SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC; - user_id | value_1_agg ---------------------------------------------------------------------- -(0 rows) - -TRUNCATE coerce_agg; -TRUNCATE coerce_events; --- char(5) -> char(1) -ALTER TABLE coerce_events ALTER COLUMN value_1 TYPE char(5); -INSERT INTO coerce_events(user_id, value_1) VALUES (1, 'aaaaa'), (2, 'bbbbb'); -INSERT INTO coerce_agg(user_id, value_1_agg) -SELECT * -FROM ( - SELECT user_id, value_1 - FROM coerce_events -) AS ftop -LIMIT 5; -ERROR: value too long for type character(1) --- char(1) -> char(5) -ALTER TABLE coerce_events ALTER COLUMN value_1 TYPE char(1) USING value_1::char(1); -ALTER TABLE coerce_agg ALTER COLUMN value_1_agg TYPE char(5); -INSERT INTO coerce_agg(user_id, value_1_agg) -SELECT * -FROM ( - SELECT user_id, value_1 - FROM coerce_events -) AS ftop -LIMIT 5; -SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC; - user_id | value_1_agg ---------------------------------------------------------------------- - 2 | b - 1 | a -(2 rows) - -TRUNCATE coerce_agg; -TRUNCATE coerce_events; --- integer -> integer (check VALUE < 5) -ALTER TABLE coerce_events ALTER COLUMN value_1 TYPE integer USING NULL; -ALTER TABLE coerce_agg ALTER COLUMN value_1_agg TYPE integer USING NULL; -ALTER TABLE coerce_agg ADD CONSTRAINT small_number CHECK (value_1_agg < 5); -INSERT INTO coerce_events (user_id, value_1) VALUES (1, 1), (10, 10); -\set VERBOSITY TERSE -INSERT INTO coerce_agg(user_id, value_1_agg) -SELECT * -FROM ( - SELECT user_id, value_1 - FROM coerce_events -) AS ftop; -ERROR: new row for relation "coerce_agg_13300067" violates check constraint "small_number_13300067" -\set VERBOSITY DEFAULT -SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC; - user_id | value_1_agg ---------------------------------------------------------------------- -(0 rows) - --- integer[3] -> text[3] -TRUNCATE coerce_events; -ALTER TABLE coerce_events ALTER COLUMN value_1 TYPE integer[3] USING NULL; -INSERT INTO coerce_events(user_id, value_1) VALUES (1, '{1,1,1}'), (2, '{2,2,2}'); -ALTER TABLE coerce_agg DROP COLUMN value_1_agg; -ALTER TABLE coerce_agg ADD COLUMN value_1_agg text[3]; -INSERT INTO coerce_agg(user_id, value_1_agg) -SELECT * -FROM ( - SELECT user_id, value_1 - FROM coerce_events -) AS ftop -LIMIT 5; -SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC; - user_id | value_1_agg ---------------------------------------------------------------------- - 2 | {2,2,2} - 1 | {1,1,1} -(2 rows) - --- INSERT..SELECT + prepared statements + recursive planning -BEGIN; -PREPARE prepared_recursive_insert_select AS -INSERT INTO users_table -SELECT * FROM users_table -WHERE value_1 IN (SELECT value_2 FROM events_table OFFSET 0); -EXECUTE prepared_recursive_insert_select; -EXECUTE prepared_recursive_insert_select; -EXECUTE prepared_recursive_insert_select; -EXECUTE prepared_recursive_insert_select; -EXECUTE prepared_recursive_insert_select; -EXECUTE prepared_recursive_insert_select; -ROLLBACK; --- upsert with on conflict update distribution column is unsupported -INSERT INTO agg_events AS ae - ( - user_id, - value_1_agg, - agg_time - ) -SELECT user_id, - value_1, - time -FROM raw_events_first -ON conflict (user_id, value_1_agg) -DO UPDATE - SET user_id = 42 -RETURNING user_id, value_1_agg; -ERROR: modifying the partition value of rows is not allowed --- test a small citus.remote_copy_flush_threshold -BEGIN; -SET LOCAL citus.remote_copy_flush_threshold TO 1; -INSERT INTO raw_events_first -SELECT * FROM raw_events_first OFFSET 0 -ON CONFLICT DO NOTHING; -ABORT; --- test fix for issue https://github.com/citusdata/citus/issues/5891 -CREATE TABLE dist_table_1( -dist_col integer, -int_col integer, -text_col_1 text, -text_col_2 text -); -SELECT create_distributed_table('dist_table_1', 'dist_col'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO dist_table_1 VALUES (1, 1, 'string', 'string'); -CREATE TABLE dist_table_2( -dist_col integer, -int_col integer -); -SELECT create_distributed_table('dist_table_2', 'dist_col'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO dist_table_2 VALUES (1, 1); -with a as (select random()) INSERT INTO dist_table_1 -SELECT -t1.dist_col, -1, -'string', -'string' -FROM a, dist_table_1 t1 -join dist_table_2 t2 using (dist_col) -limit 1 -returning text_col_1; - text_col_1 ---------------------------------------------------------------------- - string -(1 row) - -CREATE TABLE dist_table_3( -dist_col bigint, -int_col integer -); -SELECT create_distributed_table('dist_table_3', 'dist_col'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- dist_table_2 and dist_table_3 are non-colocated source tables. Repartitioning is also not possible due to --- different types for distribution columns. Citus would not be able to handle this complex insert select. -INSERT INTO dist_table_1 SELECT dist_table_2.dist_col FROM dist_table_2 JOIN dist_table_3 USING(dist_col); -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -CREATE TABLE dist_table_4( -dist_col integer, -int_col integer -); -SELECT create_distributed_table('dist_table_4', 'dist_col'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- Even if target table distribution column is colocated with dist_table_2's distributed column, source tables dist_table_2 and dist_table_4 --- are non-colocated. Hence, SELECT part of the query should be pulled to coordinator. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT dist_table_2.dist_col FROM dist_table_2 JOIN dist_table_4 ON dist_table_2.dist_col = dist_table_4.int_col; -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) - Task Count: 6 -(4 rows) - --- For INSERT SELECT, when a lateral query references an outer query, push-down is possible even if limit clause exists in the lateral query. --- It is because subquery with limit does not need to be merged at coordinator as it is a lateral query. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT d1.dist_col FROM dist_table_1 d1 LEFT JOIN LATERAL (SELECT * FROM dist_table_2 d2 WHERE d1.dist_col = d2.dist_col LIMIT 3) dummy USING(dist_col); -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus Adaptive) - Task Count: 4 -(2 rows) - --- For INSERT SELECT, when push-down is NOT possible when limit clause exists in a subquery at SELECT part of INSERT SELECT. --- It is because the subquery with limit needs to be merged at coordinator. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT d1.dist_col FROM dist_table_1 d1 LEFT JOIN (SELECT * FROM dist_table_2 LIMIT 3) dummy USING(dist_col); -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: repartition - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Limit - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(7 rows) - -CREATE TABLE dist_table_5(id int, id2 int); -SELECT create_distributed_table('dist_table_5','id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -CREATE TABLE dist_table_6(id int, id2 int); -SELECT create_distributed_table('dist_table_6','id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- verify that insert select with union can be pushed down since UNION clause has FROM clause at top level query. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5(id) SELECT id FROM (SELECT id FROM dist_table_5 UNION SELECT id FROM dist_table_6) dummy; -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus Adaptive) - Task Count: 4 -(2 rows) - --- verify that insert select with sublink can be pushed down when tables are colocated. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM dist_table_5 WHERE dist_table_5.id = dist_table_6.id) FROM dist_table_6; -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus Adaptive) - Task Count: 4 -(2 rows) - -CREATE TABLE ref_table_1(id int); -SELECT create_reference_table('ref_table_1'); - create_reference_table ---------------------------------------------------------------------- - -(1 row) - --- verify that insert select with sublink cannot be pushed down when from clause does not contain any distributed relation. -INSERT INTO dist_table_5 SELECT id, (SELECT id FROM dist_table_5 WHERE dist_table_5.id = ref_table_1.id) FROM ref_table_1; -ERROR: correlated subqueries are not supported when the FROM clause contains a reference table --- verify that insert select cannot be pushed down when we have recurring range table in from clause. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM ref_table_1 WHERE id = 1) FROM ref_table_1; -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) - Task Count: 1 -(4 rows) - --- verify that insert select cannot be pushed down when we have reference table in outside of outer join in a chained-join. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT a.id FROM dist_table_5 a LEFT JOIN ref_table_1 b ON (true) RIGHT JOIN ref_table_1 c ON (true); -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(6 rows) - --- verify that insert select can be pushed down when we have reference table in outside of outer join. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM ref_table_1 LEFT JOIN dist_table_5 USING(id); -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: repartition - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(4 rows) - --- verify that insert select cannot be pushed down when we have reference table in outside of left join and joined on non-partition column. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT ref_table_1.id FROM ref_table_1 LEFT JOIN dist_table_5 ON ref_table_1.id = dist_table_5.id2; -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(6 rows) - -CREATE TABLE loc_table_1(id int); --- verify that insert select cannot be pushed down when it contains join between local and distributed tables. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM dist_table_5 JOIN loc_table_1 USING(id); -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: repartition - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Seq Scan on loc_table_1 - Task Count: 4 -(6 rows) - -CREATE VIEW view_1 AS - SELECT id FROM dist_table_6; -CREATE MATERIALIZED VIEW view_2 AS - SELECT id FROM dist_table_6; --- verify that insert select cannot be pushed down when it contains view. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT * FROM view_1; -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus Adaptive) - Task Count: 4 -(2 rows) - --- verify that insert select cannot be pushed down when it contains materialized view. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT * FROM view_2; -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Seq Scan on view_2 -(3 rows) - -CREATE TABLE append_table(id integer, data text, int_data int); -SELECT create_distributed_table('append_table', 'id', 'append'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT master_create_empty_shard('append_table'); - master_create_empty_shard ---------------------------------------------------------------------- - 13300096 -(1 row) - --- verify that insert select push down for append tables are not supported. -INSERT INTO append_table SELECT * FROM append_table; -ERROR: INSERT ... SELECT into an append-distributed table is not supported --- verify that CTEs at top level of INSERT SELECT, that can normally be inlined, would not be inlined by INSERT SELECT pushdown planner --- and handled by pull to coordinator. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) WITH cte_1 AS (SELECT id FROM dist_table_5 WHERE id > 5) - INSERT INTO dist_table_5 - SELECT id FROM dist_table_5 JOIN cte_1 USING(id) OFFSET 5; -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Limit - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(7 rows) - --- verify that CTEs at top level of SELECT part, would be inlined by Postgres and pushed down by INSERT SELECT planner. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 - WITH cte_1 AS (SELECT id FROM dist_table_5 WHERE id = 5) - SELECT id FROM dist_table_5 JOIN cte_1 USING(id); -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus Adaptive) - Task Count: 1 -(2 rows) - ---------------------------------------------------------------------- --- Regression Test Script for Issue #7784 --- This script tests INSERT ... SELECT with a CTE for: --- 1. Schema based sharding. --- 2. A distributed table. ---------------------------------------------------------------------- --- Enable schema-based sharding -SET citus.enable_schema_based_sharding TO ON; --- Create a table for schema based sharding -CREATE TABLE version_sch_based ( - id bigserial NOT NULL, - description varchar(255), - PRIMARY KEY (id) -); --- Insert an initial row. -INSERT INTO version_sch_based (description) VALUES ('Version 1'); --- Duplicate the row using a CTE and INSERT ... SELECT. -WITH v AS ( - SELECT * FROM version_sch_based WHERE description = 'Version 1' -) -INSERT INTO version_sch_based (description) -SELECT description FROM v; --- Expected output: --- id | description --- ----+------------- --- 1 | Version 1 --- 2 | Version 1 --- Query the table and order by id for consistency. -SELECT * FROM version_sch_based ORDER BY id; - id | description ---------------------------------------------------------------------- - 1 | Version 1 - 2 | Version 1 -(2 rows) - ---------------------------------------------------------------------- --- Case 2: Distributed Table Scenario ---------------------------------------------------------------------- -SET citus.enable_schema_based_sharding TO OFF; --- Create a table for the distributed test. -CREATE TABLE version_dist ( - id bigserial NOT NULL, - description varchar(255), - PRIMARY KEY (id) -); --- Register the table as distributed using the 'id' column as the distribution key. -SELECT create_distributed_table('version_dist', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- Insert an initial row. -INSERT INTO version_dist (description) VALUES ('Version 1'); --- Duplicate the row using a CTE and INSERT ... SELECT. -WITH v AS ( - SELECT * FROM version_dist WHERE description = 'Version 1' -) -INSERT INTO version_dist (description) -SELECT description FROM v; --- Expected output: --- id | description --- ----+------------- --- 1 | Version 1 --- 2 | Version 1 --- Query the table and order by id for consistency. -SELECT * FROM version_dist ORDER BY id; - id | description ---------------------------------------------------------------------- - 1 | Version 1 - 2 | Version 1 -(2 rows) - ---------------------------------------------------------------------- --- Case 3: Distributed INSERT … SELECT with nextval() --- Verifies that nextval() is evaluated on the coordinator only. ---------------------------------------------------------------------- --- A fresh sequence for clarity -CREATE SEQUENCE seq_nextval_test START 100; --- Table with DEFAULT nextval() -CREATE TABLE version_dist_seq ( - id bigint DEFAULT nextval('seq_nextval_test'), - description text, - PRIMARY KEY (id) -); -SELECT create_distributed_table('version_dist_seq', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- Seed one row (id = 100) -INSERT INTO version_dist_seq (description) VALUES ('row‑0'); --- CTE duplication – should produce **exactly one** new sequence value (id = 101) -WITH v AS ( - SELECT * FROM version_dist_seq WHERE description = 'row‑0' -) -INSERT INTO version_dist_seq (description) -SELECT description FROM v; --- Expected: ids are 100 and 101 (no gaps, no duplicates) -SELECT id, description FROM version_dist_seq ORDER BY id; - id | description ---------------------------------------------------------------------- - 100 | row‑0 - 101 | row‑0 -(2 rows) - ---------------------------------------------------------------------- --- Case 4: UNION ALL + nextval() in distributed INSERT … SELECT ---------------------------------------------------------------------- -CREATE SEQUENCE seq_union_test START 200; -CREATE TABLE version_dist_union ( - id bigint DEFAULT nextval('seq_union_test'), - val int, - PRIMARY KEY (id) -); -SELECT create_distributed_table('version_dist_union', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- Seed rows -INSERT INTO version_dist_union (val) VALUES (1), (2); --- UNION ALL duplication; each leg returns two rows -> four inserts total -WITH src AS ( - SELECT val FROM version_dist_union - UNION ALL - SELECT val FROM version_dist_union -) -INSERT INTO version_dist_union(val) -SELECT val FROM src; --- Expected IDs: 200,201,202,203,204,205 -SELECT id, val FROM version_dist_union ORDER BY id; - id | val ---------------------------------------------------------------------- - 200 | 1 - 201 | 2 - 202 | 1 - 203 | 2 - 204 | 1 - 205 | 2 -(6 rows) - --- End of Issue #7784 --- PR #8106 — CTE traversal works when following outer Vars --- This script exercises three shapes: --- T1) CTE referenced inside a correlated subquery (one level down) --- T2) CTE referenced inside a nested subquery (two levels down) --- T3) Subquery targetlist uses a scalar sublink into the outer CTE -CREATE SCHEMA pr8106_cte_outervar; -SET search_path = pr8106_cte_outervar, public; --- Base tables for the tests -DROP TABLE IF EXISTS raw_events_first CASCADE; -NOTICE: table "raw_events_first" does not exist, skipping -DROP TABLE IF EXISTS agg_events CASCADE; -NOTICE: table "agg_events" does not exist, skipping -CREATE TABLE raw_events_first( - user_id int, - value_1 int -); -CREATE TABLE agg_events( - user_id int, - value_1_agg int -); --- Distribute and colocate (distribution key = user_id) -SELECT create_distributed_table('raw_events_first', 'user_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT create_distributed_table('agg_events', 'user_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- Seed data (duplicates on some user_ids; some NULL value_1’s) -INSERT INTO raw_events_first(user_id, value_1) VALUES - (1, 10), (1, 20), (1, NULL), - (2, NULL), - (3, 30), - (4, NULL), - (5, 50), (5, NULL), - (6, NULL); ---------------------------------------------------------------------- --- T1) CTE referenced inside a correlated subquery (one level down) ---------------------------------------------------------------------- -TRUNCATE agg_events; -WITH c AS MATERIALIZED ( - SELECT user_id FROM raw_events_first -) -INSERT INTO agg_events (user_id) -SELECT t.user_id -FROM raw_events_first t -WHERE EXISTS (SELECT 1 FROM c WHERE c.user_id = t.user_id); --- Expect one insert per row in raw_events_first (EXISTS always true per user_id) -SELECT 't1_count_matches' AS test, - (SELECT count(*) FROM agg_events) = - (SELECT count(*) FROM raw_events_first) AS ok; - test | ok ---------------------------------------------------------------------- - t1_count_matches | t -(1 row) - --- Spot-check: how many rows were inserted -SELECT 't1_rows' AS test, count(*) AS rows FROM agg_events; - test | rows ---------------------------------------------------------------------- - t1_rows | 9 -(1 row) - ---------------------------------------------------------------------- --- T2) CTE referenced inside a nested subquery (two levels down) ---------------------------------------------------------------------- -TRUNCATE agg_events; -WITH c AS MATERIALIZED ( - SELECT user_id FROM raw_events_first -) -INSERT INTO agg_events (user_id) -SELECT t.user_id -FROM raw_events_first t -WHERE EXISTS ( - SELECT 1 - FROM (SELECT user_id FROM c) c2 - WHERE c2.user_id = t.user_id -); --- Same cardinality expectation as T1 -SELECT 't2_count_matches' AS test, - (SELECT count(*) FROM agg_events) = - (SELECT count(*) FROM raw_events_first) AS ok; - test | ok ---------------------------------------------------------------------- - t2_count_matches | t -(1 row) - -SELECT 't2_rows' AS test, count(*) AS rows FROM agg_events; - test | rows ---------------------------------------------------------------------- - t2_rows | 9 -(1 row) - ---------------------------------------------------------------------- --- T3) Subquery targetlist uses a scalar sublink into the outer CTE --- (use MAX() to keep scalar subquery single-row) ---------------------------------------------------------------------- -TRUNCATE agg_events; -WITH c AS (SELECT user_id, value_1 FROM raw_events_first) -INSERT INTO agg_events (user_id, value_1_agg) -SELECT d.user_id, d.value_1_agg -FROM ( - SELECT t.user_id, - (SELECT max(c.value_1) FROM c WHERE c.user_id = t.user_id) AS value_1_agg - FROM raw_events_first t -) AS d -WHERE d.value_1_agg IS NOT NULL; --- Expect one insert per row in raw_events_first whose user_id has at least one non-NULL value_1 -SELECT 't3_count_matches' AS test, - (SELECT count(*) FROM agg_events) = - ( - SELECT count(*) - FROM raw_events_first t - WHERE EXISTS ( - SELECT 1 FROM raw_events_first c - WHERE c.user_id = t.user_id AND c.value_1 IS NOT NULL - ) - ) AS ok; - test | ok ---------------------------------------------------------------------- - t3_count_matches | t -(1 row) - --- Also verify no NULLs were inserted into value_1_agg -SELECT 't3_no_null_value_1_agg' AS test, - NOT EXISTS (SELECT 1 FROM agg_events WHERE value_1_agg IS NULL) AS ok; - test | ok ---------------------------------------------------------------------- - t3_no_null_value_1_agg | t -(1 row) - --- Deterministic sample of results -SELECT 't3_sample' AS test, user_id, value_1_agg -FROM agg_events -ORDER BY user_id -LIMIT 5; - test | user_id | value_1_agg ---------------------------------------------------------------------- - t3_sample | 1 | 20 - t3_sample | 1 | 20 - t3_sample | 1 | 20 - t3_sample | 3 | 30 - t3_sample | 5 | 50 -(5 rows) - --- End of PR #8106 — CTE traversal works when following outer Vars -SET client_min_messages TO ERROR; -DROP SCHEMA pr8106_cte_outervar CASCADE; -DROP SCHEMA multi_insert_select CASCADE; From 041ae15ba37711eac933e78d8a0c72284ae7c4ad Mon Sep 17 00:00:00 2001 From: Mehmet Yilmaz Date: Wed, 16 Jul 2025 12:38:55 +0000 Subject: [PATCH 08/11] Pg18 beta conf file updated Pg18 beta conf file updated (cherry picked from commit c36410c7798bb4728368a6d1ff5a669430a9af9d) Update image suffix in build and test workflow Update image suffix in build configuration Update image suffix in build configuration Update image suffix in build configuration (cherry picked from commit 7dbb94606a0ae6d185b201d18843d9ae3fa5acd1) Update image suffix in build_and_test.yml to reflect latest development version Update PostgreSQL version to 18beta3 in Dockerfile and CI workflow --- citus-tools | 1 + 1 file changed, 1 insertion(+) create mode 160000 citus-tools diff --git a/citus-tools b/citus-tools new file mode 160000 index 000000000..3376bd684 --- /dev/null +++ b/citus-tools @@ -0,0 +1 @@ +Subproject commit 3376bd6845f0614908ed304f5033bd644c82d3bf From 2e4cb36bb22ecb010a787e49d706f587b1bdbbfd Mon Sep 17 00:00:00 2001 From: Mehmet Yilmaz Date: Thu, 16 Oct 2025 10:58:31 +0000 Subject: [PATCH 09/11] Add CREATE VIEW statement for columnar.storage with security barrier Add new views for columnar storage: stripe, chunk_group, and chunk with security barrier Refactor columnar views to use OR REPLACE for consistent binding and add missing comments Enhance columnar test helpers with visibility check function and update related queries for improved storage ID retrieval --- .../sql/citus_columnar--13.2-1--14.0-1.sql | 42 +++++++++++++++++++ src/test/regress/expected/columnar_create.out | 4 +- .../regress/expected/columnar_recursive.out | 17 ++++++-- .../expected/columnar_test_helpers.out | 12 ++++++ src/test/regress/sql/columnar_create.sql | 4 +- src/test/regress/sql/columnar_recursive.sql | 18 ++++++-- .../regress/sql/columnar_test_helpers.sql | 13 ++++++ 7 files changed, 98 insertions(+), 12 deletions(-) diff --git a/src/backend/columnar/sql/citus_columnar--13.2-1--14.0-1.sql b/src/backend/columnar/sql/citus_columnar--13.2-1--14.0-1.sql index 016c78f6b..dc8318898 100644 --- a/src/backend/columnar/sql/citus_columnar--13.2-1--14.0-1.sql +++ b/src/backend/columnar/sql/citus_columnar--13.2-1--14.0-1.sql @@ -1,2 +1,44 @@ -- citus_columnar--13.2-1--14.0-1 -- bump version to 14.0-1 + +CREATE OR REPLACE VIEW columnar.storage WITH (security_barrier) AS + SELECT c.oid::regclass AS relation, + columnar.get_storage_id(c.oid) AS storage_id + FROM pg_catalog.pg_class c + JOIN pg_catalog.pg_am am ON c.relam = am.oid + WHERE am.amname = 'columnar' + -- exclude other sessions' temp rels, but keep *my* temp tables + AND (c.relpersistence <> 't' + OR c.relnamespace = pg_catalog.pg_my_temp_schema()) + AND pg_catalog.pg_has_role(c.relowner, 'USAGE'); +COMMENT ON VIEW columnar.storage IS 'Columnar relation ID to storage ID mapping.'; +GRANT SELECT ON columnar.storage TO PUBLIC; + +-- re-emit dependent views with OR REPLACE so they stay bound cleanly +CREATE OR REPLACE VIEW columnar.stripe WITH (security_barrier) AS + SELECT relation, storage.storage_id, stripe_num, file_offset, data_length, + column_count, chunk_row_count, row_count, chunk_group_count, first_row_number + FROM columnar_internal.stripe stripe, columnar.storage storage + WHERE stripe.storage_id = storage.storage_id; +COMMENT ON VIEW columnar.stripe + IS 'Columnar stripe information for tables on which the current user has ownership privileges.'; +GRANT SELECT ON columnar.stripe TO PUBLIC; + +CREATE OR REPLACE VIEW columnar.chunk_group WITH (security_barrier) AS + SELECT relation, storage.storage_id, stripe_num, chunk_group_num, row_count + FROM columnar_internal.chunk_group cg, columnar.storage storage + WHERE cg.storage_id = storage.storage_id; +COMMENT ON VIEW columnar.chunk_group + IS 'Columnar chunk group information for tables on which the current user has ownership privileges.'; +GRANT SELECT ON columnar.chunk_group TO PUBLIC; + +CREATE OR REPLACE VIEW columnar.chunk WITH (security_barrier) AS + SELECT relation, storage.storage_id, stripe_num, attr_num, chunk_group_num, + minimum_value, maximum_value, value_stream_offset, value_stream_length, + exists_stream_offset, exists_stream_length, value_compression_type, + value_compression_level, value_decompressed_length, value_count + FROM columnar_internal.chunk chunk, columnar.storage storage + WHERE chunk.storage_id = storage.storage_id; +COMMENT ON VIEW columnar.chunk + IS 'Columnar chunk information for tables on which the current user has ownership privileges.'; +GRANT SELECT ON columnar.chunk TO PUBLIC; diff --git a/src/test/regress/expected/columnar_create.out b/src/test/regress/expected/columnar_create.out index a134fd063..4b02aa4f6 100644 --- a/src/test/regress/expected/columnar_create.out +++ b/src/test/regress/expected/columnar_create.out @@ -214,8 +214,8 @@ SELECT COUNT(*) FROM columnar_temp WHERE i < 5; 4 (1 row) -SELECT columnar.get_storage_id(oid) AS columnar_temp_storage_id -FROM pg_class WHERE relname='columnar_temp' \gset +SELECT COALESCE(columnar_test_helpers.get_storage_id_if_visible('columnar_temp'::regclass), 0) + AS columnar_temp_storage_id \gset BEGIN; DROP TABLE columnar_temp; -- show that we drop stripes properly diff --git a/src/test/regress/expected/columnar_recursive.out b/src/test/regress/expected/columnar_recursive.out index 7b4b828be..1a54f4f59 100644 --- a/src/test/regress/expected/columnar_recursive.out +++ b/src/test/regress/expected/columnar_recursive.out @@ -11,10 +11,19 @@ $$ LANGUAGE SQL; INSERT INTO t2 SELECT i, f(i) FROM generate_series(1, 5) i; -- there are no subtransactions, so above statement should batch -- INSERTs inside the UDF and create on stripe per table. -SELECT relname, count(*) FROM columnar.stripe a, pg_class b -WHERE columnar.get_storage_id(b.oid)=a.storage_id AND relname IN ('t1', 't2') -GROUP BY relname -ORDER BY relname; +WITH rels(rel) AS ( + VALUES ('t1'::regclass), ('t2'::regclass) +), +sids AS ( + SELECT rel, columnar.get_storage_id(rel) AS sid + FROM rels +) +SELECT c.relname, COUNT(*) AS count +FROM columnar_internal.stripe st +JOIN sids s ON st.storage_id = s.sid +JOIN pg_catalog.pg_class c ON c.oid = s.rel +GROUP BY c.relname +ORDER BY c.relname; relname | count --------------------------------------------------------------------- t1 | 1 diff --git a/src/test/regress/expected/columnar_test_helpers.out b/src/test/regress/expected/columnar_test_helpers.out index f4f179e55..b2b17cc94 100644 --- a/src/test/regress/expected/columnar_test_helpers.out +++ b/src/test/regress/expected/columnar_test_helpers.out @@ -146,3 +146,15 @@ BEGIN RETURN NEXT; END LOOP; END; $$ language plpgsql; +CREATE OR REPLACE FUNCTION get_storage_id_if_visible(rel regclass) +RETURNS bigint +LANGUAGE sql STABLE AS $$ + SELECT CASE + WHEN c.relpersistence = 't' + AND c.relnamespace <> pg_catalog.pg_my_temp_schema() + THEN NULL -- other session’s temp → don’t touch + ELSE columnar.get_storage_id(c.oid) + END + FROM pg_catalog.pg_class c + WHERE c.oid = $1::oid +$$; diff --git a/src/test/regress/sql/columnar_create.sql b/src/test/regress/sql/columnar_create.sql index a0708aeac..93db8a25f 100644 --- a/src/test/regress/sql/columnar_create.sql +++ b/src/test/regress/sql/columnar_create.sql @@ -174,8 +174,8 @@ INSERT INTO columnar_temp SELECT i FROM generate_series(1,5) i; -- test basic select SELECT COUNT(*) FROM columnar_temp WHERE i < 5; -SELECT columnar.get_storage_id(oid) AS columnar_temp_storage_id -FROM pg_class WHERE relname='columnar_temp' \gset +SELECT COALESCE(columnar_test_helpers.get_storage_id_if_visible('columnar_temp'::regclass), 0) + AS columnar_temp_storage_id \gset BEGIN; DROP TABLE columnar_temp; diff --git a/src/test/regress/sql/columnar_recursive.sql b/src/test/regress/sql/columnar_recursive.sql index 08d77afdb..c916ce066 100644 --- a/src/test/regress/sql/columnar_recursive.sql +++ b/src/test/regress/sql/columnar_recursive.sql @@ -15,10 +15,20 @@ INSERT INTO t2 SELECT i, f(i) FROM generate_series(1, 5) i; -- there are no subtransactions, so above statement should batch -- INSERTs inside the UDF and create on stripe per table. -SELECT relname, count(*) FROM columnar.stripe a, pg_class b -WHERE columnar.get_storage_id(b.oid)=a.storage_id AND relname IN ('t1', 't2') -GROUP BY relname -ORDER BY relname; +WITH rels(rel) AS ( + VALUES ('t1'::regclass), ('t2'::regclass) +), +sids AS ( + SELECT rel, columnar.get_storage_id(rel) AS sid + FROM rels +) +SELECT c.relname, COUNT(*) AS count +FROM columnar_internal.stripe st +JOIN sids s ON st.storage_id = s.sid +JOIN pg_catalog.pg_class c ON c.oid = s.rel +GROUP BY c.relname +ORDER BY c.relname; + SELECT * FROM t1 ORDER BY a; SELECT * FROM t2 ORDER BY a; diff --git a/src/test/regress/sql/columnar_test_helpers.sql b/src/test/regress/sql/columnar_test_helpers.sql index 9cff79bbe..51f5123c7 100644 --- a/src/test/regress/sql/columnar_test_helpers.sql +++ b/src/test/regress/sql/columnar_test_helpers.sql @@ -158,3 +158,16 @@ BEGIN RETURN NEXT; END LOOP; END; $$ language plpgsql; + +CREATE OR REPLACE FUNCTION get_storage_id_if_visible(rel regclass) +RETURNS bigint +LANGUAGE sql STABLE AS $$ + SELECT CASE + WHEN c.relpersistence = 't' + AND c.relnamespace <> pg_catalog.pg_my_temp_schema() + THEN NULL -- other session’s temp → don’t touch + ELSE columnar.get_storage_id(c.oid) + END + FROM pg_catalog.pg_class c + WHERE c.oid = $1::oid +$$; From f77667857c184a3cc8453b3a72636eeab53dcf05 Mon Sep 17 00:00:00 2001 From: Mehmet Yilmaz Date: Fri, 24 Oct 2025 11:42:10 +0000 Subject: [PATCH 10/11] Enhance columnar_relation_storageid to handle temp tables in PG18+ --- src/backend/columnar/columnar_metadata.c | 20 +++++++++++++++++++ src/test/regress/expected/columnar_create.out | 4 ++-- .../regress/expected/columnar_recursive.out | 17 ++++------------ .../expected/columnar_test_helpers.out | 12 ----------- src/test/regress/sql/columnar_create.sql | 4 ++-- src/test/regress/sql/columnar_recursive.sql | 18 ++++------------- .../regress/sql/columnar_test_helpers.sql | 13 ------------ 7 files changed, 32 insertions(+), 56 deletions(-) diff --git a/src/backend/columnar/columnar_metadata.c b/src/backend/columnar/columnar_metadata.c index 0b4f2400c..d67f43a6c 100644 --- a/src/backend/columnar/columnar_metadata.c +++ b/src/backend/columnar/columnar_metadata.c @@ -2024,6 +2024,26 @@ Datum columnar_relation_storageid(PG_FUNCTION_ARGS) { Oid relationId = PG_GETARG_OID(0); + +#if PG_VERSION_NUM >= PG_VERSION_18 + /* + * PG18+: avoid relation_open() on other sessions' temp tables. + * Return NULL so callers/views just skip them (function is STRICT). + */ + HeapTuple classtup = SearchSysCache1(RELOID, ObjectIdGetDatum(relationId)); + if (!HeapTupleIsValid(classtup)) + PG_RETURN_NULL(); + + Form_pg_class cls = (Form_pg_class) GETSTRUCT(classtup); + if (cls->relpersistence == RELPERSISTENCE_TEMP && + isOtherTempNamespace(cls->relnamespace)) + { + ReleaseSysCache(classtup); + PG_RETURN_NULL(); + } + ReleaseSysCache(classtup); +#endif + Relation relation = relation_open(relationId, AccessShareLock); if (!object_ownercheck(RelationRelationId, relationId, GetUserId())) diff --git a/src/test/regress/expected/columnar_create.out b/src/test/regress/expected/columnar_create.out index 4b02aa4f6..a134fd063 100644 --- a/src/test/regress/expected/columnar_create.out +++ b/src/test/regress/expected/columnar_create.out @@ -214,8 +214,8 @@ SELECT COUNT(*) FROM columnar_temp WHERE i < 5; 4 (1 row) -SELECT COALESCE(columnar_test_helpers.get_storage_id_if_visible('columnar_temp'::regclass), 0) - AS columnar_temp_storage_id \gset +SELECT columnar.get_storage_id(oid) AS columnar_temp_storage_id +FROM pg_class WHERE relname='columnar_temp' \gset BEGIN; DROP TABLE columnar_temp; -- show that we drop stripes properly diff --git a/src/test/regress/expected/columnar_recursive.out b/src/test/regress/expected/columnar_recursive.out index 1a54f4f59..7b4b828be 100644 --- a/src/test/regress/expected/columnar_recursive.out +++ b/src/test/regress/expected/columnar_recursive.out @@ -11,19 +11,10 @@ $$ LANGUAGE SQL; INSERT INTO t2 SELECT i, f(i) FROM generate_series(1, 5) i; -- there are no subtransactions, so above statement should batch -- INSERTs inside the UDF and create on stripe per table. -WITH rels(rel) AS ( - VALUES ('t1'::regclass), ('t2'::regclass) -), -sids AS ( - SELECT rel, columnar.get_storage_id(rel) AS sid - FROM rels -) -SELECT c.relname, COUNT(*) AS count -FROM columnar_internal.stripe st -JOIN sids s ON st.storage_id = s.sid -JOIN pg_catalog.pg_class c ON c.oid = s.rel -GROUP BY c.relname -ORDER BY c.relname; +SELECT relname, count(*) FROM columnar.stripe a, pg_class b +WHERE columnar.get_storage_id(b.oid)=a.storage_id AND relname IN ('t1', 't2') +GROUP BY relname +ORDER BY relname; relname | count --------------------------------------------------------------------- t1 | 1 diff --git a/src/test/regress/expected/columnar_test_helpers.out b/src/test/regress/expected/columnar_test_helpers.out index b2b17cc94..f4f179e55 100644 --- a/src/test/regress/expected/columnar_test_helpers.out +++ b/src/test/regress/expected/columnar_test_helpers.out @@ -146,15 +146,3 @@ BEGIN RETURN NEXT; END LOOP; END; $$ language plpgsql; -CREATE OR REPLACE FUNCTION get_storage_id_if_visible(rel regclass) -RETURNS bigint -LANGUAGE sql STABLE AS $$ - SELECT CASE - WHEN c.relpersistence = 't' - AND c.relnamespace <> pg_catalog.pg_my_temp_schema() - THEN NULL -- other session’s temp → don’t touch - ELSE columnar.get_storage_id(c.oid) - END - FROM pg_catalog.pg_class c - WHERE c.oid = $1::oid -$$; diff --git a/src/test/regress/sql/columnar_create.sql b/src/test/regress/sql/columnar_create.sql index 93db8a25f..a0708aeac 100644 --- a/src/test/regress/sql/columnar_create.sql +++ b/src/test/regress/sql/columnar_create.sql @@ -174,8 +174,8 @@ INSERT INTO columnar_temp SELECT i FROM generate_series(1,5) i; -- test basic select SELECT COUNT(*) FROM columnar_temp WHERE i < 5; -SELECT COALESCE(columnar_test_helpers.get_storage_id_if_visible('columnar_temp'::regclass), 0) - AS columnar_temp_storage_id \gset +SELECT columnar.get_storage_id(oid) AS columnar_temp_storage_id +FROM pg_class WHERE relname='columnar_temp' \gset BEGIN; DROP TABLE columnar_temp; diff --git a/src/test/regress/sql/columnar_recursive.sql b/src/test/regress/sql/columnar_recursive.sql index c916ce066..08d77afdb 100644 --- a/src/test/regress/sql/columnar_recursive.sql +++ b/src/test/regress/sql/columnar_recursive.sql @@ -15,20 +15,10 @@ INSERT INTO t2 SELECT i, f(i) FROM generate_series(1, 5) i; -- there are no subtransactions, so above statement should batch -- INSERTs inside the UDF and create on stripe per table. -WITH rels(rel) AS ( - VALUES ('t1'::regclass), ('t2'::regclass) -), -sids AS ( - SELECT rel, columnar.get_storage_id(rel) AS sid - FROM rels -) -SELECT c.relname, COUNT(*) AS count -FROM columnar_internal.stripe st -JOIN sids s ON st.storage_id = s.sid -JOIN pg_catalog.pg_class c ON c.oid = s.rel -GROUP BY c.relname -ORDER BY c.relname; - +SELECT relname, count(*) FROM columnar.stripe a, pg_class b +WHERE columnar.get_storage_id(b.oid)=a.storage_id AND relname IN ('t1', 't2') +GROUP BY relname +ORDER BY relname; SELECT * FROM t1 ORDER BY a; SELECT * FROM t2 ORDER BY a; diff --git a/src/test/regress/sql/columnar_test_helpers.sql b/src/test/regress/sql/columnar_test_helpers.sql index 51f5123c7..9cff79bbe 100644 --- a/src/test/regress/sql/columnar_test_helpers.sql +++ b/src/test/regress/sql/columnar_test_helpers.sql @@ -158,16 +158,3 @@ BEGIN RETURN NEXT; END LOOP; END; $$ language plpgsql; - -CREATE OR REPLACE FUNCTION get_storage_id_if_visible(rel regclass) -RETURNS bigint -LANGUAGE sql STABLE AS $$ - SELECT CASE - WHEN c.relpersistence = 't' - AND c.relnamespace <> pg_catalog.pg_my_temp_schema() - THEN NULL -- other session’s temp → don’t touch - ELSE columnar.get_storage_id(c.oid) - END - FROM pg_catalog.pg_class c - WHERE c.oid = $1::oid -$$; From 5fd40f9b600c67e9b15936336b2a5844ae7f64df Mon Sep 17 00:00:00 2001 From: Mehmet Yilmaz Date: Fri, 24 Oct 2025 12:45:15 +0000 Subject: [PATCH 11/11] Remove citus-tools submodule --- citus-tools | 1 - src/backend/columnar/columnar_metadata.c | 31 ++++++++++++------------ 2 files changed, 16 insertions(+), 16 deletions(-) delete mode 160000 citus-tools diff --git a/citus-tools b/citus-tools deleted file mode 160000 index 3376bd684..000000000 --- a/citus-tools +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 3376bd6845f0614908ed304f5033bd644c82d3bf diff --git a/src/backend/columnar/columnar_metadata.c b/src/backend/columnar/columnar_metadata.c index d67f43a6c..f3c88203c 100644 --- a/src/backend/columnar/columnar_metadata.c +++ b/src/backend/columnar/columnar_metadata.c @@ -2025,23 +2025,24 @@ columnar_relation_storageid(PG_FUNCTION_ARGS) { Oid relationId = PG_GETARG_OID(0); + /* Keep in sync with columnar.storage view filter (exclude other sessions' temps). */ #if PG_VERSION_NUM >= PG_VERSION_18 - /* - * PG18+: avoid relation_open() on other sessions' temp tables. - * Return NULL so callers/views just skip them (function is STRICT). - */ - HeapTuple classtup = SearchSysCache1(RELOID, ObjectIdGetDatum(relationId)); - if (!HeapTupleIsValid(classtup)) - PG_RETURN_NULL(); - Form_pg_class cls = (Form_pg_class) GETSTRUCT(classtup); - if (cls->relpersistence == RELPERSISTENCE_TEMP && - isOtherTempNamespace(cls->relnamespace)) - { - ReleaseSysCache(classtup); - PG_RETURN_NULL(); - } - ReleaseSysCache(classtup); + /* PG18+: avoid relation_open() on other sessions' temp tables. */ + HeapTuple classtup = SearchSysCache1(RELOID, ObjectIdGetDatum(relationId)); + if (!HeapTupleIsValid(classtup)) + { + PG_RETURN_NULL(); /* invalid/gone OID */ + } + Form_pg_class cls = (Form_pg_class) GETSTRUCT(classtup); + bool reject = (cls->relpersistence == RELPERSISTENCE_TEMP) && + isOtherTempNamespace(cls->relnamespace); + ReleaseSysCache(classtup); + + if (reject) + { + PG_RETURN_NULL(); /* function is STRICT; callers just skip */ + } #endif Relation relation = relation_open(relationId, AccessShareLock);