From cc945fa331464d9f388da39dfd30c566243c94fa Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Tue, 14 Mar 2023 10:22:34 +0300 Subject: [PATCH 01/18] Add multi_create_fdw into minimal_schedule (#6759) So that we can run the tests that require fake_fdw by using minimal schedule too. Also move multi_create_fdw.sql up in multi_1_schedule to make it available to more tests. --- src/test/regress/minimal_schedule | 2 +- src/test/regress/multi_1_schedule | 9 +-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/src/test/regress/minimal_schedule b/src/test/regress/minimal_schedule index ef2d3dc65..8b0cfff70 100644 --- a/src/test/regress/minimal_schedule +++ b/src/test/regress/minimal_schedule @@ -1,2 +1,2 @@ test: minimal_cluster_management -test: multi_test_helpers multi_test_helpers_superuser columnar_test_helpers multi_test_catalog_views tablespace +test: multi_test_helpers multi_test_helpers_superuser multi_create_fdw columnar_test_helpers multi_test_catalog_views tablespace diff --git a/src/test/regress/multi_1_schedule b/src/test/regress/multi_1_schedule index 5e2cd17c1..ee81bde38 100644 --- a/src/test/regress/multi_1_schedule +++ b/src/test/regress/multi_1_schedule @@ -19,7 +19,7 @@ test: multi_extension test: single_node test: relation_access_tracking_single_node test: single_node_truncate -test: multi_test_helpers multi_test_helpers_superuser +test: multi_test_helpers multi_test_helpers_superuser multi_create_fdw test: multi_cluster_management # below tests are placed right after multi_cluster_management as we do @@ -91,13 +91,6 @@ test: drop_partitioned_table test: multi_fix_partition_shard_index_names test: partition_wise_join -# ---------- -# Tests for foreign data wrapper support -# ---------- -test: multi_create_fdw - - - # ---------- # Tests for statistics propagation # ---------- From f68fc9e69ce51833bb94520f63b48a63f2f76e08 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Tue, 14 Mar 2023 14:24:52 +0300 Subject: [PATCH 02/18] Decide core distribution params in CreateCitusTable (#6760) Decide core distribution params in CreateCitusTable to reduce the chances of creating Citus tables based on incorrect combinations of distribution method and replication model params. Also introduce DistributedTableParams struct to encapsulate the parameters that are specific to distributed tables. --- .../commands/create_distributed_table.c | 283 +++++++++++++----- 1 file changed, 207 insertions(+), 76 deletions(-) diff --git a/src/backend/distributed/commands/create_distributed_table.c b/src/backend/distributed/commands/create_distributed_table.c index 101d866f0..e38395296 100644 --- a/src/backend/distributed/commands/create_distributed_table.c +++ b/src/backend/distributed/commands/create_distributed_table.c @@ -94,6 +94,28 @@ #include "utils/syscache.h" #include "utils/inval.h" + +/* common params that apply to all Citus table types */ +typedef struct +{ + char distributionMethod; + char replicationModel; +} CitusTableParams; + + +/* + * Params that only apply to distributed tables, i.e., the ones that are + * known as DISTRIBUTED_TABLE by Citus metadata. + */ +typedef struct +{ + int shardCount; + bool shardCountIsStrict; + char *colocateWithTableName; + char *distributionColumnName; +} DistributedTableParams; + + /* * once every LOG_PER_TUPLE_AMOUNT, the copy will be logged. */ @@ -112,17 +134,16 @@ static List * HashSplitPointsForShardList(List *shardList); static List * HashSplitPointsForShardCount(int shardCount); static List * WorkerNodesForShardList(List *shardList); static List * RoundRobinWorkerNodeList(List *workerNodeList, int listLength); -static void CreateCitusTable(Oid relationId, char *distributionColumnName, - char distributionMethod, - int shardCount, bool shardCountIsStrict, - char *colocateWithTableName, - char replicationModel); +static CitusTableParams DecideCitusTableParams(CitusTableType tableType, + DistributedTableParams * + distributedTableParams); +static void CreateCitusTable(Oid relationId, CitusTableType tableType, + DistributedTableParams *distributedTableParams); static void CreateHashDistributedTableShards(Oid relationId, int shardCount, Oid colocatedTableId, bool localTableEmpty); -static uint32 ColocationIdForNewTable(Oid relationId, Var *distributionColumn, - char distributionMethod, char replicationModel, - int shardCount, bool shardCountIsStrict, - char *colocateWithTableName); +static uint32 ColocationIdForNewTable(Oid relationId, CitusTableType tableType, + DistributedTableParams *distributedTableParams, + Var *distributionColumn); static void EnsureRelationCanBeDistributed(Oid relationId, Var *distributionColumn, char distributionMethod, uint32 colocationId, char replicationModel); @@ -962,14 +983,42 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName, int shardCount, bool shardCountIsStrict, char *colocateWithTableName) { - Assert(distributionMethod != DISTRIBUTE_BY_NONE); + CitusTableType tableType; + switch (distributionMethod) + { + case DISTRIBUTE_BY_HASH: + { + tableType = HASH_DISTRIBUTED; + break; + } - char replicationModel = DecideDistTableReplicationModel(distributionMethod, - colocateWithTableName); - CreateCitusTable(relationId, distributionColumnName, - distributionMethod, shardCount, - shardCountIsStrict, colocateWithTableName, - replicationModel); + case DISTRIBUTE_BY_APPEND: + { + tableType = APPEND_DISTRIBUTED; + break; + } + + case DISTRIBUTE_BY_RANGE: + { + tableType = RANGE_DISTRIBUTED; + break; + } + + default: + { + ereport(ERROR, (errmsg("unexpected distribution method when " + "deciding Citus table type"))); + break; + } + } + + DistributedTableParams distributedTableParams = { + .colocateWithTableName = colocateWithTableName, + .shardCount = shardCount, + .shardCountIsStrict = shardCountIsStrict, + .distributionColumnName = distributionColumnName + }; + CreateCitusTable(relationId, tableType, &distributedTableParams); } @@ -980,16 +1029,7 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName, void CreateReferenceTable(Oid relationId) { - char *distributionColumnName = NULL; - char distributionMethod = DISTRIBUTE_BY_NONE; - int shardCount = 1; - bool shardCountIsStrict = true; - char *colocateWithTableName = NULL; - char replicationModel = REPLICATION_MODEL_2PC; - CreateCitusTable(relationId, distributionColumnName, - distributionMethod, shardCount, - shardCountIsStrict, colocateWithTableName, - replicationModel); + CreateCitusTable(relationId, REFERENCE_TABLE, NULL); } @@ -997,6 +1037,9 @@ CreateReferenceTable(Oid relationId) * CreateCitusTable is the internal method that creates a Citus table in * given configuration. * + * DistributedTableParams should be non-null only if we're creating a distributed + * table. + * * This functions contains all necessary logic to create distributed tables. It * performs necessary checks to ensure distributing the table is safe. If it is * safe to distribute the table, this function creates distributed table metadata, @@ -1004,11 +1047,17 @@ CreateReferenceTable(Oid relationId) * partitioned tables by distributing its partitions as well. */ static void -CreateCitusTable(Oid relationId, char *distributionColumnName, - char distributionMethod, int shardCount, - bool shardCountIsStrict, char *colocateWithTableName, - char replicationModel) +CreateCitusTable(Oid relationId, CitusTableType tableType, + DistributedTableParams *distributedTableParams) { + if ((tableType == HASH_DISTRIBUTED || tableType == APPEND_DISTRIBUTED || + tableType == RANGE_DISTRIBUTED) != (distributedTableParams != NULL)) + { + ereport(ERROR, (errmsg("distributed table params must be provided " + "when creating a distributed table and must " + "not be otherwise"))); + } + /* * EnsureTableNotDistributed errors out when relation is a citus table but * we don't want to ask user to first undistribute their citus local tables @@ -1034,11 +1083,8 @@ CreateCitusTable(Oid relationId, char *distributionColumnName, * that ALTER TABLE hook does the necessary job, which means converting * local tables to citus local tables to properly support such foreign * keys. - * - * This function does not expect to create Citus local table, so we blindly - * create reference table when the method is DISTRIBUTE_BY_NONE. */ - else if (distributionMethod == DISTRIBUTE_BY_NONE && + else if (tableType == REFERENCE_TABLE && ShouldEnableLocalReferenceForeignKeys() && HasForeignKeyWithLocalTable(relationId)) { @@ -1068,21 +1114,29 @@ CreateCitusTable(Oid relationId, char *distributionColumnName, PropagatePrerequisiteObjectsForDistributedTable(relationId); - Var *distributionColumn = BuildDistributionKeyFromColumnName(relationId, - distributionColumnName, - NoLock); + Var *distributionColumn = NULL; + if (distributedTableParams) + { + distributionColumn = BuildDistributionKeyFromColumnName(relationId, + distributedTableParams-> + distributionColumnName, + NoLock); + } + + CitusTableParams citusTableParams = DecideCitusTableParams(tableType, + distributedTableParams); /* * ColocationIdForNewTable assumes caller acquires lock on relationId. In our case, * our caller already acquired lock on relationId. */ - uint32 colocationId = ColocationIdForNewTable(relationId, distributionColumn, - distributionMethod, replicationModel, - shardCount, shardCountIsStrict, - colocateWithTableName); + uint32 colocationId = ColocationIdForNewTable(relationId, tableType, + distributedTableParams, + distributionColumn); - EnsureRelationCanBeDistributed(relationId, distributionColumn, distributionMethod, - colocationId, replicationModel); + EnsureRelationCanBeDistributed(relationId, distributionColumn, + citusTableParams.distributionMethod, + colocationId, citusTableParams.replicationModel); /* * Make sure that existing reference tables have been replicated to all the nodes @@ -1111,8 +1165,10 @@ CreateCitusTable(Oid relationId, char *distributionColumnName, bool autoConverted = false; /* create an entry for distributed table in pg_dist_partition */ - InsertIntoPgDistPartition(relationId, distributionMethod, distributionColumn, - colocationId, replicationModel, autoConverted); + InsertIntoPgDistPartition(relationId, citusTableParams.distributionMethod, + distributionColumn, + colocationId, citusTableParams.replicationModel, + autoConverted); /* foreign tables do not support TRUNCATE trigger */ if (RegularTable(relationId)) @@ -1121,17 +1177,14 @@ CreateCitusTable(Oid relationId, char *distributionColumnName, } /* create shards for hash distributed and reference tables */ - if (distributionMethod == DISTRIBUTE_BY_HASH) + if (tableType == HASH_DISTRIBUTED) { - CreateHashDistributedTableShards(relationId, shardCount, colocatedTableId, + CreateHashDistributedTableShards(relationId, distributedTableParams->shardCount, + colocatedTableId, localTableEmpty); } - else if (distributionMethod == DISTRIBUTE_BY_NONE) + else if (tableType == REFERENCE_TABLE) { - /* - * This function does not expect to create Citus local table, so we blindly - * create reference table when the method is DISTRIBUTE_BY_NONE. - */ CreateReferenceTableShard(relationId); } @@ -1173,9 +1226,14 @@ CreateCitusTable(Oid relationId, char *distributionColumnName, { MemoryContextReset(citusPartitionContext); - CreateDistributedTable(partitionRelationId, distributionColumnName, - distributionMethod, shardCount, false, - parentRelationName); + DistributedTableParams childDistributedTableParams = { + .colocateWithTableName = parentRelationName, + .shardCount = distributedTableParams->shardCount, + .shardCountIsStrict = false, + .distributionColumnName = distributedTableParams->distributionColumnName, + }; + CreateCitusTable(partitionRelationId, tableType, + &childDistributedTableParams); } MemoryContextSwitchTo(oldContext); @@ -1183,8 +1241,7 @@ CreateCitusTable(Oid relationId, char *distributionColumnName, } /* copy over data for hash distributed and reference tables */ - if (distributionMethod == DISTRIBUTE_BY_HASH || - distributionMethod == DISTRIBUTE_BY_NONE) + if (tableType == HASH_DISTRIBUTED || tableType == REFERENCE_TABLE) { if (RegularTable(relationId)) { @@ -1203,6 +1260,70 @@ CreateCitusTable(Oid relationId, char *distributionColumnName, } +/* + * DecideCitusTableParams decides CitusTableParams based on given CitusTableType + * and DistributedTableParams if it's a distributed table. + * + * DistributedTableParams should be non-null only if CitusTableType corresponds + * to a distributed table. + */ +static +CitusTableParams +DecideCitusTableParams(CitusTableType tableType, + DistributedTableParams *distributedTableParams) +{ + CitusTableParams citusTableParams = { 0 }; + switch (tableType) + { + case HASH_DISTRIBUTED: + { + citusTableParams.distributionMethod = DISTRIBUTE_BY_HASH; + citusTableParams.replicationModel = + DecideDistTableReplicationModel(DISTRIBUTE_BY_HASH, + distributedTableParams-> + colocateWithTableName); + break; + } + + case APPEND_DISTRIBUTED: + { + citusTableParams.distributionMethod = DISTRIBUTE_BY_APPEND; + citusTableParams.replicationModel = + DecideDistTableReplicationModel(APPEND_DISTRIBUTED, + distributedTableParams-> + colocateWithTableName); + break; + } + + case RANGE_DISTRIBUTED: + { + citusTableParams.distributionMethod = DISTRIBUTE_BY_RANGE; + citusTableParams.replicationModel = + DecideDistTableReplicationModel(RANGE_DISTRIBUTED, + distributedTableParams-> + colocateWithTableName); + break; + } + + case REFERENCE_TABLE: + { + citusTableParams.distributionMethod = DISTRIBUTE_BY_NONE; + citusTableParams.replicationModel = REPLICATION_MODEL_2PC; + break; + } + + default: + { + ereport(ERROR, (errmsg("unexpected table type when deciding Citus " + "table params"))); + break; + } + } + + return citusTableParams; +} + + /* * PropagatePrerequisiteObjectsForDistributedTable ensures we can create shards * on all nodes by ensuring all dependent objects exist on all node. @@ -1547,28 +1668,34 @@ CreateHashDistributedTableShards(Oid relationId, int shardCount, /* - * ColocationIdForNewTable returns a colocation id for hash-distributed table + * ColocationIdForNewTable returns a colocation id for given table * according to given configuration. If there is no such configuration, it * creates one and returns colocation id of newly the created colocation group. + * Note that DistributedTableParams and the distribution column Var should be + * non-null only if CitusTableType corresponds to a distributed table. + * * For append and range distributed tables, this function errors out if * colocateWithTableName parameter is not NULL, otherwise directly returns * INVALID_COLOCATION_ID. * + * For reference tables, returns the common reference table colocation id. + * * This function assumes its caller take necessary lock on relationId to * prevent possible changes on it. */ static uint32 -ColocationIdForNewTable(Oid relationId, Var *distributionColumn, - char distributionMethod, char replicationModel, - int shardCount, bool shardCountIsStrict, - char *colocateWithTableName) +ColocationIdForNewTable(Oid relationId, CitusTableType tableType, + DistributedTableParams *distributedTableParams, + Var *distributionColumn) { + CitusTableParams citusTableParams = DecideCitusTableParams(tableType, + distributedTableParams); + uint32 colocationId = INVALID_COLOCATION_ID; - if (distributionMethod == DISTRIBUTE_BY_APPEND || - distributionMethod == DISTRIBUTE_BY_RANGE) + if (tableType == APPEND_DISTRIBUTED || tableType == RANGE_DISTRIBUTED) { - if (pg_strncasecmp(colocateWithTableName, "default", NAMEDATALEN) != 0) + if (!IsColocateWithDefault(distributedTableParams->colocateWithTableName)) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot distribute relation"), @@ -1578,7 +1705,7 @@ ColocationIdForNewTable(Oid relationId, Var *distributionColumn, return colocationId; } - else if (distributionMethod == DISTRIBUTE_BY_NONE) + else if (tableType == REFERENCE_TABLE) { return CreateReferenceTableColocationId(); } @@ -1589,27 +1716,29 @@ ColocationIdForNewTable(Oid relationId, Var *distributionColumn, * can be sure that there will no modifications on the colocation table * until this transaction is committed. */ - Assert(distributionMethod == DISTRIBUTE_BY_HASH); + Assert(citusTableParams.distributionMethod == DISTRIBUTE_BY_HASH); Oid distributionColumnType = distributionColumn->vartype; Oid distributionColumnCollation = get_typcollation(distributionColumnType); /* get an advisory lock to serialize concurrent default group creations */ - if (IsColocateWithDefault(colocateWithTableName)) + if (IsColocateWithDefault(distributedTableParams->colocateWithTableName)) { AcquireColocationDefaultLock(); } colocationId = FindColocateWithColocationId(relationId, - replicationModel, + citusTableParams.replicationModel, distributionColumnType, distributionColumnCollation, - shardCount, + distributedTableParams->shardCount, + distributedTableParams-> shardCountIsStrict, + distributedTableParams-> colocateWithTableName); - if (IsColocateWithDefault(colocateWithTableName) && (colocationId != - INVALID_COLOCATION_ID)) + if (IsColocateWithDefault(distributedTableParams->colocateWithTableName) && + (colocationId != INVALID_COLOCATION_ID)) { /* * we can release advisory lock if there is already a default entry for given params; @@ -1621,23 +1750,25 @@ ColocationIdForNewTable(Oid relationId, Var *distributionColumn, if (colocationId == INVALID_COLOCATION_ID) { - if (IsColocateWithDefault(colocateWithTableName)) + if (IsColocateWithDefault(distributedTableParams->colocateWithTableName)) { /* * Generate a new colocation ID and insert a pg_dist_colocation * record. */ - colocationId = CreateColocationGroup(shardCount, ShardReplicationFactor, + colocationId = CreateColocationGroup(distributedTableParams->shardCount, + ShardReplicationFactor, distributionColumnType, distributionColumnCollation); } - else if (IsColocateWithNone(colocateWithTableName)) + else if (IsColocateWithNone(distributedTableParams->colocateWithTableName)) { /* * Generate a new colocation ID and insert a pg_dist_colocation * record. */ - colocationId = CreateColocationGroup(shardCount, ShardReplicationFactor, + colocationId = CreateColocationGroup(distributedTableParams->shardCount, + ShardReplicationFactor, distributionColumnType, distributionColumnCollation); } From 821f26cc743b04a7926384a3d294a69bda002936 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Mon, 16 Jan 2023 13:08:51 +0300 Subject: [PATCH 03/18] Fix flaky test detection for upgrade tests When run_test.py is run for an upgrade_.*_after.sql then, then automatically run the corresponding uprade_.*_before.sql file first. This is because all those upgrade_.*_after.sql files depend on the objects created in upgrade_.*_before.sql files by definition. --- src/test/regress/citus_tests/run_test.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/test/regress/citus_tests/run_test.py b/src/test/regress/citus_tests/run_test.py index a4b303e90..3daac8b6a 100755 --- a/src/test/regress/citus_tests/run_test.py +++ b/src/test/regress/citus_tests/run_test.py @@ -15,6 +15,16 @@ import common import config + +# Returns true if given test_schedule_line is of the form: +# "test: upgrade_ ... _after .." +def schedule_line_is_upgrade_after(test_schedule_line: str) -> bool: + return ( + test_schedule_line.startswith("test: upgrade_") + and "_after" in test_schedule_line + ) + + if __name__ == "__main__": args = argparse.ArgumentParser() args.add_argument( @@ -172,6 +182,11 @@ if __name__ == "__main__": if test_file_name in deps: dependencies = deps[test_file_name] + elif schedule_line_is_upgrade_after(test_schedule_line): + dependencies = TestDeps( + default_base_schedule(test_schedule), + [test_file_name.replace("_after", "_before")], + ) else: dependencies = TestDeps(default_base_schedule(test_schedule)) From 994f67185f1eba1236ec6c78448328f2b50f7bff Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Mon, 16 Jan 2023 13:38:20 +0300 Subject: [PATCH 04/18] Make upgrade_columnar_after runnable multiple times This commit hides port numbers in upgrade_columnar_after because the port numbers assigned to nodes in upgrade schedule differ from the ones that flaky test detector assigns. --- .../expected/upgrade_columnar_after.out | 42 ++++++++++--------- .../regress/sql/upgrade_columnar_after.sql | 18 ++++---- 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/src/test/regress/expected/upgrade_columnar_after.out b/src/test/regress/expected/upgrade_columnar_after.out index 0da9bb17f..8bb09d861 100644 --- a/src/test/regress/expected/upgrade_columnar_after.out +++ b/src/test/regress/expected/upgrade_columnar_after.out @@ -228,10 +228,12 @@ BEGIN; 22 (1 row) - -- make sure that serial is preserved - -- since we run "after schedule" twice and "rollback" wouldn't undo - -- sequence changes, it can be 22 or 33, not a different value - SELECT max(id) in (22, 33) FROM text_data; + -- Make sure that serial is preserved. + -- + -- Since we might run "after schedule" several times for flaky test + -- detection and "rollback" wouldn't undo sequence changes, "id" should + -- look like below: + SELECT max(id) >= 11 AND max(id) % 11 = 0 FROM text_data; ?column? --------------------------------------------------------------------- t @@ -292,7 +294,7 @@ SELECT COUNT(*)=10 FROM columnar_schema_members_pg_depend; DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend; -- Check the same for workers too. -SELECT run_command_on_workers( +SELECT success, result FROM run_command_on_workers( $$ SELECT pg_class.oid INTO columnar_schema_members FROM pg_class, pg_namespace @@ -308,44 +310,44 @@ WHERE classid = 'pg_am'::regclass::oid AND deptype = 'n'; $$ ); - run_command_on_workers + success | result --------------------------------------------------------------------- - (localhost,10201,t,"SELECT 10") - (localhost,10202,t,"SELECT 10") + t | SELECT 10 + t | SELECT 10 (2 rows) -SELECT run_command_on_workers( +SELECT success, result FROM run_command_on_workers( $$ (TABLE columnar_schema_members EXCEPT TABLE columnar_schema_members_pg_depend) UNION (TABLE columnar_schema_members_pg_depend EXCEPT TABLE columnar_schema_members); $$ ); - run_command_on_workers + success | result --------------------------------------------------------------------- - (localhost,10201,t,"") - (localhost,10202,t,"") + t | + t | (2 rows) -SELECT run_command_on_workers( +SELECT success, result FROM run_command_on_workers( $$ SELECT COUNT(*)=10 FROM columnar_schema_members_pg_depend; $$ ); - run_command_on_workers + success | result --------------------------------------------------------------------- - (localhost,10201,t,t) - (localhost,10202,t,t) + t | t + t | t (2 rows) -SELECT run_command_on_workers( +SELECT success, result FROM run_command_on_workers( $$ DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend; $$ ); - run_command_on_workers + success | result --------------------------------------------------------------------- - (localhost,10201,t,"DROP TABLE") - (localhost,10202,t,"DROP TABLE") + t | DROP TABLE + t | DROP TABLE (2 rows) diff --git a/src/test/regress/sql/upgrade_columnar_after.sql b/src/test/regress/sql/upgrade_columnar_after.sql index f2839645c..cf54ec80a 100644 --- a/src/test/regress/sql/upgrade_columnar_after.sql +++ b/src/test/regress/sql/upgrade_columnar_after.sql @@ -101,10 +101,12 @@ BEGIN; INSERT INTO text_data (value) SELECT generate_random_string(1024 * 10) FROM generate_series(0,10); SELECT count(DISTINCT value) FROM text_data; - -- make sure that serial is preserved - -- since we run "after schedule" twice and "rollback" wouldn't undo - -- sequence changes, it can be 22 or 33, not a different value - SELECT max(id) in (22, 33) FROM text_data; + -- Make sure that serial is preserved. + -- + -- Since we might run "after schedule" several times for flaky test + -- detection and "rollback" wouldn't undo sequence changes, "id" should + -- look like below: + SELECT max(id) >= 11 AND max(id) % 11 = 0 FROM text_data; -- since we run "after schedule" twice, rollback the transaction -- to avoid getting "table already exists" errors @@ -160,7 +162,7 @@ DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend; -- Check the same for workers too. -SELECT run_command_on_workers( +SELECT success, result FROM run_command_on_workers( $$ SELECT pg_class.oid INTO columnar_schema_members FROM pg_class, pg_namespace @@ -177,7 +179,7 @@ WHERE classid = 'pg_am'::regclass::oid AND $$ ); -SELECT run_command_on_workers( +SELECT success, result FROM run_command_on_workers( $$ (TABLE columnar_schema_members EXCEPT TABLE columnar_schema_members_pg_depend) UNION @@ -185,13 +187,13 @@ UNION $$ ); -SELECT run_command_on_workers( +SELECT success, result FROM run_command_on_workers( $$ SELECT COUNT(*)=10 FROM columnar_schema_members_pg_depend; $$ ); -SELECT run_command_on_workers( +SELECT success, result FROM run_command_on_workers( $$ DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend; $$ From 2b4be535de51749878d046d8a1db9659865a0dfa Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Mon, 16 Jan 2023 18:11:00 +0300 Subject: [PATCH 05/18] Do clean-up before upgrade_columnar_before to make it runnable multiple times So that flaky test detector can run upgrade_columnar_before.sql multiple times. --- .../expected/upgrade_columnar_before.out | 22 +++++++++++++++++ .../regress/sql/upgrade_columnar_before.sql | 24 +++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/src/test/regress/expected/upgrade_columnar_before.out b/src/test/regress/expected/upgrade_columnar_before.out index 28c252e30..a4895c770 100644 --- a/src/test/regress/expected/upgrade_columnar_before.out +++ b/src/test/regress/expected/upgrade_columnar_before.out @@ -1,5 +1,27 @@ -- Test if relying on topological sort of the objects, not their names, works -- fine when re-creating objects during pg_upgrade. +DO +$$ +BEGIN +IF EXISTS (SELECT * FROM pg_namespace WHERE nspname = 'upgrade_columnar') +THEN + -- Drop the the table leftover from the earlier run of + -- upgrade_columnar_before.sql. Similarly, drop the fake public schema + -- created before and rename the original one (renamed to citus_schema) + -- back to public. + -- + -- This can only happen if upgrade_columnar_before.sql is run multiple + -- times for flaky test detection. + DROP TABLE citus_schema.new_columnar_table; + DROP SCHEMA public CASCADE; + ALTER SCHEMA citus_schema RENAME TO public; + + SET LOCAL client_min_messages TO WARNING; + DROP SCHEMA upgrade_columnar CASCADE; +END IF; +END +$$ +LANGUAGE plpgsql; ALTER SCHEMA public RENAME TO citus_schema; SET search_path TO citus_schema; -- As mentioned in https://github.com/citusdata/citus/issues/5447, it diff --git a/src/test/regress/sql/upgrade_columnar_before.sql b/src/test/regress/sql/upgrade_columnar_before.sql index ea71dba02..6f39f4234 100644 --- a/src/test/regress/sql/upgrade_columnar_before.sql +++ b/src/test/regress/sql/upgrade_columnar_before.sql @@ -1,5 +1,29 @@ -- Test if relying on topological sort of the objects, not their names, works -- fine when re-creating objects during pg_upgrade. + +DO +$$ +BEGIN +IF EXISTS (SELECT * FROM pg_namespace WHERE nspname = 'upgrade_columnar') +THEN + -- Drop the the table leftover from the earlier run of + -- upgrade_columnar_before.sql. Similarly, drop the fake public schema + -- created before and rename the original one (renamed to citus_schema) + -- back to public. + -- + -- This can only happen if upgrade_columnar_before.sql is run multiple + -- times for flaky test detection. + DROP TABLE citus_schema.new_columnar_table; + DROP SCHEMA public CASCADE; + ALTER SCHEMA citus_schema RENAME TO public; + + SET LOCAL client_min_messages TO WARNING; + DROP SCHEMA upgrade_columnar CASCADE; +END IF; +END +$$ +LANGUAGE plpgsql; + ALTER SCHEMA public RENAME TO citus_schema; SET search_path TO citus_schema; From be0735a329d599e50e60893e92f3aa4d494eb39b Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Thu, 19 Jan 2023 18:13:15 +0300 Subject: [PATCH 06/18] Use "cpp" to expand "#include" directives in columnar sql files --- src/backend/columnar/.gitignore | 3 +++ src/backend/columnar/Makefile | 47 +++++++++++++++++++++++++++++---- 2 files changed, 45 insertions(+), 5 deletions(-) create mode 100644 src/backend/columnar/.gitignore diff --git a/src/backend/columnar/.gitignore b/src/backend/columnar/.gitignore new file mode 100644 index 000000000..b70410d1d --- /dev/null +++ b/src/backend/columnar/.gitignore @@ -0,0 +1,3 @@ +# The directory used to store columnar sql files after pre-processing them +# with 'cpp' in build-time, see src/backend/columnar/Makefile. +/build/ diff --git a/src/backend/columnar/Makefile b/src/backend/columnar/Makefile index f9fa09b7c..ded52a98d 100644 --- a/src/backend/columnar/Makefile +++ b/src/backend/columnar/Makefile @@ -10,14 +10,51 @@ OBJS += \ MODULE_big = citus_columnar EXTENSION = citus_columnar -columnar_sql_files = $(patsubst $(citus_abs_srcdir)/%,%,$(wildcard $(citus_abs_srcdir)/sql/*.sql)) -columnar_downgrade_sql_files = $(patsubst $(citus_abs_srcdir)/%,%,$(wildcard $(citus_abs_srcdir)/sql/downgrades/*.sql)) -DATA = $(columnar_sql_files) \ - $(columnar_downgrade_sql_files) +template_sql_files = $(patsubst $(citus_abs_srcdir)/%,%,$(wildcard $(citus_abs_srcdir)/sql/*.sql)) +template_downgrade_sql_files = $(patsubst $(citus_abs_srcdir)/sql/downgrades/%,%,$(wildcard $(citus_abs_srcdir)/sql/downgrades/*.sql)) +generated_sql_files = $(patsubst %,$(citus_abs_srcdir)/build/%,$(template_sql_files)) +generated_downgrade_sql_files += $(patsubst %,$(citus_abs_srcdir)/build/sql/%,$(template_downgrade_sql_files)) + +DATA_built = $(generated_sql_files) PG_CPPFLAGS += -I$(libpq_srcdir) -I$(safestringlib_srcdir)/include include $(citus_top_builddir)/Makefile.global -.PHONY: install-all +SQL_DEPDIR=.deps/sql +SQL_BUILDDIR=build/sql + +$(generated_sql_files): $(citus_abs_srcdir)/build/%: % + @mkdir -p $(citus_abs_srcdir)/$(SQL_DEPDIR) $(citus_abs_srcdir)/$(SQL_BUILDDIR) + @# -MF is used to store dependency files(.Po) in another directory for separation + @# -MT is used to change the target of the rule emitted by dependency generation. + @# -P is used to inhibit generation of linemarkers in the output from the preprocessor. + @# -undef is used to not predefine any system-specific or GCC-specific macros. + @# `man cpp` for further information + cd $(citus_abs_srcdir) && cpp -undef -w -P -MMD -MP -MF$(SQL_DEPDIR)/$(*F).Po -MT$@ $< > $@ + +$(generated_downgrade_sql_files): $(citus_abs_srcdir)/build/sql/%: sql/downgrades/% + @mkdir -p $(citus_abs_srcdir)/$(SQL_DEPDIR) $(citus_abs_srcdir)/$(SQL_BUILDDIR) + @# -MF is used to store dependency files(.Po) in another directory for separation + @# -MT is used to change the target of the rule emitted by dependency generation. + @# -P is used to inhibit generation of linemarkers in the output from the preprocessor. + @# -undef is used to not predefine any system-specific or GCC-specific macros. + @# `man cpp` for further information + cd $(citus_abs_srcdir) && cpp -undef -w -P -MMD -MP -MF$(SQL_DEPDIR)/$(*F).Po -MT$@ $< > $@ + +.PHONY: install install-downgrades install-all + +cleanup-before-install: + rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus_columnar.control + rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/columnar--* + rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus_columnar--* + +install: cleanup-before-install + +# install and install-downgrades should be run sequentially install-all: install + $(MAKE) install-downgrades + +install-downgrades: $(generated_downgrade_sql_files) + $(INSTALL_DATA) $(generated_downgrade_sql_files) '$(DESTDIR)$(datadir)/$(datamoduledir)/' + From 9550ebd118bf961bcd504cc8ff40a820d280f11f Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Wed, 18 Jan 2023 15:32:15 +0300 Subject: [PATCH 07/18] Remove pg_depend entries from columnar metadata indexes to columnar-am In the past, having columnar tables in the cluster was causing pg upgrades to fail when attempting to access columnar metadata. This is because, pg_dump doesn't see objects that we use for columnar-am related booking as the dependencies of the tables using columnar-am. To fix that; in #5456, we inserted some "normal dependency" edges (from those objects to columnar-am) into pg_depend. This helped us ensuring the existency of a class of metadata objects --such as columnar.storageid_seq-- and helped fixing #5437. However, the normal-dependency edges that we added for indexes on columnar metadata tables --such columnar.stripe_pkey-- didn't help at all because they were indeed causing dependency loops (#5510) and pg_dump was not able to take those dependency edges into the account. For this reason, this commit deletes those dependency edges so that pg_dump stops complaining about them. Note that it's not critical to delete those edges from pg_depend since they're not breaking pg upgrades but were triggering some warning messages. And given that backporting a sql change into older versions is hard a lot, we skip backporting this. --- .../sql/citus_columnar--11.1-1--11.2-1.sql | 18 ++++++++ .../citus_columnar--11.2-1--11.1-1.sql | 3 ++ .../11.2-1.sql | 43 +++++++++++++++++++ .../latest.sql | 17 +++----- src/test/regress/expected/multi_extension.out | 37 ++++++++++++++++ .../expected/upgrade_columnar_after.out | 24 ++++++++--- src/test/regress/sql/multi_extension.sql | 33 ++++++++++++++ .../regress/sql/upgrade_columnar_after.sql | 20 ++++++--- 8 files changed, 172 insertions(+), 23 deletions(-) create mode 100644 src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/11.2-1.sql diff --git a/src/backend/columnar/sql/citus_columnar--11.1-1--11.2-1.sql b/src/backend/columnar/sql/citus_columnar--11.1-1--11.2-1.sql index 60a0401d5..89ccd9e74 100644 --- a/src/backend/columnar/sql/citus_columnar--11.1-1--11.2-1.sql +++ b/src/backend/columnar/sql/citus_columnar--11.1-1--11.2-1.sql @@ -1 +1,19 @@ -- citus_columnar--11.1-1--11.2-1 + +#include "udfs/columnar_ensure_am_depends_catalog/11.2-1.sql" + +DELETE FROM pg_depend +WHERE classid = 'pg_am'::regclass::oid + AND objid IN (select oid from pg_am where amname = 'columnar') + AND objsubid = 0 + AND refclassid = 'pg_class'::regclass::oid + AND refobjid IN ( + 'columnar_internal.stripe_first_row_number_idx'::regclass::oid, + 'columnar_internal.chunk_group_pkey'::regclass::oid, + 'columnar_internal.chunk_pkey'::regclass::oid, + 'columnar_internal.options_pkey'::regclass::oid, + 'columnar_internal.stripe_first_row_number_idx'::regclass::oid, + 'columnar_internal.stripe_pkey'::regclass::oid + ) + AND refobjsubid = 0 + AND deptype = 'n'; diff --git a/src/backend/columnar/sql/downgrades/citus_columnar--11.2-1--11.1-1.sql b/src/backend/columnar/sql/downgrades/citus_columnar--11.2-1--11.1-1.sql index 9acf68da3..c987bfa67 100644 --- a/src/backend/columnar/sql/downgrades/citus_columnar--11.2-1--11.1-1.sql +++ b/src/backend/columnar/sql/downgrades/citus_columnar--11.2-1--11.1-1.sql @@ -1 +1,4 @@ -- citus_columnar--11.2-1--11.1-1 + +-- Note that we intentionally do not re-insert the pg_depend records that we +-- deleted via citus_columnar--11.1-1--11.2-1.sql. diff --git a/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/11.2-1.sql b/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/11.2-1.sql new file mode 100644 index 000000000..101db17fb --- /dev/null +++ b/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/11.2-1.sql @@ -0,0 +1,43 @@ +CREATE OR REPLACE FUNCTION columnar_internal.columnar_ensure_am_depends_catalog() + RETURNS void + LANGUAGE plpgsql + SET search_path = pg_catalog +AS $func$ +BEGIN + INSERT INTO pg_depend + WITH columnar_schema_members(relid) AS ( + SELECT pg_class.oid AS relid FROM pg_class + WHERE relnamespace = + COALESCE( + (SELECT pg_namespace.oid FROM pg_namespace WHERE nspname = 'columnar_internal'), + (SELECT pg_namespace.oid FROM pg_namespace WHERE nspname = 'columnar') + ) + AND relname IN ('chunk', + 'chunk_group', + 'options', + 'storageid_seq', + 'stripe') + ) + SELECT -- Define a dependency edge from "columnar table access method" .. + 'pg_am'::regclass::oid as classid, + (select oid from pg_am where amname = 'columnar') as objid, + 0 as objsubid, + -- ... to some objects registered as regclass and that lives in + -- "columnar" schema. That contains catalog tables and the sequences + -- created in "columnar" schema. + -- + -- Given the possibility of user might have created their own objects + -- in columnar schema, we explicitly specify list of objects that we + -- are interested in. + 'pg_class'::regclass::oid as refclassid, + columnar_schema_members.relid as refobjid, + 0 as refobjsubid, + 'n' as deptype + FROM columnar_schema_members + -- Avoid inserting duplicate entries into pg_depend. + EXCEPT TABLE pg_depend; +END; +$func$; +COMMENT ON FUNCTION columnar_internal.columnar_ensure_am_depends_catalog() + IS 'internal function responsible for creating dependencies from columnar ' + 'table access method to the rel objects in columnar schema'; diff --git a/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/latest.sql b/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/latest.sql index ade15390a..101db17fb 100644 --- a/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/latest.sql +++ b/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/latest.sql @@ -1,4 +1,4 @@ -CREATE OR REPLACE FUNCTION citus_internal.columnar_ensure_am_depends_catalog() +CREATE OR REPLACE FUNCTION columnar_internal.columnar_ensure_am_depends_catalog() RETURNS void LANGUAGE plpgsql SET search_path = pg_catalog @@ -14,22 +14,17 @@ BEGIN ) AND relname IN ('chunk', 'chunk_group', - 'chunk_group_pkey', - 'chunk_pkey', 'options', - 'options_pkey', 'storageid_seq', - 'stripe', - 'stripe_first_row_number_idx', - 'stripe_pkey') + 'stripe') ) SELECT -- Define a dependency edge from "columnar table access method" .. 'pg_am'::regclass::oid as classid, (select oid from pg_am where amname = 'columnar') as objid, 0 as objsubid, - -- ... to each object that is registered to pg_class and that lives - -- in "columnar" schema. That contains catalog tables, indexes - -- created on them and the sequences created in "columnar" schema. + -- ... to some objects registered as regclass and that lives in + -- "columnar" schema. That contains catalog tables and the sequences + -- created in "columnar" schema. -- -- Given the possibility of user might have created their own objects -- in columnar schema, we explicitly specify list of objects that we @@ -43,6 +38,6 @@ BEGIN EXCEPT TABLE pg_depend; END; $func$; -COMMENT ON FUNCTION citus_internal.columnar_ensure_am_depends_catalog() +COMMENT ON FUNCTION columnar_internal.columnar_ensure_am_depends_catalog() IS 'internal function responsible for creating dependencies from columnar ' 'table access method to the rel objects in columnar schema'; diff --git a/src/test/regress/expected/multi_extension.out b/src/test/regress/expected/multi_extension.out index 092ec9e5c..ead2a5b85 100644 --- a/src/test/regress/expected/multi_extension.out +++ b/src/test/regress/expected/multi_extension.out @@ -1258,6 +1258,43 @@ SELECT * FROM pg_dist_cleanup; 2 | 0 | 1 | table_with_orphaned_shards_102011 | 0 | 0 (2 rows) +ALTER EXTENSION citus_columnar UPDATE TO '11.2-1'; +-- Make sure that we defined dependencies from all rel objects (tables, +-- indexes, sequences ..) to columnar table access method ... +SELECT pg_class.oid INTO columnar_schema_members +FROM pg_class, pg_namespace +WHERE pg_namespace.oid=pg_class.relnamespace AND + pg_namespace.nspname='columnar_internal' AND + pg_class.relname NOT IN ('chunk_group_pkey', + 'chunk_pkey', + 'options_pkey', + 'stripe_first_row_number_idx', + 'stripe_pkey'); +SELECT refobjid INTO columnar_schema_members_pg_depend +FROM pg_depend +WHERE classid = 'pg_am'::regclass::oid AND + objid = (select oid from pg_am where amname = 'columnar') AND + objsubid = 0 AND + refclassid = 'pg_class'::regclass::oid AND + refobjsubid = 0 AND + deptype = 'n'; +-- ... , so this should be empty, +(TABLE columnar_schema_members EXCEPT TABLE columnar_schema_members_pg_depend) +UNION +(TABLE columnar_schema_members_pg_depend EXCEPT TABLE columnar_schema_members); + oid +--------------------------------------------------------------------- +(0 rows) + +-- ... , and both columnar_schema_members_pg_depend & columnar_schema_members +-- should have 5 entries. +SELECT COUNT(*)=5 FROM columnar_schema_members_pg_depend; + ?column? +--------------------------------------------------------------------- + t +(1 row) + +DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend; -- error out as cleanup records remain ALTER EXTENSION citus UPDATE TO '11.0-4'; ERROR: pg_dist_cleanup is introduced in Citus 11.1 diff --git a/src/test/regress/expected/upgrade_columnar_after.out b/src/test/regress/expected/upgrade_columnar_after.out index 8bb09d861..768a057f9 100644 --- a/src/test/regress/expected/upgrade_columnar_after.out +++ b/src/test/regress/expected/upgrade_columnar_after.out @@ -267,7 +267,12 @@ ROLLBACK; SELECT pg_class.oid INTO columnar_schema_members FROM pg_class, pg_namespace WHERE pg_namespace.oid=pg_class.relnamespace AND - pg_namespace.nspname='columnar_internal'; + pg_namespace.nspname='columnar_internal' AND + pg_class.relname NOT IN ('chunk_group_pkey', + 'chunk_pkey', + 'options_pkey', + 'stripe_first_row_number_idx', + 'stripe_pkey'); SELECT refobjid INTO columnar_schema_members_pg_depend FROM pg_depend WHERE classid = 'pg_am'::regclass::oid AND @@ -285,8 +290,8 @@ UNION (0 rows) -- ... , and both columnar_schema_members_pg_depend & columnar_schema_members --- should have 10 entries. -SELECT COUNT(*)=10 FROM columnar_schema_members_pg_depend; +-- should have 5 entries. +SELECT COUNT(*)=5 FROM columnar_schema_members_pg_depend; ?column? --------------------------------------------------------------------- t @@ -299,7 +304,12 @@ $$ SELECT pg_class.oid INTO columnar_schema_members FROM pg_class, pg_namespace WHERE pg_namespace.oid=pg_class.relnamespace AND - pg_namespace.nspname='columnar_internal'; + pg_namespace.nspname='columnar_internal' AND + pg_class.relname NOT IN ('chunk_group_pkey', + 'chunk_pkey', + 'options_pkey', + 'stripe_first_row_number_idx', + 'stripe_pkey'); SELECT refobjid INTO columnar_schema_members_pg_depend FROM pg_depend WHERE classid = 'pg_am'::regclass::oid AND @@ -312,8 +322,8 @@ $$ ); success | result --------------------------------------------------------------------- - t | SELECT 10 - t | SELECT 10 + t | SELECT 5 + t | SELECT 5 (2 rows) SELECT success, result FROM run_command_on_workers( @@ -331,7 +341,7 @@ $$ SELECT success, result FROM run_command_on_workers( $$ -SELECT COUNT(*)=10 FROM columnar_schema_members_pg_depend; +SELECT COUNT(*)=5 FROM columnar_schema_members_pg_depend; $$ ); success | result diff --git a/src/test/regress/sql/multi_extension.sql b/src/test/regress/sql/multi_extension.sql index 8c8ade9d8..d202227ae 100644 --- a/src/test/regress/sql/multi_extension.sql +++ b/src/test/regress/sql/multi_extension.sql @@ -556,6 +556,39 @@ ALTER EXTENSION citus UPDATE TO '11.2-1'; SELECT * FROM pg_dist_placement ORDER BY shardid; SELECT * FROM pg_dist_cleanup; +ALTER EXTENSION citus_columnar UPDATE TO '11.2-1'; + +-- Make sure that we defined dependencies from all rel objects (tables, +-- indexes, sequences ..) to columnar table access method ... +SELECT pg_class.oid INTO columnar_schema_members +FROM pg_class, pg_namespace +WHERE pg_namespace.oid=pg_class.relnamespace AND + pg_namespace.nspname='columnar_internal' AND + pg_class.relname NOT IN ('chunk_group_pkey', + 'chunk_pkey', + 'options_pkey', + 'stripe_first_row_number_idx', + 'stripe_pkey'); +SELECT refobjid INTO columnar_schema_members_pg_depend +FROM pg_depend +WHERE classid = 'pg_am'::regclass::oid AND + objid = (select oid from pg_am where amname = 'columnar') AND + objsubid = 0 AND + refclassid = 'pg_class'::regclass::oid AND + refobjsubid = 0 AND + deptype = 'n'; + +-- ... , so this should be empty, +(TABLE columnar_schema_members EXCEPT TABLE columnar_schema_members_pg_depend) +UNION +(TABLE columnar_schema_members_pg_depend EXCEPT TABLE columnar_schema_members); + +-- ... , and both columnar_schema_members_pg_depend & columnar_schema_members +-- should have 5 entries. +SELECT COUNT(*)=5 FROM columnar_schema_members_pg_depend; + +DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend; + -- error out as cleanup records remain ALTER EXTENSION citus UPDATE TO '11.0-4'; diff --git a/src/test/regress/sql/upgrade_columnar_after.sql b/src/test/regress/sql/upgrade_columnar_after.sql index cf54ec80a..133fcfde0 100644 --- a/src/test/regress/sql/upgrade_columnar_after.sql +++ b/src/test/regress/sql/upgrade_columnar_after.sql @@ -139,7 +139,12 @@ ROLLBACK; SELECT pg_class.oid INTO columnar_schema_members FROM pg_class, pg_namespace WHERE pg_namespace.oid=pg_class.relnamespace AND - pg_namespace.nspname='columnar_internal'; + pg_namespace.nspname='columnar_internal' AND + pg_class.relname NOT IN ('chunk_group_pkey', + 'chunk_pkey', + 'options_pkey', + 'stripe_first_row_number_idx', + 'stripe_pkey'); SELECT refobjid INTO columnar_schema_members_pg_depend FROM pg_depend WHERE classid = 'pg_am'::regclass::oid AND @@ -155,8 +160,8 @@ UNION (TABLE columnar_schema_members_pg_depend EXCEPT TABLE columnar_schema_members); -- ... , and both columnar_schema_members_pg_depend & columnar_schema_members --- should have 10 entries. -SELECT COUNT(*)=10 FROM columnar_schema_members_pg_depend; +-- should have 5 entries. +SELECT COUNT(*)=5 FROM columnar_schema_members_pg_depend; DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend; @@ -167,7 +172,12 @@ $$ SELECT pg_class.oid INTO columnar_schema_members FROM pg_class, pg_namespace WHERE pg_namespace.oid=pg_class.relnamespace AND - pg_namespace.nspname='columnar_internal'; + pg_namespace.nspname='columnar_internal' AND + pg_class.relname NOT IN ('chunk_group_pkey', + 'chunk_pkey', + 'options_pkey', + 'stripe_first_row_number_idx', + 'stripe_pkey'); SELECT refobjid INTO columnar_schema_members_pg_depend FROM pg_depend WHERE classid = 'pg_am'::regclass::oid AND @@ -189,7 +199,7 @@ $$ SELECT success, result FROM run_command_on_workers( $$ -SELECT COUNT(*)=10 FROM columnar_schema_members_pg_depend; +SELECT COUNT(*)=5 FROM columnar_schema_members_pg_depend; $$ ); From b8b85072d6281f228425e40ddb1d2b4fe2f6c6ff Mon Sep 17 00:00:00 2001 From: Jelte Fennema Date: Wed, 15 Mar 2023 14:53:14 +0100 Subject: [PATCH 08/18] Add pytest depedencies to Pipfile (#6767) In #6720 I'm adding a `pytest` based testing framework. This adds the dependencies for those. They have already been [merged into our docker files][the-process-merge] in the the-process repo preparation for #6720. But by not having them on our citus main branch it is impossible to make changes to the Pipfile, because our CI Dockerfiles and master are out of date. Since #6720 will need some more discussion and might take a few more weeks to be merged, this takes out the Pipfile changes. By merging this PR we can unblock new Pipfile changes. Unblocks and partially addresses #6766 [the-process-merge]: https://github.com/citusdata/the-process/pull/117 --- .circleci/config.yml | 2 +- src/test/regress/Pipfile | 6 ++ src/test/regress/Pipfile.lock | 176 ++++++++++++++++++++++++++++++++-- 3 files changed, 175 insertions(+), 9 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 1d65ae59c..8f2d86f15 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -6,7 +6,7 @@ orbs: parameters: image_suffix: type: string - default: '-vb84a6c5' + default: '-v89059f9' pg13_version: type: string default: '13.10' diff --git a/src/test/regress/Pipfile b/src/test/regress/Pipfile index bb848c792..16da96f21 100644 --- a/src/test/regress/Pipfile +++ b/src/test/regress/Pipfile @@ -8,6 +8,12 @@ mitmproxy = {editable = true, ref = "main", git = "https://github.com/citusdata/ construct = "==2.9.45" docopt = "==0.6.2" cryptography = ">=39.0.1" +pytest = "*" +psycopg = "*" +filelock = "*" +pytest-asyncio = "*" +pytest-timeout = "*" +pytest-xdist = "*" [dev-packages] black = "*" diff --git a/src/test/regress/Pipfile.lock b/src/test/regress/Pipfile.lock index 0349032b2..8bf8715ea 100644 --- a/src/test/regress/Pipfile.lock +++ b/src/test/regress/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "dfc5545eeb592c0dd5ed002b7665d940288c5ead77d2f31a0aa08391569577fc" + "sha256": "456a43ce06df947ccbf02db7fcbfd654999acaae25911990d4d74fc04b10c77e" }, "pipfile-spec": 6, "requires": { @@ -24,6 +24,14 @@ "markers": "python_version >= '3.6'", "version": "==3.4.1" }, + "attrs": { + "hashes": [ + "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836", + "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99" + ], + "markers": "python_version >= '3.6'", + "version": "==22.2.0" + }, "blinker": { "hashes": [ "sha256:471aee25f3992bd325afa3772f1063dbdbbca947a041b8b89466dc00d606f8b6" @@ -245,6 +253,30 @@ "index": "pypi", "version": "==0.6.2" }, + "exceptiongroup": { + "hashes": [ + "sha256:327cbda3da756e2de031a3107b81ab7b3770a602c4d16ca618298c526f4bec1e", + "sha256:bcb67d800a4497e1b404c2dd44fca47d3b7a5e5433dbab67f96c1a685cdfdf23" + ], + "markers": "python_version < '3.11'", + "version": "==1.1.0" + }, + "execnet": { + "hashes": [ + "sha256:8f694f3ba9cc92cab508b152dcfe322153975c29bda272e2fd7f3f00f36e47c5", + "sha256:a295f7cc774947aac58dde7fdc85f4aa00c42adf5d8f5468fc630c1acf30a142" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==1.9.0" + }, + "filelock": { + "hashes": [ + "sha256:7b319f24340b51f55a2bf7a12ac0755a9b03e718311dac567a0f4f7fabd2f5de", + "sha256:f58d535af89bb9ad5cd4df046f741f8553a418c01a7856bf0d173bbc9f6bd16d" + ], + "index": "pypi", + "version": "==3.9.0" + }, "flask": { "hashes": [ "sha256:59da8a3170004800a2837844bfa84d49b022550616070f7cb1a659682b2e7c9f", @@ -285,6 +317,14 @@ "markers": "python_full_version >= '3.6.1'", "version": "==6.0.1" }, + "iniconfig": { + "hashes": [ + "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", + "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374" + ], + "markers": "python_version >= '3.7'", + "version": "==2.0.0" + }, "itsdangerous": { "hashes": [ "sha256:2c2349112351b88699d8d4b6b075022c0808887cb7ad10069318a8b0bc88db44", @@ -435,6 +475,14 @@ ], "version": "==1.0.4" }, + "packaging": { + "hashes": [ + "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2", + "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97" + ], + "markers": "python_version >= '3.7'", + "version": "==23.0" + }, "passlib": { "hashes": [ "sha256:aa6bca462b8d8bda89c70b382f0c298a20b5560af6cbfa2dce410c0a2fb669f1", @@ -442,6 +490,14 @@ ], "version": "==1.7.4" }, + "pluggy": { + "hashes": [ + "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159", + "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3" + ], + "markers": "python_version >= '3.6'", + "version": "==1.0.0" + }, "protobuf": { "hashes": [ "sha256:0c44e01f74109decea196b5b313b08edb5316df77313995594a6981e95674259", @@ -469,6 +525,14 @@ "markers": "python_version >= '3.5'", "version": "==3.18.3" }, + "psycopg": { + "hashes": [ + "sha256:59b4a71536b146925513c0234dfd1dc42b81e65d56ce5335dff4813434dbc113", + "sha256:b1500c42063abaa01d30b056f0b300826b8dd8d586900586029a294ce74af327" + ], + "index": "pypi", + "version": "==3.1.8" + }, "publicsuffix2": { "hashes": [ "sha256:00f8cc31aa8d0d5592a5ced19cccba7de428ebca985db26ac852d920ddd6fe7b", @@ -514,7 +578,7 @@ "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1", "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b" ], - "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2'", + "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==2.4.7" }, "pyperclip": { @@ -523,6 +587,38 @@ ], "version": "==1.8.2" }, + "pytest": { + "hashes": [ + "sha256:c7c6ca206e93355074ae32f7403e8ea12163b1163c976fee7d4d84027c162be5", + "sha256:d45e0952f3727241918b8fd0f376f5ff6b301cc0777c6f9a556935c92d8a7d42" + ], + "index": "pypi", + "version": "==7.2.1" + }, + "pytest-asyncio": { + "hashes": [ + "sha256:83cbf01169ce3e8eb71c6c278ccb0574d1a7a3bb8eaaf5e50e0ad342afb33b36", + "sha256:f129998b209d04fcc65c96fc85c11e5316738358909a8399e93be553d7656442" + ], + "index": "pypi", + "version": "==0.20.3" + }, + "pytest-timeout": { + "hashes": [ + "sha256:c07ca07404c612f8abbe22294b23c368e2e5104b521c1790195561f37e1ac3d9", + "sha256:f6f50101443ce70ad325ceb4473c4255e9d74e3c7cd0ef827309dfa4c0d975c6" + ], + "index": "pypi", + "version": "==2.1.0" + }, + "pytest-xdist": { + "hashes": [ + "sha256:336098e3bbd8193276867cc87db8b22903c3927665dff9d1ac8684c02f597b68", + "sha256:fa10f95a2564cd91652f2d132725183c3b590d9fdcdec09d3677386ecf4c1ce9" + ], + "index": "pypi", + "version": "==3.2.0" + }, "ruamel.yaml": { "hashes": [ "sha256:1a771fc92d3823682b7f0893ad56cb5a5c87c48e62b5399d6f42c8759a583b33", @@ -531,6 +627,46 @@ "markers": "python_version >= '3'", "version": "==0.17.16" }, + "ruamel.yaml.clib": { + "hashes": [ + "sha256:045e0626baf1c52e5527bd5db361bc83180faaba2ff586e763d3d5982a876a9e", + "sha256:15910ef4f3e537eea7fe45f8a5d19997479940d9196f357152a09031c5be59f3", + "sha256:184faeaec61dbaa3cace407cffc5819f7b977e75360e8d5ca19461cd851a5fc5", + "sha256:1f08fd5a2bea9c4180db71678e850b995d2a5f4537be0e94557668cf0f5f9497", + "sha256:2aa261c29a5545adfef9296b7e33941f46aa5bbd21164228e833412af4c9c75f", + "sha256:3110a99e0f94a4a3470ff67fc20d3f96c25b13d24c6980ff841e82bafe827cac", + "sha256:3243f48ecd450eddadc2d11b5feb08aca941b5cd98c9b1db14b2fd128be8c697", + "sha256:370445fd795706fd291ab00c9df38a0caed0f17a6fb46b0f607668ecb16ce763", + "sha256:40d030e2329ce5286d6b231b8726959ebbe0404c92f0a578c0e2482182e38282", + "sha256:41d0f1fa4c6830176eef5b276af04c89320ea616655d01327d5ce65e50575c94", + "sha256:4a4d8d417868d68b979076a9be6a38c676eca060785abaa6709c7b31593c35d1", + "sha256:4b3a93bb9bc662fc1f99c5c3ea8e623d8b23ad22f861eb6fce9377ac07ad6072", + "sha256:5bc0667c1eb8f83a3752b71b9c4ba55ef7c7058ae57022dd9b29065186a113d9", + "sha256:721bc4ba4525f53f6a611ec0967bdcee61b31df5a56801281027a3a6d1c2daf5", + "sha256:763d65baa3b952479c4e972669f679fe490eee058d5aa85da483ebae2009d231", + "sha256:7bdb4c06b063f6fd55e472e201317a3bb6cdeeee5d5a38512ea5c01e1acbdd93", + "sha256:8831a2cedcd0f0927f788c5bdf6567d9dc9cc235646a434986a852af1cb54b4b", + "sha256:91a789b4aa0097b78c93e3dc4b40040ba55bef518f84a40d4442f713b4094acb", + "sha256:92460ce908546ab69770b2e576e4f99fbb4ce6ab4b245345a3869a0a0410488f", + "sha256:99e77daab5d13a48a4054803d052ff40780278240a902b880dd37a51ba01a307", + "sha256:a234a20ae07e8469da311e182e70ef6b199d0fbeb6c6cc2901204dd87fb867e8", + "sha256:a7b301ff08055d73223058b5c46c55638917f04d21577c95e00e0c4d79201a6b", + "sha256:be2a7ad8fd8f7442b24323d24ba0b56c51219513cfa45b9ada3b87b76c374d4b", + "sha256:bf9a6bc4a0221538b1a7de3ed7bca4c93c02346853f44e1cd764be0023cd3640", + "sha256:c3ca1fbba4ae962521e5eb66d72998b51f0f4d0f608d3c0347a48e1af262efa7", + "sha256:d000f258cf42fec2b1bbf2863c61d7b8918d31ffee905da62dede869254d3b8a", + "sha256:d5859983f26d8cd7bb5c287ef452e8aacc86501487634573d260968f753e1d71", + "sha256:d5e51e2901ec2366b79f16c2299a03e74ba4531ddcfacc1416639c557aef0ad8", + "sha256:debc87a9516b237d0466a711b18b6ebeb17ba9f391eb7f91c649c5c4ec5006c7", + "sha256:df5828871e6648db72d1c19b4bd24819b80a755c4541d3409f0f7acd0f335c80", + "sha256:ecdf1a604009bd35c674b9225a8fa609e0282d9b896c03dd441a91e5f53b534e", + "sha256:efa08d63ef03d079dcae1dfe334f6c8847ba8b645d08df286358b1f5293d24ab", + "sha256:f01da5790e95815eb5a8a138508c01c758e5f5bc0ce4286c4f7028b8dd7ac3d0", + "sha256:f34019dced51047d6f70cb9383b2ae2853b7fc4dce65129a5acd49f4f9256646" + ], + "markers": "python_version < '3.10' and platform_python_implementation == 'CPython'", + "version": "==0.2.7" + }, "sortedcontainers": { "hashes": [ "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", @@ -538,6 +674,14 @@ ], "version": "==2.4.0" }, + "tomli": { + "hashes": [ + "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", + "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" + ], + "markers": "python_version < '3.11'", + "version": "==2.0.1" + }, "tornado": { "hashes": [ "sha256:1d54d13ab8414ed44de07efecb97d4ef7c39f7438cf5e976ccd356bebb1b5fca", @@ -555,6 +699,14 @@ "markers": "python_version >= '3.7'", "version": "==6.2" }, + "typing-extensions": { + "hashes": [ + "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb", + "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4" + ], + "markers": "python_version >= '3.7'", + "version": "==4.5.0" + }, "urwid": { "hashes": [ "sha256:588bee9c1cb208d0906a9f73c613d2bd32c3ed3702012f51efe318a3f2127eae" @@ -563,11 +715,11 @@ }, "werkzeug": { "hashes": [ - "sha256:7ea2d48322cc7c0f8b3a215ed73eabd7b5d75d0b50e31ab006286ccff9e00b8f", - "sha256:f979ab81f58d7318e064e99c4506445d60135ac5cd2e177a2de0089bfd4c9bd5" + "sha256:2e1ccc9417d4da358b9de6f174e3ac094391ea1d4fbef2d667865d819dfd0afe", + "sha256:56433961bc1f12533306c624f3be5e744389ac61d722175d543e1751285da612" ], "markers": "python_version >= '3.7'", - "version": "==2.2.2" + "version": "==2.2.3" }, "wsproto": { "hashes": [ @@ -690,11 +842,11 @@ }, "flake8-bugbear": { "hashes": [ - "sha256:04a115e5f9c8e87c38bdbbcdf9f58223ffe05469c07c9a7bd8633330bc4d078b", - "sha256:55902ab5a48c5ea53d8689ecd146eda548e72f2724192b9c1d68f6d975d13c06" + "sha256:39259814a83f33c8409417ee12dd4050c9c0bb4c8707c12fc18ae62b2f3ddee1", + "sha256:f136bd0ca2684f101168bba2310dec541e11aa6b252260c17dcf58d18069a740" ], "index": "pypi", - "version": "==23.1.20" + "version": "==23.2.13" }, "isort": { "hashes": [ @@ -767,6 +919,14 @@ ], "markers": "python_version < '3.11'", "version": "==2.0.1" + }, + "typing-extensions": { + "hashes": [ + "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb", + "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4" + ], + "markers": "python_version >= '3.7'", + "version": "==4.5.0" } } } From 1e42cd3da03584d25aa0ace99574cbca05293189 Mon Sep 17 00:00:00 2001 From: Teja Mupparti Date: Wed, 21 Dec 2022 19:33:13 -0800 Subject: [PATCH 09/18] Support MERGE on distributed tables with restrictions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This implements the phase - II of MERGE sql support Support routable query where all the tables in the merge-sql are distributed, co-located, and both the source and target relations are joined on the distribution column with a constant qual. This should be a Citus single-task query. Below is an example. SELECT create_distributed_table('t1', 'id'); SELECT create_distributed_table('s1', 'id', colocate_with => ‘t1’); MERGE INTO t1 USING s1 ON t1.id = s1.id AND t1.id = 100 WHEN MATCHED THEN UPDATE SET val = s1.val + 10 WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val, src) VALUES (s1.id, s1.val, s1.src) Basically, MERGE checks to see if There are a minimum of two distributed tables (source and a target). All the distributed tables are indeed colocated. MERGE relations are joined on the distribution column MERGE .. USING .. ON target.dist_key = source.dist_key The query should touch only a single shard i.e. JOIN AND with a constant qual MERGE .. USING .. ON target.dist_key = source.dist_key AND target.dist_key = <> If any of the conditions are not met, it raises an exception. (cherry picked from commit 44c387b978a51b0c0e87c7f9aec154cfc3041da1) This implements MERGE phase3 Support pushdown query where all the tables in the merge-sql are Citus-distributed, co-located, and both the source and target relations are joined on the distribution column. This will generate multiple tasks which execute independently after pushdown. SELECT create_distributed_table('t1', 'id'); SELECT create_distributed_table('s1', 'id', colocate_with => ‘t1’); MERGE INTO t1 USING s1 ON t1.id = s1.id WHEN MATCHED THEN UPDATE SET val = s1.val + 10 WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val, src) VALUES (s1.id, s1.val, s1.src) *The only exception for both the phases II and III is, UPDATEs and INSERTs must be done on the same shard-group as the joined key; for example, below scenarios are NOT supported as the key-value to be inserted/updated is not guaranteed to be on the same node as the id distribution-column. MERGE INTO target t USING source s ON (t.customer_id = s.customer_id) WHEN NOT MATCHED THEN - - INSERT(customer_id, …) VALUES (, ……); OR this scenario where we update the distribution column itself MERGE INTO target t USING source s On (t.customer_id = s.customer_id) WHEN MATCHED THEN UPDATE SET customer_id = 100; (cherry picked from commit fa7b8949a88bf614d5a07fc33f6159d9efa5d087) --- .../distributed/planner/distributed_planner.c | 194 +-- .../planner/fast_path_router_planner.c | 4 +- .../planner/multi_physical_planner.c | 17 +- .../planner/multi_router_planner.c | 353 ++++- .../relation_restriction_equivalence.c | 31 +- src/include/distributed/distributed_planner.h | 4 + .../relation_restriction_equivalence.h | 11 + src/test/regress/bin/normalize.sed | 4 + src/test/regress/expected/merge.out | 1306 ++++++++++++++++- src/test/regress/expected/pg15.out | 10 +- src/test/regress/expected/pgmerge.out | 14 +- src/test/regress/sql/merge.sql | 742 +++++++++- src/test/regress/sql/pgmerge.sql | 8 + 13 files changed, 2450 insertions(+), 248 deletions(-) diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index 701ae4ff5..262258d7f 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -75,9 +75,6 @@ static uint64 NextPlanId = 1; /* keep track of planner call stack levels */ int PlannerLevel = 0; -static void ErrorIfQueryHasUnsupportedMergeCommand(Query *queryTree, - List *rangeTableList); -static bool ContainsMergeCommandWalker(Node *node); static bool ListContainsDistributedTableRTE(List *rangeTableList, bool *maybeHasForeignDistributedTable); static bool IsUpdateOrDelete(Query *query); @@ -132,7 +129,7 @@ static PlannedStmt * PlanDistributedStmt(DistributedPlanningContext *planContext static RTEListProperties * GetRTEListProperties(List *rangeTableList); static List * TranslatedVars(PlannerInfo *root, int relationIndex); static void WarnIfListHasForeignDistributedTable(List *rangeTableList); -static void ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList); + /* Distributed planner hook */ PlannedStmt * @@ -200,12 +197,6 @@ distributed_planner(Query *parse, if (!fastPathRouterQuery) { - /* - * Fast path queries cannot have merge command, and we - * prevent the remaining here. - */ - ErrorIfQueryHasUnsupportedMergeCommand(parse, rangeTableList); - /* * When there are partitioned tables (not applicable to fast path), * pretend that they are regular tables to avoid unnecessary work @@ -304,44 +295,11 @@ distributed_planner(Query *parse, } -/* - * ErrorIfQueryHasUnsupportedMergeCommand walks over the query tree and bails out - * if there is no Merge command (e.g., CMD_MERGE) in the query tree. For merge, - * looks for all supported combinations, throws an exception if any violations - * are seen. - */ -static void -ErrorIfQueryHasUnsupportedMergeCommand(Query *queryTree, List *rangeTableList) -{ - /* - * Postgres currently doesn't support Merge queries inside subqueries and - * ctes, but lets be defensive and do query tree walk anyway. - * - * We do not call this path for fast-path queries to avoid this additional - * overhead. - */ - if (!ContainsMergeCommandWalker((Node *) queryTree)) - { - /* No MERGE found */ - return; - } - - - /* - * In Citus we have limited support for MERGE, it's allowed - * only if all the tables(target, source or any CTE) tables - * are are local i.e. a combination of Citus local and Non-Citus - * tables (regular Postgres tables). - */ - ErrorIfMergeHasUnsupportedTables(queryTree, rangeTableList); -} - - /* * ContainsMergeCommandWalker walks over the node and finds if there are any * Merge command (e.g., CMD_MERGE) in the node. */ -static bool +bool ContainsMergeCommandWalker(Node *node) { #if PG_VERSION_NUM < PG_VERSION_15 @@ -676,7 +634,8 @@ bool IsUpdateOrDelete(Query *query) { return query->commandType == CMD_UPDATE || - query->commandType == CMD_DELETE; + query->commandType == CMD_DELETE || + query->commandType == CMD_MERGE; } @@ -2611,148 +2570,3 @@ WarnIfListHasForeignDistributedTable(List *rangeTableList) } } } - - -/* - * IsMergeAllowedOnRelation takes a relation entry and checks if MERGE command is - * permitted on special relations, such as materialized view, returns true only if - * it's a "source" relation. - */ -bool -IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte) -{ - if (!IsMergeQuery(parse)) - { - return false; - } - - RangeTblEntry *targetRte = rt_fetch(parse->resultRelation, parse->rtable); - - /* Is it a target relation? */ - if (targetRte->relid == rte->relid) - { - return false; - } - - return true; -} - - -/* - * ErrorIfMergeHasUnsupportedTables checks if all the tables(target, source or any CTE - * present) in the MERGE command are local i.e. a combination of Citus local and Non-Citus - * tables (regular Postgres tables), raises an exception for all other combinations. - */ -static void -ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList) -{ - ListCell *tableCell = NULL; - - foreach(tableCell, rangeTableList) - { - RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(tableCell); - Oid relationId = rangeTableEntry->relid; - - switch (rangeTableEntry->rtekind) - { - case RTE_RELATION: - { - /* Check the relation type */ - break; - } - - case RTE_SUBQUERY: - case RTE_FUNCTION: - case RTE_TABLEFUNC: - case RTE_VALUES: - case RTE_JOIN: - case RTE_CTE: - { - /* Skip them as base table(s) will be checked */ - continue; - } - - /* - * RTE_NAMEDTUPLESTORE is typically used in ephmeral named relations, - * such as, trigger data; until we find a genuine use case, raise an - * exception. - * RTE_RESULT is a node added by the planner and we shouldn't - * encounter it in the parse tree. - */ - case RTE_NAMEDTUPLESTORE: - case RTE_RESULT: - { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("MERGE command is not supported with " - "Tuplestores and results"))); - break; - } - - default: - { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("MERGE command: Unrecognized range table entry."))); - } - } - - /* RTE Relation can be of various types, check them now */ - - /* skip the regular views as they are replaced with subqueries */ - if (rangeTableEntry->relkind == RELKIND_VIEW) - { - continue; - } - - if (rangeTableEntry->relkind == RELKIND_MATVIEW || - rangeTableEntry->relkind == RELKIND_FOREIGN_TABLE) - { - /* Materialized view or Foreign table as target is not allowed */ - if (IsMergeAllowedOnRelation(parse, rangeTableEntry)) - { - /* Non target relation is ok */ - continue; - } - else - { - ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("MERGE command is not allowed " - "on materialized view"))); - } - } - - if (rangeTableEntry->relkind != RELKIND_RELATION && - rangeTableEntry->relkind != RELKIND_PARTITIONED_TABLE) - { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("Unexpected relation type(relkind:%c) in MERGE command", - rangeTableEntry->relkind))); - } - - Assert(rangeTableEntry->relid != 0); - - /* Distributed tables and Reference tables are not supported yet */ - if (IsCitusTableType(relationId, REFERENCE_TABLE) || - IsCitusTableType(relationId, DISTRIBUTED_TABLE)) - { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("MERGE command is not supported on " - "distributed/reference tables yet"))); - } - - /* Regular Postgres tables and Citus local tables are allowed */ - if (!IsCitusTable(relationId) || - IsCitusTableType(relationId, CITUS_LOCAL_TABLE)) - { - continue; - } - - - /* Any other Citus table type missing ? */ - } - - /* All the tables are local, supported */ -} diff --git a/src/backend/distributed/planner/fast_path_router_planner.c b/src/backend/distributed/planner/fast_path_router_planner.c index aa029f3c0..e7d91a101 100644 --- a/src/backend/distributed/planner/fast_path_router_planner.c +++ b/src/backend/distributed/planner/fast_path_router_planner.c @@ -54,8 +54,6 @@ bool EnableFastPathRouterPlanner = true; static bool ColumnAppearsMultipleTimes(Node *quals, Var *distributionKey); -static bool ConjunctionContainsColumnFilter(Node *node, Var *column, - Node **distributionKeyValue); static bool DistKeyInSimpleOpExpression(Expr *clause, Var *distColumn, Node **distributionKeyValue); @@ -294,7 +292,7 @@ ColumnAppearsMultipleTimes(Node *quals, Var *distributionKey) * * If the conjuction contains column filter which is const, distributionKeyValue is set. */ -static bool +bool ConjunctionContainsColumnFilter(Node *node, Var *column, Node **distributionKeyValue) { if (node == NULL) diff --git a/src/backend/distributed/planner/multi_physical_planner.c b/src/backend/distributed/planner/multi_physical_planner.c index be6caf0e2..b30dddeb7 100644 --- a/src/backend/distributed/planner/multi_physical_planner.c +++ b/src/backend/distributed/planner/multi_physical_planner.c @@ -2225,14 +2225,17 @@ QueryPushdownSqlTaskList(Query *query, uint64 jobId, } /* - * For left joins we don't care about the shards pruned for the right hand side. - * If the right hand side would prune to a smaller set we should still send it to - * all tables of the left hand side. However if the right hand side is bigger than - * the left hand side we don't have to send the query to any shard that is not - * matching anything on the left hand side. + * For left joins we don't care about the shards pruned for + * the right hand side. If the right hand side would prune + * to a smaller set we should still send it to all tables + * of the left hand side. However if the right hand side is + * bigger than the left hand side we don't have to send the + * query to any shard that is not matching anything on the + * left hand side. * - * Instead we will simply skip any RelationRestriction if it is an OUTER join and - * the table is part of the non-outer side of the join. + * Instead we will simply skip any RelationRestriction if it + * is an OUTER join and the table is part of the non-outer + * side of the join. */ if (IsInnerTableOfOuterJoin(relationRestriction)) { diff --git a/src/backend/distributed/planner/multi_router_planner.c b/src/backend/distributed/planner/multi_router_planner.c index f4591a770..99beff2c8 100644 --- a/src/backend/distributed/planner/multi_router_planner.c +++ b/src/backend/distributed/planner/multi_router_planner.c @@ -185,7 +185,6 @@ static DeferredErrorMessage * TargetlistAndFunctionsSupported(Oid resultRelation List *targetList, CmdType commandType, List *returningList); - /* * CreateRouterPlan attempts to create a router executor plan for the given * SELECT statement. ->planningError is set if planning fails. @@ -905,6 +904,85 @@ NodeIsFieldStore(Node *node) } +/* + * MergeQuerySupported does check for a MERGE command in the query, if it finds + * one, it will verify the below criteria + * - Supported tables and combinations in ErrorIfMergeHasUnsupportedTables + * - Distributed tables requirements in ErrorIfDistTablesNotColocated + * - Checks target-lists and functions-in-quals in TargetlistAndFunctionsSupported + */ +static DeferredErrorMessage * +MergeQuerySupported(Query *originalQuery, + PlannerRestrictionContext *plannerRestrictionContext) +{ + /* For non-MERGE commands it's a no-op */ + if (!QueryHasMergeCommand(originalQuery)) + { + return NULL; + } + + List *rangeTableList = ExtractRangeTableEntryList(originalQuery); + RangeTblEntry *resultRte = ExtractResultRelationRTE(originalQuery); + + /* + * Fast path queries cannot have merge command, and we prevent the remaining here. + * In Citus we have limited support for MERGE, it's allowed only if all + * the tables(target, source or any CTE) tables are are local i.e. a + * combination of Citus local and Non-Citus tables (regular Postgres tables) + * or distributed tables with some restrictions, please see header of routine + * ErrorIfDistTablesNotColocated for details. + */ + DeferredErrorMessage *deferredError = + ErrorIfMergeHasUnsupportedTables(originalQuery, + rangeTableList, + plannerRestrictionContext); + if (deferredError) + { + return deferredError; + } + + Oid resultRelationId = resultRte->relid; + deferredError = + TargetlistAndFunctionsSupported(resultRelationId, + originalQuery->jointree, + originalQuery->jointree->quals, + originalQuery->targetList, + originalQuery->commandType, + originalQuery->returningList); + if (deferredError) + { + return deferredError; + } + + #if PG_VERSION_NUM >= PG_VERSION_15 + + /* + * MERGE is a special case where we have multiple modify statements + * within itself. Check each INSERT/UPDATE/DELETE individually. + */ + MergeAction *action = NULL; + foreach_ptr(action, originalQuery->mergeActionList) + { + Assert(originalQuery->returningList == NULL); + deferredError = + TargetlistAndFunctionsSupported(resultRelationId, + originalQuery->jointree, + action->qual, + action->targetList, + action->commandType, + originalQuery->returningList); + if (deferredError) + { + return deferredError; + } + } + + #endif + + return NULL; +} + + /* * ModifyQuerySupported returns NULL if the query only contains supported * features, otherwise it returns an error description. @@ -920,8 +998,17 @@ ModifyQuerySupported(Query *queryTree, Query *originalQuery, bool multiShardQuer PlannerRestrictionContext *plannerRestrictionContext) { Oid distributedTableId = InvalidOid; - DeferredErrorMessage *error = ModifyPartialQuerySupported(queryTree, multiShardQuery, - &distributedTableId); + DeferredErrorMessage *error = MergeQuerySupported(originalQuery, + plannerRestrictionContext); + if (error) + { + /* + * For MERGE, we do not do recursive plannning, simply bail out. + */ + RaiseDeferredError(error, ERROR); + } + + error = ModifyPartialQuerySupported(queryTree, multiShardQuery, &distributedTableId); if (error) { return error; @@ -3969,3 +4056,263 @@ CompareInsertValuesByShardId(const void *leftElement, const void *rightElement) } } } + + +/* + * IsMergeAllowedOnRelation takes a relation entry and checks if MERGE command is + * permitted on special relations, such as materialized view, returns true only if + * it's a "source" relation. + */ +bool +IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte) +{ + if (!IsMergeQuery(parse)) + { + return false; + } + + RangeTblEntry *targetRte = rt_fetch(parse->resultRelation, parse->rtable); + + /* Is it a target relation? */ + if (targetRte->relid == rte->relid) + { + return false; + } + + return true; +} + + +/* + * ErrorIfDistTablesNotColocated Checks to see if + * + * - There are a minimum of two distributed tables (source and a target). + * - All the distributed tables are indeed colocated. + * - MERGE relations are joined on the distribution column + * MERGE .. USING .. ON target.dist_key = source.dist_key + * + * If any of the conditions are not met, it raises an exception. + */ +static DeferredErrorMessage * +ErrorIfDistTablesNotColocated(Query *parse, List *distTablesList, + PlannerRestrictionContext *plannerRestrictionContext) +{ + /* All MERGE tables must be distributed */ + if (list_length(distTablesList) < 2) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "For MERGE command, both the source and target " + "must be distributed", NULL, NULL); + } + + /* All distributed tables must be colocated */ + if (!AllRelationsInListColocated(distTablesList, RANGETABLE_ENTRY)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "For MERGE command, all the distributed tables " + "must be colocated", NULL, NULL); + } + + /* Are source and target tables joined on distribution column? */ + if (!RestrictionEquivalenceForPartitionKeys(plannerRestrictionContext)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE command is only supported when distributed " + "tables are joined on their distribution column", + NULL, NULL); + } + + return NULL; +} + + +/* + * ErrorIfMergeHasUnsupportedTables checks if all the tables(target, source or any CTE + * present) in the MERGE command are local i.e. a combination of Citus local and Non-Citus + * tables (regular Postgres tables), or distributed tables with some restrictions, please + * see header of routine ErrorIfDistTablesNotColocated for details, raises an exception + * for all other combinations. + */ +static DeferredErrorMessage * +ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList, + PlannerRestrictionContext *restrictionContext) +{ + List *distTablesList = NIL; + bool foundLocalTables = false; + + RangeTblEntry *rangeTableEntry = NULL; + foreach_ptr(rangeTableEntry, rangeTableList) + { + Oid relationId = rangeTableEntry->relid; + + switch (rangeTableEntry->rtekind) + { + case RTE_RELATION: + { + /* Check the relation type */ + break; + } + + case RTE_SUBQUERY: + case RTE_FUNCTION: + case RTE_TABLEFUNC: + case RTE_VALUES: + case RTE_JOIN: + case RTE_CTE: + { + /* Skip them as base table(s) will be checked */ + continue; + } + + /* + * RTE_NAMEDTUPLESTORE is typically used in ephmeral named relations, + * such as, trigger data; until we find a genuine use case, raise an + * exception. + * RTE_RESULT is a node added by the planner and we shouldn't + * encounter it in the parse tree. + */ + case RTE_NAMEDTUPLESTORE: + case RTE_RESULT: + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE command is not supported with " + "Tuplestores and results", + NULL, NULL); + } + + default: + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE command: Unrecognized range table entry.", + NULL, NULL); + } + } + + /* RTE Relation can be of various types, check them now */ + + /* skip the regular views as they are replaced with subqueries */ + if (rangeTableEntry->relkind == RELKIND_VIEW) + { + continue; + } + + if (rangeTableEntry->relkind == RELKIND_MATVIEW || + rangeTableEntry->relkind == RELKIND_FOREIGN_TABLE) + { + /* Materialized view or Foreign table as target is not allowed */ + if (IsMergeAllowedOnRelation(parse, rangeTableEntry)) + { + /* Non target relation is ok */ + continue; + } + else + { + /* Usually we don't reach this exception as the Postgres parser catches it */ + StringInfo errorMessage = makeStringInfo(); + appendStringInfo(errorMessage, + "MERGE command is not allowed on " + "relation type(relkind:%c)", rangeTableEntry->relkind); + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, errorMessage->data, + NULL, NULL); + } + } + + if (rangeTableEntry->relkind != RELKIND_RELATION && + rangeTableEntry->relkind != RELKIND_PARTITIONED_TABLE) + { + StringInfo errorMessage = makeStringInfo(); + appendStringInfo(errorMessage, "Unexpected table type(relkind:%c) " + "in MERGE command", rangeTableEntry->relkind); + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, errorMessage->data, + NULL, NULL); + } + + Assert(rangeTableEntry->relid != 0); + + /* Reference tables are not supported yet */ + if (IsCitusTableType(relationId, REFERENCE_TABLE)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE command is not supported on reference " + "tables yet", NULL, NULL); + } + + /* Append/Range tables are not supported */ + if (IsCitusTableType(relationId, APPEND_DISTRIBUTED) || + IsCitusTableType(relationId, RANGE_DISTRIBUTED)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "For MERGE command, all the distributed tables " + "must be colocated, for append/range distribution, " + "colocation is not supported", NULL, + "Consider using hash distribution instead"); + } + + /* + * For now, save all distributed tables, later (below) we will + * check for supported combination(s). + */ + if (IsCitusTableType(relationId, DISTRIBUTED_TABLE)) + { + distTablesList = lappend(distTablesList, rangeTableEntry); + continue; + } + + /* Regular Postgres tables and Citus local tables are allowed */ + if (!IsCitusTable(relationId) || + IsCitusTableType(relationId, CITUS_LOCAL_TABLE)) + { + foundLocalTables = true; + continue; + } + + /* Any other Citus table type missing ? */ + } + + /* Ensure all tables are indeed local */ + if (foundLocalTables && list_length(distTablesList) == 0) + { + /* All the tables are local, supported */ + return NULL; + } + else if (foundLocalTables && list_length(distTablesList) > 0) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE command is not supported with " + "combination of distributed/local tables yet", + NULL, NULL); + } + + /* Ensure all distributed tables are indeed co-located */ + return ErrorIfDistTablesNotColocated(parse, distTablesList, restrictionContext); +} + + +/* + * QueryHasMergeCommand walks over the query tree and returns false if there + * is no Merge command (e.g., CMD_MERGE), true otherwise. + */ +static bool +QueryHasMergeCommand(Query *queryTree) +{ + /* function is void for pre-15 versions of Postgres */ + #if PG_VERSION_NUM < PG_VERSION_15 + return false; + #else + + /* + * Postgres currently doesn't support Merge queries inside subqueries and + * ctes, but lets be defensive and do query tree walk anyway. + * + * We do not call this path for fast-path queries to avoid this additional + * overhead. + */ + if (!ContainsMergeCommandWalker((Node *) queryTree)) + { + /* No MERGE found */ + return false; + } + + return true; + #endif +} diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c index 4d131899a..f76a95d26 100644 --- a/src/backend/distributed/planner/relation_restriction_equivalence.c +++ b/src/backend/distributed/planner/relation_restriction_equivalence.c @@ -151,8 +151,6 @@ static void ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass secondClass); static Var * PartitionKeyForRTEIdentityInQuery(Query *query, int targetRTEIndex, Index *partitionKeyIndex); -static bool AllRelationsInRestrictionContextColocated(RelationRestrictionContext * - restrictionContext); static bool IsNotSafeRestrictionToRecursivelyPlan(Node *node); static JoinRestrictionContext * FilterJoinRestrictionContext( JoinRestrictionContext *joinRestrictionContext, Relids @@ -383,7 +381,8 @@ SafeToPushdownUnionSubquery(Query *originalQuery, return false; } - if (!AllRelationsInRestrictionContextColocated(restrictionContext)) + if (!AllRelationsInListColocated(restrictionContext->relationRestrictionList, + RESTRICTION_CONTEXT)) { /* distribution columns are equal, but tables are not co-located */ return false; @@ -1919,19 +1918,33 @@ FindQueryContainingRTEIdentityInternal(Node *node, /* - * AllRelationsInRestrictionContextColocated determines whether all of the relations in the - * given relation restrictions list are co-located. + * AllRelationsInListColocated determines whether all of the relations in the + * given list are co-located. + * Note: The list can be of dofferent types, which is specified by ListEntryType */ -static bool -AllRelationsInRestrictionContextColocated(RelationRestrictionContext *restrictionContext) +bool +AllRelationsInListColocated(List *relationList, ListEntryType entryType) { + void *varPtr = NULL; + RangeTblEntry *rangeTableEntry = NULL; RelationRestriction *relationRestriction = NULL; int initialColocationId = INVALID_COLOCATION_ID; /* check whether all relations exists in the main restriction list */ - foreach_ptr(relationRestriction, restrictionContext->relationRestrictionList) + foreach_ptr(varPtr, relationList) { - Oid relationId = relationRestriction->relationId; + Oid relationId = InvalidOid; + + if (entryType == RANGETABLE_ENTRY) + { + rangeTableEntry = (RangeTblEntry *) varPtr; + relationId = rangeTableEntry->relid; + } + else if (entryType == RESTRICTION_CONTEXT) + { + relationRestriction = (RelationRestriction *) varPtr; + relationId = relationRestriction->relationId; + } if (IsCitusTable(relationId) && !HasDistributionKey(relationId)) { diff --git a/src/include/distributed/distributed_planner.h b/src/include/distributed/distributed_planner.h index 29c3c7154..19bd9f0c2 100644 --- a/src/include/distributed/distributed_planner.h +++ b/src/include/distributed/distributed_planner.h @@ -256,5 +256,9 @@ extern struct DistributedPlan * CreateDistributedPlan(uint64 planId, plannerRestrictionContext); extern bool IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte); +extern bool ConjunctionContainsColumnFilter(Node *node, + Var *column, + Node **distributionKeyValue); +extern bool ContainsMergeCommandWalker(Node *node); #endif /* DISTRIBUTED_PLANNER_H */ diff --git a/src/include/distributed/relation_restriction_equivalence.h b/src/include/distributed/relation_restriction_equivalence.h index ccd50a6db..4fd9c7015 100644 --- a/src/include/distributed/relation_restriction_equivalence.h +++ b/src/include/distributed/relation_restriction_equivalence.h @@ -17,6 +17,15 @@ #define SINGLE_RTE_INDEX 1 +/* + * Represents the pointer type that's being passed in the list. + */ +typedef enum ListEntryType +{ + RANGETABLE_ENTRY, /* RangeTblEntry */ + RESTRICTION_CONTEXT /* RelationRestriction */ +} ListEntryType; + extern bool AllDistributionKeysInQueryAreEqual(Query *originalQuery, PlannerRestrictionContext * plannerRestrictionContext); @@ -54,4 +63,6 @@ extern RelationRestrictionContext * FilterRelationRestrictionContext( RelationRestrictionContext *relationRestrictionContext, Relids queryRteIdentities); +extern bool AllRelationsInListColocated(List *relationList, ListEntryType entryType); + #endif /* RELATION_RESTRICTION_EQUIVALENCE_H */ diff --git a/src/test/regress/bin/normalize.sed b/src/test/regress/bin/normalize.sed index df343a077..2ebb31f47 100644 --- a/src/test/regress/bin/normalize.sed +++ b/src/test/regress/bin/normalize.sed @@ -28,6 +28,10 @@ s/\(ref_id\)=\([0-9]+\)/(ref_id)=(X)/g # shard table names for multi_subtransactions s/"t2_[0-9]+"/"t2_xxxxxxx"/g +# shard table names for MERGE tests +s/merge_schema\.([_a-z0-9]+)_40[0-9]+ /merge_schema.\1_xxxxxxx /g +s/pgmerge_schema\.([_a-z0-9]+)_40[0-9]+ /pgmerge_schema.\1_xxxxxxx /g + # shard table names for multi_subquery s/ keyval(1|2|ref)_[0-9]+ / keyval\1_xxxxxxx /g diff --git a/src/test/regress/expected/merge.out b/src/test/regress/expected/merge.out index 6fc472b70..02671acd0 100644 --- a/src/test/regress/expected/merge.out +++ b/src/test/regress/expected/merge.out @@ -18,6 +18,7 @@ SET search_path TO merge_schema; SET citus.shard_count TO 4; SET citus.next_shard_id TO 4000000; SET citus.explain_all_tasks to true; +SET citus.shard_replication_factor TO 1; SELECT 1 FROM master_add_node('localhost', :master_port, groupid => 0); NOTICE: localhost:xxxxx is the coordinator and already contains metadata, skipping syncing the metadata ?column? @@ -214,9 +215,45 @@ HINT: To remove the local data, run: SELECT truncate_local_data_after_distribut (1 row) +-- Updates one of the row with customer_id = 30002 +SELECT * from target t WHERE t.customer_id = 30002; + customer_id | last_order_id | order_center | order_count | last_order +--------------------------------------------------------------------- + 30002 | 103 | AX | -1 | Sun Jan 17 19:53:00 2021 +(1 row) + +-- Turn on notice to print tasks sent to nodes +SET citus.log_remote_commands to true; MERGE INTO target t USING source s - ON (t.customer_id = s.customer_id) + ON (t.customer_id = s.customer_id) AND t.customer_id = 30002 + WHEN MATCHED AND t.order_center = 'XX' THEN + DELETE + WHEN MATCHED THEN + UPDATE SET -- Existing customer, update the order count and last_order_id + order_count = t.order_count + 1, + last_order_id = s.order_id + WHEN NOT MATCHED THEN + DO NOTHING; +NOTICE: issuing MERGE INTO merge_schema.target_xxxxxxx t USING merge_schema.source_xxxxxxx s ON ((t.customer_id OPERATOR(pg_catalog.=) s.customer_id) AND (t.customer_id OPERATOR(pg_catalog.=) 30002)) WHEN MATCHED AND ((t.order_center COLLATE "default") OPERATOR(pg_catalog.=) 'XX'::text) THEN DELETE WHEN MATCHED THEN UPDATE SET last_order_id = s.order_id, order_count = (t.order_count OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT * from target t WHERE t.customer_id = 30002; + customer_id | last_order_id | order_center | order_count | last_order +--------------------------------------------------------------------- + 30002 | 103 | AX | 0 | Sun Jan 17 19:53:00 2021 +(1 row) + +-- Deletes one of the row with customer_id = 30004 +SELECT * from target t WHERE t.customer_id = 30004; + customer_id | last_order_id | order_center | order_count | last_order +--------------------------------------------------------------------- + 30004 | 99 | XX | -1 | Fri Sep 11 03:23:00 2020 +(1 row) + +MERGE INTO target t + USING source s + ON (t.customer_id = s.customer_id) AND t.customer_id = 30004 WHEN MATCHED AND t.order_center = 'XX' THEN DELETE WHEN MATCHED THEN @@ -226,7 +263,11 @@ MERGE INTO target t WHEN NOT MATCHED THEN -- New entry, record it. INSERT (customer_id, last_order_id, order_center, order_count, last_order) VALUES (customer_id, s.order_id, s.order_center, 123, s.order_time); -ERROR: MERGE command is not supported on distributed/reference tables yet +SELECT * from target t WHERE t.customer_id = 30004; + customer_id | last_order_id | order_center | order_count | last_order +--------------------------------------------------------------------- +(0 rows) + -- -- Test MERGE with CTE as source -- @@ -386,18 +427,61 @@ HINT: To remove the local data, run: SELECT truncate_local_data_after_distribut (1 row) +SELECT * FROM t1 order by id; + id | val +--------------------------------------------------------------------- + 1 | 0 + 2 | 0 + 5 | 0 +(3 rows) + +SET citus.log_remote_commands to true; WITH s1_res AS ( SELECT * FROM s1 ) MERGE INTO t1 - USING s1_res ON (s1_res.id = t1.id) + USING s1_res ON (s1_res.id = t1.id) AND t1.id = 6 WHEN MATCHED AND s1_res.val = 0 THEN DELETE WHEN MATCHED THEN UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing WITH s1_res AS (SELECT s1.id, s1.val FROM merge_schema.s1_xxxxxxx s1) MERGE INTO merge_schema.t1_xxxxxxx t1 USING s1_res ON ((s1_res.id OPERATOR(pg_catalog.=) t1.id) AND (t1.id OPERATOR(pg_catalog.=) 6)) WHEN MATCHED AND (s1_res.val OPERATOR(pg_catalog.=) 0) THEN DELETE WHEN MATCHED THEN UPDATE SET val = (t1.val OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing WITH s1_res AS (SELECT s1.id, s1.val FROM merge_schema.s1_xxxxxxx s1) MERGE INTO merge_schema.t1_xxxxxxx t1 USING s1_res ON ((s1_res.id OPERATOR(pg_catalog.=) t1.id) AND (t1.id OPERATOR(pg_catalog.=) 6)) WHEN MATCHED AND (s1_res.val OPERATOR(pg_catalog.=) 0) THEN DELETE WHEN MATCHED THEN UPDATE SET val = (t1.val OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing WITH s1_res AS (SELECT s1.id, s1.val FROM merge_schema.s1_xxxxxxx s1) MERGE INTO merge_schema.t1_xxxxxxx t1 USING s1_res ON ((s1_res.id OPERATOR(pg_catalog.=) t1.id) AND (t1.id OPERATOR(pg_catalog.=) 6)) WHEN MATCHED AND (s1_res.val OPERATOR(pg_catalog.=) 0) THEN DELETE WHEN MATCHED THEN UPDATE SET val = (t1.val OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing WITH s1_res AS (SELECT s1.id, s1.val FROM merge_schema.s1_xxxxxxx s1) MERGE INTO merge_schema.t1_xxxxxxx t1 USING s1_res ON ((s1_res.id OPERATOR(pg_catalog.=) t1.id) AND (t1.id OPERATOR(pg_catalog.=) 6)) WHEN MATCHED AND (s1_res.val OPERATOR(pg_catalog.=) 0) THEN DELETE WHEN MATCHED THEN UPDATE SET val = (t1.val OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx' +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx' +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing COMMIT PREPARED 'citus_xx_xx_xx_xx' +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing COMMIT PREPARED 'citus_xx_xx_xx_xx' +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +-- Other than id 6 everything else is a NO match, and should appear in target +SELECT * FROM t1 order by 1, 2; + id | val +--------------------------------------------------------------------- + 1 | 0 + 1 | 0 + 2 | 0 + 2 | 1 + 3 | 1 + 4 | 1 + 5 | 0 + 6 | 1 +(8 rows) + -- -- Test with multiple join conditions -- @@ -553,16 +637,38 @@ HINT: To remove the local data, run: SELECT truncate_local_data_after_distribut (1 row) +SELECT * FROM t2 ORDER BY 1; + id | val | src +--------------------------------------------------------------------- + 1 | 0 | target + 2 | 0 | target + 3 | 1 | match + 4 | 0 | match +(4 rows) + +SET citus.log_remote_commands to true; MERGE INTO t2 USING s2 -ON t2.id = s2.id AND t2.src = s2.src +ON t2.id = s2.id AND t2.src = s2.src AND t2.id = 4 WHEN MATCHED AND t2.val = 1 THEN UPDATE SET val = s2.val + 10 WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN - INSERT (id, val, src) VALUES (s2.id, s2.val, s2.src); -ERROR: MERGE command is not supported on distributed/reference tables yet + DO NOTHING; +NOTICE: issuing MERGE INTO merge_schema.t2_xxxxxxx t2 USING merge_schema.s2_xxxxxxx s2 ON ((t2.id OPERATOR(pg_catalog.=) s2.id) AND (t2.src OPERATOR(pg_catalog.=) s2.src) AND (t2.id OPERATOR(pg_catalog.=) 4)) WHEN MATCHED AND (t2.val OPERATOR(pg_catalog.=) 1) THEN UPDATE SET val = (s2.val OPERATOR(pg_catalog.+) 10) WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +-- Row with id = 4 is a match for delete clause, row should be deleted +-- Row with id = 3 is a NO match, row from source will be inserted +SELECT * FROM t2 ORDER BY 1; + id | val | src +--------------------------------------------------------------------- + 1 | 0 | target + 2 | 0 | target + 3 | 1 | match +(3 rows) + -- -- With sub-query as the MERGE source -- @@ -943,7 +1049,7 @@ WHEN MATCHED THEN UPDATE SET value = vl_source.value, id = vl_target.id + 1 WHEN NOT MATCHED THEN INSERT VALUES(vl_source.ID, vl_source.value); -DEBUG: +DEBUG: RESET client_min_messages; SELECT * INTO vl_local FROM vl_target ORDER BY 1 ; -- Should be equal @@ -996,7 +1102,7 @@ WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED THEN INSERT VALUES(rs_source.id); -DEBUG: +DEBUG: RESET client_min_messages; SELECT * INTO rs_local FROM rs_target ORDER BY 1 ; -- Should be equal @@ -1132,7 +1238,7 @@ DEBUG: function does not have co-located tables DEBUG: generating subplan XXX_1 for subquery SELECT id, source FROM merge_schema.f_dist() f(id integer, source character varying) DEBUG: DEBUG: Plan XXX query after replacing subqueries and CTEs: MERGE INTO merge_schema.fn_target USING (SELECT intermediate_result.id, intermediate_result.source FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, source character varying)) fn_source ON (fn_source.id OPERATOR(pg_catalog.=) fn_target.id) WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED THEN INSERT (id, data) VALUES (fn_source.id, fn_source.source) -DEBUG: +DEBUG: RESET client_min_messages; SELECT * INTO fn_local FROM fn_target ORDER BY 1 ; -- Should be equal @@ -1204,7 +1310,7 @@ MERGE INTO ft_target DELETE WHEN NOT MATCHED THEN INSERT (id, user_val) VALUES (foreign_table.id, foreign_table.user_val); -DEBUG: +DEBUG: RESET client_min_messages; SELECT * FROM ft_target; id | user_val @@ -1213,9 +1319,866 @@ SELECT * FROM ft_target; 3 | source (2 rows) +-- +-- complex joins on the source side +-- +-- source(join of two relations) relation is an unaliased join +CREATE TABLE target_cj(tid int, src text, val int); +CREATE TABLE source_cj1(sid1 int, src1 text, val1 int); +CREATE TABLE source_cj2(sid2 int, src2 text, val2 int); +INSERT INTO target_cj VALUES (1, 'target', 0); +INSERT INTO target_cj VALUES (2, 'target', 0); +INSERT INTO target_cj VALUES (2, 'target', 0); +INSERT INTO target_cj VALUES (3, 'target', 0); +INSERT INTO source_cj1 VALUES (2, 'source-1', 10); +INSERT INTO source_cj2 VALUES (2, 'source-2', 20); +BEGIN; +MERGE INTO target_cj t +USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src2 +WHEN NOT MATCHED THEN + DO NOTHING; +-- Gold result to compare against +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | source-2 | 0 + 2 | source-2 | 0 + 3 | target | 0 +(4 rows) + +ROLLBACK; +BEGIN; +-- try accessing columns from either side of the source join +MERGE INTO target_cj t +USING source_cj1 s2 + INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET tid = sid2, src = src1, val = val2 +WHEN NOT MATCHED THEN + DO NOTHING; +-- Gold result to compare against +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | source-1 | 20 + 2 | source-1 | 20 + 3 | target | 0 +(4 rows) + +ROLLBACK; +-- Test the same scenarios with distributed tables +SELECT create_distributed_table('target_cj', 'tid'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.target_cj$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('source_cj1', 'sid1'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.source_cj1$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('source_cj2', 'sid2'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.source_cj2$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SET citus.log_remote_commands to true; +MERGE INTO target_cj t +USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src2 +WHEN NOT MATCHED THEN + DO NOTHING; +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.target_cj_xxxxxxx t USING (merge_schema.source_cj1_xxxxxxx s1 JOIN merge_schema.source_cj2_xxxxxxx s2 ON ((s1.sid1 OPERATOR(pg_catalog.=) s2.sid2))) ON ((t.tid OPERATOR(pg_catalog.=) s1.sid1) AND (t.tid OPERATOR(pg_catalog.=) 2)) WHEN MATCHED THEN UPDATE SET src = s2.src2 WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | source-2 | 0 + 2 | source-2 | 0 + 3 | target | 0 +(4 rows) + +ROLLBACK; +BEGIN; +-- try accessing columns from either side of the source join +MERGE INTO target_cj t +USING source_cj1 s2 + INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src1, val = val2 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | source-1 | 20 + 2 | source-1 | 20 + 3 | target | 0 +(4 rows) + +ROLLBACK; +-- sub-query as a source +BEGIN; +MERGE INTO target_cj t +USING (SELECT * FROM source_cj1 WHERE sid1 = 2) sub +ON t.tid = sub.sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = sub.src1, val = val1 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | source-1 | 10 + 2 | source-1 | 10 + 3 | target | 0 +(4 rows) + +ROLLBACK; +-- Test self-join +BEGIN; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | target | 0 + 2 | target | 0 + 3 | target | 0 +(4 rows) + +set citus.log_remote_commands to true; +MERGE INTO target_cj t1 +USING (SELECT * FROM target_cj) sub +ON t1.tid = sub.tid AND t1.tid = 3 +WHEN MATCHED THEN + UPDATE SET src = sub.src, val = sub.val + 100 +WHEN NOT MATCHED THEN + DO NOTHING; +NOTICE: issuing MERGE INTO merge_schema.target_cj_xxxxxxx t1 USING (SELECT target_cj.tid, target_cj.src, target_cj.val FROM merge_schema.target_cj_xxxxxxx target_cj) sub ON ((t1.tid OPERATOR(pg_catalog.=) sub.tid) AND (t1.tid OPERATOR(pg_catalog.=) 3)) WHEN MATCHED THEN UPDATE SET src = sub.src, val = (sub.val OPERATOR(pg_catalog.+) 100) WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +set citus.log_remote_commands to false; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | target | 0 + 2 | target | 0 + 3 | target | 100 +(4 rows) + +ROLLBACK; +-- Test PREPARE +PREPARE foo(int) AS +MERGE INTO target_cj target +USING (SELECT * FROM source_cj1) sub +ON target.tid = sub.sid1 AND target.tid = $1 +WHEN MATCHED THEN + UPDATE SET val = sub.val1 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | target | 0 + 2 | target | 0 + 3 | target | 0 +(4 rows) + +BEGIN; +EXECUTE foo(2); +EXECUTE foo(2); +EXECUTE foo(2); +EXECUTE foo(2); +EXECUTE foo(2); +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | target | 10 + 2 | target | 10 + 3 | target | 0 +(4 rows) + +ROLLBACK; +BEGIN; +SET citus.log_remote_commands to true; +SET client_min_messages TO DEBUG1; +EXECUTE foo(2); +DEBUG: +DEBUG: +DEBUG: +DEBUG: +DEBUG: +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.target_cj_xxxxxxx target USING (SELECT source_cj1.sid1, source_cj1.src1, source_cj1.val1 FROM merge_schema.source_cj1_xxxxxxx source_cj1) sub ON ((target.tid OPERATOR(pg_catalog.=) sub.sid1) AND (target.tid OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = sub.val1 WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +RESET client_min_messages; +EXECUTE foo(2); +NOTICE: issuing MERGE INTO merge_schema.target_cj_xxxxxxx target USING (SELECT source_cj1.sid1, source_cj1.src1, source_cj1.val1 FROM merge_schema.source_cj1_xxxxxxx source_cj1) sub ON ((target.tid OPERATOR(pg_catalog.=) sub.sid1) AND (target.tid OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = sub.val1 WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | target | 10 + 2 | target | 10 + 3 | target | 0 +(4 rows) + +ROLLBACK; +-- Test distributed tables, must be co-located and joined on distribution column. +-- +-- We create two sets of source and target tables, one set is Postgres and the other +-- is Citus distributed. Run the _exact_ MERGE SQL on both the sets and compare the +-- final results of target tables of Postgres and Citus, the result should match. +-- This is repeated for various MERGE SQL combinations +-- +CREATE TABLE pg_target(id int, val varchar); +CREATE TABLE pg_source(id int, val varchar); +CREATE TABLE citus_target(id int, val varchar); +CREATE TABLE citus_source(id int, val varchar); +-- Half of the source rows do not match +INSERT INTO pg_target SELECT i, 'target' FROM generate_series(250, 500) i; +INSERT INTO pg_source SELECT i, 'source' FROM generate_series(1, 500) i; +INSERT INTO citus_target SELECT i, 'target' FROM generate_series(250, 500) i; +INSERT INTO citus_source SELECT i, 'source' FROM generate_series(1, 500) i; +SELECT create_distributed_table('citus_target', 'id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.citus_target$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('citus_source', 'id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.citus_source$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- +-- This routine compares the target tables of Postgres and Citus and +-- returns true if they match, false if the results do not match. +-- +CREATE OR REPLACE FUNCTION compare_tables() RETURNS BOOLEAN AS $$ +DECLARE ret BOOL; +BEGIN +SELECT count(1) = 0 INTO ret + FROM pg_target + FULL OUTER JOIN citus_target + USING (id, val) + WHERE pg_target.id IS NULL + OR citus_target.id IS NULL; +RETURN ret; +END +$$ LANGUAGE PLPGSQL; +-- Make sure we start with exact data in Postgres and Citus +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +-- Run the MERGE on both Postgres and Citus, and compare the final target tables +BEGIN; +SET citus.log_remote_commands to true; +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- +-- ON clause filter on source +-- +BEGIN; +SET citus.log_remote_commands to true; +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id AND s.id < 100 +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id AND s.id < 100 +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- +-- ON clause filter on target +-- +BEGIN; +SET citus.log_remote_commands to true; +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id AND t.id < 100 +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id AND t.id < 100 +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (t.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (t.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (t.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (t.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- +-- NOT MATCHED clause filter on source +-- +BEGIN; +SET citus.log_remote_commands to true; +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id +WHEN MATCHED THEN + DO NOTHING +WHEN NOT MATCHED AND s.id < 100 THEN + INSERT VALUES(s.id, s.val); +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN MATCHED THEN + DO NOTHING +WHEN NOT MATCHED AND s.id < 100 THEN + INSERT VALUES(s.id, s.val); +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED AND (s.id OPERATOR(pg_catalog.<) 100) THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED AND (s.id OPERATOR(pg_catalog.<) 100) THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED AND (s.id OPERATOR(pg_catalog.<) 100) THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED AND (s.id OPERATOR(pg_catalog.<) 100) THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- +-- Test constant filter in ON clause to check if shards are pruned +-- with restriction information +-- +-- +-- Though constant filter is present, this won't prune shards as +-- NOT MATCHED clause is present +-- +BEGIN; +SET citus.log_remote_commands to true; +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id AND s.id = 250 +WHEN MATCHED THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id AND s.id = 250 +WHEN MATCHED THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- This will prune shards with restriction information as NOT MATCHED is void +BEGIN; +SET citus.log_remote_commands to true; +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id AND s.id = 250 +WHEN MATCHED THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN NOT MATCHED THEN + DO NOTHING; +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id AND s.id = 250 +WHEN MATCHED THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN NOT MATCHED THEN + DO NOTHING; +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- Test CTE with distributed tables +CREATE VIEW pg_source_view AS SELECT * FROM pg_source WHERE id < 400; +WARNING: "view pg_source_view" has dependency to "table pg_source" that is not in Citus' metadata +DETAIL: "view pg_source_view" will be created only locally +HINT: Distribute "table pg_source" first to distribute "view pg_source_view" +CREATE VIEW citus_source_view AS SELECT * FROM citus_source WHERE id < 400; +BEGIN; +SEt citus.log_remote_commands to true; +WITH cte AS ( + SELECT * FROM pg_source_view +) +MERGE INTO pg_target t +USING cte +ON cte.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated by CTE' +WHEN NOT MATCHED THEN + INSERT VALUES (cte.id, cte.val) +WHEN MATCHED AND t.id < 350 THEN + DELETE; +WITH cte AS ( + SELECT * FROM citus_source_view +) +MERGE INTO citus_target t +USING cte +ON cte.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated by CTE' +WHEN NOT MATCHED THEN + INSERT VALUES (cte.id, cte.val) +WHEN MATCHED AND t.id < 350 THEN + DELETE; +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing WITH cte AS (SELECT citus_source_view.id, citus_source_view.val FROM (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source WHERE (citus_source.id OPERATOR(pg_catalog.<) 400)) citus_source_view) MERGE INTO merge_schema.citus_target_xxxxxxx t USING cte ON (cte.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by CTE'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (cte.id, cte.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing WITH cte AS (SELECT citus_source_view.id, citus_source_view.val FROM (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source WHERE (citus_source.id OPERATOR(pg_catalog.<) 400)) citus_source_view) MERGE INTO merge_schema.citus_target_xxxxxxx t USING cte ON (cte.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by CTE'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (cte.id, cte.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing WITH cte AS (SELECT citus_source_view.id, citus_source_view.val FROM (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source WHERE (citus_source.id OPERATOR(pg_catalog.<) 400)) citus_source_view) MERGE INTO merge_schema.citus_target_xxxxxxx t USING cte ON (cte.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by CTE'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (cte.id, cte.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing WITH cte AS (SELECT citus_source_view.id, citus_source_view.val FROM (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source WHERE (citus_source.id OPERATOR(pg_catalog.<) 400)) citus_source_view) MERGE INTO merge_schema.citus_target_xxxxxxx t USING cte ON (cte.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by CTE'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (cte.id, cte.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- Test sub-query with distributed tables +BEGIN; +SEt citus.log_remote_commands to true; +MERGE INTO pg_target t +USING (SELECT * FROM pg_source) subq +ON subq.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated by subquery' +WHEN NOT MATCHED THEN + INSERT VALUES (subq.id, subq.val) +WHEN MATCHED AND t.id < 350 THEN + DELETE; +MERGE INTO citus_target t +USING (SELECT * FROM citus_source) subq +ON subq.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated by subquery' +WHEN NOT MATCHED THEN + INSERT VALUES (subq.id, subq.val) +WHEN MATCHED AND t.id < 350 THEN + DELETE; +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) subq ON (subq.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by subquery'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (subq.id, subq.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) subq ON (subq.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by subquery'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (subq.id, subq.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) subq ON (subq.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by subquery'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (subq.id, subq.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) subq ON (subq.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by subquery'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (subq.id, subq.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- Test PREPARE +PREPARE pg_prep(int) AS +MERGE INTO pg_target +USING (SELECT * FROM pg_source) sub +ON pg_target.id = sub.id AND pg_target.id = $1 +WHEN MATCHED THEN + UPDATE SET val = 'Updated by prepare using ' || sub.val +WHEN NOT MATCHED THEN + DO NOTHING; +PREPARE citus_prep(int) AS +MERGE INTO citus_target +USING (SELECT * FROM citus_source) sub +ON citus_target.id = sub.id AND citus_target.id = $1 +WHEN MATCHED THEN + UPDATE SET val = 'Updated by prepare using ' || sub.val +WHEN NOT MATCHED THEN + DO NOTHING; +BEGIN; +SET citus.log_remote_commands to true; +SELECT * FROM pg_target WHERE id = 500; -- before merge + id | val +--------------------------------------------------------------------- + 500 | target +(1 row) + +EXECUTE pg_prep(500); +SELECT * FROM pg_target WHERE id = 500; -- non-cached + id | val +--------------------------------------------------------------------- + 500 | Updated by prepare using source +(1 row) + +EXECUTE pg_prep(500); +EXECUTE pg_prep(500); +EXECUTE pg_prep(500); +EXECUTE pg_prep(500); +EXECUTE pg_prep(500); +SELECT * FROM pg_target WHERE id = 500; -- cached + id | val +--------------------------------------------------------------------- + 500 | Updated by prepare using source +(1 row) + +SELECT * FROM citus_target WHERE id = 500; -- before merge +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing SELECT id, val FROM merge_schema.citus_target_xxxxxxx citus_target WHERE (id OPERATOR(pg_catalog.=) 500) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx + id | val +--------------------------------------------------------------------- + 500 | target +(1 row) + +EXECUTE citus_prep(500); +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SELECT * FROM citus_target WHERE id = 500; -- non-cached +NOTICE: issuing SELECT id, val FROM merge_schema.citus_target_xxxxxxx citus_target WHERE (id OPERATOR(pg_catalog.=) 500) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx + id | val +--------------------------------------------------------------------- + 500 | Updated by prepare using source +(1 row) + +EXECUTE citus_prep(500); +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +EXECUTE citus_prep(500); +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +EXECUTE citus_prep(500); +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +EXECUTE citus_prep(500); +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +EXECUTE citus_prep(500); +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SELECT * FROM citus_target WHERE id = 500; -- cached +NOTICE: issuing SELECT id, val FROM merge_schema.citus_target_xxxxxxx citus_target WHERE (id OPERATOR(pg_catalog.=) 500) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx + id | val +--------------------------------------------------------------------- + 500 | Updated by prepare using source +(1 row) + +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- Test partitions + distributed tables +CREATE TABLE pg_pa_target (tid integer, balance float, val text) + PARTITION BY LIST (tid); +CREATE TABLE citus_pa_target (tid integer, balance float, val text) + PARTITION BY LIST (tid); +CREATE TABLE part1 PARTITION OF pg_pa_target FOR VALUES IN (1,4) + WITH (autovacuum_enabled=off); +CREATE TABLE part2 PARTITION OF pg_pa_target FOR VALUES IN (2,5,6) + WITH (autovacuum_enabled=off); +CREATE TABLE part3 PARTITION OF pg_pa_target FOR VALUES IN (3,8,9) + WITH (autovacuum_enabled=off); +CREATE TABLE part4 PARTITION OF pg_pa_target DEFAULT + WITH (autovacuum_enabled=off); +CREATE TABLE part5 PARTITION OF citus_pa_target FOR VALUES IN (1,4) + WITH (autovacuum_enabled=off); +CREATE TABLE part6 PARTITION OF citus_pa_target FOR VALUES IN (2,5,6) + WITH (autovacuum_enabled=off); +CREATE TABLE part7 PARTITION OF citus_pa_target FOR VALUES IN (3,8,9) + WITH (autovacuum_enabled=off); +CREATE TABLE part8 PARTITION OF citus_pa_target DEFAULT + WITH (autovacuum_enabled=off); +CREATE TABLE pg_pa_source (sid integer, delta float); +CREATE TABLE citus_pa_source (sid integer, delta float); +-- insert many rows to the source table +INSERT INTO pg_pa_source SELECT id, id * 10 FROM generate_series(1,14) AS id; +INSERT INTO citus_pa_source SELECT id, id * 10 FROM generate_series(1,14) AS id; +-- insert a few rows in the target table (odd numbered tid) +INSERT INTO pg_pa_target SELECT id, id * 100, 'initial' FROM generate_series(1,14,2) AS id; +INSERT INTO citus_pa_target SELECT id, id * 100, 'initial' FROM generate_series(1,14,2) AS id; +SELECT create_distributed_table('citus_pa_target', 'tid'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.part5$$) +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.part6$$) +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.part7$$) +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.part8$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('citus_pa_source', 'sid'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.citus_pa_source$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE OR REPLACE FUNCTION pa_compare_tables() RETURNS BOOLEAN AS $$ +DECLARE ret BOOL; +BEGIN +SELECT count(1) = 0 INTO ret + FROM pg_pa_target + FULL OUTER JOIN citus_pa_target + USING (tid, balance, val) + WHERE pg_pa_target.tid IS NULL + OR citus_pa_target.tid IS NULL; +RETURN ret; +END +$$ LANGUAGE PLPGSQL; +-- try simple MERGE +BEGIN; +MERGE INTO pg_pa_target t + USING pg_pa_source s + ON t.tid = s.sid + WHEN MATCHED THEN + UPDATE SET balance = balance + delta, val = val || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid, delta, 'inserted by merge'); +MERGE INTO citus_pa_target t + USING citus_pa_source s + ON t.tid = s.sid + WHEN MATCHED THEN + UPDATE SET balance = balance + delta, val = val || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid, delta, 'inserted by merge'); +SELECT pa_compare_tables(); + pa_compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- same with a constant qual +BEGIN; +MERGE INTO pg_pa_target t + USING pg_pa_source s + ON t.tid = s.sid AND tid = 1 + WHEN MATCHED THEN + UPDATE SET balance = balance + delta, val = val || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid, delta, 'inserted by merge'); +MERGE INTO citus_pa_target t + USING citus_pa_source s + ON t.tid = s.sid AND tid = 1 + WHEN MATCHED THEN + UPDATE SET balance = balance + delta, val = val || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid, delta, 'inserted by merge'); +SELECT pa_compare_tables(); + pa_compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; -- -- Error and Unsupported scenarios -- +-- try updating the distribution key column +BEGIN; +MERGE INTO target_cj t + USING source_cj1 s + ON t.tid = s.sid1 AND t.tid = 2 + WHEN MATCHED THEN + UPDATE SET tid = tid + 9, src = src || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid1, 'inserted by merge', val1); +ERROR: modifying the partition value of rows is not allowed +ROLLBACK; -- Foreign table as target MERGE INTO foreign_table USING ft_target ON (foreign_table.id = ft_target.id) @@ -1274,7 +2237,54 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1.id, s1.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is not supported with combination of distributed/local tables yet +-- Now both s1 and t1 are distributed tables +SELECT undistribute_table('t1'); +NOTICE: creating a new table for merge_schema.t1 +NOTICE: moving the data of merge_schema.t1 +NOTICE: dropping the old merge_schema.t1 +NOTICE: renaming the new table to merge_schema.t1 + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('t1', 'id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.t1$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- We have a potential pitfall where a function can be invoked in +-- the MERGE conditions which can insert/update to a random shard +CREATE OR REPLACE function merge_when_and_write() RETURNS BOOLEAN +LANGUAGE PLPGSQL AS +$$ +BEGIN + INSERT INTO t1 VALUES (100, 100); + RETURN TRUE; +END; +$$; +-- Test preventing "ON" join condition from writing to the database +BEGIN; +MERGE INTO t1 +USING s1 ON t1.id = s1.id AND t1.id = 2 AND (merge_when_and_write()) +WHEN MATCHED THEN + UPDATE SET val = t1.val + s1.val; +ERROR: functions used in the WHERE/ON/WHEN clause of modification queries on distributed tables must not be VOLATILE +ROLLBACK; +-- Test preventing WHEN clause(s) from writing to the database +BEGIN; +MERGE INTO t1 +USING s1 ON t1.id = s1.id AND t1.id = 2 +WHEN MATCHED AND (merge_when_and_write()) THEN + UPDATE SET val = t1.val + s1.val; +ERROR: functions used in the WHERE/ON/WHEN clause of modification queries on distributed tables must not be VOLATILE +ROLLBACK; -- Joining on partition columns with sub-query MERGE INTO t1 USING (SELECT * FROM s1) sub ON (sub.val = t1.id) -- sub.val is not a distribution column @@ -1284,7 +2294,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is only supported when distributed tables are joined on their distribution column -- Joining on partition columns with CTE WITH s1_res AS ( SELECT * FROM s1 @@ -1297,7 +2307,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is only supported when distributed tables are joined on their distribution column -- Constant Join condition WITH s1_res AS ( SELECT * FROM s1 @@ -1310,7 +2320,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is only supported when distributed tables are joined on their distribution column -- With a single WHEN clause, which causes a non-left join WITH s1_res AS ( SELECT * FROM s1 @@ -1319,7 +2329,7 @@ WITH s1_res AS ( WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is only supported when distributed tables are joined on their distribution column -- -- Reference tables -- @@ -1371,7 +2381,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1.id, s1.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is not supported on reference tables yet -- -- Postgres + Citus-Distributed table -- @@ -1413,7 +2423,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1.id, s1.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is not supported with combination of distributed/local tables yet MERGE INTO t1 USING (SELECT * FROM s1) sub ON (sub.id = t1.id) WHEN MATCHED AND sub.val = 0 THEN @@ -1422,7 +2432,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is not supported with combination of distributed/local tables yet CREATE TABLE pg(val int); SELECT create_distributed_table('s1', 'id'); NOTICE: Copying data from local table... @@ -1443,7 +2453,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is not supported with combination of distributed/local tables yet -- Mix Postgres table in CTE WITH pg_res AS ( SELECT * FROM pg @@ -1456,7 +2466,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is not supported with combination of distributed/local tables yet -- Match more than one source row should fail same as Postgres behavior SELECT undistribute_table('t1'); NOTICE: creating a new table for merge_schema.t1 @@ -1511,6 +2521,234 @@ WHEN NOT MATCHED THEN INSERT VALUES(mv_source.id, mv_source.val); ERROR: cannot execute MERGE on relation "mv_source" DETAIL: This operation is not supported for materialized views. +-- Distributed tables *must* be colocated +CREATE TABLE dist_target(id int, val varchar); +SELECT create_distributed_table('dist_target', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE dist_source(id int, val varchar); +SELECT create_distributed_table('dist_source', 'id', colocate_with => 'none'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); +ERROR: For MERGE command, all the distributed tables must be colocated +-- Distributed tables *must* be joined on distribution column +CREATE TABLE dist_colocated(id int, val int); +SELECT create_distributed_table('dist_colocated', 'id', colocate_with => 'dist_target'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +MERGE INTO dist_target +USING dist_colocated +ON dist_target.id = dist_colocated.val -- val is not the distribution column +WHEN MATCHED THEN +UPDATE SET val = dist_colocated.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_colocated.id, dist_colocated.val); +ERROR: MERGE command is only supported when distributed tables are joined on their distribution column +-- Both the source and target must be distributed +MERGE INTO dist_target +USING (SELECT 100 id) AS source +ON dist_target.id = source.id AND dist_target.val = 'const' +WHEN MATCHED THEN +UPDATE SET val = 'source' +WHEN NOT MATCHED THEN +INSERT VALUES(source.id, 'source'); +ERROR: For MERGE command, both the source and target must be distributed +-- Non-hash distributed tables (append/range). +CREATE VIEW show_tables AS +SELECT logicalrelid, partmethod +FROM pg_dist_partition +WHERE (logicalrelid = 'dist_target'::regclass) OR (logicalrelid = 'dist_source'::regclass) +ORDER BY 1; +SELECT undistribute_table('dist_source'); +NOTICE: creating a new table for merge_schema.dist_source +NOTICE: moving the data of merge_schema.dist_source +NOTICE: dropping the old merge_schema.dist_source +NOTICE: drop cascades to view show_tables +CONTEXT: SQL statement "DROP TABLE merge_schema.dist_source CASCADE" +NOTICE: renaming the new table to merge_schema.dist_source + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('dist_source', 'id', 'append'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM show_tables; + logicalrelid | partmethod +--------------------------------------------------------------------- + dist_target | h + dist_source | a +(2 rows) + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); +ERROR: For MERGE command, all the distributed tables must be colocated, for append/range distribution, colocation is not supported +HINT: Consider using hash distribution instead +SELECT undistribute_table('dist_source'); +NOTICE: creating a new table for merge_schema.dist_source +NOTICE: moving the data of merge_schema.dist_source +NOTICE: dropping the old merge_schema.dist_source +NOTICE: drop cascades to view show_tables +CONTEXT: SQL statement "DROP TABLE merge_schema.dist_source CASCADE" +NOTICE: renaming the new table to merge_schema.dist_source + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('dist_source', 'id', 'range'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM show_tables; + logicalrelid | partmethod +--------------------------------------------------------------------- + dist_target | h + dist_source | r +(2 rows) + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); +ERROR: For MERGE command, all the distributed tables must be colocated, for append/range distribution, colocation is not supported +HINT: Consider using hash distribution instead +-- Both are append tables +SELECT undistribute_table('dist_target'); +NOTICE: creating a new table for merge_schema.dist_target +NOTICE: moving the data of merge_schema.dist_target +NOTICE: dropping the old merge_schema.dist_target +NOTICE: drop cascades to view show_tables +CONTEXT: SQL statement "DROP TABLE merge_schema.dist_target CASCADE" +NOTICE: renaming the new table to merge_schema.dist_target + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT undistribute_table('dist_source'); +NOTICE: creating a new table for merge_schema.dist_source +NOTICE: moving the data of merge_schema.dist_source +NOTICE: dropping the old merge_schema.dist_source +NOTICE: drop cascades to view show_tables +CONTEXT: SQL statement "DROP TABLE merge_schema.dist_source CASCADE" +NOTICE: renaming the new table to merge_schema.dist_source + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('dist_target', 'id', 'append'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('dist_source', 'id', 'append'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM show_tables; + logicalrelid | partmethod +--------------------------------------------------------------------- + dist_target | a + dist_source | a +(2 rows) + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); +ERROR: For MERGE command, all the distributed tables must be colocated, for append/range distribution, colocation is not supported +HINT: Consider using hash distribution instead +-- Both are range tables +SELECT undistribute_table('dist_target'); +NOTICE: creating a new table for merge_schema.dist_target +NOTICE: moving the data of merge_schema.dist_target +NOTICE: dropping the old merge_schema.dist_target +NOTICE: drop cascades to view show_tables +CONTEXT: SQL statement "DROP TABLE merge_schema.dist_target CASCADE" +NOTICE: renaming the new table to merge_schema.dist_target + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT undistribute_table('dist_source'); +NOTICE: creating a new table for merge_schema.dist_source +NOTICE: moving the data of merge_schema.dist_source +NOTICE: dropping the old merge_schema.dist_source +NOTICE: drop cascades to view show_tables +CONTEXT: SQL statement "DROP TABLE merge_schema.dist_source CASCADE" +NOTICE: renaming the new table to merge_schema.dist_source + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('dist_target', 'id', 'range'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('dist_source', 'id', 'range'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM show_tables; + logicalrelid | partmethod +--------------------------------------------------------------------- + dist_target | r + dist_source | r +(2 rows) + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); +ERROR: For MERGE command, all the distributed tables must be colocated, for append/range distribution, colocation is not supported +HINT: Consider using hash distribution instead DROP SERVER foreign_server CASCADE; NOTICE: drop cascades to 3 other objects DETAIL: drop cascades to user mapping for postgres on server foreign_server @@ -1519,8 +2757,9 @@ drop cascades to foreign table foreign_table NOTICE: foreign table "foreign_table_4000046" does not exist, skipping CONTEXT: SQL statement "SELECT citus_drop_all_shards(v_obj.objid, v_obj.schema_name, v_obj.object_name, drop_shards_metadata_only := false)" PL/pgSQL function citus_drop_trigger() line XX at PERFORM +DROP FUNCTION merge_when_and_write(); DROP SCHEMA merge_schema CASCADE; -NOTICE: drop cascades to 56 other objects +NOTICE: drop cascades to 75 other objects DETAIL: drop cascades to function insert_data() drop cascades to table pg_result drop cascades to table local_local @@ -1572,11 +2811,30 @@ drop cascades to table ft_target drop cascades to table ft_source_4000045 drop cascades to table ft_source drop cascades to extension postgres_fdw +drop cascades to table target_cj +drop cascades to table source_cj1 +drop cascades to table source_cj2 +drop cascades to table pg_target +drop cascades to table pg_source +drop cascades to table citus_target +drop cascades to table citus_source +drop cascades to function compare_tables() +drop cascades to view pg_source_view +drop cascades to view citus_source_view +drop cascades to table pg_pa_target +drop cascades to table citus_pa_target +drop cascades to table pg_pa_source +drop cascades to table citus_pa_source +drop cascades to function pa_compare_tables() drop cascades to table pg -drop cascades to table t1_4000062 -drop cascades to table s1_4000063 +drop cascades to table t1_4000110 +drop cascades to table s1_4000111 drop cascades to table t1 drop cascades to table s1 +drop cascades to table dist_colocated +drop cascades to table dist_target +drop cascades to table dist_source +drop cascades to view show_tables SELECT 1 FROM master_remove_node('localhost', :master_port); ?column? --------------------------------------------------------------------- diff --git a/src/test/regress/expected/pg15.out b/src/test/regress/expected/pg15.out index 7a41b25ec..d92686b93 100644 --- a/src/test/regress/expected/pg15.out +++ b/src/test/regress/expected/pg15.out @@ -315,7 +315,7 @@ SELECT create_reference_table('tbl2'); MERGE INTO tbl1 USING tbl2 ON (true) WHEN MATCHED THEN DELETE; -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is not supported on reference tables yet -- now, both are reference, still not supported SELECT create_reference_table('tbl1'); create_reference_table @@ -325,7 +325,7 @@ SELECT create_reference_table('tbl1'); MERGE INTO tbl1 USING tbl2 ON (true) WHEN MATCHED THEN DELETE; -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is not supported on reference tables yet -- now, both distributed, not works SELECT undistribute_table('tbl1'); NOTICE: creating a new table for pg15.tbl1 @@ -419,14 +419,14 @@ SELECT create_distributed_table('tbl2', 'x'); MERGE INTO tbl1 USING tbl2 ON (true) WHEN MATCHED THEN DELETE; -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is only supported when distributed tables are joined on their distribution column -- also, not inside subqueries & ctes WITH targq AS ( SELECT * FROM tbl2 ) MERGE INTO tbl1 USING targq ON (true) WHEN MATCHED THEN DELETE; -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is only supported when distributed tables are joined on their distribution column -- crashes on beta3, fixed on 15 stable --WITH foo AS ( -- MERGE INTO tbl1 USING tbl2 ON (true) @@ -441,7 +441,7 @@ USING tbl2 ON (true) WHEN MATCHED THEN UPDATE SET x = (SELECT count(*) FROM tbl2); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is only supported when distributed tables are joined on their distribution column -- test numeric types with negative scale CREATE TABLE numeric_negative_scale(numeric_column numeric(3,-1), orig_value int); INSERT into numeric_negative_scale SELECT x,x FROM generate_series(111, 115) x; diff --git a/src/test/regress/expected/pgmerge.out b/src/test/regress/expected/pgmerge.out index b90760691..0bedf356f 100644 --- a/src/test/regress/expected/pgmerge.out +++ b/src/test/regress/expected/pgmerge.out @@ -910,7 +910,15 @@ MERGE INTO wq_target t USING wq_source s ON t.tid = s.sid WHEN MATCHED AND (merge_when_and_write()) THEN UPDATE SET balance = t.balance + s.balance; -ERROR: functions used in UPDATE queries on distributed tables must not be VOLATILE +ERROR: functions used in the WHERE/ON/WHEN clause of modification queries on distributed tables must not be VOLATILE +ROLLBACK; +-- Test preventing ON condition from writing to the database +BEGIN; +MERGE INTO wq_target t +USING wq_source s ON t.tid = s.sid AND (merge_when_and_write()) +WHEN MATCHED THEN + UPDATE SET balance = t.balance + s.balance; +ERROR: functions used in the WHERE/ON/WHEN clause of modification queries on distributed tables must not be VOLATILE ROLLBACK; drop function merge_when_and_write(); DROP TABLE wq_target, wq_source; @@ -1891,7 +1899,7 @@ MERGE INTO pa_target t UPDATE SET balance = balance + delta, val = val || ' updated by merge' WHEN NOT MATCHED THEN INSERT VALUES (slogts::timestamp, sid, delta, 'inserted by merge'); -DEBUG: +DEBUG: SELECT * FROM pa_target ORDER BY tid; logts | tid | balance | val --------------------------------------------------------------------- @@ -2083,7 +2091,7 @@ WHEN MATCHED THEN UPDATE WHEN NOT MATCHED THEN INSERT (city_id, logdate, peaktemp, unitsales) VALUES (city_id, logdate, peaktemp, unitsales); -DEBUG: +DEBUG: RESET client_min_messages; SELECT tableoid::regclass, * FROM measurement ORDER BY city_id, logdate; tableoid | city_id | logdate | peaktemp | unitsales diff --git a/src/test/regress/sql/merge.sql b/src/test/regress/sql/merge.sql index c266b5333..12294b2c9 100644 --- a/src/test/regress/sql/merge.sql +++ b/src/test/regress/sql/merge.sql @@ -19,6 +19,7 @@ SET search_path TO merge_schema; SET citus.shard_count TO 4; SET citus.next_shard_id TO 4000000; SET citus.explain_all_tasks to true; +SET citus.shard_replication_factor TO 1; SELECT 1 FROM master_add_node('localhost', :master_port, groupid => 0); CREATE TABLE source @@ -143,9 +144,33 @@ SELECT undistribute_table('source'); SELECT create_distributed_table('target', 'customer_id'); SELECT create_distributed_table('source', 'customer_id'); +-- Updates one of the row with customer_id = 30002 +SELECT * from target t WHERE t.customer_id = 30002; +-- Turn on notice to print tasks sent to nodes +SET citus.log_remote_commands to true; MERGE INTO target t USING source s - ON (t.customer_id = s.customer_id) + ON (t.customer_id = s.customer_id) AND t.customer_id = 30002 + + WHEN MATCHED AND t.order_center = 'XX' THEN + DELETE + + WHEN MATCHED THEN + UPDATE SET -- Existing customer, update the order count and last_order_id + order_count = t.order_count + 1, + last_order_id = s.order_id + + WHEN NOT MATCHED THEN + DO NOTHING; + +SET citus.log_remote_commands to false; +SELECT * from target t WHERE t.customer_id = 30002; + +-- Deletes one of the row with customer_id = 30004 +SELECT * from target t WHERE t.customer_id = 30004; +MERGE INTO target t + USING source s + ON (t.customer_id = s.customer_id) AND t.customer_id = 30004 WHEN MATCHED AND t.order_center = 'XX' THEN DELETE @@ -158,6 +183,7 @@ MERGE INTO target t WHEN NOT MATCHED THEN -- New entry, record it. INSERT (customer_id, last_order_id, order_center, order_count, last_order) VALUES (customer_id, s.order_id, s.order_center, 123, s.order_time); +SELECT * from target t WHERE t.customer_id = 30004; -- -- Test MERGE with CTE as source @@ -243,11 +269,13 @@ SELECT create_distributed_table('t1', 'id'); SELECT create_distributed_table('s1', 'id'); +SELECT * FROM t1 order by id; +SET citus.log_remote_commands to true; WITH s1_res AS ( SELECT * FROM s1 ) MERGE INTO t1 - USING s1_res ON (s1_res.id = t1.id) + USING s1_res ON (s1_res.id = t1.id) AND t1.id = 6 WHEN MATCHED AND s1_res.val = 0 THEN DELETE @@ -255,6 +283,9 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val); +SET citus.log_remote_commands to false; +-- Other than id 6 everything else is a NO match, and should appear in target +SELECT * FROM t1 order by 1, 2; -- -- Test with multiple join conditions @@ -325,15 +356,21 @@ SELECT undistribute_table('s2'); SELECT create_distributed_table('t2', 'id'); SELECT create_distributed_table('s2', 'id'); +SELECT * FROM t2 ORDER BY 1; +SET citus.log_remote_commands to true; MERGE INTO t2 USING s2 -ON t2.id = s2.id AND t2.src = s2.src +ON t2.id = s2.id AND t2.src = s2.src AND t2.id = 4 WHEN MATCHED AND t2.val = 1 THEN UPDATE SET val = s2.val + 10 WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN - INSERT (id, val, src) VALUES (s2.id, s2.val, s2.src); + DO NOTHING; +SET citus.log_remote_commands to false; +-- Row with id = 4 is a match for delete clause, row should be deleted +-- Row with id = 3 is a NO match, row from source will be inserted +SELECT * FROM t2 ORDER BY 1; -- -- With sub-query as the MERGE source @@ -824,10 +861,577 @@ RESET client_min_messages; SELECT * FROM ft_target; +-- +-- complex joins on the source side +-- + +-- source(join of two relations) relation is an unaliased join + +CREATE TABLE target_cj(tid int, src text, val int); +CREATE TABLE source_cj1(sid1 int, src1 text, val1 int); +CREATE TABLE source_cj2(sid2 int, src2 text, val2 int); + +INSERT INTO target_cj VALUES (1, 'target', 0); +INSERT INTO target_cj VALUES (2, 'target', 0); +INSERT INTO target_cj VALUES (2, 'target', 0); +INSERT INTO target_cj VALUES (3, 'target', 0); + +INSERT INTO source_cj1 VALUES (2, 'source-1', 10); +INSERT INTO source_cj2 VALUES (2, 'source-2', 20); + +BEGIN; +MERGE INTO target_cj t +USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src2 +WHEN NOT MATCHED THEN + DO NOTHING; +-- Gold result to compare against +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +BEGIN; +-- try accessing columns from either side of the source join +MERGE INTO target_cj t +USING source_cj1 s2 + INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET tid = sid2, src = src1, val = val2 +WHEN NOT MATCHED THEN + DO NOTHING; +-- Gold result to compare against +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +-- Test the same scenarios with distributed tables + +SELECT create_distributed_table('target_cj', 'tid'); +SELECT create_distributed_table('source_cj1', 'sid1'); +SELECT create_distributed_table('source_cj2', 'sid2'); + +BEGIN; +SET citus.log_remote_commands to true; +MERGE INTO target_cj t +USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src2 +WHEN NOT MATCHED THEN + DO NOTHING; +SET citus.log_remote_commands to false; +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +BEGIN; +-- try accessing columns from either side of the source join +MERGE INTO target_cj t +USING source_cj1 s2 + INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src1, val = val2 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +-- sub-query as a source +BEGIN; +MERGE INTO target_cj t +USING (SELECT * FROM source_cj1 WHERE sid1 = 2) sub +ON t.tid = sub.sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = sub.src1, val = val1 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +-- Test self-join +BEGIN; +SELECT * FROM target_cj ORDER BY 1; +set citus.log_remote_commands to true; +MERGE INTO target_cj t1 +USING (SELECT * FROM target_cj) sub +ON t1.tid = sub.tid AND t1.tid = 3 +WHEN MATCHED THEN + UPDATE SET src = sub.src, val = sub.val + 100 +WHEN NOT MATCHED THEN + DO NOTHING; +set citus.log_remote_commands to false; +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + + +-- Test PREPARE +PREPARE foo(int) AS +MERGE INTO target_cj target +USING (SELECT * FROM source_cj1) sub +ON target.tid = sub.sid1 AND target.tid = $1 +WHEN MATCHED THEN + UPDATE SET val = sub.val1 +WHEN NOT MATCHED THEN + DO NOTHING; + +SELECT * FROM target_cj ORDER BY 1; + +BEGIN; +EXECUTE foo(2); +EXECUTE foo(2); +EXECUTE foo(2); +EXECUTE foo(2); +EXECUTE foo(2); +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +BEGIN; + +SET citus.log_remote_commands to true; +SET client_min_messages TO DEBUG1; +EXECUTE foo(2); +RESET client_min_messages; + +EXECUTE foo(2); +SET citus.log_remote_commands to false; + +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +-- Test distributed tables, must be co-located and joined on distribution column. + +-- +-- We create two sets of source and target tables, one set is Postgres and the other +-- is Citus distributed. Run the _exact_ MERGE SQL on both the sets and compare the +-- final results of target tables of Postgres and Citus, the result should match. +-- This is repeated for various MERGE SQL combinations +-- +CREATE TABLE pg_target(id int, val varchar); +CREATE TABLE pg_source(id int, val varchar); +CREATE TABLE citus_target(id int, val varchar); +CREATE TABLE citus_source(id int, val varchar); + +-- Half of the source rows do not match +INSERT INTO pg_target SELECT i, 'target' FROM generate_series(250, 500) i; +INSERT INTO pg_source SELECT i, 'source' FROM generate_series(1, 500) i; + +INSERT INTO citus_target SELECT i, 'target' FROM generate_series(250, 500) i; +INSERT INTO citus_source SELECT i, 'source' FROM generate_series(1, 500) i; + +SELECT create_distributed_table('citus_target', 'id'); +SELECT create_distributed_table('citus_source', 'id'); + +-- +-- This routine compares the target tables of Postgres and Citus and +-- returns true if they match, false if the results do not match. +-- +CREATE OR REPLACE FUNCTION compare_tables() RETURNS BOOLEAN AS $$ +DECLARE ret BOOL; +BEGIN +SELECT count(1) = 0 INTO ret + FROM pg_target + FULL OUTER JOIN citus_target + USING (id, val) + WHERE pg_target.id IS NULL + OR citus_target.id IS NULL; +RETURN ret; +END +$$ LANGUAGE PLPGSQL; + +-- Make sure we start with exact data in Postgres and Citus +SELECT compare_tables(); + +-- Run the MERGE on both Postgres and Citus, and compare the final target tables + +BEGIN; +SET citus.log_remote_commands to true; + +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); + +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + +-- +-- ON clause filter on source +-- +BEGIN; +SET citus.log_remote_commands to true; + +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id AND s.id < 100 +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); + +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id AND s.id < 100 +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + +-- +-- ON clause filter on target +-- +BEGIN; +SET citus.log_remote_commands to true; + +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id AND t.id < 100 +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); + +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id AND t.id < 100 +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + +-- +-- NOT MATCHED clause filter on source +-- +BEGIN; +SET citus.log_remote_commands to true; + +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id +WHEN MATCHED THEN + DO NOTHING +WHEN NOT MATCHED AND s.id < 100 THEN + INSERT VALUES(s.id, s.val); + +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN MATCHED THEN + DO NOTHING +WHEN NOT MATCHED AND s.id < 100 THEN + INSERT VALUES(s.id, s.val); + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + +-- +-- Test constant filter in ON clause to check if shards are pruned +-- with restriction information +-- + +-- +-- Though constant filter is present, this won't prune shards as +-- NOT MATCHED clause is present +-- +BEGIN; +SET citus.log_remote_commands to true; + +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id AND s.id = 250 +WHEN MATCHED THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); + +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id AND s.id = 250 +WHEN MATCHED THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + +-- This will prune shards with restriction information as NOT MATCHED is void +BEGIN; +SET citus.log_remote_commands to true; + +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id AND s.id = 250 +WHEN MATCHED THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN NOT MATCHED THEN + DO NOTHING; + +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id AND s.id = 250 +WHEN MATCHED THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN NOT MATCHED THEN + DO NOTHING; + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + +-- Test CTE with distributed tables +CREATE VIEW pg_source_view AS SELECT * FROM pg_source WHERE id < 400; +CREATE VIEW citus_source_view AS SELECT * FROM citus_source WHERE id < 400; + +BEGIN; +SEt citus.log_remote_commands to true; + +WITH cte AS ( + SELECT * FROM pg_source_view +) +MERGE INTO pg_target t +USING cte +ON cte.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated by CTE' +WHEN NOT MATCHED THEN + INSERT VALUES (cte.id, cte.val) +WHEN MATCHED AND t.id < 350 THEN + DELETE; + +WITH cte AS ( + SELECT * FROM citus_source_view +) +MERGE INTO citus_target t +USING cte +ON cte.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated by CTE' +WHEN NOT MATCHED THEN + INSERT VALUES (cte.id, cte.val) +WHEN MATCHED AND t.id < 350 THEN + DELETE; + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + + +-- Test sub-query with distributed tables +BEGIN; +SEt citus.log_remote_commands to true; + +MERGE INTO pg_target t +USING (SELECT * FROM pg_source) subq +ON subq.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated by subquery' +WHEN NOT MATCHED THEN + INSERT VALUES (subq.id, subq.val) +WHEN MATCHED AND t.id < 350 THEN + DELETE; + +MERGE INTO citus_target t +USING (SELECT * FROM citus_source) subq +ON subq.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated by subquery' +WHEN NOT MATCHED THEN + INSERT VALUES (subq.id, subq.val) +WHEN MATCHED AND t.id < 350 THEN + DELETE; + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + +-- Test PREPARE +PREPARE pg_prep(int) AS +MERGE INTO pg_target +USING (SELECT * FROM pg_source) sub +ON pg_target.id = sub.id AND pg_target.id = $1 +WHEN MATCHED THEN + UPDATE SET val = 'Updated by prepare using ' || sub.val +WHEN NOT MATCHED THEN + DO NOTHING; + +PREPARE citus_prep(int) AS +MERGE INTO citus_target +USING (SELECT * FROM citus_source) sub +ON citus_target.id = sub.id AND citus_target.id = $1 +WHEN MATCHED THEN + UPDATE SET val = 'Updated by prepare using ' || sub.val +WHEN NOT MATCHED THEN + DO NOTHING; + +BEGIN; +SET citus.log_remote_commands to true; + +SELECT * FROM pg_target WHERE id = 500; -- before merge +EXECUTE pg_prep(500); +SELECT * FROM pg_target WHERE id = 500; -- non-cached +EXECUTE pg_prep(500); +EXECUTE pg_prep(500); +EXECUTE pg_prep(500); +EXECUTE pg_prep(500); +EXECUTE pg_prep(500); +SELECT * FROM pg_target WHERE id = 500; -- cached + +SELECT * FROM citus_target WHERE id = 500; -- before merge +EXECUTE citus_prep(500); +SELECT * FROM citus_target WHERE id = 500; -- non-cached +EXECUTE citus_prep(500); +EXECUTE citus_prep(500); +EXECUTE citus_prep(500); +EXECUTE citus_prep(500); +EXECUTE citus_prep(500); +SELECT * FROM citus_target WHERE id = 500; -- cached + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + +-- Test partitions + distributed tables + +CREATE TABLE pg_pa_target (tid integer, balance float, val text) + PARTITION BY LIST (tid); +CREATE TABLE citus_pa_target (tid integer, balance float, val text) + PARTITION BY LIST (tid); + +CREATE TABLE part1 PARTITION OF pg_pa_target FOR VALUES IN (1,4) + WITH (autovacuum_enabled=off); +CREATE TABLE part2 PARTITION OF pg_pa_target FOR VALUES IN (2,5,6) + WITH (autovacuum_enabled=off); +CREATE TABLE part3 PARTITION OF pg_pa_target FOR VALUES IN (3,8,9) + WITH (autovacuum_enabled=off); +CREATE TABLE part4 PARTITION OF pg_pa_target DEFAULT + WITH (autovacuum_enabled=off); +CREATE TABLE part5 PARTITION OF citus_pa_target FOR VALUES IN (1,4) + WITH (autovacuum_enabled=off); +CREATE TABLE part6 PARTITION OF citus_pa_target FOR VALUES IN (2,5,6) + WITH (autovacuum_enabled=off); +CREATE TABLE part7 PARTITION OF citus_pa_target FOR VALUES IN (3,8,9) + WITH (autovacuum_enabled=off); +CREATE TABLE part8 PARTITION OF citus_pa_target DEFAULT + WITH (autovacuum_enabled=off); + +CREATE TABLE pg_pa_source (sid integer, delta float); +CREATE TABLE citus_pa_source (sid integer, delta float); + +-- insert many rows to the source table +INSERT INTO pg_pa_source SELECT id, id * 10 FROM generate_series(1,14) AS id; +INSERT INTO citus_pa_source SELECT id, id * 10 FROM generate_series(1,14) AS id; +-- insert a few rows in the target table (odd numbered tid) +INSERT INTO pg_pa_target SELECT id, id * 100, 'initial' FROM generate_series(1,14,2) AS id; +INSERT INTO citus_pa_target SELECT id, id * 100, 'initial' FROM generate_series(1,14,2) AS id; + +SELECT create_distributed_table('citus_pa_target', 'tid'); +SELECT create_distributed_table('citus_pa_source', 'sid'); + +CREATE OR REPLACE FUNCTION pa_compare_tables() RETURNS BOOLEAN AS $$ +DECLARE ret BOOL; +BEGIN +SELECT count(1) = 0 INTO ret + FROM pg_pa_target + FULL OUTER JOIN citus_pa_target + USING (tid, balance, val) + WHERE pg_pa_target.tid IS NULL + OR citus_pa_target.tid IS NULL; +RETURN ret; +END +$$ LANGUAGE PLPGSQL; + +-- try simple MERGE +BEGIN; +MERGE INTO pg_pa_target t + USING pg_pa_source s + ON t.tid = s.sid + WHEN MATCHED THEN + UPDATE SET balance = balance + delta, val = val || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid, delta, 'inserted by merge'); + +MERGE INTO citus_pa_target t + USING citus_pa_source s + ON t.tid = s.sid + WHEN MATCHED THEN + UPDATE SET balance = balance + delta, val = val || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid, delta, 'inserted by merge'); + +SELECT pa_compare_tables(); +ROLLBACK; + +-- same with a constant qual +BEGIN; +MERGE INTO pg_pa_target t + USING pg_pa_source s + ON t.tid = s.sid AND tid = 1 + WHEN MATCHED THEN + UPDATE SET balance = balance + delta, val = val || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid, delta, 'inserted by merge'); + +MERGE INTO citus_pa_target t + USING citus_pa_source s + ON t.tid = s.sid AND tid = 1 + WHEN MATCHED THEN + UPDATE SET balance = balance + delta, val = val || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid, delta, 'inserted by merge'); + +SELECT pa_compare_tables(); +ROLLBACK; + -- -- Error and Unsupported scenarios -- +-- try updating the distribution key column +BEGIN; +MERGE INTO target_cj t + USING source_cj1 s + ON t.tid = s.sid1 AND t.tid = 2 + WHEN MATCHED THEN + UPDATE SET tid = tid + 9, src = src || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid1, 'inserted by merge', val1); +ROLLBACK; + -- Foreign table as target MERGE INTO foreign_table USING ft_target ON (foreign_table.id = ft_target.id) @@ -854,6 +1458,38 @@ MERGE INTO t1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1.id, s1.val); +-- Now both s1 and t1 are distributed tables +SELECT undistribute_table('t1'); +SELECT create_distributed_table('t1', 'id'); + +-- We have a potential pitfall where a function can be invoked in +-- the MERGE conditions which can insert/update to a random shard +CREATE OR REPLACE function merge_when_and_write() RETURNS BOOLEAN +LANGUAGE PLPGSQL AS +$$ +BEGIN + INSERT INTO t1 VALUES (100, 100); + RETURN TRUE; +END; +$$; + +-- Test preventing "ON" join condition from writing to the database +BEGIN; +MERGE INTO t1 +USING s1 ON t1.id = s1.id AND t1.id = 2 AND (merge_when_and_write()) +WHEN MATCHED THEN + UPDATE SET val = t1.val + s1.val; +ROLLBACK; + +-- Test preventing WHEN clause(s) from writing to the database +BEGIN; +MERGE INTO t1 +USING s1 ON t1.id = s1.id AND t1.id = 2 +WHEN MATCHED AND (merge_when_and_write()) THEN + UPDATE SET val = t1.val + s1.val; +ROLLBACK; + + -- Joining on partition columns with sub-query MERGE INTO t1 USING (SELECT * FROM s1) sub ON (sub.val = t1.id) -- sub.val is not a distribution column @@ -997,6 +1633,104 @@ WHEN MATCHED THEN WHEN NOT MATCHED THEN INSERT VALUES(mv_source.id, mv_source.val); +-- Distributed tables *must* be colocated +CREATE TABLE dist_target(id int, val varchar); +SELECT create_distributed_table('dist_target', 'id'); +CREATE TABLE dist_source(id int, val varchar); +SELECT create_distributed_table('dist_source', 'id', colocate_with => 'none'); + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); + +-- Distributed tables *must* be joined on distribution column +CREATE TABLE dist_colocated(id int, val int); +SELECT create_distributed_table('dist_colocated', 'id', colocate_with => 'dist_target'); + +MERGE INTO dist_target +USING dist_colocated +ON dist_target.id = dist_colocated.val -- val is not the distribution column +WHEN MATCHED THEN +UPDATE SET val = dist_colocated.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_colocated.id, dist_colocated.val); + + +-- Both the source and target must be distributed +MERGE INTO dist_target +USING (SELECT 100 id) AS source +ON dist_target.id = source.id AND dist_target.val = 'const' +WHEN MATCHED THEN +UPDATE SET val = 'source' +WHEN NOT MATCHED THEN +INSERT VALUES(source.id, 'source'); + +-- Non-hash distributed tables (append/range). +CREATE VIEW show_tables AS +SELECT logicalrelid, partmethod +FROM pg_dist_partition +WHERE (logicalrelid = 'dist_target'::regclass) OR (logicalrelid = 'dist_source'::regclass) +ORDER BY 1; + +SELECT undistribute_table('dist_source'); +SELECT create_distributed_table('dist_source', 'id', 'append'); +SELECT * FROM show_tables; + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); + +SELECT undistribute_table('dist_source'); +SELECT create_distributed_table('dist_source', 'id', 'range'); +SELECT * FROM show_tables; + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); + +-- Both are append tables +SELECT undistribute_table('dist_target'); +SELECT undistribute_table('dist_source'); +SELECT create_distributed_table('dist_target', 'id', 'append'); +SELECT create_distributed_table('dist_source', 'id', 'append'); +SELECT * FROM show_tables; + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); + +-- Both are range tables +SELECT undistribute_table('dist_target'); +SELECT undistribute_table('dist_source'); +SELECT create_distributed_table('dist_target', 'id', 'range'); +SELECT create_distributed_table('dist_source', 'id', 'range'); +SELECT * FROM show_tables; + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); + DROP SERVER foreign_server CASCADE; +DROP FUNCTION merge_when_and_write(); DROP SCHEMA merge_schema CASCADE; SELECT 1 FROM master_remove_node('localhost', :master_port); diff --git a/src/test/regress/sql/pgmerge.sql b/src/test/regress/sql/pgmerge.sql index 6842f516a..83bf01a68 100644 --- a/src/test/regress/sql/pgmerge.sql +++ b/src/test/regress/sql/pgmerge.sql @@ -608,6 +608,14 @@ USING wq_source s ON t.tid = s.sid WHEN MATCHED AND (merge_when_and_write()) THEN UPDATE SET balance = t.balance + s.balance; ROLLBACK; + +-- Test preventing ON condition from writing to the database +BEGIN; +MERGE INTO wq_target t +USING wq_source s ON t.tid = s.sid AND (merge_when_and_write()) +WHEN MATCHED THEN + UPDATE SET balance = t.balance + s.balance; +ROLLBACK; drop function merge_when_and_write(); DROP TABLE wq_target, wq_source; From cf5513628151410d839a469f9fcafc1ead68f912 Mon Sep 17 00:00:00 2001 From: Teja Mupparti Date: Tue, 31 Jan 2023 18:23:44 -0800 Subject: [PATCH 10/18] 1) Restrict MERGE command INSERT to the source's distribution column Fixes #6672 2) Move all MERGE related routines to a new file merge_planner.c 3) Make ConjunctionContainsColumnFilter() static again, and rearrange the code in MergeQuerySupported() 4) Restore the original format in the comments section. 5) Add big serial test. Implement latest set of comments --- .../distributed/planner/distributed_planner.c | 41 +- .../planner/fast_path_router_planner.c | 5 +- .../distributed/planner/merge_planner.c | 701 ++++++++++++++++++ .../planner/multi_physical_planner.c | 17 +- .../planner/multi_router_planner.c | 398 +--------- .../planner/query_pushdown_planning.c | 12 +- .../relation_restriction_equivalence.c | 68 +- src/include/distributed/distributed_planner.h | 6 - src/include/distributed/merge_planner.h | 26 + .../distributed/multi_router_planner.h | 13 +- .../relation_restriction_equivalence.h | 12 +- src/test/regress/create_schedule | 1 + src/test/regress/expected/merge.out | 444 +++++++++-- src/test/regress/expected/merge_arbitrary.out | 150 ++++ .../regress/expected/merge_arbitrary_0.out | 6 + .../expected/merge_arbitrary_create.out | 72 ++ .../expected/merge_arbitrary_create_0.out | 6 + src/test/regress/expected/pg15.out | 31 +- src/test/regress/expected/pgmerge.out | 12 +- src/test/regress/sql/merge.sql | 235 +++++- src/test/regress/sql/merge_arbitrary.sql | 133 ++++ .../regress/sql/merge_arbitrary_create.sql | 50 ++ src/test/regress/sql/pg15.sql | 23 +- src/test/regress/sql/pgmerge.sql | 6 +- src/test/regress/sql_schedule | 1 + 25 files changed, 1920 insertions(+), 549 deletions(-) create mode 100644 src/backend/distributed/planner/merge_planner.c create mode 100644 src/include/distributed/merge_planner.h create mode 100644 src/test/regress/expected/merge_arbitrary.out create mode 100644 src/test/regress/expected/merge_arbitrary_0.out create mode 100644 src/test/regress/expected/merge_arbitrary_create.out create mode 100644 src/test/regress/expected/merge_arbitrary_create_0.out create mode 100644 src/test/regress/sql/merge_arbitrary.sql create mode 100644 src/test/regress/sql/merge_arbitrary_create.sql diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index 262258d7f..17b63ee0a 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -77,7 +77,7 @@ int PlannerLevel = 0; static bool ListContainsDistributedTableRTE(List *rangeTableList, bool *maybeHasForeignDistributedTable); -static bool IsUpdateOrDelete(Query *query); +static bool IsUpdateOrDeleteOrMerge(Query *query); static PlannedStmt * CreateDistributedPlannedStmt( DistributedPlanningContext *planContext); static PlannedStmt * InlineCtesAndCreateDistributedPlannedStmt(uint64 planId, @@ -153,7 +153,7 @@ distributed_planner(Query *parse, * We cannot have merge command for this path as well because * there cannot be recursively planned merge command. */ - Assert(!ContainsMergeCommandWalker((Node *) parse)); + Assert(!IsMergeQuery(parse)); needsDistributedPlanning = true; } @@ -295,39 +295,6 @@ distributed_planner(Query *parse, } -/* - * ContainsMergeCommandWalker walks over the node and finds if there are any - * Merge command (e.g., CMD_MERGE) in the node. - */ -bool -ContainsMergeCommandWalker(Node *node) -{ - #if PG_VERSION_NUM < PG_VERSION_15 - return false; - #endif - - if (node == NULL) - { - return false; - } - - if (IsA(node, Query)) - { - Query *query = (Query *) node; - if (IsMergeQuery(query)) - { - return true; - } - - return query_tree_walker((Query *) node, ContainsMergeCommandWalker, NULL, 0); - } - - return expression_tree_walker(node, ContainsMergeCommandWalker, NULL); - - return false; -} - - /* * ExtractRangeTableEntryList is a wrapper around ExtractRangeTableEntryWalker. * The function traverses the input query and returns all the range table @@ -631,7 +598,7 @@ IsMultiTaskPlan(DistributedPlan *distributedPlan) * IsUpdateOrDelete returns true if the query performs an update or delete. */ bool -IsUpdateOrDelete(Query *query) +IsUpdateOrDeleteOrMerge(Query *query) { return query->commandType == CMD_UPDATE || query->commandType == CMD_DELETE || @@ -809,7 +776,7 @@ CreateDistributedPlannedStmt(DistributedPlanningContext *planContext) * if it is planned as a multi shard modify query. */ if ((distributedPlan->planningError || - (IsUpdateOrDelete(planContext->originalQuery) && IsMultiTaskPlan( + (IsUpdateOrDeleteOrMerge(planContext->originalQuery) && IsMultiTaskPlan( distributedPlan))) && hasUnresolvedParams) { diff --git a/src/backend/distributed/planner/fast_path_router_planner.c b/src/backend/distributed/planner/fast_path_router_planner.c index e7d91a101..ecb62478a 100644 --- a/src/backend/distributed/planner/fast_path_router_planner.c +++ b/src/backend/distributed/planner/fast_path_router_planner.c @@ -56,6 +56,9 @@ bool EnableFastPathRouterPlanner = true; static bool ColumnAppearsMultipleTimes(Node *quals, Var *distributionKey); static bool DistKeyInSimpleOpExpression(Expr *clause, Var *distColumn, Node **distributionKeyValue); +static bool ConjunctionContainsColumnFilter(Node *node, + Var *column, + Node **distributionKeyValue); /* @@ -292,7 +295,7 @@ ColumnAppearsMultipleTimes(Node *quals, Var *distributionKey) * * If the conjuction contains column filter which is const, distributionKeyValue is set. */ -bool +static bool ConjunctionContainsColumnFilter(Node *node, Var *column, Node **distributionKeyValue) { if (node == NULL) diff --git a/src/backend/distributed/planner/merge_planner.c b/src/backend/distributed/planner/merge_planner.c new file mode 100644 index 000000000..03fd9e00d --- /dev/null +++ b/src/backend/distributed/planner/merge_planner.c @@ -0,0 +1,701 @@ +/*------------------------------------------------------------------------- + * + * merge_planner.c + * + * This file contains functions to help plan MERGE queries. + * + * Copyright (c) Citus Data, Inc. + * + *------------------------------------------------------------------------- + */ + +#include + +#include "postgres.h" +#include "nodes/makefuncs.h" +#include "optimizer/optimizer.h" +#include "parser/parsetree.h" +#include "utils/lsyscache.h" + +#include "distributed/citus_clauses.h" +#include "distributed/listutils.h" +#include "distributed/merge_planner.h" +#include "distributed/multi_logical_optimizer.h" +#include "distributed/multi_router_planner.h" +#include "distributed/pg_version_constants.h" +#include "distributed/query_pushdown_planning.h" + +#if PG_VERSION_NUM >= PG_VERSION_15 + +static DeferredErrorMessage * CheckIfRTETypeIsUnsupported(Query *parse, + RangeTblEntry *rangeTableEntry); +static DeferredErrorMessage * ErrorIfDistTablesNotColocated(Query *parse, + List * + distTablesList, + PlannerRestrictionContext + * + plannerRestrictionContext); +static DeferredErrorMessage * ErrorIfMergeHasUnsupportedTables(Query *parse, + List *rangeTableList, + PlannerRestrictionContext * + restrictionContext); +static bool IsDistributionColumnInMergeSource(Expr *columnExpression, Query *query, bool + skipOuterVars); +static DeferredErrorMessage * InsertDistributionColumnMatchesSource(Query *query, + RangeTblEntry * + resultRte); + +static DeferredErrorMessage * MergeQualAndTargetListFunctionsSupported(Oid + resultRelationId, + FromExpr *joinTree, + Node *quals, + List *targetList, + CmdType commandType); +#endif + + +/* + * MergeQuerySupported does check for a MERGE command in the query, if it finds + * one, it will verify the below criteria + * - Supported tables and combinations in ErrorIfMergeHasUnsupportedTables + * - Distributed tables requirements in ErrorIfDistTablesNotColocated + * - Checks target-lists and functions-in-quals in TargetlistAndFunctionsSupported + */ +DeferredErrorMessage * +MergeQuerySupported(Query *originalQuery, bool multiShardQuery, + PlannerRestrictionContext *plannerRestrictionContext) +{ + /* function is void for pre-15 versions of Postgres */ + #if PG_VERSION_NUM < PG_VERSION_15 + + return NULL; + + #else + + /* For non-MERGE commands it's a no-op */ + if (!IsMergeQuery(originalQuery)) + { + return NULL; + } + + /* + * TODO: For now, we are adding an exception where any volatile or stable + * functions are not allowed in the MERGE query, but this will become too + * restrictive as this will prevent many useful and simple cases, such as, + * INSERT VALUES(ts::timestamp), bigserial column inserts etc. But without + * this restriction, we have a potential danger of some of the function(s) + * getting executed at the worker which will result in incorrect behavior. + */ + if (contain_mutable_functions((Node *) originalQuery)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "non-IMMUTABLE functions are not yet supported " + "in MERGE sql with distributed tables ", + NULL, NULL); + } + + List *rangeTableList = ExtractRangeTableEntryList(originalQuery); + RangeTblEntry *resultRte = ExtractResultRelationRTE(originalQuery); + + /* + * Fast path queries cannot have merge command, and we prevent the remaining here. + * In Citus we have limited support for MERGE, it's allowed only if all + * the tables(target, source or any CTE) tables are are local i.e. a + * combination of Citus local and Non-Citus tables (regular Postgres tables) + * or distributed tables with some restrictions, please see header of routine + * ErrorIfDistTablesNotColocated for details. + */ + DeferredErrorMessage *deferredError = + ErrorIfMergeHasUnsupportedTables(originalQuery, + rangeTableList, + plannerRestrictionContext); + if (deferredError) + { + /* MERGE's unsupported combination, raise the exception */ + RaiseDeferredError(deferredError, ERROR); + } + + Oid resultRelationId = resultRte->relid; + deferredError = MergeQualAndTargetListFunctionsSupported(resultRelationId, + originalQuery->jointree, + originalQuery->jointree-> + quals, + originalQuery->targetList, + originalQuery->commandType); + if (deferredError) + { + return deferredError; + } + + /* + * MERGE is a special case where we have multiple modify statements + * within itself. Check each INSERT/UPDATE/DELETE individually. + */ + MergeAction *action = NULL; + foreach_ptr(action, originalQuery->mergeActionList) + { + Assert(originalQuery->returningList == NULL); + deferredError = MergeQualAndTargetListFunctionsSupported(resultRelationId, + originalQuery->jointree, + action->qual, + action->targetList, + action->commandType); + if (deferredError) + { + /* MERGE's unsupported scenario, raise the exception */ + RaiseDeferredError(deferredError, ERROR); + } + } + + deferredError = + InsertDistributionColumnMatchesSource(originalQuery, resultRte); + if (deferredError) + { + /* MERGE's unsupported scenario, raise the exception */ + RaiseDeferredError(deferredError, ERROR); + } + + if (multiShardQuery) + { + deferredError = + DeferErrorIfUnsupportedSubqueryPushdown(originalQuery, + plannerRestrictionContext); + if (deferredError) + { + return deferredError; + } + } + + if (HasDangerousJoinUsing(originalQuery->rtable, (Node *) originalQuery->jointree)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "a join with USING causes an internal naming " + "conflict, use ON instead", NULL, NULL); + } + + return NULL; + + #endif +} + + +/* + * IsMergeAllowedOnRelation takes a relation entry and checks if MERGE command is + * permitted on special relations, such as materialized view, returns true only if + * it's a "source" relation. + */ +bool +IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte) +{ + if (!IsMergeQuery(parse)) + { + return false; + } + + /* Fetch the MERGE target relation */ + RangeTblEntry *targetRte = rt_fetch(parse->resultRelation, parse->rtable); + + /* Is it a target relation? */ + if (targetRte->relid == rte->relid) + { + return false; + } + + return true; +} + + +#if PG_VERSION_NUM >= PG_VERSION_15 + +/* + * ErrorIfDistTablesNotColocated Checks to see if + * + * - There are a minimum of two distributed tables (source and a target). + * - All the distributed tables are indeed colocated. + * + * If any of the conditions are not met, it raises an exception. + */ +static DeferredErrorMessage * +ErrorIfDistTablesNotColocated(Query *parse, List *distTablesList, + PlannerRestrictionContext * + plannerRestrictionContext) +{ + /* All MERGE tables must be distributed */ + if (list_length(distTablesList) < 2) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "For MERGE command, both the source and target " + "must be distributed", NULL, NULL); + } + + /* All distributed tables must be colocated */ + if (!AllRelationsInRTEListColocated(distTablesList)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "For MERGE command, all the distributed tables " + "must be colocated", NULL, NULL); + } + + return NULL; +} + + +/* + * ErrorIfRTETypeIsUnsupported Checks for types of tables that are not supported, such + * as, reference tables, append-distributed tables and materialized view as target relation. + * Routine returns NULL for the supported types, error message for everything else. + */ +static DeferredErrorMessage * +CheckIfRTETypeIsUnsupported(Query *parse, RangeTblEntry *rangeTableEntry) +{ + if (rangeTableEntry->relkind == RELKIND_MATVIEW || + rangeTableEntry->relkind == RELKIND_FOREIGN_TABLE) + { + /* Materialized view or Foreign table as target is not allowed */ + if (IsMergeAllowedOnRelation(parse, rangeTableEntry)) + { + /* Non target relation is ok */ + return NULL; + } + else + { + /* Usually we don't reach this exception as the Postgres parser catches it */ + StringInfo errorMessage = makeStringInfo(); + appendStringInfo(errorMessage, "MERGE command is not allowed on " + "relation type(relkind:%c)", + rangeTableEntry->relkind); + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + errorMessage->data, NULL, NULL); + } + } + + if (rangeTableEntry->relkind != RELKIND_RELATION && + rangeTableEntry->relkind != RELKIND_PARTITIONED_TABLE) + { + StringInfo errorMessage = makeStringInfo(); + appendStringInfo(errorMessage, "Unexpected table type(relkind:%c) " + "in MERGE command", rangeTableEntry->relkind); + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + errorMessage->data, NULL, NULL); + } + + Assert(rangeTableEntry->relid != 0); + + /* Reference tables are not supported yet */ + if (IsCitusTableType(rangeTableEntry->relid, REFERENCE_TABLE)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE command is not supported on reference " + "tables yet", NULL, NULL); + } + + /* Append/Range tables are not supported */ + if (IsCitusTableType(rangeTableEntry->relid, APPEND_DISTRIBUTED) || + IsCitusTableType(rangeTableEntry->relid, RANGE_DISTRIBUTED)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "For MERGE command, all the distributed tables " + "must be colocated, for append/range distribution, " + "colocation is not supported", NULL, + "Consider using hash distribution instead"); + } + + return NULL; +} + + +/* + * ErrorIfMergeHasUnsupportedTables checks if all the tables(target, source or any CTE + * present) in the MERGE command are local i.e. a combination of Citus local and Non-Citus + * tables (regular Postgres tables), or distributed tables with some restrictions, please + * see header of routine ErrorIfDistTablesNotColocated for details, raises an exception + * for all other combinations. + */ +static DeferredErrorMessage * +ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList, + PlannerRestrictionContext *restrictionContext) +{ + List *distTablesList = NIL; + bool foundLocalTables = false; + + RangeTblEntry *rangeTableEntry = NULL; + foreach_ptr(rangeTableEntry, rangeTableList) + { + Oid relationId = rangeTableEntry->relid; + + switch (rangeTableEntry->rtekind) + { + case RTE_RELATION: + { + /* Check the relation type */ + break; + } + + case RTE_SUBQUERY: + case RTE_FUNCTION: + case RTE_TABLEFUNC: + case RTE_VALUES: + case RTE_JOIN: + case RTE_CTE: + { + /* Skip them as base table(s) will be checked */ + continue; + } + + /* + * RTE_NAMEDTUPLESTORE is typically used in ephmeral named relations, + * such as, trigger data; until we find a genuine use case, raise an + * exception. + * RTE_RESULT is a node added by the planner and we shouldn't + * encounter it in the parse tree. + */ + case RTE_NAMEDTUPLESTORE: + case RTE_RESULT: + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE command is not supported with " + "Tuplestores and results", + NULL, NULL); + } + + default: + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE command: Unrecognized range table entry.", + NULL, NULL); + } + } + + /* RTE Relation can be of various types, check them now */ + + /* skip the regular views as they are replaced with subqueries */ + if (rangeTableEntry->relkind == RELKIND_VIEW) + { + continue; + } + + DeferredErrorMessage *errorMessage = + CheckIfRTETypeIsUnsupported(parse, rangeTableEntry); + if (errorMessage) + { + return errorMessage; + } + + /* + * For now, save all distributed tables, later (below) we will + * check for supported combination(s). + */ + if (IsCitusTableType(relationId, DISTRIBUTED_TABLE)) + { + distTablesList = lappend(distTablesList, rangeTableEntry); + continue; + } + + /* Regular Postgres tables and Citus local tables are allowed */ + if (!IsCitusTable(relationId) || + IsCitusTableType(relationId, CITUS_LOCAL_TABLE)) + { + foundLocalTables = true; + continue; + } + + /* Any other Citus table type missing ? */ + } + + /* Ensure all tables are indeed local */ + if (foundLocalTables && list_length(distTablesList) == 0) + { + /* All the tables are local, supported */ + return NULL; + } + else if (foundLocalTables && list_length(distTablesList) > 0) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE command is not supported with " + "combination of distributed/local tables yet", + NULL, NULL); + } + + /* Ensure all distributed tables are indeed co-located */ + return ErrorIfDistTablesNotColocated(parse, + distTablesList, + restrictionContext); +} + + +/* + * IsPartitionColumnInMerge returns true if the given column is a partition column. + * The function uses FindReferencedTableColumn to find the original relation + * id and column that the column expression refers to. It then checks whether + * that column is a partition column of the relation. + * + * Also, the function returns always false for reference tables given that + * reference tables do not have partition column. + * + * If skipOuterVars is true, then it doesn't process the outervars. + */ +bool +IsDistributionColumnInMergeSource(Expr *columnExpression, Query *query, bool + skipOuterVars) +{ + bool isDistributionColumn = false; + Var *column = NULL; + RangeTblEntry *relationRTE = NULL; + + /* ParentQueryList is same as the original query for MERGE */ + FindReferencedTableColumn(columnExpression, list_make1(query), query, &column, + &relationRTE, + skipOuterVars); + Oid relationId = relationRTE ? relationRTE->relid : InvalidOid; + if (relationId != InvalidOid && column != NULL) + { + Var *distributionColumn = DistPartitionKey(relationId); + + /* not all distributed tables have partition column */ + if (distributionColumn != NULL && column->varattno == + distributionColumn->varattno) + { + isDistributionColumn = true; + } + } + + return isDistributionColumn; +} + + +/* + * InsertDistributionColumnMatchesSource check to see if MERGE is inserting a + * value into the target which is not from the source table, if so, it + * raises an exception. + * Note: Inserting random values other than the joined column values will + * result in unexpected behaviour of rows ending up in incorrect shards, to + * prevent such mishaps, we disallow such inserts here. + */ +static DeferredErrorMessage * +InsertDistributionColumnMatchesSource(Query *query, RangeTblEntry *resultRte) +{ + Assert(IsMergeQuery(query)); + + if (!IsCitusTableType(resultRte->relid, DISTRIBUTED_TABLE)) + { + return NULL; + } + + bool foundDistributionColumn = false; + MergeAction *action = NULL; + foreach_ptr(action, query->mergeActionList) + { + /* Skip MATCHED clause as INSERTS are not allowed in it*/ + if (action->matched) + { + continue; + } + + /* NOT MATCHED can have either INSERT or DO NOTHING */ + if (action->commandType == CMD_NOTHING) + { + return NULL; + } + + if (action->targetList == NIL) + { + /* INSERT DEFAULT VALUES is not allowed */ + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "cannot perform MERGE INSERT with DEFAULTS", + NULL, NULL); + } + + Assert(action->commandType == CMD_INSERT); + Var *targetKey = PartitionColumn(resultRte->relid, 1); + + TargetEntry *targetEntry = NULL; + foreach_ptr(targetEntry, action->targetList) + { + AttrNumber originalAttrNo = targetEntry->resno; + + /* skip processing of target table non-partition columns */ + if (originalAttrNo != targetKey->varattno) + { + continue; + } + + foundDistributionColumn = true; + + if (IsA(targetEntry->expr, Var)) + { + if (IsDistributionColumnInMergeSource(targetEntry->expr, query, true)) + { + return NULL; + } + else + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE INSERT must use the source table " + "distribution column value", + NULL, NULL); + } + } + else + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE INSERT must refer a source column " + "for distribution column ", + NULL, NULL); + } + } + + if (!foundDistributionColumn) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE INSERT must have distribution column as value", + NULL, NULL); + } + } + + return NULL; +} + + +/* + * MergeQualAndTargetListFunctionsSupported Checks WHEN/ON clause actions to see what functions + * are allowed, if we are updating distribution column, etc. + */ +static DeferredErrorMessage * +MergeQualAndTargetListFunctionsSupported(Oid resultRelationId, FromExpr *joinTree, + Node *quals, + List *targetList, CmdType commandType) +{ + uint32 rangeTableId = 1; + Var *distributionColumn = NULL; + if (IsCitusTable(resultRelationId) && HasDistributionKey(resultRelationId)) + { + distributionColumn = PartitionColumn(resultRelationId, rangeTableId); + } + + ListCell *targetEntryCell = NULL; + bool hasVarArgument = false; /* A STABLE function is passed a Var argument */ + bool hasBadCoalesce = false; /* CASE/COALESCE passed a mutable function */ + foreach(targetEntryCell, targetList) + { + TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell); + + /* skip resjunk entries: UPDATE adds some for ctid, etc. */ + if (targetEntry->resjunk) + { + continue; + } + + bool targetEntryDistributionColumn = false; + AttrNumber targetColumnAttrNumber = InvalidAttrNumber; + + if (distributionColumn) + { + if (commandType == CMD_UPDATE) + { + /* + * Note that it is not possible to give an alias to + * UPDATE table SET ... + */ + if (targetEntry->resname) + { + targetColumnAttrNumber = get_attnum(resultRelationId, + targetEntry->resname); + if (targetColumnAttrNumber == distributionColumn->varattno) + { + targetEntryDistributionColumn = true; + } + } + } + } + + if (targetEntryDistributionColumn && + TargetEntryChangesValue(targetEntry, distributionColumn, joinTree)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "updating the distribution column is not " + "allowed in MERGE actions", + NULL, NULL); + } + + if (FindNodeMatchingCheckFunction((Node *) targetEntry->expr, + CitusIsVolatileFunction)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "functions used in MERGE actions on distributed " + "tables must not be VOLATILE", + NULL, NULL); + } + + if (MasterIrreducibleExpression((Node *) targetEntry->expr, + &hasVarArgument, &hasBadCoalesce)) + { + Assert(hasVarArgument || hasBadCoalesce); + } + + if (FindNodeMatchingCheckFunction((Node *) targetEntry->expr, + NodeIsFieldStore)) + { + /* DELETE cannot do field indirection already */ + Assert(commandType == CMD_UPDATE || commandType == CMD_INSERT); + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "inserting or modifying composite type fields is not " + "supported", NULL, + "Use the column name to insert or update the composite " + "type as a single value"); + } + } + + + /* + * Check the condition, convert list of expressions into expression tree for further processing + */ + if (quals) + { + if (IsA(quals, List)) + { + quals = (Node *) make_ands_explicit((List *) quals); + } + + if (FindNodeMatchingCheckFunction((Node *) quals, CitusIsVolatileFunction)) + { + StringInfo errorMessage = makeStringInfo(); + appendStringInfo(errorMessage, "functions used in the %s clause of MERGE " + "queries on distributed tables must not be VOLATILE", + (commandType == CMD_MERGE) ? "ON" : "WHEN"); + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + errorMessage->data, NULL, NULL); + } + else if (MasterIrreducibleExpression(quals, &hasVarArgument, &hasBadCoalesce)) + { + Assert(hasVarArgument || hasBadCoalesce); + } + } + + if (hasVarArgument) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "STABLE functions used in MERGE queries " + "cannot be called with column references", + NULL, NULL); + } + + if (hasBadCoalesce) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "non-IMMUTABLE functions are not allowed in CASE or " + "COALESCE statements", + NULL, NULL); + } + + if (quals != NULL && nodeTag(quals) == T_CurrentOfExpr) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "cannot run MERGE actions with cursors", + NULL, NULL); + } + + return NULL; +} + + +#endif diff --git a/src/backend/distributed/planner/multi_physical_planner.c b/src/backend/distributed/planner/multi_physical_planner.c index b30dddeb7..be6caf0e2 100644 --- a/src/backend/distributed/planner/multi_physical_planner.c +++ b/src/backend/distributed/planner/multi_physical_planner.c @@ -2225,17 +2225,14 @@ QueryPushdownSqlTaskList(Query *query, uint64 jobId, } /* - * For left joins we don't care about the shards pruned for - * the right hand side. If the right hand side would prune - * to a smaller set we should still send it to all tables - * of the left hand side. However if the right hand side is - * bigger than the left hand side we don't have to send the - * query to any shard that is not matching anything on the - * left hand side. + * For left joins we don't care about the shards pruned for the right hand side. + * If the right hand side would prune to a smaller set we should still send it to + * all tables of the left hand side. However if the right hand side is bigger than + * the left hand side we don't have to send the query to any shard that is not + * matching anything on the left hand side. * - * Instead we will simply skip any RelationRestriction if it - * is an OUTER join and the table is part of the non-outer - * side of the join. + * Instead we will simply skip any RelationRestriction if it is an OUTER join and + * the table is part of the non-outer side of the join. */ if (IsInnerTableOfOuterJoin(relationRestriction)) { diff --git a/src/backend/distributed/planner/multi_router_planner.c b/src/backend/distributed/planner/multi_router_planner.c index 99beff2c8..407aeaf65 100644 --- a/src/backend/distributed/planner/multi_router_planner.c +++ b/src/backend/distributed/planner/multi_router_planner.c @@ -33,6 +33,7 @@ #include "distributed/intermediate_result_pruning.h" #include "distributed/metadata_utility.h" #include "distributed/coordinator_protocol.h" +#include "distributed/merge_planner.h" #include "distributed/metadata_cache.h" #include "distributed/multi_executor.h" #include "distributed/multi_join_order.h" @@ -125,21 +126,15 @@ static bool IsTidColumn(Node *node); static DeferredErrorMessage * ModifyPartialQuerySupported(Query *queryTree, bool multiShardQuery, Oid *distributedTableId); -static bool NodeIsFieldStore(Node *node); -static DeferredErrorMessage * MultiShardUpdateDeleteMergeSupported(Query *originalQuery, - PlannerRestrictionContext - * - plannerRestrictionContext); +static DeferredErrorMessage * MultiShardUpdateDeleteSupported(Query *originalQuery, + PlannerRestrictionContext + * + plannerRestrictionContext); static DeferredErrorMessage * SingleShardUpdateDeleteSupported(Query *originalQuery, PlannerRestrictionContext * plannerRestrictionContext); -static bool HasDangerousJoinUsing(List *rtableList, Node *jtnode); -static bool MasterIrreducibleExpression(Node *expression, bool *varArgument, - bool *badCoalesce); static bool MasterIrreducibleExpressionWalker(Node *expression, WalkerState *state); static bool MasterIrreducibleExpressionFunctionChecker(Oid func_id, void *context); -static bool TargetEntryChangesValue(TargetEntry *targetEntry, Var *column, - FromExpr *joinTree); static Job * RouterInsertJob(Query *originalQuery); static void ErrorIfNoShardsExist(CitusTableCacheEntry *cacheEntry); static DeferredErrorMessage * DeferErrorIfModifyView(Query *queryTree); @@ -179,12 +174,8 @@ static void ReorderTaskPlacementsByTaskAssignmentPolicy(Job *job, static bool ModifiesLocalTableWithRemoteCitusLocalTable(List *rangeTableList); static DeferredErrorMessage * DeferErrorIfUnsupportedLocalTableJoin(List *rangeTableList); static bool IsLocallyAccessibleCitusLocalTable(Oid relationId); -static DeferredErrorMessage * TargetlistAndFunctionsSupported(Oid resultRelationId, - FromExpr *joinTree, - Node *quals, - List *targetList, - CmdType commandType, - List *returningList); + + /* * CreateRouterPlan attempts to create a router executor plan for the given * SELECT statement. ->planningError is set if planning fails. @@ -521,7 +512,7 @@ IsTidColumn(Node *node) * updating distribution column, etc. * Note: This subset of checks are repeated for each MERGE modify action. */ -static DeferredErrorMessage * +DeferredErrorMessage * TargetlistAndFunctionsSupported(Oid resultRelationId, FromExpr *joinTree, Node *quals, List *targetList, CmdType commandType, List *returningList) @@ -897,92 +888,13 @@ IsLocallyAccessibleCitusLocalTable(Oid relationId) /* * NodeIsFieldStore returns true if given Node is a FieldStore object. */ -static bool +bool NodeIsFieldStore(Node *node) { return node && IsA(node, FieldStore); } -/* - * MergeQuerySupported does check for a MERGE command in the query, if it finds - * one, it will verify the below criteria - * - Supported tables and combinations in ErrorIfMergeHasUnsupportedTables - * - Distributed tables requirements in ErrorIfDistTablesNotColocated - * - Checks target-lists and functions-in-quals in TargetlistAndFunctionsSupported - */ -static DeferredErrorMessage * -MergeQuerySupported(Query *originalQuery, - PlannerRestrictionContext *plannerRestrictionContext) -{ - /* For non-MERGE commands it's a no-op */ - if (!QueryHasMergeCommand(originalQuery)) - { - return NULL; - } - - List *rangeTableList = ExtractRangeTableEntryList(originalQuery); - RangeTblEntry *resultRte = ExtractResultRelationRTE(originalQuery); - - /* - * Fast path queries cannot have merge command, and we prevent the remaining here. - * In Citus we have limited support for MERGE, it's allowed only if all - * the tables(target, source or any CTE) tables are are local i.e. a - * combination of Citus local and Non-Citus tables (regular Postgres tables) - * or distributed tables with some restrictions, please see header of routine - * ErrorIfDistTablesNotColocated for details. - */ - DeferredErrorMessage *deferredError = - ErrorIfMergeHasUnsupportedTables(originalQuery, - rangeTableList, - plannerRestrictionContext); - if (deferredError) - { - return deferredError; - } - - Oid resultRelationId = resultRte->relid; - deferredError = - TargetlistAndFunctionsSupported(resultRelationId, - originalQuery->jointree, - originalQuery->jointree->quals, - originalQuery->targetList, - originalQuery->commandType, - originalQuery->returningList); - if (deferredError) - { - return deferredError; - } - - #if PG_VERSION_NUM >= PG_VERSION_15 - - /* - * MERGE is a special case where we have multiple modify statements - * within itself. Check each INSERT/UPDATE/DELETE individually. - */ - MergeAction *action = NULL; - foreach_ptr(action, originalQuery->mergeActionList) - { - Assert(originalQuery->returningList == NULL); - deferredError = - TargetlistAndFunctionsSupported(resultRelationId, - originalQuery->jointree, - action->qual, - action->targetList, - action->commandType, - originalQuery->returningList); - if (deferredError) - { - return deferredError; - } - } - - #endif - - return NULL; -} - - /* * ModifyQuerySupported returns NULL if the query only contains supported * features, otherwise it returns an error description. @@ -998,14 +910,11 @@ ModifyQuerySupported(Query *queryTree, Query *originalQuery, bool multiShardQuer PlannerRestrictionContext *plannerRestrictionContext) { Oid distributedTableId = InvalidOid; - DeferredErrorMessage *error = MergeQuerySupported(originalQuery, + DeferredErrorMessage *error = MergeQuerySupported(originalQuery, multiShardQuery, plannerRestrictionContext); if (error) { - /* - * For MERGE, we do not do recursive plannning, simply bail out. - */ - RaiseDeferredError(error, ERROR); + return error; } error = ModifyPartialQuerySupported(queryTree, multiShardQuery, &distributedTableId); @@ -1178,13 +1087,13 @@ ModifyQuerySupported(Query *queryTree, Query *originalQuery, bool multiShardQuer } } - if (commandType != CMD_INSERT) + if (commandType != CMD_INSERT && commandType != CMD_MERGE) { DeferredErrorMessage *errorMessage = NULL; if (multiShardQuery) { - errorMessage = MultiShardUpdateDeleteMergeSupported( + errorMessage = MultiShardUpdateDeleteSupported( originalQuery, plannerRestrictionContext); } @@ -1365,12 +1274,12 @@ ErrorIfOnConflictNotSupported(Query *queryTree) /* - * MultiShardUpdateDeleteMergeSupported returns the error message if the update/delete is + * MultiShardUpdateDeleteSupported returns the error message if the update/delete is * not pushdownable, otherwise it returns NULL. */ static DeferredErrorMessage * -MultiShardUpdateDeleteMergeSupported(Query *originalQuery, - PlannerRestrictionContext *plannerRestrictionContext) +MultiShardUpdateDeleteSupported(Query *originalQuery, + PlannerRestrictionContext *plannerRestrictionContext) { DeferredErrorMessage *errorMessage = NULL; RangeTblEntry *resultRangeTable = ExtractResultRelationRTE(originalQuery); @@ -1401,8 +1310,9 @@ MultiShardUpdateDeleteMergeSupported(Query *originalQuery, } else { - errorMessage = DeferErrorIfUnsupportedSubqueryPushdown(originalQuery, - plannerRestrictionContext); + errorMessage = DeferErrorIfUnsupportedSubqueryPushdown( + originalQuery, + plannerRestrictionContext); } return errorMessage; @@ -1442,7 +1352,7 @@ SingleShardUpdateDeleteSupported(Query *originalQuery, * HasDangerousJoinUsing search jointree for unnamed JOIN USING. Check the * implementation of has_dangerous_join_using in ruleutils. */ -static bool +bool HasDangerousJoinUsing(List *rtableList, Node *joinTreeNode) { if (IsA(joinTreeNode, RangeTblRef)) @@ -1546,7 +1456,7 @@ IsMergeQuery(Query *query) * which do, but for now we just error out. That makes both the code and user-education * easier. */ -static bool +bool MasterIrreducibleExpression(Node *expression, bool *varArgument, bool *badCoalesce) { WalkerState data; @@ -1694,7 +1604,7 @@ MasterIrreducibleExpressionFunctionChecker(Oid func_id, void *context) * expression is a value that is implied by the qualifiers of the join * tree, or the target entry sets a different column. */ -static bool +bool TargetEntryChangesValue(TargetEntry *targetEntry, Var *column, FromExpr *joinTree) { bool isColumnValueChanged = true; @@ -1965,8 +1875,8 @@ RouterJob(Query *originalQuery, PlannerRestrictionContext *plannerRestrictionCon if (*planningError) { /* - * For MERGE, we do _not_ plan anything other than Router job, let's - * not continue further down the lane in distributed planning, simply + * For MERGE, we do _not_ plan any other router job than the MERGE job itself, + * let's not continue further down the lane in distributed planning, simply * bail out. */ if (IsMergeQuery(originalQuery)) @@ -4056,263 +3966,3 @@ CompareInsertValuesByShardId(const void *leftElement, const void *rightElement) } } } - - -/* - * IsMergeAllowedOnRelation takes a relation entry and checks if MERGE command is - * permitted on special relations, such as materialized view, returns true only if - * it's a "source" relation. - */ -bool -IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte) -{ - if (!IsMergeQuery(parse)) - { - return false; - } - - RangeTblEntry *targetRte = rt_fetch(parse->resultRelation, parse->rtable); - - /* Is it a target relation? */ - if (targetRte->relid == rte->relid) - { - return false; - } - - return true; -} - - -/* - * ErrorIfDistTablesNotColocated Checks to see if - * - * - There are a minimum of two distributed tables (source and a target). - * - All the distributed tables are indeed colocated. - * - MERGE relations are joined on the distribution column - * MERGE .. USING .. ON target.dist_key = source.dist_key - * - * If any of the conditions are not met, it raises an exception. - */ -static DeferredErrorMessage * -ErrorIfDistTablesNotColocated(Query *parse, List *distTablesList, - PlannerRestrictionContext *plannerRestrictionContext) -{ - /* All MERGE tables must be distributed */ - if (list_length(distTablesList) < 2) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "For MERGE command, both the source and target " - "must be distributed", NULL, NULL); - } - - /* All distributed tables must be colocated */ - if (!AllRelationsInListColocated(distTablesList, RANGETABLE_ENTRY)) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "For MERGE command, all the distributed tables " - "must be colocated", NULL, NULL); - } - - /* Are source and target tables joined on distribution column? */ - if (!RestrictionEquivalenceForPartitionKeys(plannerRestrictionContext)) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "MERGE command is only supported when distributed " - "tables are joined on their distribution column", - NULL, NULL); - } - - return NULL; -} - - -/* - * ErrorIfMergeHasUnsupportedTables checks if all the tables(target, source or any CTE - * present) in the MERGE command are local i.e. a combination of Citus local and Non-Citus - * tables (regular Postgres tables), or distributed tables with some restrictions, please - * see header of routine ErrorIfDistTablesNotColocated for details, raises an exception - * for all other combinations. - */ -static DeferredErrorMessage * -ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList, - PlannerRestrictionContext *restrictionContext) -{ - List *distTablesList = NIL; - bool foundLocalTables = false; - - RangeTblEntry *rangeTableEntry = NULL; - foreach_ptr(rangeTableEntry, rangeTableList) - { - Oid relationId = rangeTableEntry->relid; - - switch (rangeTableEntry->rtekind) - { - case RTE_RELATION: - { - /* Check the relation type */ - break; - } - - case RTE_SUBQUERY: - case RTE_FUNCTION: - case RTE_TABLEFUNC: - case RTE_VALUES: - case RTE_JOIN: - case RTE_CTE: - { - /* Skip them as base table(s) will be checked */ - continue; - } - - /* - * RTE_NAMEDTUPLESTORE is typically used in ephmeral named relations, - * such as, trigger data; until we find a genuine use case, raise an - * exception. - * RTE_RESULT is a node added by the planner and we shouldn't - * encounter it in the parse tree. - */ - case RTE_NAMEDTUPLESTORE: - case RTE_RESULT: - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "MERGE command is not supported with " - "Tuplestores and results", - NULL, NULL); - } - - default: - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "MERGE command: Unrecognized range table entry.", - NULL, NULL); - } - } - - /* RTE Relation can be of various types, check them now */ - - /* skip the regular views as they are replaced with subqueries */ - if (rangeTableEntry->relkind == RELKIND_VIEW) - { - continue; - } - - if (rangeTableEntry->relkind == RELKIND_MATVIEW || - rangeTableEntry->relkind == RELKIND_FOREIGN_TABLE) - { - /* Materialized view or Foreign table as target is not allowed */ - if (IsMergeAllowedOnRelation(parse, rangeTableEntry)) - { - /* Non target relation is ok */ - continue; - } - else - { - /* Usually we don't reach this exception as the Postgres parser catches it */ - StringInfo errorMessage = makeStringInfo(); - appendStringInfo(errorMessage, - "MERGE command is not allowed on " - "relation type(relkind:%c)", rangeTableEntry->relkind); - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, errorMessage->data, - NULL, NULL); - } - } - - if (rangeTableEntry->relkind != RELKIND_RELATION && - rangeTableEntry->relkind != RELKIND_PARTITIONED_TABLE) - { - StringInfo errorMessage = makeStringInfo(); - appendStringInfo(errorMessage, "Unexpected table type(relkind:%c) " - "in MERGE command", rangeTableEntry->relkind); - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, errorMessage->data, - NULL, NULL); - } - - Assert(rangeTableEntry->relid != 0); - - /* Reference tables are not supported yet */ - if (IsCitusTableType(relationId, REFERENCE_TABLE)) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "MERGE command is not supported on reference " - "tables yet", NULL, NULL); - } - - /* Append/Range tables are not supported */ - if (IsCitusTableType(relationId, APPEND_DISTRIBUTED) || - IsCitusTableType(relationId, RANGE_DISTRIBUTED)) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "For MERGE command, all the distributed tables " - "must be colocated, for append/range distribution, " - "colocation is not supported", NULL, - "Consider using hash distribution instead"); - } - - /* - * For now, save all distributed tables, later (below) we will - * check for supported combination(s). - */ - if (IsCitusTableType(relationId, DISTRIBUTED_TABLE)) - { - distTablesList = lappend(distTablesList, rangeTableEntry); - continue; - } - - /* Regular Postgres tables and Citus local tables are allowed */ - if (!IsCitusTable(relationId) || - IsCitusTableType(relationId, CITUS_LOCAL_TABLE)) - { - foundLocalTables = true; - continue; - } - - /* Any other Citus table type missing ? */ - } - - /* Ensure all tables are indeed local */ - if (foundLocalTables && list_length(distTablesList) == 0) - { - /* All the tables are local, supported */ - return NULL; - } - else if (foundLocalTables && list_length(distTablesList) > 0) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "MERGE command is not supported with " - "combination of distributed/local tables yet", - NULL, NULL); - } - - /* Ensure all distributed tables are indeed co-located */ - return ErrorIfDistTablesNotColocated(parse, distTablesList, restrictionContext); -} - - -/* - * QueryHasMergeCommand walks over the query tree and returns false if there - * is no Merge command (e.g., CMD_MERGE), true otherwise. - */ -static bool -QueryHasMergeCommand(Query *queryTree) -{ - /* function is void for pre-15 versions of Postgres */ - #if PG_VERSION_NUM < PG_VERSION_15 - return false; - #else - - /* - * Postgres currently doesn't support Merge queries inside subqueries and - * ctes, but lets be defensive and do query tree walk anyway. - * - * We do not call this path for fast-path queries to avoid this additional - * overhead. - */ - if (!ContainsMergeCommandWalker((Node *) queryTree)) - { - /* No MERGE found */ - return false; - } - - return true; - #endif -} diff --git a/src/backend/distributed/planner/query_pushdown_planning.c b/src/backend/distributed/planner/query_pushdown_planning.c index 5cae19497..cbe6a3606 100644 --- a/src/backend/distributed/planner/query_pushdown_planning.c +++ b/src/backend/distributed/planner/query_pushdown_planning.c @@ -591,10 +591,16 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery, } else if (!RestrictionEquivalenceForPartitionKeys(plannerRestrictionContext)) { + StringInfo errorMessage = makeStringInfo(); + bool isMergeCmd = IsMergeQuery(originalQuery); + appendStringInfo(errorMessage, + "%s" + "only supported when all distributed tables are " + "co-located and joined on their distribution columns", + isMergeCmd ? "MERGE command is " : "complex joins are "); + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "complex joins are only supported when all distributed tables are " - "co-located and joined on their distribution columns", - NULL, NULL); + errorMessage->data, NULL, NULL); } /* we shouldn't allow reference tables in the FROM clause when the query has sublinks */ diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c index f76a95d26..5c91ee79c 100644 --- a/src/backend/distributed/planner/relation_restriction_equivalence.c +++ b/src/backend/distributed/planner/relation_restriction_equivalence.c @@ -151,6 +151,9 @@ static void ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass secondClass); static Var * PartitionKeyForRTEIdentityInQuery(Query *query, int targetRTEIndex, Index *partitionKeyIndex); +static bool AllRelationsInRestrictionContextColocated(RelationRestrictionContext * + restrictionContext); +static bool AllRelationsInListColocated(List *relationList); static bool IsNotSafeRestrictionToRecursivelyPlan(Node *node); static JoinRestrictionContext * FilterJoinRestrictionContext( JoinRestrictionContext *joinRestrictionContext, Relids @@ -381,8 +384,7 @@ SafeToPushdownUnionSubquery(Query *originalQuery, return false; } - if (!AllRelationsInListColocated(restrictionContext->relationRestrictionList, - RESTRICTION_CONTEXT)) + if (!AllRelationsInRestrictionContextColocated(restrictionContext)) { /* distribution columns are equal, but tables are not co-located */ return false; @@ -1918,34 +1920,56 @@ FindQueryContainingRTEIdentityInternal(Node *node, /* - * AllRelationsInListColocated determines whether all of the relations in the - * given list are co-located. - * Note: The list can be of dofferent types, which is specified by ListEntryType + * AllRelationsInRestrictionContextColocated determines whether all of the relations in the + * given relation restrictions list are co-located. */ -bool -AllRelationsInListColocated(List *relationList, ListEntryType entryType) +static bool +AllRelationsInRestrictionContextColocated(RelationRestrictionContext *restrictionContext) { - void *varPtr = NULL; - RangeTblEntry *rangeTableEntry = NULL; RelationRestriction *relationRestriction = NULL; - int initialColocationId = INVALID_COLOCATION_ID; + List *relationIdList = NIL; /* check whether all relations exists in the main restriction list */ - foreach_ptr(varPtr, relationList) + foreach_ptr(relationRestriction, restrictionContext->relationRestrictionList) { - Oid relationId = InvalidOid; + relationIdList = lappend_oid(relationIdList, relationRestriction->relationId); + } - if (entryType == RANGETABLE_ENTRY) - { - rangeTableEntry = (RangeTblEntry *) varPtr; - relationId = rangeTableEntry->relid; - } - else if (entryType == RESTRICTION_CONTEXT) - { - relationRestriction = (RelationRestriction *) varPtr; - relationId = relationRestriction->relationId; - } + return AllRelationsInListColocated(relationIdList); +} + +/* + * AllRelationsInRTEListColocated determines whether all of the relations in the + * given RangeTableEntry list are co-located. + */ +bool +AllRelationsInRTEListColocated(List *rangeTableEntryList) +{ + RangeTblEntry *rangeTableEntry = NULL; + List *relationIdList = NIL; + + foreach_ptr(rangeTableEntry, rangeTableEntryList) + { + relationIdList = lappend_oid(relationIdList, rangeTableEntry->relid); + } + + return AllRelationsInListColocated(relationIdList); +} + + +/* + * AllRelationsInListColocated determines whether all of the relations in the + * given list are co-located. + */ +static bool +AllRelationsInListColocated(List *relationList) +{ + int initialColocationId = INVALID_COLOCATION_ID; + Oid relationId = InvalidOid; + + foreach_oid(relationId, relationList) + { if (IsCitusTable(relationId) && !HasDistributionKey(relationId)) { continue; diff --git a/src/include/distributed/distributed_planner.h b/src/include/distributed/distributed_planner.h index 19bd9f0c2..412859449 100644 --- a/src/include/distributed/distributed_planner.h +++ b/src/include/distributed/distributed_planner.h @@ -255,10 +255,4 @@ extern struct DistributedPlan * CreateDistributedPlan(uint64 planId, PlannerRestrictionContext * plannerRestrictionContext); -extern bool IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte); -extern bool ConjunctionContainsColumnFilter(Node *node, - Var *column, - Node **distributionKeyValue); -extern bool ContainsMergeCommandWalker(Node *node); - #endif /* DISTRIBUTED_PLANNER_H */ diff --git a/src/include/distributed/merge_planner.h b/src/include/distributed/merge_planner.h new file mode 100644 index 000000000..243be14d0 --- /dev/null +++ b/src/include/distributed/merge_planner.h @@ -0,0 +1,26 @@ +/*------------------------------------------------------------------------- + * + * merge_planner.h + * + * Declarations for public functions and types related to router planning. + * + * Copyright (c) Citus Data, Inc. + * + *------------------------------------------------------------------------- + */ + +#ifndef MERGE_PLANNER_H +#define MERGE_PLANNER_H + +#include "c.h" + +#include "nodes/parsenodes.h" +#include "distributed/distributed_planner.h" +#include "distributed/errormessage.h" + +extern bool IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte); +extern DeferredErrorMessage * MergeQuerySupported(Query *originalQuery, + bool multiShardQuery, + PlannerRestrictionContext * + plannerRestrictionContext); +#endif /* MERGE_PLANNER_H */ diff --git a/src/include/distributed/multi_router_planner.h b/src/include/distributed/multi_router_planner.h index 07d160865..698a0fd60 100644 --- a/src/include/distributed/multi_router_planner.h +++ b/src/include/distributed/multi_router_planner.h @@ -100,6 +100,17 @@ extern PlannedStmt * FastPathPlanner(Query *originalQuery, Query *parse, ParamLi extern bool FastPathRouterQuery(Query *query, Node **distributionKeyValue); extern bool JoinConditionIsOnFalse(List *relOptInfo); extern Oid ResultRelationOidForQuery(Query *query); - +extern DeferredErrorMessage * TargetlistAndFunctionsSupported(Oid resultRelationId, + FromExpr *joinTree, + Node *quals, + List *targetList, + CmdType commandType, + List *returningList); +extern bool NodeIsFieldStore(Node *node); +extern bool TargetEntryChangesValue(TargetEntry *targetEntry, Var *column, + FromExpr *joinTree); +extern bool MasterIrreducibleExpression(Node *expression, bool *varArgument, + bool *badCoalesce); +extern bool HasDangerousJoinUsing(List *rtableList, Node *jtnode); #endif /* MULTI_ROUTER_PLANNER_H */ diff --git a/src/include/distributed/relation_restriction_equivalence.h b/src/include/distributed/relation_restriction_equivalence.h index 4fd9c7015..e0e716c7e 100644 --- a/src/include/distributed/relation_restriction_equivalence.h +++ b/src/include/distributed/relation_restriction_equivalence.h @@ -17,15 +17,6 @@ #define SINGLE_RTE_INDEX 1 -/* - * Represents the pointer type that's being passed in the list. - */ -typedef enum ListEntryType -{ - RANGETABLE_ENTRY, /* RangeTblEntry */ - RESTRICTION_CONTEXT /* RelationRestriction */ -} ListEntryType; - extern bool AllDistributionKeysInQueryAreEqual(Query *originalQuery, PlannerRestrictionContext * plannerRestrictionContext); @@ -63,6 +54,5 @@ extern RelationRestrictionContext * FilterRelationRestrictionContext( RelationRestrictionContext *relationRestrictionContext, Relids queryRteIdentities); -extern bool AllRelationsInListColocated(List *relationList, ListEntryType entryType); - +extern bool AllRelationsInRTEListColocated(List *rangeTableEntryList); #endif /* RELATION_RESTRICTION_EQUIVALENCE_H */ diff --git a/src/test/regress/create_schedule b/src/test/regress/create_schedule index 82dfa2475..db2ae92be 100644 --- a/src/test/regress/create_schedule +++ b/src/test/regress/create_schedule @@ -13,3 +13,4 @@ test: arbitrary_configs_truncate_create test: arbitrary_configs_truncate_cascade_create test: arbitrary_configs_truncate_partition_create test: arbitrary_configs_alter_table_add_constraint_without_name_create +test: merge_arbitrary_create diff --git a/src/test/regress/expected/merge.out b/src/test/regress/expected/merge.out index 02671acd0..e2b3aea65 100644 --- a/src/test/regress/expected/merge.out +++ b/src/test/regress/expected/merge.out @@ -17,8 +17,9 @@ CREATE SCHEMA merge_schema; SET search_path TO merge_schema; SET citus.shard_count TO 4; SET citus.next_shard_id TO 4000000; -SET citus.explain_all_tasks to true; +SET citus.explain_all_tasks TO true; SET citus.shard_replication_factor TO 1; +SET citus.max_adaptive_executor_pool_size TO 1; SELECT 1 FROM master_add_node('localhost', :master_port, groupid => 0); NOTICE: localhost:xxxxx is the coordinator and already contains metadata, skipping syncing the metadata ?column? @@ -268,6 +269,29 @@ SELECT * from target t WHERE t.customer_id = 30004; --------------------------------------------------------------------- (0 rows) +-- Updating distribution column is allowed if the operation is a no-op +SELECT * from target t WHERE t.customer_id = 30000; + customer_id | last_order_id | order_center | order_count | last_order +--------------------------------------------------------------------- + 30000 | 101 | WX | 123 | Sat Jan 01 00:00:00 2022 +(1 row) + +MERGE INTO target t +USING SOURCE s +ON (t.customer_id = s.customer_id AND t.customer_id = 30000) +WHEN MATCHED THEN + UPDATE SET customer_id = 30000; +MERGE INTO target t +USING SOURCE s +ON (t.customer_id = s.customer_id AND t.customer_id = 30000) +WHEN MATCHED THEN + UPDATE SET customer_id = t.customer_id; +SELECT * from target t WHERE t.customer_id = 30000; + customer_id | last_order_id | order_center | order_count | last_order +--------------------------------------------------------------------- + 30000 | 101 | WX | 123 | Sat Jan 01 00:00:00 2022 +(1 row) + -- -- Test MERGE with CTE as source -- @@ -310,7 +334,6 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (pg_res.id, pg_res.val); --- Two rows with id 2 and val incremented, id 3, and id 1 is deleted SELECT * FROM t1 order by id; id | val --------------------------------------------------------------------- @@ -1200,7 +1223,8 @@ END; $$ language plpgsql volatile; CREATE TABLE fn_target(id int, data varchar); MERGE INTO fn_target -USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source +--USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source +USING (SELECT id, source FROM dist_table) as fn_source ON fn_source.id = fn_target.id WHEN MATCHED THEN DO NOTHING @@ -1216,29 +1240,22 @@ SELECT citus_add_local_table_to_metadata('fn_target'); (1 row) -SELECT create_distributed_table('dist_table', 'id'); -NOTICE: Copying data from local table... -NOTICE: copying the data has completed -DETAIL: The local data in the table is no longer visible, but is still on disk. -HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.dist_table$$) - create_distributed_table +SELECT citus_add_local_table_to_metadata('dist_table'); + citus_add_local_table_to_metadata --------------------------------------------------------------------- (1 row) SET client_min_messages TO DEBUG1; MERGE INTO fn_target -USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source +--USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source +USING (SELECT id, source FROM dist_table) as fn_source ON fn_source.id = fn_target.id WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED THEN INSERT VALUES(fn_source.id, fn_source.source); -DEBUG: function does not have co-located tables -DEBUG: generating subplan XXX_1 for subquery SELECT id, source FROM merge_schema.f_dist() f(id integer, source character varying) -DEBUG: -DEBUG: Plan XXX query after replacing subqueries and CTEs: MERGE INTO merge_schema.fn_target USING (SELECT intermediate_result.id, intermediate_result.source FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, source character varying)) fn_source ON (fn_source.id OPERATOR(pg_catalog.=) fn_target.id) WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED THEN INSERT (id, data) VALUES (fn_source.id, fn_source.source) -DEBUG: +DEBUG: RESET client_min_messages; SELECT * INTO fn_local FROM fn_target ORDER BY 1 ; -- Should be equal @@ -1959,7 +1976,7 @@ ON pg_target.id = sub.id AND pg_target.id = $1 WHEN MATCHED THEN UPDATE SET val = 'Updated by prepare using ' || sub.val WHEN NOT MATCHED THEN - DO NOTHING; + INSERT VALUES (sub.id, sub.val); PREPARE citus_prep(int) AS MERGE INTO citus_target USING (SELECT * FROM citus_source) sub @@ -1967,15 +1984,20 @@ ON citus_target.id = sub.id AND citus_target.id = $1 WHEN MATCHED THEN UPDATE SET val = 'Updated by prepare using ' || sub.val WHEN NOT MATCHED THEN - DO NOTHING; + INSERT VALUES (sub.id, sub.val); BEGIN; -SET citus.log_remote_commands to true; SELECT * FROM pg_target WHERE id = 500; -- before merge id | val --------------------------------------------------------------------- 500 | target (1 row) +SELECT count(*) FROM pg_target; -- before merge + count +--------------------------------------------------------------------- + 251 +(1 row) + EXECUTE pg_prep(500); SELECT * FROM pg_target WHERE id = 500; -- non-cached id | val @@ -1994,18 +2016,33 @@ SELECT * FROM pg_target WHERE id = 500; -- cached 500 | Updated by prepare using source (1 row) +SELECT count(*) FROM pg_target; -- cached + count +--------------------------------------------------------------------- + 3245 +(1 row) + SELECT * FROM citus_target WHERE id = 500; -- before merge -NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); -DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx -NOTICE: issuing SELECT id, val FROM merge_schema.citus_target_xxxxxxx citus_target WHERE (id OPERATOR(pg_catalog.=) 500) -DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx id | val --------------------------------------------------------------------- 500 | target (1 row) +SELECT count(*) FROM citus_target; -- before merge + count +--------------------------------------------------------------------- + 251 +(1 row) + +SET citus.log_remote_commands to true; EXECUTE citus_prep(500); -NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx SELECT * FROM citus_target WHERE id = 500; -- non-cached NOTICE: issuing SELECT id, val FROM merge_schema.citus_target_xxxxxxx citus_target WHERE (id OPERATOR(pg_catalog.=) 500) @@ -2016,29 +2053,63 @@ DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx (1 row) EXECUTE citus_prep(500); -NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx EXECUTE citus_prep(500); -NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx EXECUTE citus_prep(500); -NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx EXECUTE citus_prep(500); -NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx EXECUTE citus_prep(500); -NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; SELECT * FROM citus_target WHERE id = 500; -- cached -NOTICE: issuing SELECT id, val FROM merge_schema.citus_target_xxxxxxx citus_target WHERE (id OPERATOR(pg_catalog.=) 500) -DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx id | val --------------------------------------------------------------------- 500 | Updated by prepare using source (1 row) -SET citus.log_remote_commands to false; +SELECT count(*) FROM citus_target; -- cached + count +--------------------------------------------------------------------- + 3245 +(1 row) + SELECT compare_tables(); compare_tables --------------------------------------------------------------------- @@ -2165,9 +2236,263 @@ SELECT pa_compare_tables(); (1 row) ROLLBACK; +CREATE TABLE source_json( id integer, z int, d jsonb); +CREATE TABLE target_json( id integer, z int, d jsonb); +INSERT INTO source_json SELECT i,i FROM generate_series(0,5)i; +SELECT create_distributed_table('target_json','id'), create_distributed_table('source_json', 'id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.source_json$$) + create_distributed_table | create_distributed_table +--------------------------------------------------------------------- + | +(1 row) + +-- single shard query given source_json is filtered and Postgres is smart to pushdown +-- filter to the target_json as well +SELECT public.coordinator_plan($Q$ +EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda +USING (SELECT * FROM source_json WHERE id = 1) sdn +ON sda.id = sdn.id +WHEN NOT matched THEN + INSERT (id, z) VALUES (sdn.id, 5); +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0) (actual rows=0 loops=1) + Task Count: 1 +(2 rows) + +SELECT * FROM target_json ORDER BY 1; + id | z | d +--------------------------------------------------------------------- + 1 | 5 | +(1 row) + +-- zero shard query as filters do not match +--SELECT public.coordinator_plan($Q$ +--EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda +--USING (SELECT * FROM source_json WHERE id = 1) sdn +--ON sda.id = sdn.id AND sda.id = 2 +--WHEN NOT matched THEN +-- INSERT (id, z) VALUES (sdn.id, 5); +--$Q$); +--SELECT * FROM target_json ORDER BY 1; +-- join for source_json is happening at a different place +SELECT public.coordinator_plan($Q$ +EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda +USING source_json s1 LEFT JOIN (SELECT * FROM source_json) s2 USING(z) +ON sda.id = s1.id AND s1.id = s2.id +WHEN NOT matched THEN + INSERT (id, z) VALUES (s2.id, 5); +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0) (actual rows=0 loops=1) + Task Count: 4 +(2 rows) + +SELECT * FROM target_json ORDER BY 1; + id | z | d +--------------------------------------------------------------------- + 0 | 5 | + 1 | 5 | + 2 | 5 | + 3 | 5 | + 4 | 5 | + 5 | 5 | +(6 rows) + +-- update JSON column +SELECT public.coordinator_plan($Q$ +EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda +USING source_json sdn +ON sda.id = sdn.id +WHEN matched THEN + UPDATE SET d = '{"a" : 5}'; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0) (actual rows=0 loops=1) + Task Count: 4 +(2 rows) + +SELECT * FROM target_json ORDER BY 1; + id | z | d +--------------------------------------------------------------------- + 0 | 5 | {"a": 5} + 1 | 5 | {"a": 5} + 2 | 5 | {"a": 5} + 3 | 5 | {"a": 5} + 4 | 5 | {"a": 5} + 5 | 5 | {"a": 5} +(6 rows) + +CREATE FUNCTION immutable_hash(int) RETURNS int +AS 'SELECT hashtext( ($1 + $1)::text);' +LANGUAGE SQL +IMMUTABLE +RETURNS NULL ON NULL INPUT; +MERGE INTO target_json sda +USING source_json sdn +ON sda.id = sdn.id +WHEN matched THEN + UPDATE SET z = immutable_hash(sdn.z); +-- Test bigserial +CREATE TABLE source_serial (id integer, z int, d bigserial); +CREATE TABLE target_serial (id integer, z int, d bigserial); +INSERT INTO source_serial SELECT i,i FROM generate_series(0,100)i; +SELECT create_distributed_table('source_serial', 'id'), + create_distributed_table('target_serial', 'id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.source_serial$$) + create_distributed_table | create_distributed_table +--------------------------------------------------------------------- + | +(1 row) + +MERGE INTO target_serial sda +USING source_serial sdn +ON sda.id = sdn.id +WHEN NOT matched THEN + INSERT (id, z) VALUES (id, z); +ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables +SELECT count(*) from source_serial; + count +--------------------------------------------------------------------- + 101 +(1 row) + +SELECT count(*) from target_serial; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(distinct d) from source_serial; + count +--------------------------------------------------------------------- + 101 +(1 row) + +SELECT count(distinct d) from target_serial; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Test set operations +CREATE TABLE target_set(t1 int, t2 int); +CREATE TABLE source_set(s1 int, s2 int); +SELECT create_distributed_table('target_set', 't1'), + create_distributed_table('source_set', 's1'); + create_distributed_table | create_distributed_table +--------------------------------------------------------------------- + | +(1 row) + +INSERT INTO target_set VALUES(1, 0); +INSERT INTO source_set VALUES(1, 1); +INSERT INTO source_set VALUES(2, 2); +MERGE INTO target_set +USING (SELECT * FROM source_set UNION SELECT * FROM source_set) AS foo ON target_set.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET t2 = t2 + 100 +WHEN NOT MATCHED THEN + INSERT VALUES(foo.s1); +SELECT * FROM target_set ORDER BY 1, 2; + t1 | t2 +--------------------------------------------------------------------- + 1 | 100 + 2 | +(2 rows) + -- -- Error and Unsupported scenarios -- +MERGE INTO target_set +USING (SELECT s1,s2 FROM source_set UNION SELECT s2,s1 FROM source_set) AS foo ON target_set.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET t2 = t2 + 1; +ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position +DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column +MERGE INTO target_set +USING (SELECT 2 as s3, source_set.* FROM (SELECT * FROM source_set LIMIT 1) as foo LEFT JOIN source_set USING( s1)) AS foo +ON target_set.t1 = foo.s1 +WHEN MATCHED THEN UPDATE SET t2 = t2 + 1 +WHEN NOT MATCHED THEN INSERT VALUES(s1, s3); +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query +-- modifying CTE not supported +EXPLAIN +WITH cte_1 AS (DELETE FROM target_json) +MERGE INTO target_json sda +USING source_json sdn +ON sda.id = sdn.id +WHEN NOT matched THEN + INSERT (id, z) VALUES (sdn.id, 5); +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns +-- Grouping sets not supported +MERGE INTO citus_target t +USING (SELECT count(*), id FROM citus_source GROUP BY GROUPING SETS (id, val)) subq +ON subq.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated' +WHEN NOT MATCHED THEN + INSERT VALUES (subq.id, 99) +WHEN MATCHED AND t.id < 350 THEN + DELETE; +ERROR: cannot push down this subquery +DETAIL: could not run distributed query with GROUPING SETS, CUBE, or ROLLUP +WITH subq AS +( +SELECT count(*), id FROM citus_source GROUP BY GROUPING SETS (id, val) +) +MERGE INTO citus_target t +USING subq +ON subq.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated' +WHEN NOT MATCHED THEN + INSERT VALUES (subq.id, 99) +WHEN MATCHED AND t.id < 350 THEN + DELETE; +ERROR: cannot push down this subquery +DETAIL: could not run distributed query with GROUPING SETS, CUBE, or ROLLUP +-- try inserting unmatched distribution column value +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT DEFAULT VALUES; +ERROR: cannot perform MERGE INSERT with DEFAULTS +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT VALUES(10000); +ERROR: MERGE INSERT must refer a source column for distribution column +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT (id) VALUES(1000); +ERROR: MERGE INSERT must refer a source column for distribution column +MERGE INTO t1 t +USING s1 s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT (id) VALUES(s.val); +ERROR: MERGE INSERT must use the source table distribution column value +MERGE INTO t1 t +USING s1 s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT (val) VALUES(s.val); +ERROR: MERGE INSERT must have distribution column as value -- try updating the distribution key column BEGIN; MERGE INTO target_cj t @@ -2177,7 +2502,7 @@ MERGE INTO target_cj t UPDATE SET tid = tid + 9, src = src || ' updated by merge' WHEN NOT MATCHED THEN INSERT VALUES (sid1, 'inserted by merge', val1); -ERROR: modifying the partition value of rows is not allowed +ERROR: updating the distribution column is not allowed in MERGE actions ROLLBACK; -- Foreign table as target MERGE INTO foreign_table @@ -2269,13 +2594,31 @@ BEGIN RETURN TRUE; END; $$; +-- Test functions executing in MERGE statement. This is to prevent the functions from +-- doing a random sql, which may be executed in a remote node or modifying the target +-- relation which will have unexpected/suprising results. +MERGE INTO t1 USING (SELECT * FROM s1 WHERE true) s1 ON + t1.id = s1.id AND s1.id = 2 + WHEN matched THEN + UPDATE SET id = s1.id, val = random(); +ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables +-- Test STABLE function +CREATE FUNCTION add_s(integer, integer) RETURNS integer +AS 'select $1 + $2;' +LANGUAGE SQL +STABLE RETURNS NULL ON NULL INPUT; +MERGE INTO t1 +USING s1 ON t1.id = s1.id +WHEN NOT MATCHED THEN + INSERT VALUES(s1.id, add_s(s1.val, 2)); +ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables -- Test preventing "ON" join condition from writing to the database BEGIN; MERGE INTO t1 USING s1 ON t1.id = s1.id AND t1.id = 2 AND (merge_when_and_write()) WHEN MATCHED THEN UPDATE SET val = t1.val + s1.val; -ERROR: functions used in the WHERE/ON/WHEN clause of modification queries on distributed tables must not be VOLATILE +ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables ROLLBACK; -- Test preventing WHEN clause(s) from writing to the database BEGIN; @@ -2283,7 +2626,7 @@ MERGE INTO t1 USING s1 ON t1.id = s1.id AND t1.id = 2 WHEN MATCHED AND (merge_when_and_write()) THEN UPDATE SET val = t1.val + s1.val; -ERROR: functions used in the WHERE/ON/WHEN clause of modification queries on distributed tables must not be VOLATILE +ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables ROLLBACK; -- Joining on partition columns with sub-query MERGE INTO t1 @@ -2294,7 +2637,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val); -ERROR: MERGE command is only supported when distributed tables are joined on their distribution column +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns -- Joining on partition columns with CTE WITH s1_res AS ( SELECT * FROM s1 @@ -2307,7 +2650,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val); -ERROR: MERGE command is only supported when distributed tables are joined on their distribution column +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns -- Constant Join condition WITH s1_res AS ( SELECT * FROM s1 @@ -2320,7 +2663,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val); -ERROR: MERGE command is only supported when distributed tables are joined on their distribution column +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns -- With a single WHEN clause, which causes a non-left join WITH s1_res AS ( SELECT * FROM s1 @@ -2329,7 +2672,7 @@ WITH s1_res AS ( WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val); -ERROR: MERGE command is only supported when distributed tables are joined on their distribution column +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns -- -- Reference tables -- @@ -2559,7 +2902,7 @@ WHEN MATCHED THEN UPDATE SET val = dist_colocated.val WHEN NOT MATCHED THEN INSERT VALUES(dist_colocated.id, dist_colocated.val); -ERROR: MERGE command is only supported when distributed tables are joined on their distribution column +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns -- Both the source and target must be distributed MERGE INTO dist_target USING (SELECT 100 id) AS source @@ -2752,14 +3095,14 @@ HINT: Consider using hash distribution instead DROP SERVER foreign_server CASCADE; NOTICE: drop cascades to 3 other objects DETAIL: drop cascades to user mapping for postgres on server foreign_server -drop cascades to foreign table foreign_table_4000046 +drop cascades to foreign table foreign_table_4000043 drop cascades to foreign table foreign_table -NOTICE: foreign table "foreign_table_4000046" does not exist, skipping +NOTICE: foreign table "foreign_table_4000043" does not exist, skipping CONTEXT: SQL statement "SELECT citus_drop_all_shards(v_obj.objid, v_obj.schema_name, v_obj.object_name, drop_shards_metadata_only := false)" PL/pgSQL function citus_drop_trigger() line XX at PERFORM DROP FUNCTION merge_when_and_write(); DROP SCHEMA merge_schema CASCADE; -NOTICE: drop cascades to 75 other objects +NOTICE: drop cascades to 84 other objects DETAIL: drop cascades to function insert_data() drop cascades to table pg_result drop cascades to table local_local @@ -2801,14 +3144,15 @@ drop cascades to table mv_target drop cascades to table mv_source_table drop cascades to materialized view mv_source drop cascades to table mv_local -drop cascades to table dist_table +drop cascades to table dist_table_4000041 drop cascades to function f_dist() drop cascades to table fn_target_4000040 drop cascades to table fn_result drop cascades to table fn_target +drop cascades to table dist_table drop cascades to table fn_local drop cascades to table ft_target -drop cascades to table ft_source_4000045 +drop cascades to table ft_source_4000042 drop cascades to table ft_source drop cascades to extension postgres_fdw drop cascades to table target_cj @@ -2826,9 +3170,17 @@ drop cascades to table citus_pa_target drop cascades to table pg_pa_source drop cascades to table citus_pa_source drop cascades to function pa_compare_tables() +drop cascades to table source_json +drop cascades to table target_json +drop cascades to function immutable_hash(integer) +drop cascades to table source_serial +drop cascades to table target_serial +drop cascades to table target_set +drop cascades to table source_set +drop cascades to function add_s(integer,integer) drop cascades to table pg -drop cascades to table t1_4000110 -drop cascades to table s1_4000111 +drop cascades to table t1_4000131 +drop cascades to table s1_4000132 drop cascades to table t1 drop cascades to table s1 drop cascades to table dist_colocated diff --git a/src/test/regress/expected/merge_arbitrary.out b/src/test/regress/expected/merge_arbitrary.out new file mode 100644 index 000000000..345ac1410 --- /dev/null +++ b/src/test/regress/expected/merge_arbitrary.out @@ -0,0 +1,150 @@ +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 +\gset +\if :server_version_ge_15 +\else +\q +\endif +SET search_path TO merge_arbitrary_schema; +INSERT INTO target_cj VALUES (1, 'target', 0); +INSERT INTO target_cj VALUES (2, 'target', 0); +INSERT INTO target_cj VALUES (2, 'target', 0); +INSERT INTO target_cj VALUES (3, 'target', 0); +INSERT INTO source_cj1 VALUES (2, 'source-1', 10); +INSERT INTO source_cj2 VALUES (2, 'source-2', 20); +BEGIN; +MERGE INTO target_cj t +USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src2 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | source-2 | 0 + 2 | source-2 | 0 + 3 | target | 0 +(4 rows) + +ROLLBACK; +BEGIN; +-- try accessing columns from either side of the source join +MERGE INTO target_cj t +USING source_cj1 s2 + INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src1, val = val2 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | source-1 | 20 + 2 | source-1 | 20 + 3 | target | 0 +(4 rows) + +ROLLBACK; +-- Test PREPARE +PREPARE insert(int, int, int) AS +MERGE INTO prept +USING (SELECT $2, s1, s2 FROM preps WHERE s2 > $3) as foo +ON prept.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET t2 = t2 + $1 +WHEN NOT MATCHED THEN + INSERT VALUES(s1, s2); +PREPARE delete(int) AS +MERGE INTO prept +USING preps +ON prept.t1 = preps.s1 +WHEN MATCHED AND prept.t2 = $1 THEN + DELETE +WHEN MATCHED THEN + UPDATE SET t2 = t2 + 1; +INSERT INTO prept VALUES(100, 0); +INSERT INTO preps VALUES(100, 0); +INSERT INTO preps VALUES(200, 0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +-- sixth time +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +-- Should have the counter as 14 (7 * 2) +SELECT * FROM prept; + t1 | t2 +--------------------------------------------------------------------- + 100 | 14 +(1 row) + +-- Test local tables +INSERT INTO s1 VALUES(1, 0); -- Matches DELETE clause +INSERT INTO s1 VALUES(2, 1); -- Matches UPDATE clause +INSERT INTO s1 VALUES(3, 1); -- No Match INSERT clause +INSERT INTO s1 VALUES(4, 1); -- No Match INSERT clause +INSERT INTO s1 VALUES(6, 1); -- No Match INSERT clause +INSERT INTO t1 VALUES(1, 0); -- Will be deleted +INSERT INTO t1 VALUES(2, 0); -- Will be updated +INSERT INTO t1 VALUES(5, 0); -- Will be intact +PREPARE local(int, int) AS +WITH s1_res AS ( + SELECT * FROM s1 +) +MERGE INTO t1 + USING s1_res ON (s1_res.id = t1.id) + WHEN MATCHED AND s1_res.val = $1 THEN + DELETE + WHEN MATCHED THEN + UPDATE SET val = t1.val + $2 + WHEN NOT MATCHED THEN + INSERT (id, val) VALUES (s1_res.id, s1_res.val); +BEGIN; +EXECUTE local(0, 1); +SELECT * FROM t1 order by id; + id | val +--------------------------------------------------------------------- + 2 | 1 + 3 | 1 + 4 | 1 + 5 | 0 + 6 | 1 +(5 rows) + +ROLLBACK; +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; +-- sixth time +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; +BEGIN; +EXECUTE local(0, 1); +SELECT * FROM t1 order by id; + id | val +--------------------------------------------------------------------- + 2 | 1 + 3 | 1 + 4 | 1 + 5 | 0 + 6 | 1 +(5 rows) + +ROLLBACK; diff --git a/src/test/regress/expected/merge_arbitrary_0.out b/src/test/regress/expected/merge_arbitrary_0.out new file mode 100644 index 000000000..a7e3fbf20 --- /dev/null +++ b/src/test/regress/expected/merge_arbitrary_0.out @@ -0,0 +1,6 @@ +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 +\gset +\if :server_version_ge_15 +\else +\q diff --git a/src/test/regress/expected/merge_arbitrary_create.out b/src/test/regress/expected/merge_arbitrary_create.out new file mode 100644 index 000000000..9b2444f17 --- /dev/null +++ b/src/test/regress/expected/merge_arbitrary_create.out @@ -0,0 +1,72 @@ +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 +\gset +\if :server_version_ge_15 +\else +\q +\endif +DROP SCHEMA IF EXISTS merge_arbitrary_schema CASCADE; +CREATE SCHEMA merge_arbitrary_schema; +SET search_path TO merge_arbitrary_schema; +SET citus.shard_count TO 4; +SET citus.next_shard_id TO 6000000; +CREATE TABLE target_cj(tid int, src text, val int); +CREATE TABLE source_cj1(sid1 int, src1 text, val1 int); +CREATE TABLE source_cj2(sid2 int, src2 text, val2 int); +SELECT create_distributed_table('target_cj', 'tid'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('source_cj1', 'sid1'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('source_cj2', 'sid2'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE prept(t1 int, t2 int); +CREATE TABLE preps(s1 int, s2 int); +SELECT create_distributed_table('prept', 't1'), create_distributed_table('preps', 's1'); + create_distributed_table | create_distributed_table +--------------------------------------------------------------------- + | +(1 row) + +PREPARE insert(int, int, int) AS +MERGE INTO prept +USING (SELECT $2, s1, s2 FROM preps WHERE s2 > $3) as foo +ON prept.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET t2 = t2 + $1 +WHEN NOT MATCHED THEN + INSERT VALUES(s1, s2); +PREPARE delete(int) AS +MERGE INTO prept +USING preps +ON prept.t1 = preps.s1 +WHEN MATCHED AND prept.t2 = $1 THEN + DELETE +WHEN MATCHED THEN + UPDATE SET t2 = t2 + 1; +-- Citus local tables +CREATE TABLE t1(id int, val int); +CREATE TABLE s1(id int, val int); +SELECT citus_add_local_table_to_metadata('t1'); + citus_add_local_table_to_metadata +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_add_local_table_to_metadata('s1'); + citus_add_local_table_to_metadata +--------------------------------------------------------------------- + +(1 row) + diff --git a/src/test/regress/expected/merge_arbitrary_create_0.out b/src/test/regress/expected/merge_arbitrary_create_0.out new file mode 100644 index 000000000..a7e3fbf20 --- /dev/null +++ b/src/test/regress/expected/merge_arbitrary_create_0.out @@ -0,0 +1,6 @@ +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 +\gset +\if :server_version_ge_15 +\else +\q diff --git a/src/test/regress/expected/pg15.out b/src/test/regress/expected/pg15.out index d92686b93..7fc102dbb 100644 --- a/src/test/regress/expected/pg15.out +++ b/src/test/regress/expected/pg15.out @@ -419,29 +419,36 @@ SELECT create_distributed_table('tbl2', 'x'); MERGE INTO tbl1 USING tbl2 ON (true) WHEN MATCHED THEN DELETE; -ERROR: MERGE command is only supported when distributed tables are joined on their distribution column +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns -- also, not inside subqueries & ctes WITH targq AS ( SELECT * FROM tbl2 ) MERGE INTO tbl1 USING targq ON (true) WHEN MATCHED THEN DELETE; -ERROR: MERGE command is only supported when distributed tables are joined on their distribution column --- crashes on beta3, fixed on 15 stable ---WITH foo AS ( --- MERGE INTO tbl1 USING tbl2 ON (true) --- WHEN MATCHED THEN DELETE ---) SELECT * FROM foo; ---COPY ( --- MERGE INTO tbl1 USING tbl2 ON (true) --- WHEN MATCHED THEN DELETE ---) TO stdout; +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns +WITH foo AS ( + MERGE INTO tbl1 USING tbl2 ON (true) + WHEN MATCHED THEN DELETE +) SELECT * FROM foo; +ERROR: MERGE not supported in WITH query +COPY ( + MERGE INTO tbl1 USING tbl2 ON (true) + WHEN MATCHED THEN DELETE +) TO stdout; +ERROR: MERGE not supported in COPY +MERGE INTO tbl1 t +USING tbl2 +ON (true) +WHEN MATCHED THEN + DO NOTHING; +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns MERGE INTO tbl1 t USING tbl2 ON (true) WHEN MATCHED THEN UPDATE SET x = (SELECT count(*) FROM tbl2); -ERROR: MERGE command is only supported when distributed tables are joined on their distribution column +ERROR: updating the distribution column is not allowed in MERGE actions -- test numeric types with negative scale CREATE TABLE numeric_negative_scale(numeric_column numeric(3,-1), orig_value int); INSERT into numeric_negative_scale SELECT x,x FROM generate_series(111, 115) x; diff --git a/src/test/regress/expected/pgmerge.out b/src/test/regress/expected/pgmerge.out index 0bedf356f..8a74336a0 100644 --- a/src/test/regress/expected/pgmerge.out +++ b/src/test/regress/expected/pgmerge.out @@ -910,7 +910,7 @@ MERGE INTO wq_target t USING wq_source s ON t.tid = s.sid WHEN MATCHED AND (merge_when_and_write()) THEN UPDATE SET balance = t.balance + s.balance; -ERROR: functions used in the WHERE/ON/WHEN clause of modification queries on distributed tables must not be VOLATILE +ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables ROLLBACK; -- Test preventing ON condition from writing to the database BEGIN; @@ -918,7 +918,7 @@ MERGE INTO wq_target t USING wq_source s ON t.tid = s.sid AND (merge_when_and_write()) WHEN MATCHED THEN UPDATE SET balance = t.balance + s.balance; -ERROR: functions used in the WHERE/ON/WHEN clause of modification queries on distributed tables must not be VOLATILE +ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables ROLLBACK; drop function merge_when_and_write(); DROP TABLE wq_target, wq_source; @@ -1893,13 +1893,15 @@ INSERT INTO pa_target SELECT '2017-02-28', id, id * 100, 'initial' FROM generate SET client_min_messages TO DEBUG1; BEGIN; MERGE INTO pa_target t - USING (SELECT '2017-01-15' AS slogts, * FROM pa_source WHERE sid < 10) s + USING (SELECT * FROM pa_source WHERE sid < 10) s + --USING (SELECT '2017-01-15' AS slogts, * FROM pa_source WHERE sid < 10) s ON t.tid = s.sid WHEN MATCHED THEN UPDATE SET balance = balance + delta, val = val || ' updated by merge' WHEN NOT MATCHED THEN - INSERT VALUES (slogts::timestamp, sid, delta, 'inserted by merge'); -DEBUG: + INSERT VALUES ('2017-01-15', sid, delta, 'inserted by merge'); +DEBUG: + --INSERT VALUES (slogts::timestamp, sid, delta, 'inserted by merge'); SELECT * FROM pa_target ORDER BY tid; logts | tid | balance | val --------------------------------------------------------------------- diff --git a/src/test/regress/sql/merge.sql b/src/test/regress/sql/merge.sql index 12294b2c9..ded90b69c 100644 --- a/src/test/regress/sql/merge.sql +++ b/src/test/regress/sql/merge.sql @@ -18,8 +18,9 @@ CREATE SCHEMA merge_schema; SET search_path TO merge_schema; SET citus.shard_count TO 4; SET citus.next_shard_id TO 4000000; -SET citus.explain_all_tasks to true; +SET citus.explain_all_tasks TO true; SET citus.shard_replication_factor TO 1; +SET citus.max_adaptive_executor_pool_size TO 1; SELECT 1 FROM master_add_node('localhost', :master_port, groupid => 0); CREATE TABLE source @@ -185,6 +186,21 @@ MERGE INTO target t VALUES (customer_id, s.order_id, s.order_center, 123, s.order_time); SELECT * from target t WHERE t.customer_id = 30004; +-- Updating distribution column is allowed if the operation is a no-op +SELECT * from target t WHERE t.customer_id = 30000; +MERGE INTO target t +USING SOURCE s +ON (t.customer_id = s.customer_id AND t.customer_id = 30000) +WHEN MATCHED THEN + UPDATE SET customer_id = 30000; + +MERGE INTO target t +USING SOURCE s +ON (t.customer_id = s.customer_id AND t.customer_id = 30000) +WHEN MATCHED THEN + UPDATE SET customer_id = t.customer_id; +SELECT * from target t WHERE t.customer_id = 30000; + -- -- Test MERGE with CTE as source -- @@ -223,7 +239,6 @@ MERGE INTO t1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (pg_res.id, pg_res.val); --- Two rows with id 2 and val incremented, id 3, and id 1 is deleted SELECT * FROM t1 order by id; SELECT * INTO merge_result FROM t1 order by id; @@ -777,7 +792,8 @@ $$ language plpgsql volatile; CREATE TABLE fn_target(id int, data varchar); MERGE INTO fn_target -USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source +--USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source +USING (SELECT id, source FROM dist_table) as fn_source ON fn_source.id = fn_target.id WHEN MATCHED THEN DO NOTHING @@ -790,11 +806,12 @@ SELECT * INTO fn_result FROM fn_target ORDER BY 1 ; -- Clean the slate TRUNCATE TABLE fn_target; SELECT citus_add_local_table_to_metadata('fn_target'); -SELECT create_distributed_table('dist_table', 'id'); +SELECT citus_add_local_table_to_metadata('dist_table'); SET client_min_messages TO DEBUG1; MERGE INTO fn_target -USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source +--USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source +USING (SELECT id, source FROM dist_table) as fn_source ON fn_source.id = fn_target.id WHEN MATCHED THEN DO NOTHING @@ -1287,7 +1304,7 @@ ON pg_target.id = sub.id AND pg_target.id = $1 WHEN MATCHED THEN UPDATE SET val = 'Updated by prepare using ' || sub.val WHEN NOT MATCHED THEN - DO NOTHING; + INSERT VALUES (sub.id, sub.val); PREPARE citus_prep(int) AS MERGE INTO citus_target @@ -1296,12 +1313,12 @@ ON citus_target.id = sub.id AND citus_target.id = $1 WHEN MATCHED THEN UPDATE SET val = 'Updated by prepare using ' || sub.val WHEN NOT MATCHED THEN - DO NOTHING; + INSERT VALUES (sub.id, sub.val); BEGIN; -SET citus.log_remote_commands to true; SELECT * FROM pg_target WHERE id = 500; -- before merge +SELECT count(*) FROM pg_target; -- before merge EXECUTE pg_prep(500); SELECT * FROM pg_target WHERE id = 500; -- non-cached EXECUTE pg_prep(500); @@ -1310,8 +1327,11 @@ EXECUTE pg_prep(500); EXECUTE pg_prep(500); EXECUTE pg_prep(500); SELECT * FROM pg_target WHERE id = 500; -- cached +SELECT count(*) FROM pg_target; -- cached SELECT * FROM citus_target WHERE id = 500; -- before merge +SELECT count(*) FROM citus_target; -- before merge +SET citus.log_remote_commands to true; EXECUTE citus_prep(500); SELECT * FROM citus_target WHERE id = 500; -- non-cached EXECUTE citus_prep(500); @@ -1319,9 +1339,10 @@ EXECUTE citus_prep(500); EXECUTE citus_prep(500); EXECUTE citus_prep(500); EXECUTE citus_prep(500); -SELECT * FROM citus_target WHERE id = 500; -- cached - SET citus.log_remote_commands to false; +SELECT * FROM citus_target WHERE id = 500; -- cached +SELECT count(*) FROM citus_target; -- cached + SELECT compare_tables(); ROLLBACK; @@ -1417,10 +1438,185 @@ MERGE INTO citus_pa_target t SELECT pa_compare_tables(); ROLLBACK; +CREATE TABLE source_json( id integer, z int, d jsonb); +CREATE TABLE target_json( id integer, z int, d jsonb); + +INSERT INTO source_json SELECT i,i FROM generate_series(0,5)i; + +SELECT create_distributed_table('target_json','id'), create_distributed_table('source_json', 'id'); + +-- single shard query given source_json is filtered and Postgres is smart to pushdown +-- filter to the target_json as well +SELECT public.coordinator_plan($Q$ +EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda +USING (SELECT * FROM source_json WHERE id = 1) sdn +ON sda.id = sdn.id +WHEN NOT matched THEN + INSERT (id, z) VALUES (sdn.id, 5); +$Q$); +SELECT * FROM target_json ORDER BY 1; + +-- zero shard query as filters do not match +--SELECT public.coordinator_plan($Q$ +--EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda +--USING (SELECT * FROM source_json WHERE id = 1) sdn +--ON sda.id = sdn.id AND sda.id = 2 +--WHEN NOT matched THEN +-- INSERT (id, z) VALUES (sdn.id, 5); +--$Q$); +--SELECT * FROM target_json ORDER BY 1; + +-- join for source_json is happening at a different place +SELECT public.coordinator_plan($Q$ +EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda +USING source_json s1 LEFT JOIN (SELECT * FROM source_json) s2 USING(z) +ON sda.id = s1.id AND s1.id = s2.id +WHEN NOT matched THEN + INSERT (id, z) VALUES (s2.id, 5); +$Q$); +SELECT * FROM target_json ORDER BY 1; + +-- update JSON column +SELECT public.coordinator_plan($Q$ +EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda +USING source_json sdn +ON sda.id = sdn.id +WHEN matched THEN + UPDATE SET d = '{"a" : 5}'; +$Q$); +SELECT * FROM target_json ORDER BY 1; + +CREATE FUNCTION immutable_hash(int) RETURNS int +AS 'SELECT hashtext( ($1 + $1)::text);' +LANGUAGE SQL +IMMUTABLE +RETURNS NULL ON NULL INPUT; + +MERGE INTO target_json sda +USING source_json sdn +ON sda.id = sdn.id +WHEN matched THEN + UPDATE SET z = immutable_hash(sdn.z); + +-- Test bigserial +CREATE TABLE source_serial (id integer, z int, d bigserial); +CREATE TABLE target_serial (id integer, z int, d bigserial); +INSERT INTO source_serial SELECT i,i FROM generate_series(0,100)i; +SELECT create_distributed_table('source_serial', 'id'), + create_distributed_table('target_serial', 'id'); + +MERGE INTO target_serial sda +USING source_serial sdn +ON sda.id = sdn.id +WHEN NOT matched THEN + INSERT (id, z) VALUES (id, z); + +SELECT count(*) from source_serial; +SELECT count(*) from target_serial; + +SELECT count(distinct d) from source_serial; +SELECT count(distinct d) from target_serial; + +-- Test set operations +CREATE TABLE target_set(t1 int, t2 int); +CREATE TABLE source_set(s1 int, s2 int); + +SELECT create_distributed_table('target_set', 't1'), + create_distributed_table('source_set', 's1'); + +INSERT INTO target_set VALUES(1, 0); +INSERT INTO source_set VALUES(1, 1); +INSERT INTO source_set VALUES(2, 2); + +MERGE INTO target_set +USING (SELECT * FROM source_set UNION SELECT * FROM source_set) AS foo ON target_set.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET t2 = t2 + 100 +WHEN NOT MATCHED THEN + INSERT VALUES(foo.s1); +SELECT * FROM target_set ORDER BY 1, 2; + -- -- Error and Unsupported scenarios -- +MERGE INTO target_set +USING (SELECT s1,s2 FROM source_set UNION SELECT s2,s1 FROM source_set) AS foo ON target_set.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET t2 = t2 + 1; + +MERGE INTO target_set +USING (SELECT 2 as s3, source_set.* FROM (SELECT * FROM source_set LIMIT 1) as foo LEFT JOIN source_set USING( s1)) AS foo +ON target_set.t1 = foo.s1 +WHEN MATCHED THEN UPDATE SET t2 = t2 + 1 +WHEN NOT MATCHED THEN INSERT VALUES(s1, s3); + + +-- modifying CTE not supported +EXPLAIN +WITH cte_1 AS (DELETE FROM target_json) +MERGE INTO target_json sda +USING source_json sdn +ON sda.id = sdn.id +WHEN NOT matched THEN + INSERT (id, z) VALUES (sdn.id, 5); + +-- Grouping sets not supported +MERGE INTO citus_target t +USING (SELECT count(*), id FROM citus_source GROUP BY GROUPING SETS (id, val)) subq +ON subq.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated' +WHEN NOT MATCHED THEN + INSERT VALUES (subq.id, 99) +WHEN MATCHED AND t.id < 350 THEN + DELETE; + +WITH subq AS +( +SELECT count(*), id FROM citus_source GROUP BY GROUPING SETS (id, val) +) +MERGE INTO citus_target t +USING subq +ON subq.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated' +WHEN NOT MATCHED THEN + INSERT VALUES (subq.id, 99) +WHEN MATCHED AND t.id < 350 THEN + DELETE; + +-- try inserting unmatched distribution column value +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT DEFAULT VALUES; + +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT VALUES(10000); + +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT (id) VALUES(1000); + +MERGE INTO t1 t +USING s1 s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT (id) VALUES(s.val); + +MERGE INTO t1 t +USING s1 s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT (val) VALUES(s.val); + -- try updating the distribution key column BEGIN; MERGE INTO target_cj t @@ -1473,6 +1669,25 @@ BEGIN END; $$; +-- Test functions executing in MERGE statement. This is to prevent the functions from +-- doing a random sql, which may be executed in a remote node or modifying the target +-- relation which will have unexpected/suprising results. +MERGE INTO t1 USING (SELECT * FROM s1 WHERE true) s1 ON + t1.id = s1.id AND s1.id = 2 + WHEN matched THEN + UPDATE SET id = s1.id, val = random(); + +-- Test STABLE function +CREATE FUNCTION add_s(integer, integer) RETURNS integer +AS 'select $1 + $2;' +LANGUAGE SQL +STABLE RETURNS NULL ON NULL INPUT; + +MERGE INTO t1 +USING s1 ON t1.id = s1.id +WHEN NOT MATCHED THEN + INSERT VALUES(s1.id, add_s(s1.val, 2)); + -- Test preventing "ON" join condition from writing to the database BEGIN; MERGE INTO t1 diff --git a/src/test/regress/sql/merge_arbitrary.sql b/src/test/regress/sql/merge_arbitrary.sql new file mode 100644 index 000000000..17b7d4f90 --- /dev/null +++ b/src/test/regress/sql/merge_arbitrary.sql @@ -0,0 +1,133 @@ +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 +\gset +\if :server_version_ge_15 +\else +\q +\endif + +SET search_path TO merge_arbitrary_schema; +INSERT INTO target_cj VALUES (1, 'target', 0); +INSERT INTO target_cj VALUES (2, 'target', 0); +INSERT INTO target_cj VALUES (2, 'target', 0); +INSERT INTO target_cj VALUES (3, 'target', 0); + +INSERT INTO source_cj1 VALUES (2, 'source-1', 10); +INSERT INTO source_cj2 VALUES (2, 'source-2', 20); + +BEGIN; +MERGE INTO target_cj t +USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src2 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +BEGIN; +-- try accessing columns from either side of the source join +MERGE INTO target_cj t +USING source_cj1 s2 + INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src1, val = val2 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +-- Test PREPARE +PREPARE insert(int, int, int) AS +MERGE INTO prept +USING (SELECT $2, s1, s2 FROM preps WHERE s2 > $3) as foo +ON prept.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET t2 = t2 + $1 +WHEN NOT MATCHED THEN + INSERT VALUES(s1, s2); + +PREPARE delete(int) AS +MERGE INTO prept +USING preps +ON prept.t1 = preps.s1 +WHEN MATCHED AND prept.t2 = $1 THEN + DELETE +WHEN MATCHED THEN + UPDATE SET t2 = t2 + 1; + +INSERT INTO prept VALUES(100, 0); + +INSERT INTO preps VALUES(100, 0); +INSERT INTO preps VALUES(200, 0); + +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); + +-- sixth time +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); + +-- Should have the counter as 14 (7 * 2) +SELECT * FROM prept; + +-- Test local tables +INSERT INTO s1 VALUES(1, 0); -- Matches DELETE clause +INSERT INTO s1 VALUES(2, 1); -- Matches UPDATE clause +INSERT INTO s1 VALUES(3, 1); -- No Match INSERT clause +INSERT INTO s1 VALUES(4, 1); -- No Match INSERT clause +INSERT INTO s1 VALUES(6, 1); -- No Match INSERT clause + +INSERT INTO t1 VALUES(1, 0); -- Will be deleted +INSERT INTO t1 VALUES(2, 0); -- Will be updated +INSERT INTO t1 VALUES(5, 0); -- Will be intact + +PREPARE local(int, int) AS +WITH s1_res AS ( + SELECT * FROM s1 +) +MERGE INTO t1 + USING s1_res ON (s1_res.id = t1.id) + + WHEN MATCHED AND s1_res.val = $1 THEN + DELETE + WHEN MATCHED THEN + UPDATE SET val = t1.val + $2 + WHEN NOT MATCHED THEN + INSERT (id, val) VALUES (s1_res.id, s1_res.val); + +BEGIN; +EXECUTE local(0, 1); +SELECT * FROM t1 order by id; +ROLLBACK; + +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; + +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; + +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; + +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; + +-- sixth time +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; + +BEGIN; +EXECUTE local(0, 1); +SELECT * FROM t1 order by id; +ROLLBACK; diff --git a/src/test/regress/sql/merge_arbitrary_create.sql b/src/test/regress/sql/merge_arbitrary_create.sql new file mode 100644 index 000000000..edf9b0d9d --- /dev/null +++ b/src/test/regress/sql/merge_arbitrary_create.sql @@ -0,0 +1,50 @@ +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 +\gset +\if :server_version_ge_15 +\else +\q +\endif + +DROP SCHEMA IF EXISTS merge_arbitrary_schema CASCADE; +CREATE SCHEMA merge_arbitrary_schema; +SET search_path TO merge_arbitrary_schema; +SET citus.shard_count TO 4; +SET citus.next_shard_id TO 6000000; +CREATE TABLE target_cj(tid int, src text, val int); +CREATE TABLE source_cj1(sid1 int, src1 text, val1 int); +CREATE TABLE source_cj2(sid2 int, src2 text, val2 int); + +SELECT create_distributed_table('target_cj', 'tid'); +SELECT create_distributed_table('source_cj1', 'sid1'); +SELECT create_distributed_table('source_cj2', 'sid2'); + +CREATE TABLE prept(t1 int, t2 int); +CREATE TABLE preps(s1 int, s2 int); + +SELECT create_distributed_table('prept', 't1'), create_distributed_table('preps', 's1'); + +PREPARE insert(int, int, int) AS +MERGE INTO prept +USING (SELECT $2, s1, s2 FROM preps WHERE s2 > $3) as foo +ON prept.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET t2 = t2 + $1 +WHEN NOT MATCHED THEN + INSERT VALUES(s1, s2); + +PREPARE delete(int) AS +MERGE INTO prept +USING preps +ON prept.t1 = preps.s1 +WHEN MATCHED AND prept.t2 = $1 THEN + DELETE +WHEN MATCHED THEN + UPDATE SET t2 = t2 + 1; + +-- Citus local tables +CREATE TABLE t1(id int, val int); +CREATE TABLE s1(id int, val int); + +SELECT citus_add_local_table_to_metadata('t1'); +SELECT citus_add_local_table_to_metadata('s1'); diff --git a/src/test/regress/sql/pg15.sql b/src/test/regress/sql/pg15.sql index 121b41f86..ac8062c65 100644 --- a/src/test/regress/sql/pg15.sql +++ b/src/test/regress/sql/pg15.sql @@ -269,16 +269,21 @@ WITH targq AS ( MERGE INTO tbl1 USING targq ON (true) WHEN MATCHED THEN DELETE; --- crashes on beta3, fixed on 15 stable ---WITH foo AS ( --- MERGE INTO tbl1 USING tbl2 ON (true) --- WHEN MATCHED THEN DELETE ---) SELECT * FROM foo; +WITH foo AS ( + MERGE INTO tbl1 USING tbl2 ON (true) + WHEN MATCHED THEN DELETE +) SELECT * FROM foo; ---COPY ( --- MERGE INTO tbl1 USING tbl2 ON (true) --- WHEN MATCHED THEN DELETE ---) TO stdout; +COPY ( + MERGE INTO tbl1 USING tbl2 ON (true) + WHEN MATCHED THEN DELETE +) TO stdout; + +MERGE INTO tbl1 t +USING tbl2 +ON (true) +WHEN MATCHED THEN + DO NOTHING; MERGE INTO tbl1 t USING tbl2 diff --git a/src/test/regress/sql/pgmerge.sql b/src/test/regress/sql/pgmerge.sql index 83bf01a68..9b828f27e 100644 --- a/src/test/regress/sql/pgmerge.sql +++ b/src/test/regress/sql/pgmerge.sql @@ -1172,12 +1172,14 @@ INSERT INTO pa_target SELECT '2017-02-28', id, id * 100, 'initial' FROM generate SET client_min_messages TO DEBUG1; BEGIN; MERGE INTO pa_target t - USING (SELECT '2017-01-15' AS slogts, * FROM pa_source WHERE sid < 10) s + USING (SELECT * FROM pa_source WHERE sid < 10) s + --USING (SELECT '2017-01-15' AS slogts, * FROM pa_source WHERE sid < 10) s ON t.tid = s.sid WHEN MATCHED THEN UPDATE SET balance = balance + delta, val = val || ' updated by merge' WHEN NOT MATCHED THEN - INSERT VALUES (slogts::timestamp, sid, delta, 'inserted by merge'); + INSERT VALUES ('2017-01-15', sid, delta, 'inserted by merge'); + --INSERT VALUES (slogts::timestamp, sid, delta, 'inserted by merge'); SELECT * FROM pa_target ORDER BY tid; ROLLBACK; RESET client_min_messages; diff --git a/src/test/regress/sql_schedule b/src/test/regress/sql_schedule index f07f7af9a..272a84eff 100644 --- a/src/test/regress/sql_schedule +++ b/src/test/regress/sql_schedule @@ -14,3 +14,4 @@ test: arbitrary_configs_truncate test: arbitrary_configs_truncate_cascade test: arbitrary_configs_truncate_partition test: arbitrary_configs_alter_table_add_constraint_without_name +test: merge_arbitrary From ea3093bdb67888ba30ef77dc777f3785edada87d Mon Sep 17 00:00:00 2001 From: aykut-bozkurt <51649454+aykut-bozkurt@users.noreply.github.com> Date: Mon, 20 Mar 2023 12:06:31 +0300 Subject: [PATCH 11/18] Make workerCount configurable for regression tests (#6764) Make worker count flexible in our regression tests instead of hardcoding it to 2 workers. --- src/test/regress/Makefile | 14 ++++++++------ src/test/regress/citus_tests/run_test.py | 19 +++++++++++++++++-- src/test/regress/pg_regress_multi.pl | 4 +++- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/src/test/regress/Makefile b/src/test/regress/Makefile index 368f8f8c5..d9700df80 100644 --- a/src/test/regress/Makefile +++ b/src/test/regress/Makefile @@ -117,29 +117,31 @@ check-minimal-mx: all -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/mx_minimal_schedule $(EXTRA_TESTS) check-custom-schedule: all - $(pg_regress_multi_check) --load-extension=citus \ + $(pg_regress_multi_check) --load-extension=citus --worker-count=$(WORKERCOUNT) \ -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS) check-failure-custom-schedule: all - $(pg_regress_multi_check) --load-extension=citus --mitmproxy \ + $(pg_regress_multi_check) --load-extension=citus --mitmproxy --worker-count=$(WORKERCOUNT) \ -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS) check-isolation-custom-schedule: all $(isolation_test_files) - $(pg_regress_multi_check) --load-extension=citus --isolationtester \ + $(pg_regress_multi_check) --load-extension=citus --isolationtester --worker-count=$(WORKERCOUNT) \ -- $(MULTI_REGRESS_OPTS) --inputdir=$(citus_abs_srcdir)/build --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS) check-custom-schedule-vg: all $(pg_regress_multi_check) --load-extension=citus \ - --valgrind --pg_ctl-timeout=360 --connection-timeout=500000 --valgrind-path=valgrind --valgrind-log-file=$(CITUS_VALGRIND_LOG_FILE) \ + --valgrind --pg_ctl-timeout=360 --connection-timeout=500000 --worker-count=$(WORKERCOUNT) \ + --valgrind-path=valgrind --valgrind-log-file=$(CITUS_VALGRIND_LOG_FILE) \ -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS) check-failure-custom-schedule-vg: all $(pg_regress_multi_check) --load-extension=citus --mitmproxy \ - --valgrind --pg_ctl-timeout=360 --connection-timeout=500000 --valgrind-path=valgrind --valgrind-log-file=$(CITUS_VALGRIND_LOG_FILE) \ + --valgrind --pg_ctl-timeout=360 --connection-timeout=500000 --worker-count=$(WORKERCOUNT) \ + --valgrind-path=valgrind --valgrind-log-file=$(CITUS_VALGRIND_LOG_FILE) \ -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS) check-isolation-custom-schedule-vg: all $(isolation_test_files) - $(pg_regress_multi_check) --load-extension=citus --isolationtester \ + $(pg_regress_multi_check) --load-extension=citus --isolationtester --worker-count=$(WORKERCOUNT) \ --valgrind --pg_ctl-timeout=360 --connection-timeout=500000 --valgrind-path=valgrind --valgrind-log-file=$(CITUS_VALGRIND_LOG_FILE) \ -- $(MULTI_REGRESS_OPTS) --inputdir=$(citus_abs_srcdir)/build --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS) diff --git a/src/test/regress/citus_tests/run_test.py b/src/test/regress/citus_tests/run_test.py index 3daac8b6a..9c901785c 100755 --- a/src/test/regress/citus_tests/run_test.py +++ b/src/test/regress/citus_tests/run_test.py @@ -73,10 +73,11 @@ if __name__ == "__main__": schedule: Optional[str] direct_extra_tests: list[str] - def __init__(self, schedule, extra_tests=None, repeatable=True): + def __init__(self, schedule, extra_tests=None, repeatable=True, worker_count=2): self.schedule = schedule self.direct_extra_tests = extra_tests or [] self.repeatable = repeatable + self.worker_count = worker_count def extra_tests(self): all_deps = OrderedDict() @@ -180,6 +181,15 @@ if __name__ == "__main__": return "base_schedule" return "minimal_schedule" + # we run the tests with 2 workers by default. + # If we find any dependency which requires more workers, we update the worker count. + def worker_count_for(test_name): + if test_name in deps: + return deps[test_name].worker_count + return 2 + + test_worker_count = max(worker_count_for(test_file_name), 2) + if test_file_name in deps: dependencies = deps[test_file_name] elif schedule_line_is_upgrade_after(test_schedule_line): @@ -204,6 +214,7 @@ if __name__ == "__main__": with open(tmp_schedule_path, "a") as myfile: for dependency in dependencies.extra_tests(): myfile.write(f"test: {dependency}\n") + test_worker_count = max(worker_count_for(dependency), test_worker_count) repetition_cnt = args["repeat"] if repetition_cnt > 1 and not dependencies.repeatable: @@ -224,7 +235,11 @@ if __name__ == "__main__": make_recipe += "-vg" # prepare command to run tests - test_command = f"make -C {regress_dir} {make_recipe} SCHEDULE='{pathlib.Path(tmp_schedule_path).stem}'" + test_command = ( + f"make -C {regress_dir} {make_recipe} " + f"WORKERCOUNT={test_worker_count} " + f"SCHEDULE='{pathlib.Path(tmp_schedule_path).stem}'" + ) # run test command n times try: diff --git a/src/test/regress/pg_regress_multi.pl b/src/test/regress/pg_regress_multi.pl index 119e6a758..af594c1d4 100755 --- a/src/test/regress/pg_regress_multi.pl +++ b/src/test/regress/pg_regress_multi.pl @@ -49,6 +49,7 @@ sub Usage() print " --pg_ctl-timeout Timeout for pg_ctl\n"; print " --connection-timeout Timeout for connecting to worker nodes\n"; print " --mitmproxy Start a mitmproxy for one of the workers\n"; + print " --worker-count Number of workers in Citus cluster (default: 2)\n"; exit 1; } @@ -84,6 +85,7 @@ my $mitmFifoPath = catfile($TMP_CHECKDIR, "mitmproxy.fifo"); my $conninfo = ""; my $publicWorker1Host = "localhost"; my $publicWorker2Host = "localhost"; +my $workerCount = 2; my $serversAreShutdown = "TRUE"; my $usingWindows = 0; @@ -116,6 +118,7 @@ GetOptions( 'conninfo=s' => \$conninfo, 'worker-1-public-hostname=s' => \$publicWorker1Host, 'worker-2-public-hostname=s' => \$publicWorker2Host, + 'worker-count=i' => \$workerCount, 'help' => sub { Usage() }); my $fixopen = "$bindir/postgres.fixopen"; @@ -318,7 +321,6 @@ my $mitmPort = 9060; # Set some default configuration options my $masterPort = 57636; -my $workerCount = 2; my @workerHosts = (); my @workerPorts = (); From aa33988c6edd66ec9ae9e8a0e27ce34599eaf76b Mon Sep 17 00:00:00 2001 From: aykut-bozkurt <51649454+aykut-bozkurt@users.noreply.github.com> Date: Tue, 21 Mar 2023 00:58:12 +0300 Subject: [PATCH 12/18] fix pip lock file (#6766) ci/fix_styles.sh were complaining about `black` and `isort` packages are not found even if I `pipenv install --dev` due to broken lock file. I regenerated the lock file and now it works fine. We also wanted to upgrade required python version for the pipfile. --- .circleci/config.yml | 2 +- src/test/regress/Pipfile.lock | 215 ++++++++++++++++++---------------- 2 files changed, 115 insertions(+), 102 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 8f2d86f15..d0db414ce 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -6,7 +6,7 @@ orbs: parameters: image_suffix: type: string - default: '-v89059f9' + default: '-v4b2ae97' pg13_version: type: string default: '13.10' diff --git a/src/test/regress/Pipfile.lock b/src/test/regress/Pipfile.lock index 8bf8715ea..709254d77 100644 --- a/src/test/regress/Pipfile.lock +++ b/src/test/regress/Pipfile.lock @@ -219,32 +219,32 @@ }, "cryptography": { "hashes": [ - "sha256:0f8da300b5c8af9f98111ffd512910bc792b4c77392a9523624680f7956a99d4", - "sha256:35f7c7d015d474f4011e859e93e789c87d21f6f4880ebdc29896a60403328f1f", - "sha256:4789d1e3e257965e960232345002262ede4d094d1a19f4d3b52e48d4d8f3b885", - "sha256:5aa67414fcdfa22cf052e640cb5ddc461924a045cacf325cd164e65312d99502", - "sha256:5d2d8b87a490bfcd407ed9d49093793d0f75198a35e6eb1a923ce1ee86c62b41", - "sha256:6687ef6d0a6497e2b58e7c5b852b53f62142cfa7cd1555795758934da363a965", - "sha256:6f8ba7f0328b79f08bdacc3e4e66fb4d7aab0c3584e0bd41328dce5262e26b2e", - "sha256:706843b48f9a3f9b9911979761c91541e3d90db1ca905fd63fee540a217698bc", - "sha256:807ce09d4434881ca3a7594733669bd834f5b2c6d5c7e36f8c00f691887042ad", - "sha256:83e17b26de248c33f3acffb922748151d71827d6021d98c70e6c1a25ddd78505", - "sha256:96f1157a7c08b5b189b16b47bc9db2332269d6680a196341bf30046330d15388", - "sha256:aec5a6c9864be7df2240c382740fcf3b96928c46604eaa7f3091f58b878c0bb6", - "sha256:b0afd054cd42f3d213bf82c629efb1ee5f22eba35bf0eec88ea9ea7304f511a2", - "sha256:c5caeb8188c24888c90b5108a441c106f7faa4c4c075a2bcae438c6e8ca73cef", - "sha256:ced4e447ae29ca194449a3f1ce132ded8fcab06971ef5f618605aacaa612beac", - "sha256:d1f6198ee6d9148405e49887803907fe8962a23e6c6f83ea7d98f1c0de375695", - "sha256:e124352fd3db36a9d4a21c1aa27fd5d051e621845cb87fb851c08f4f75ce8be6", - "sha256:e422abdec8b5fa8462aa016786680720d78bdce7a30c652b7fadf83a4ba35336", - "sha256:ef8b72fa70b348724ff1218267e7f7375b8de4e8194d1636ee60510aae104cd0", - "sha256:f0c64d1bd842ca2633e74a1a28033d139368ad959872533b1bab8c80e8240a0c", - "sha256:f24077a3b5298a5a06a8e0536e3ea9ec60e4c7ac486755e5fb6e6ea9b3500106", - "sha256:fdd188c8a6ef8769f148f88f859884507b954cc64db6b52f66ef199bb9ad660a", - "sha256:fe913f20024eb2cb2f323e42a64bdf2911bb9738a15dba7d3cce48151034e3a8" + "sha256:103e8f7155f3ce2ffa0049fe60169878d47a4364b277906386f8de21c9234aa1", + "sha256:23df8ca3f24699167daf3e23e51f7ba7334d504af63a94af468f468b975b7dd7", + "sha256:2725672bb53bb92dc7b4150d233cd4b8c59615cd8288d495eaa86db00d4e5c06", + "sha256:30b1d1bfd00f6fc80d11300a29f1d8ab2b8d9febb6ed4a38a76880ec564fae84", + "sha256:35d658536b0a4117c885728d1a7032bdc9a5974722ae298d6c533755a6ee3915", + "sha256:50cadb9b2f961757e712a9737ef33d89b8190c3ea34d0fb6675e00edbe35d074", + "sha256:5f8c682e736513db7d04349b4f6693690170f95aac449c56f97415c6980edef5", + "sha256:6236a9610c912b129610eb1a274bdc1350b5df834d124fa84729ebeaf7da42c3", + "sha256:788b3921d763ee35dfdb04248d0e3de11e3ca8eb22e2e48fef880c42e1f3c8f9", + "sha256:8bc0008ef798231fac03fe7d26e82d601d15bd16f3afaad1c6113771566570f3", + "sha256:8f35c17bd4faed2bc7797d2a66cbb4f986242ce2e30340ab832e5d99ae60e011", + "sha256:b49a88ff802e1993b7f749b1eeb31134f03c8d5c956e3c125c75558955cda536", + "sha256:bc0521cce2c1d541634b19f3ac661d7a64f9555135e9d8af3980965be717fd4a", + "sha256:bc5b871e977c8ee5a1bbc42fa8d19bcc08baf0c51cbf1586b0e87a2694dde42f", + "sha256:c43ac224aabcbf83a947eeb8b17eaf1547bce3767ee2d70093b461f31729a480", + "sha256:d15809e0dbdad486f4ad0979753518f47980020b7a34e9fc56e8be4f60702fac", + "sha256:d7d84a512a59f4412ca8549b01f94be4161c94efc598bf09d027d67826beddc0", + "sha256:e029b844c21116564b8b61216befabca4b500e6816fa9f0ba49527653cae2108", + "sha256:e8a0772016feeb106efd28d4a328e77dc2edae84dfbac06061319fdb669ff828", + "sha256:e944fe07b6f229f4c1a06a7ef906a19652bdd9fd54c761b0ff87e83ae7a30354", + "sha256:eb40fe69cfc6f5cdab9a5ebd022131ba21453cf7b8a7fd3631f45bbf52bed612", + "sha256:fa507318e427169ade4e9eccef39e9011cdc19534f55ca2f36ec3f388c1f70f3", + "sha256:ffd394c7896ed7821a6d13b24657c6a34b6e2650bd84ae063cf11ccffa4f1a97" ], "index": "pypi", - "version": "==39.0.1" + "version": "==39.0.2" }, "docopt": { "hashes": [ @@ -255,11 +255,11 @@ }, "exceptiongroup": { "hashes": [ - "sha256:327cbda3da756e2de031a3107b81ab7b3770a602c4d16ca618298c526f4bec1e", - "sha256:bcb67d800a4497e1b404c2dd44fca47d3b7a5e5433dbab67f96c1a685cdfdf23" + "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e", + "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785" ], "markers": "python_version < '3.11'", - "version": "==1.1.0" + "version": "==1.1.1" }, "execnet": { "hashes": [ @@ -271,11 +271,11 @@ }, "filelock": { "hashes": [ - "sha256:7b319f24340b51f55a2bf7a12ac0755a9b03e718311dac567a0f4f7fabd2f5de", - "sha256:f58d535af89bb9ad5cd4df046f741f8553a418c01a7856bf0d173bbc9f6bd16d" + "sha256:3199fd0d3faea8b911be52b663dfccceb84c95949dd13179aa21436d1a79c4ce", + "sha256:e90b34656470756edf8b19656785c5fea73afa1953f3e1b0d645cef11cab3182" ], "index": "pypi", - "version": "==3.9.0" + "version": "==3.10.0" }, "flask": { "hashes": [ @@ -420,60 +420,71 @@ }, "msgpack": { "hashes": [ - "sha256:002b5c72b6cd9b4bafd790f364b8480e859b4712e91f43014fe01e4f957b8467", - "sha256:0a68d3ac0104e2d3510de90a1091720157c319ceeb90d74f7b5295a6bee51bae", - "sha256:0df96d6eaf45ceca04b3f3b4b111b86b33785683d682c655063ef8057d61fd92", - "sha256:0dfe3947db5fb9ce52aaea6ca28112a170db9eae75adf9339a1aec434dc954ef", - "sha256:0e3590f9fb9f7fbc36df366267870e77269c03172d086fa76bb4eba8b2b46624", - "sha256:11184bc7e56fd74c00ead4f9cc9a3091d62ecb96e97653add7a879a14b003227", - "sha256:112b0f93202d7c0fef0b7810d465fde23c746a2d482e1e2de2aafd2ce1492c88", - "sha256:1276e8f34e139aeff1c77a3cefb295598b504ac5314d32c8c3d54d24fadb94c9", - "sha256:1576bd97527a93c44fa856770197dec00d223b0b9f36ef03f65bac60197cedf8", - "sha256:1e91d641d2bfe91ba4c52039adc5bccf27c335356055825c7f88742c8bb900dd", - "sha256:26b8feaca40a90cbe031b03d82b2898bf560027160d3eae1423f4a67654ec5d6", - "sha256:2999623886c5c02deefe156e8f869c3b0aaeba14bfc50aa2486a0415178fce55", - "sha256:2a2df1b55a78eb5f5b7d2a4bb221cd8363913830145fad05374a80bf0877cb1e", - "sha256:2bb8cdf50dd623392fa75525cce44a65a12a00c98e1e37bf0fb08ddce2ff60d2", - "sha256:2cc5ca2712ac0003bcb625c96368fd08a0f86bbc1a5578802512d87bc592fe44", - "sha256:35bc0faa494b0f1d851fd29129b2575b2e26d41d177caacd4206d81502d4c6a6", - "sha256:3c11a48cf5e59026ad7cb0dc29e29a01b5a66a3e333dc11c04f7e991fc5510a9", - "sha256:449e57cc1ff18d3b444eb554e44613cffcccb32805d16726a5494038c3b93dab", - "sha256:462497af5fd4e0edbb1559c352ad84f6c577ffbbb708566a0abaaa84acd9f3ae", - "sha256:4733359808c56d5d7756628736061c432ded018e7a1dff2d35a02439043321aa", - "sha256:48f5d88c99f64c456413d74a975bd605a9b0526293218a3b77220a2c15458ba9", - "sha256:49565b0e3d7896d9ea71d9095df15b7f75a035c49be733051c34762ca95bbf7e", - "sha256:4ab251d229d10498e9a2f3b1e68ef64cb393394ec477e3370c457f9430ce9250", - "sha256:4d5834a2a48965a349da1c5a79760d94a1a0172fbb5ab6b5b33cbf8447e109ce", - "sha256:4dea20515f660aa6b7e964433b1808d098dcfcabbebeaaad240d11f909298075", - "sha256:545e3cf0cf74f3e48b470f68ed19551ae6f9722814ea969305794645da091236", - "sha256:63e29d6e8c9ca22b21846234913c3466b7e4ee6e422f205a2988083de3b08cae", - "sha256:6916c78f33602ecf0509cc40379271ba0f9ab572b066bd4bdafd7434dee4bc6e", - "sha256:6a4192b1ab40f8dca3f2877b70e63799d95c62c068c84dc028b40a6cb03ccd0f", - "sha256:6c9566f2c39ccced0a38d37c26cc3570983b97833c365a6044edef3574a00c08", - "sha256:76ee788122de3a68a02ed6f3a16bbcd97bc7c2e39bd4d94be2f1821e7c4a64e6", - "sha256:7760f85956c415578c17edb39eed99f9181a48375b0d4a94076d84148cf67b2d", - "sha256:77ccd2af37f3db0ea59fb280fa2165bf1b096510ba9fe0cc2bf8fa92a22fdb43", - "sha256:81fc7ba725464651190b196f3cd848e8553d4d510114a954681fd0b9c479d7e1", - "sha256:85f279d88d8e833ec015650fd15ae5eddce0791e1e8a59165318f371158efec6", - "sha256:9667bdfdf523c40d2511f0e98a6c9d3603be6b371ae9a238b7ef2dc4e7a427b0", - "sha256:a75dfb03f8b06f4ab093dafe3ddcc2d633259e6c3f74bb1b01996f5d8aa5868c", - "sha256:ac5bd7901487c4a1dd51a8c58f2632b15d838d07ceedaa5e4c080f7190925bff", - "sha256:aca0f1644d6b5a73eb3e74d4d64d5d8c6c3d577e753a04c9e9c87d07692c58db", - "sha256:b17be2478b622939e39b816e0aa8242611cc8d3583d1cd8ec31b249f04623243", - "sha256:c1683841cd4fa45ac427c18854c3ec3cd9b681694caf5bff04edb9387602d661", - "sha256:c23080fdeec4716aede32b4e0ef7e213c7b1093eede9ee010949f2a418ced6ba", - "sha256:d5b5b962221fa2c5d3a7f8133f9abffc114fe218eb4365e40f17732ade576c8e", - "sha256:d603de2b8d2ea3f3bcb2efe286849aa7a81531abc52d8454da12f46235092bcb", - "sha256:e83f80a7fec1a62cf4e6c9a660e39c7f878f603737a0cdac8c13131d11d97f52", - "sha256:eb514ad14edf07a1dbe63761fd30f89ae79b42625731e1ccf5e1f1092950eaa6", - "sha256:eba96145051ccec0ec86611fe9cf693ce55f2a3ce89c06ed307de0e085730ec1", - "sha256:ed6f7b854a823ea44cf94919ba3f727e230da29feb4a99711433f25800cf747f", - "sha256:f0029245c51fd9473dc1aede1160b0a29f4a912e6b1dd353fa6d317085b219da", - "sha256:f5d869c18f030202eb412f08b28d2afeea553d6613aee89e200d7aca7ef01f5f", - "sha256:fb62ea4b62bfcb0b380d5680f9a4b3f9a2d166d9394e9bbd9666c0ee09a3645c", - "sha256:fcb8a47f43acc113e24e910399376f7277cf8508b27e5b88499f053de6b115a8" + "sha256:06f5174b5f8ed0ed919da0e62cbd4ffde676a374aba4020034da05fab67b9164", + "sha256:0c05a4a96585525916b109bb85f8cb6511db1c6f5b9d9cbcbc940dc6b4be944b", + "sha256:137850656634abddfb88236008339fdaba3178f4751b28f270d2ebe77a563b6c", + "sha256:17358523b85973e5f242ad74aa4712b7ee560715562554aa2134d96e7aa4cbbf", + "sha256:18334484eafc2b1aa47a6d42427da7fa8f2ab3d60b674120bce7a895a0a85bdd", + "sha256:1835c84d65f46900920b3708f5ba829fb19b1096c1800ad60bae8418652a951d", + "sha256:1967f6129fc50a43bfe0951c35acbb729be89a55d849fab7686004da85103f1c", + "sha256:1ab2f3331cb1b54165976a9d976cb251a83183631c88076613c6c780f0d6e45a", + "sha256:1c0f7c47f0087ffda62961d425e4407961a7ffd2aa004c81b9c07d9269512f6e", + "sha256:20a97bf595a232c3ee6d57ddaadd5453d174a52594bf9c21d10407e2a2d9b3bd", + "sha256:20c784e66b613c7f16f632e7b5e8a1651aa5702463d61394671ba07b2fc9e025", + "sha256:266fa4202c0eb94d26822d9bfd7af25d1e2c088927fe8de9033d929dd5ba24c5", + "sha256:28592e20bbb1620848256ebc105fc420436af59515793ed27d5c77a217477705", + "sha256:288e32b47e67f7b171f86b030e527e302c91bd3f40fd9033483f2cacc37f327a", + "sha256:3055b0455e45810820db1f29d900bf39466df96ddca11dfa6d074fa47054376d", + "sha256:332360ff25469c346a1c5e47cbe2a725517919892eda5cfaffe6046656f0b7bb", + "sha256:362d9655cd369b08fda06b6657a303eb7172d5279997abe094512e919cf74b11", + "sha256:366c9a7b9057e1547f4ad51d8facad8b406bab69c7d72c0eb6f529cf76d4b85f", + "sha256:36961b0568c36027c76e2ae3ca1132e35123dcec0706c4b7992683cc26c1320c", + "sha256:379026812e49258016dd84ad79ac8446922234d498058ae1d415f04b522d5b2d", + "sha256:382b2c77589331f2cb80b67cc058c00f225e19827dbc818d700f61513ab47bea", + "sha256:476a8fe8fae289fdf273d6d2a6cb6e35b5a58541693e8f9f019bfe990a51e4ba", + "sha256:48296af57cdb1d885843afd73c4656be5c76c0c6328db3440c9601a98f303d87", + "sha256:4867aa2df9e2a5fa5f76d7d5565d25ec76e84c106b55509e78c1ede0f152659a", + "sha256:4c075728a1095efd0634a7dccb06204919a2f67d1893b6aa8e00497258bf926c", + "sha256:4f837b93669ce4336e24d08286c38761132bc7ab29782727f8557e1eb21b2080", + "sha256:4f8d8b3bf1ff2672567d6b5c725a1b347fe838b912772aa8ae2bf70338d5a198", + "sha256:525228efd79bb831cf6830a732e2e80bc1b05436b086d4264814b4b2955b2fa9", + "sha256:5494ea30d517a3576749cad32fa27f7585c65f5f38309c88c6d137877fa28a5a", + "sha256:55b56a24893105dc52c1253649b60f475f36b3aa0fc66115bffafb624d7cb30b", + "sha256:56a62ec00b636583e5cb6ad313bbed36bb7ead5fa3a3e38938503142c72cba4f", + "sha256:57e1f3528bd95cc44684beda696f74d3aaa8a5e58c816214b9046512240ef437", + "sha256:586d0d636f9a628ddc6a17bfd45aa5b5efaf1606d2b60fa5d87b8986326e933f", + "sha256:5cb47c21a8a65b165ce29f2bec852790cbc04936f502966768e4aae9fa763cb7", + "sha256:6c4c68d87497f66f96d50142a2b73b97972130d93677ce930718f68828b382e2", + "sha256:821c7e677cc6acf0fd3f7ac664c98803827ae6de594a9f99563e48c5a2f27eb0", + "sha256:916723458c25dfb77ff07f4c66aed34e47503b2eb3188b3adbec8d8aa6e00f48", + "sha256:9e6ca5d5699bcd89ae605c150aee83b5321f2115695e741b99618f4856c50898", + "sha256:9f5ae84c5c8a857ec44dc180a8b0cc08238e021f57abdf51a8182e915e6299f0", + "sha256:a2b031c2e9b9af485d5e3c4520f4220d74f4d222a5b8dc8c1a3ab9448ca79c57", + "sha256:a61215eac016f391129a013c9e46f3ab308db5f5ec9f25811e811f96962599a8", + "sha256:a740fa0e4087a734455f0fc3abf5e746004c9da72fbd541e9b113013c8dc3282", + "sha256:a9985b214f33311df47e274eb788a5893a761d025e2b92c723ba4c63936b69b1", + "sha256:ab31e908d8424d55601ad7075e471b7d0140d4d3dd3272daf39c5c19d936bd82", + "sha256:ac9dd47af78cae935901a9a500104e2dea2e253207c924cc95de149606dc43cc", + "sha256:addab7e2e1fcc04bd08e4eb631c2a90960c340e40dfc4a5e24d2ff0d5a3b3edb", + "sha256:b1d46dfe3832660f53b13b925d4e0fa1432b00f5f7210eb3ad3bb9a13c6204a6", + "sha256:b2de4c1c0538dcb7010902a2b97f4e00fc4ddf2c8cda9749af0e594d3b7fa3d7", + "sha256:b5ef2f015b95f912c2fcab19c36814963b5463f1fb9049846994b007962743e9", + "sha256:b72d0698f86e8d9ddf9442bdedec15b71df3598199ba33322d9711a19f08145c", + "sha256:bae7de2026cbfe3782c8b78b0db9cbfc5455e079f1937cb0ab8d133496ac55e1", + "sha256:bf22a83f973b50f9d38e55c6aade04c41ddda19b00c4ebc558930d78eecc64ed", + "sha256:c075544284eadc5cddc70f4757331d99dcbc16b2bbd4849d15f8aae4cf36d31c", + "sha256:c396e2cc213d12ce017b686e0f53497f94f8ba2b24799c25d913d46c08ec422c", + "sha256:cb5aaa8c17760909ec6cb15e744c3ebc2ca8918e727216e79607b7bbce9c8f77", + "sha256:cdc793c50be3f01106245a61b739328f7dccc2c648b501e237f0699fe1395b81", + "sha256:d25dd59bbbbb996eacf7be6b4ad082ed7eacc4e8f3d2df1ba43822da9bfa122a", + "sha256:e42b9594cc3bf4d838d67d6ed62b9e59e201862a25e9a157019e171fbe672dd3", + "sha256:e57916ef1bd0fee4f21c4600e9d1da352d8816b52a599c46460e93a6e9f17086", + "sha256:ed40e926fa2f297e8a653c954b732f125ef97bdd4c889f243182299de27e2aa9", + "sha256:ef8108f8dedf204bb7b42994abf93882da1159728a2d4c5e82012edd92c9da9f", + "sha256:f933bbda5a3ee63b8834179096923b094b76f0c7a73c1cfe8f07ad608c58844b", + "sha256:fe5c63197c55bce6385d9aee16c4d0641684628f63ace85f73571e65ad1c1e8d" ], - "version": "==1.0.4" + "version": "==1.0.5" }, "packaging": { "hashes": [ @@ -578,7 +589,7 @@ "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1", "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b" ], - "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2'", "version": "==2.4.7" }, "pyperclip": { @@ -589,11 +600,11 @@ }, "pytest": { "hashes": [ - "sha256:c7c6ca206e93355074ae32f7403e8ea12163b1163c976fee7d4d84027c162be5", - "sha256:d45e0952f3727241918b8fd0f376f5ff6b301cc0777c6f9a556935c92d8a7d42" + "sha256:130328f552dcfac0b1cec75c12e3f005619dc5f874f0a06e8ff7263f0ee6225e", + "sha256:c99ab0c73aceb050f68929bc93af19ab6db0558791c6a0715723abe9d0ade9d4" ], "index": "pypi", - "version": "==7.2.1" + "version": "==7.2.2" }, "pytest-asyncio": { "hashes": [ @@ -613,11 +624,11 @@ }, "pytest-xdist": { "hashes": [ - "sha256:336098e3bbd8193276867cc87db8b22903c3927665dff9d1ac8684c02f597b68", - "sha256:fa10f95a2564cd91652f2d132725183c3b590d9fdcdec09d3677386ecf4c1ce9" + "sha256:1849bd98d8b242b948e472db7478e090bf3361912a8fed87992ed94085f54727", + "sha256:37290d161638a20b672401deef1cba812d110ac27e35d213f091d15b8beb40c9" ], "index": "pypi", - "version": "==3.2.0" + "version": "==3.2.1" }, "ruamel.yaml": { "hashes": [ @@ -657,14 +668,16 @@ "sha256:d000f258cf42fec2b1bbf2863c61d7b8918d31ffee905da62dede869254d3b8a", "sha256:d5859983f26d8cd7bb5c287ef452e8aacc86501487634573d260968f753e1d71", "sha256:d5e51e2901ec2366b79f16c2299a03e74ba4531ddcfacc1416639c557aef0ad8", + "sha256:da538167284de58a52109a9b89b8f6a53ff8437dd6dc26d33b57bf6699153122", "sha256:debc87a9516b237d0466a711b18b6ebeb17ba9f391eb7f91c649c5c4ec5006c7", "sha256:df5828871e6648db72d1c19b4bd24819b80a755c4541d3409f0f7acd0f335c80", "sha256:ecdf1a604009bd35c674b9225a8fa609e0282d9b896c03dd441a91e5f53b534e", "sha256:efa08d63ef03d079dcae1dfe334f6c8847ba8b645d08df286358b1f5293d24ab", "sha256:f01da5790e95815eb5a8a138508c01c758e5f5bc0ce4286c4f7028b8dd7ac3d0", - "sha256:f34019dced51047d6f70cb9383b2ae2853b7fc4dce65129a5acd49f4f9256646" + "sha256:f34019dced51047d6f70cb9383b2ae2853b7fc4dce65129a5acd49f4f9256646", + "sha256:f6d3d39611ac2e4f62c3128a9eed45f19a6608670c5a2f4f07f24e8de3441d38" ], - "markers": "python_version < '3.10' and platform_python_implementation == 'CPython'", + "markers": "platform_python_implementation == 'CPython' and python_version < '3.10'", "version": "==0.2.7" }, "sortedcontainers": { @@ -842,11 +855,11 @@ }, "flake8-bugbear": { "hashes": [ - "sha256:39259814a83f33c8409417ee12dd4050c9c0bb4c8707c12fc18ae62b2f3ddee1", - "sha256:f136bd0ca2684f101168bba2310dec541e11aa6b252260c17dcf58d18069a740" + "sha256:beb5c7efcd7ccc2039ef66a77bb8db925e7be3531ff1cb4d0b7030d0e2113d72", + "sha256:e3e7f74c8a49ad3794a7183353026dabd68c74030d5f46571f84c1fb0eb79363" ], "index": "pypi", - "version": "==23.2.13" + "version": "==23.3.12" }, "isort": { "hashes": [ @@ -882,19 +895,19 @@ }, "pathspec": { "hashes": [ - "sha256:3a66eb970cbac598f9e5ccb5b2cf58930cd8e3ed86d393d541eaf2d8b1705229", - "sha256:64d338d4e0914e91c1792321e6907b5a593f1ab1851de7fc269557a21b30ebbc" + "sha256:2798de800fa92780e33acca925945e9a19a133b715067cf165b8866c15a31687", + "sha256:d8af70af76652554bd134c22b3e8a1cc46ed7d91edcdd721ef1a0c51a84a5293" ], "markers": "python_version >= '3.7'", - "version": "==0.11.0" + "version": "==0.11.1" }, "platformdirs": { "hashes": [ - "sha256:8a1228abb1ef82d788f74139988b137e78692984ec7b08eaa6c65f1723af28f9", - "sha256:b1d5eb14f221506f50d6604a561f4c5786d9e80355219694a1b244bcd96f4567" + "sha256:024996549ee88ec1a9aa99ff7f8fc819bb59e2c3477b410d90a16d32d6e707aa", + "sha256:e5986afb596e4bb5bde29a79ac9061aa955b94fca2399b7aaac4090860920dd8" ], "markers": "python_version >= '3.7'", - "version": "==3.0.0" + "version": "==3.1.1" }, "pycodestyle": { "hashes": [ From aa465b6de1702fce7e2b5a0e577d085e838c2209 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Tue, 21 Mar 2023 14:04:07 +0300 Subject: [PATCH 13/18] Decide what to do with router planner error at one place (#6781) --- .../distributed/planner/distributed_planner.c | 36 +++++++------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index 17b63ee0a..866f7353a 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -944,18 +944,6 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina distributedPlan = CreateModifyPlan(originalQuery, query, plannerRestrictionContext); } - - /* the functions above always return a plan, possibly with an error */ - Assert(distributedPlan); - - if (distributedPlan->planningError == NULL) - { - return distributedPlan; - } - else - { - RaiseDeferredError(distributedPlan->planningError, DEBUG2); - } } else { @@ -968,18 +956,18 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina distributedPlan = CreateRouterPlan(originalQuery, query, plannerRestrictionContext); - if (distributedPlan->planningError == NULL) - { - return distributedPlan; - } - else - { - /* - * For debugging it's useful to display why query was not - * router plannable. - */ - RaiseDeferredError(distributedPlan->planningError, DEBUG2); - } + } + + /* the functions above always return a plan, possibly with an error */ + Assert(distributedPlan); + + if (distributedPlan->planningError == NULL) + { + return distributedPlan; + } + else + { + RaiseDeferredError(distributedPlan->planningError, DEBUG2); } if (hasUnresolvedParams) From 2713e015d695db8fcca5644abe6ce2e0d0e05d17 Mon Sep 17 00:00:00 2001 From: Ahmet Gedemenli Date: Tue, 21 Mar 2023 16:34:52 +0300 Subject: [PATCH 14/18] Check before logicalrep for rebalancer, error if needed (#6754) DESCRIPTION: Check before logicalrep for rebalancer, error if needed Check if we can use logical replication or not, in case of shard transfer mode = auto, before executing the shard moves. If we can't, error out. Before this PR, we used to error out in the middle of shard moves: ```sql set citus.shard_count = 4; -- just to get the error sooner select citus_remove_node('localhost',9702); create table t1 (a int primary key); select create_distributed_table('t1','a'); create table t2 (a bigint); select create_distributed_table('t2','a'); select citus_add_node('localhost',9702); select rebalance_table_shards(); NOTICE: Moving shard 102008 from localhost:9701 to localhost:9702 ... NOTICE: Moving shard 102009 from localhost:9701 to localhost:9702 ... NOTICE: Moving shard 102012 from localhost:9701 to localhost:9702 ... ERROR: cannot use logical replication to transfer shards of the relation t2 since it doesn't have a REPLICA IDENTITY or PRIMARY KEY ``` Now we check and error out in the beginning, without moving the shards. fixes: #6727 --- .../distributed/operations/shard_rebalancer.c | 44 ++++++++++++++++--- .../regress/expected/shard_rebalancer.out | 34 +++++++++++++- src/test/regress/sql/shard_rebalancer.sql | 15 +++++++ 3 files changed, 85 insertions(+), 8 deletions(-) diff --git a/src/backend/distributed/operations/shard_rebalancer.c b/src/backend/distributed/operations/shard_rebalancer.c index baed8b0d5..c5282202e 100644 --- a/src/backend/distributed/operations/shard_rebalancer.c +++ b/src/backend/distributed/operations/shard_rebalancer.c @@ -1818,10 +1818,10 @@ static void RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid) { char transferMode = LookupShardTransferMode(shardReplicationModeOid); - EnsureReferenceTablesExistOnAllNodesExtended(transferMode); if (list_length(options->relationIdList) == 0) { + EnsureReferenceTablesExistOnAllNodesExtended(transferMode); return; } @@ -1836,6 +1836,25 @@ RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid) List *placementUpdateList = GetRebalanceSteps(options); + if (transferMode == TRANSFER_MODE_AUTOMATIC) + { + /* + * If the shard transfer mode is set to auto, we should check beforehand + * if we are able to use logical replication to transfer shards or not. + * We throw an error if any of the tables do not have a replica identity, which + * is required for logical replication to replicate UPDATE and DELETE commands. + */ + PlacementUpdateEvent *placementUpdate = NULL; + foreach_ptr(placementUpdate, placementUpdateList) + { + Oid relationId = RelationIdForShard(placementUpdate->shardId); + List *colocatedTableList = ColocatedTableList(relationId); + VerifyTablesHaveReplicaIdentity(colocatedTableList); + } + } + + EnsureReferenceTablesExistOnAllNodesExtended(transferMode); + if (list_length(placementUpdateList) == 0) { return; @@ -1916,12 +1935,6 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo EnsureTableOwner(colocatedTableId); } - if (shardTransferMode == TRANSFER_MODE_AUTOMATIC) - { - /* make sure that all tables included in the rebalance have a replica identity*/ - VerifyTablesHaveReplicaIdentity(colocatedTableList); - } - List *placementUpdateList = GetRebalanceSteps(options); if (list_length(placementUpdateList) == 0) @@ -1930,6 +1943,23 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo return 0; } + if (shardTransferMode == TRANSFER_MODE_AUTOMATIC) + { + /* + * If the shard transfer mode is set to auto, we should check beforehand + * if we are able to use logical replication to transfer shards or not. + * We throw an error if any of the tables do not have a replica identity, which + * is required for logical replication to replicate UPDATE and DELETE commands. + */ + PlacementUpdateEvent *placementUpdate = NULL; + foreach_ptr(placementUpdate, placementUpdateList) + { + relationId = RelationIdForShard(placementUpdate->shardId); + List *colocatedTables = ColocatedTableList(relationId); + VerifyTablesHaveReplicaIdentity(colocatedTables); + } + } + DropOrphanedResourcesInSeparateTransaction(); /* find the name of the shard transfer mode to interpolate in the scheduled command */ diff --git a/src/test/regress/expected/shard_rebalancer.out b/src/test/regress/expected/shard_rebalancer.out index 2146d67f1..1dea3b442 100644 --- a/src/test/regress/expected/shard_rebalancer.out +++ b/src/test/regress/expected/shard_rebalancer.out @@ -1482,7 +1482,6 @@ SELECT * from master_drain_node('localhost', :worker_2_port); ERROR: cannot use logical replication to transfer shards of the relation colocated_rebalance_test since it doesn't have a REPLICA IDENTITY or PRIMARY KEY DETAIL: UPDATE and DELETE commands on the shard will error out during logical replication unless there is a REPLICA IDENTITY or PRIMARY KEY. HINT: If you wish to continue without a replica identity set the shard_transfer_mode to 'force_logical' or 'block_writes'. -CONTEXT: while executing command on localhost:xxxxx -- Make sure shouldhaveshards is false select shouldhaveshards from pg_dist_node where nodeport = :worker_2_port; shouldhaveshards @@ -2714,6 +2713,39 @@ SELECT sh.logicalrelid, pl.nodeport (5 rows) DROP TABLE single_shard_colocation_1a, single_shard_colocation_1b, single_shard_colocation_1c, single_shard_colocation_2a, single_shard_colocation_2b CASCADE; +-- verify we detect if one of the tables do not have a replica identity or primary key +-- and error out in case of shard transfer mode = auto +SELECT 1 FROM citus_remove_node('localhost', :worker_2_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +create table table_with_primary_key (a int primary key); +select create_distributed_table('table_with_primary_key','a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +create table table_without_primary_key (a bigint); +select create_distributed_table('table_without_primary_key','a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- add the second node back, then rebalance +ALTER SEQUENCE pg_dist_groupid_seq RESTART WITH 16; +select 1 from citus_add_node('localhost', :worker_2_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +select rebalance_table_shards(); +ERROR: cannot use logical replication to transfer shards of the relation table_without_primary_key since it doesn't have a REPLICA IDENTITY or PRIMARY KEY +DROP TABLE table_with_primary_key, table_without_primary_key; \c - - - :worker_1_port SET citus.enable_ddl_propagation TO OFF; REVOKE ALL ON SCHEMA public FROM testrole; diff --git a/src/test/regress/sql/shard_rebalancer.sql b/src/test/regress/sql/shard_rebalancer.sql index dbbc94732..da4259f5b 100644 --- a/src/test/regress/sql/shard_rebalancer.sql +++ b/src/test/regress/sql/shard_rebalancer.sql @@ -1497,6 +1497,21 @@ SELECT sh.logicalrelid, pl.nodeport DROP TABLE single_shard_colocation_1a, single_shard_colocation_1b, single_shard_colocation_1c, single_shard_colocation_2a, single_shard_colocation_2b CASCADE; +-- verify we detect if one of the tables do not have a replica identity or primary key +-- and error out in case of shard transfer mode = auto +SELECT 1 FROM citus_remove_node('localhost', :worker_2_port); + +create table table_with_primary_key (a int primary key); +select create_distributed_table('table_with_primary_key','a'); +create table table_without_primary_key (a bigint); +select create_distributed_table('table_without_primary_key','a'); + +-- add the second node back, then rebalance +ALTER SEQUENCE pg_dist_groupid_seq RESTART WITH 16; +select 1 from citus_add_node('localhost', :worker_2_port); +select rebalance_table_shards(); + +DROP TABLE table_with_primary_key, table_without_primary_key; \c - - - :worker_1_port SET citus.enable_ddl_propagation TO OFF; REVOKE ALL ON SCHEMA public FROM testrole; From 4960ced1759c38955cdc05ab60b505f4b3b0f408 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Wed, 22 Mar 2023 10:49:08 +0300 Subject: [PATCH 15/18] Add an arbitrary config test heavily based on multi_router_planner_fast_path.sql (#6782) This would be useful for testing #6773. This is because, given that #6773 only adds support for router / fast-path queries, theoretically almost all the tests that we have in that test file should work for null-shard-key tables too (and they indeed do). I deliberately did not replace multi_router_planner_fast_path.sql with the one that I'm adding into arbitrary configs because we might still want to see when we're able to go through fast-path planning for the usual distributed tables (the ones that have a shard key). --- src/test/regress/create_schedule | 1 + .../expected/arbitrary_configs_router.out | 1561 +++++++++++++++++ .../arbitrary_configs_router_create.out | 121 ++ .../regress/sql/arbitrary_configs_router.sql | 634 +++++++ .../sql/arbitrary_configs_router_create.sql | 118 ++ src/test/regress/sql_schedule | 1 + 6 files changed, 2436 insertions(+) create mode 100644 src/test/regress/expected/arbitrary_configs_router.out create mode 100644 src/test/regress/expected/arbitrary_configs_router_create.out create mode 100644 src/test/regress/sql/arbitrary_configs_router.sql create mode 100644 src/test/regress/sql/arbitrary_configs_router_create.sql diff --git a/src/test/regress/create_schedule b/src/test/regress/create_schedule index db2ae92be..e301678b9 100644 --- a/src/test/regress/create_schedule +++ b/src/test/regress/create_schedule @@ -14,3 +14,4 @@ test: arbitrary_configs_truncate_cascade_create test: arbitrary_configs_truncate_partition_create test: arbitrary_configs_alter_table_add_constraint_without_name_create test: merge_arbitrary_create +test: arbitrary_configs_router_create diff --git a/src/test/regress/expected/arbitrary_configs_router.out b/src/test/regress/expected/arbitrary_configs_router.out new file mode 100644 index 000000000..a42b955cc --- /dev/null +++ b/src/test/regress/expected/arbitrary_configs_router.out @@ -0,0 +1,1561 @@ +SET search_path TO arbitrary_configs_router; +SET client_min_messages TO WARNING; +-- test simple select for a single row +SELECT * FROM articles_hash WHERE author_id = 10 AND id = 50; + id | author_id | title | word_count +--------------------------------------------------------------------- + 50 | 10 | anjanette | 19519 +(1 row) + +-- get all titles by a single author +SELECT title FROM articles_hash WHERE author_id = 10; + title +--------------------------------------------------------------------- + aggrandize + absentness + andelee + attemper + anjanette +(5 rows) + +-- try ordering them by word count +SELECT title, word_count FROM articles_hash + WHERE author_id = 10 + ORDER BY word_count DESC NULLS LAST; + title | word_count +--------------------------------------------------------------------- + anjanette | 19519 + aggrandize | 17277 + attemper | 14976 + andelee | 6363 + absentness | 1820 +(5 rows) + +-- look at last two articles by an author +SELECT title, id FROM articles_hash + WHERE author_id = 5 + ORDER BY id + LIMIT 2; + title | id +--------------------------------------------------------------------- + aruru | 5 + adversa | 15 +(2 rows) + +-- find all articles by two authors in same shard +-- but plan is not fast path router plannable due to +-- two distribution columns in the query +SELECT title, author_id FROM articles_hash + WHERE author_id = 7 OR author_id = 8 + ORDER BY author_id ASC, id; + title | author_id +--------------------------------------------------------------------- + aseptic | 7 + auriga | 7 + arsenous | 7 + archduchies | 7 + abeyance | 7 + agatized | 8 + assembly | 8 + aerophyte | 8 + anatine | 8 + alkylic | 8 +(10 rows) + +-- having clause is supported if it goes to a single shard +-- and single dist. key on the query +SELECT author_id, sum(word_count) AS corpus_size FROM articles_hash + WHERE author_id = 1 + GROUP BY author_id + HAVING sum(word_count) > 1000 + ORDER BY sum(word_count) DESC; + author_id | corpus_size +--------------------------------------------------------------------- + 1 | 35894 +(1 row) + +-- fast path planner only support = operator +SELECT * FROM articles_hash WHERE author_id <= 1; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +SELECT * FROM articles_hash WHERE author_id IN (1, 3) ORDER BY 1,2,3,4; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 3 | 3 | asternal | 10480 + 11 | 1 | alamo | 1347 + 13 | 3 | aseyev | 2255 + 21 | 1 | arcading | 5890 + 23 | 3 | abhorring | 6799 + 31 | 1 | athwartships | 7271 + 33 | 3 | autochrome | 8180 + 41 | 1 | aznavour | 11814 + 43 | 3 | affixal | 12723 +(10 rows) + +-- queries with CTEs cannot go through fast-path planning +WITH first_author AS ( SELECT id FROM articles_hash WHERE author_id = 1) +SELECT * FROM first_author; + id +--------------------------------------------------------------------- + 1 + 11 + 21 + 31 + 41 +(5 rows) + +-- two CTE joins also cannot go through fast-path planning +WITH id_author AS ( SELECT id, author_id FROM articles_hash WHERE author_id = 1), +id_title AS (SELECT id, title from articles_hash WHERE author_id = 1) +SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id; + id | author_id | id | title +--------------------------------------------------------------------- + 1 | 1 | 1 | arsenous + 11 | 1 | 11 | alamo + 21 | 1 | 21 | arcading + 31 | 1 | 31 | athwartships + 41 | 1 | 41 | aznavour +(5 rows) + +-- this is a different case where each CTE is recursively planned and those goes +-- through the fast-path router planner, but the top level join is not +WITH id_author AS ( SELECT id, author_id FROM articles_hash WHERE author_id = 1), +id_title AS (SELECT id, title from articles_hash WHERE author_id = 2) +SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id; + id | author_id | id | title +--------------------------------------------------------------------- +(0 rows) + +-- recursive CTEs are also cannot go through fast +-- path planning +WITH RECURSIVE hierarchy as ( + SELECT *, 1 AS level + FROM company_employees + WHERE company_id = 1 and manager_id = 0 + UNION + SELECT ce.*, (h.level+1) + FROM hierarchy h JOIN company_employees ce + ON (h.employee_id = ce.manager_id AND + h.company_id = ce.company_id AND + ce.company_id = 1)) +SELECT * FROM hierarchy WHERE LEVEL <= 2; + company_id | employee_id | manager_id | level +--------------------------------------------------------------------- + 1 | 1 | 0 | 1 + 1 | 2 | 1 | 2 + 1 | 3 | 1 | 2 +(3 rows) + +WITH update_article AS ( + UPDATE articles_hash SET word_count = 10 WHERE id = 1 AND word_count = 9 RETURNING * +) +SELECT * FROM update_article; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +WITH delete_article AS ( + DELETE FROM articles_hash WHERE id = 1 AND word_count = 10 RETURNING * +) +SELECT * FROM delete_article; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- grouping sets are supported via fast-path +SELECT + id, substring(title, 2, 1) AS subtitle, count(*) + FROM articles_hash + WHERE author_id = 1 + GROUP BY GROUPING SETS ((id),(subtitle)) + ORDER BY id, subtitle; + id | subtitle | count +--------------------------------------------------------------------- + 1 | | 1 + 11 | | 1 + 21 | | 1 + 31 | | 1 + 41 | | 1 + | l | 1 + | r | 2 + | t | 1 + | z | 1 +(9 rows) + +-- queries which involve functions in FROM clause are not supported via fast path planning +SELECT * FROM articles_hash, position('om' in 'Thomas') WHERE author_id = 1; + id | author_id | title | word_count | position +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 | 3 + 11 | 1 | alamo | 1347 | 3 + 21 | 1 | arcading | 5890 | 3 + 31 | 1 | athwartships | 7271 | 3 + 41 | 1 | aznavour | 11814 | 3 +(5 rows) + +-- sublinks are not supported via fast path planning +SELECT * FROM articles_hash +WHERE author_id IN (SELECT author_id FROM articles_hash WHERE author_id = 2) +ORDER BY articles_hash.id; + id | author_id | title | word_count +--------------------------------------------------------------------- + 2 | 2 | abducing | 13642 + 12 | 2 | archiblast | 18185 + 22 | 2 | antipope | 2728 + 32 | 2 | amazon | 11342 + 42 | 2 | ausable | 15885 +(5 rows) + +-- subqueries are not supported via fast path planning +SELECT articles_hash.id,test.word_count +FROM articles_hash, (SELECT id, word_count FROM articles_hash) AS test WHERE test.id = articles_hash.id +ORDER BY test.word_count DESC, articles_hash.id LIMIT 5; + id | word_count +--------------------------------------------------------------------- + 50 | 19519 + 14 | 19094 + 48 | 18610 + 12 | 18185 + 46 | 17702 +(5 rows) + +SELECT articles_hash.id,test.word_count +FROM articles_hash, (SELECT id, word_count FROM articles_hash) AS test +WHERE test.id = articles_hash.id and articles_hash.author_id = 1 +ORDER BY articles_hash.id; + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +-- simple lookup query just works +SELECT * + FROM articles_hash + WHERE author_id = 1; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- below query hits a single shard but with multiple filters +-- so cannot go via fast-path +SELECT * + FROM articles_hash + WHERE author_id = 1 OR author_id = 17; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- rename the output columns +SELECT id as article_id, word_count * id as random_value + FROM articles_hash + WHERE author_id = 1; + article_id | random_value +--------------------------------------------------------------------- + 1 | 9572 + 11 | 14817 + 21 | 123690 + 31 | 225401 + 41 | 484374 +(5 rows) + +-- joins do not go through fast-path planning +SELECT a.author_id as first_author, b.word_count as second_word_count + FROM articles_hash a, articles_hash b + WHERE a.author_id = 10 and a.author_id = b.author_id + LIMIT 3; + first_author | second_word_count +--------------------------------------------------------------------- + 10 | 17277 + 10 | 1820 + 10 | 6363 +(3 rows) + +-- single shard select with limit goes through fast-path planning +SELECT * + FROM articles_hash + WHERE author_id = 1 + LIMIT 3; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 +(3 rows) + +-- single shard select with limit + offset goes through fast-path planning +SELECT * + FROM articles_hash + WHERE author_id = 1 + LIMIT 2 + OFFSET 1; + id | author_id | title | word_count +--------------------------------------------------------------------- + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 +(2 rows) + +-- single shard select with limit + offset + order by goes through fast-path planning +SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id desc + LIMIT 2 + OFFSET 1; + id | author_id | title | word_count +--------------------------------------------------------------------- + 31 | 1 | athwartships | 7271 + 21 | 1 | arcading | 5890 +(2 rows) + +-- single shard select with group by on non-partition column goes through fast-path planning +SELECT id + FROM articles_hash + WHERE author_id = 1 + GROUP BY id + ORDER BY id; + id +--------------------------------------------------------------------- + 1 + 11 + 21 + 31 + 41 +(5 rows) + +-- single shard select with distinct goes through fast-path planning +SELECT DISTINCT id + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; + id +--------------------------------------------------------------------- + 1 + 11 + 21 + 31 + 41 +(5 rows) + +-- single shard aggregate goes through fast-path planning +SELECT avg(word_count) + FROM articles_hash + WHERE author_id = 2; + avg +--------------------------------------------------------------------- + 12356.400000000000 +(1 row) + +-- max, min, sum, count goes through fast-path planning +SELECT max(word_count) as max, min(word_count) as min, + sum(word_count) as sum, count(word_count) as cnt + FROM articles_hash + WHERE author_id = 2; + max | min | sum | cnt +--------------------------------------------------------------------- + 18185 | 2728 | 61782 | 5 +(1 row) + +-- queries with aggregates and group by goes through fast-path planning +SELECT max(word_count) + FROM articles_hash + WHERE author_id = 1 + GROUP BY author_id; + max +--------------------------------------------------------------------- + 11814 +(1 row) + +-- set operations are not supported via fast-path planning +SELECT * FROM ( + SELECT * FROM articles_hash WHERE author_id = 1 + UNION + SELECT * FROM articles_hash WHERE author_id = 3 +) AS combination +ORDER BY id; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 3 | 3 | asternal | 10480 + 11 | 1 | alamo | 1347 + 13 | 3 | aseyev | 2255 + 21 | 1 | arcading | 5890 + 23 | 3 | abhorring | 6799 + 31 | 1 | athwartships | 7271 + 33 | 3 | autochrome | 8180 + 41 | 1 | aznavour | 11814 + 43 | 3 | affixal | 12723 +(10 rows) + +-- function calls in the target list is supported via fast path +SELECT LEFT(title, 1) FROM articles_hash WHERE author_id = 1; + left +--------------------------------------------------------------------- + a + a + a + a + a +(5 rows) + +-- top-level union queries are supported through recursive planning +-- unions in subqueries are not supported via fast-path planning +SELECT * FROM ( + (SELECT * FROM articles_hash WHERE author_id = 1) + UNION + (SELECT * FROM articles_hash WHERE author_id = 1)) uu +ORDER BY 1, 2 +LIMIT 5; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- Test various filtering options for router plannable check +-- cannot go through fast-path if there is +-- explicit coercion +SELECT * + FROM articles_hash + WHERE author_id = 1::bigint; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- can go through fast-path if there is +-- implicit coercion +-- This doesn't work see the related issue +-- reported https://github.com/citusdata/citus/issues/2605 +-- SELECT * +-- FROM articles_hash +-- WHERE author_id = 1.0; +SELECT * + FROM articles_hash + WHERE author_id = 68719476736; -- this is bigint + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- cannot go through fast-path due to +-- multiple filters on the dist. key +SELECT * + FROM articles_hash + WHERE author_id = 1 and author_id >= 1; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- cannot go through fast-path due to +-- multiple filters on the dist. key +SELECT * + FROM articles_hash + WHERE author_id = 1 or id = 1; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- goes through fast-path planning because +-- the dist. key is ANDed with the rest of the +-- filters +SELECT * + FROM articles_hash + WHERE author_id = 1 and (id = 1 or id = 41); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 41 | 1 | aznavour | 11814 +(2 rows) + +-- this time there is an OR clause which prevents +-- router planning at all +SELECT * + FROM articles_hash + WHERE author_id = 1 and id = 1 or id = 41; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 41 | 1 | aznavour | 11814 +(2 rows) + +-- goes through fast-path planning because +-- the dist. key is ANDed with the rest of the +-- filters +SELECT * + FROM articles_hash + WHERE author_id = 1 and (id = random()::int * 0); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- not router plannable due to function call on the right side +SELECT * + FROM articles_hash + WHERE author_id = (random()::int * 0 + 1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- Citus does not qualify this as a fast-path because +-- dist_key = func() +SELECT * + FROM articles_hash + WHERE author_id = abs(-1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- Citus does not qualify this as a fast-path because +-- dist_key = func() +SELECT * + FROM articles_hash + WHERE 1 = abs(author_id); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- Citus does not qualify this as a fast-path because +-- dist_key = func() +SELECT * + FROM articles_hash + WHERE author_id = abs(author_id - 2); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- the function is not on the dist. key, so qualify as +-- fast-path +SELECT * + FROM articles_hash + WHERE author_id = 1 and (id = abs(id - 2)); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 +(1 row) + +-- not router plannable due to is true +SELECT * + FROM articles_hash + WHERE (author_id = 1) is true; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- router plannable, (boolean expression) = true is collapsed to (boolean expression) +SELECT * + FROM articles_hash + WHERE (author_id = 1) = true; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- some more complex quals +SELECT count(*) FROM articles_hash WHERE (author_id = 15) AND (id = 1 OR word_count > 5); + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM articles_hash WHERE (author_id = 15) OR (id = 1 AND word_count > 5); + count +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT count(*) FROM articles_hash WHERE (id = 15) OR (author_id = 1 AND word_count > 5); + count +--------------------------------------------------------------------- + 6 +(1 row) + +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (author_id = 1 OR word_count > 5); + count +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (author_id = 1 AND (word_count > 5 OR id = 2)); + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (title ilike 'a%' AND (word_count > 5 OR author_id = 2)); + count +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (title ilike 'a%' AND (word_count > 5 AND author_id = 2)); + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (title ilike 'a%' AND ((word_count > 5 OR title ilike 'b%' ) AND (author_id = 2 AND word_count > 50))); + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- fast-path router plannable, between operator is on another column +SELECT * + FROM articles_hash + WHERE (author_id = 1) and id between 0 and 20; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 +(2 rows) + +-- fast-path router plannable, partition column expression is and'ed to rest +SELECT * + FROM articles_hash + WHERE (author_id = 1) and (id = 1 or id = 31) and title like '%s'; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 31 | 1 | athwartships | 7271 +(2 rows) + +-- fast-path router plannable, order is changed +SELECT * + FROM articles_hash + WHERE (id = 1 or id = 31) and title like '%s' and (author_id = 1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 31 | 1 | athwartships | 7271 +(2 rows) + +-- fast-path router plannable +SELECT * + FROM articles_hash + WHERE (title like '%s' or title like 'a%') and (author_id = 1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- fast-path router plannable +SELECT * + FROM articles_hash + WHERE (title like '%s' or title like 'a%') and (author_id = 1) and (word_count < 3000 or word_count > 8000); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 41 | 1 | aznavour | 11814 +(3 rows) + +-- window functions are supported with fast-path router plannable +SELECT LAG(title, 1) over (ORDER BY word_count) prev, title, word_count + FROM articles_hash + WHERE author_id = 5; + prev | title | word_count +--------------------------------------------------------------------- + | afrasia | 864 + afrasia | adversa | 3164 + adversa | antehall | 7707 + antehall | aminate | 9089 + aminate | aruru | 11389 +(5 rows) + +SELECT LAG(title, 1) over (ORDER BY word_count) prev, title, word_count + FROM articles_hash + WHERE author_id = 5 + ORDER BY word_count DESC; + prev | title | word_count +--------------------------------------------------------------------- + aminate | aruru | 11389 + antehall | aminate | 9089 + adversa | antehall | 7707 + afrasia | adversa | 3164 + | afrasia | 864 +(5 rows) + +SELECT id, MIN(id) over (order by word_count) + FROM articles_hash + WHERE author_id = 1; + id | min +--------------------------------------------------------------------- + 11 | 11 + 21 | 11 + 31 | 11 + 1 | 1 + 41 | 1 +(5 rows) + +SELECT id, word_count, AVG(word_count) over (order by word_count) + FROM articles_hash + WHERE author_id = 1; + id | word_count | avg +--------------------------------------------------------------------- + 11 | 1347 | 1347.0000000000000000 + 21 | 5890 | 3618.5000000000000000 + 31 | 7271 | 4836.0000000000000000 + 1 | 9572 | 6020.0000000000000000 + 41 | 11814 | 7178.8000000000000000 +(5 rows) + +SELECT word_count, rank() OVER (PARTITION BY author_id ORDER BY word_count) + FROM articles_hash + WHERE author_id = 1; + word_count | rank +--------------------------------------------------------------------- + 1347 | 1 + 5890 | 2 + 7271 | 3 + 9572 | 4 + 11814 | 5 +(5 rows) + +-- some more tests on complex target lists +SELECT DISTINCT ON (author_id, id) author_id, id, + MIN(id) over (order by avg(word_count)) * AVG(id * 5.2 + (1.0/max(word_count))) over (order by max(word_count)) as t1, + count(*) FILTER (WHERE title LIKE 'al%') as cnt_with_filter, + count(*) FILTER (WHERE '0300030' LIKE '%3%') as cnt_with_filter_2, + avg(case when id > 2 then char_length(word_count::text) * (id * strpos(word_count::text, '1')) end) as case_cnt, + COALESCE(strpos(avg(word_count)::text, '1'), 20) + FROM articles_hash as aliased_table + WHERE author_id = 1 + GROUP BY author_id, id + HAVING count(DISTINCT title) > 0 + ORDER BY author_id, id, sum(word_count) - avg(char_length(title)) DESC, COALESCE(array_upper(ARRAY[max(id)],1) * 5,0) DESC; + author_id | id | t1 | cnt_with_filter | cnt_with_filter_2 | case_cnt | coalesce +--------------------------------------------------------------------- + 1 | 1 | 83.20028854345579490574 | 0 | 1 | | 0 + 1 | 11 | 629.20816629547141796586 | 1 | 1 | 44.0000000000000000 | 1 + 1 | 21 | 915.20501693381380745499 | 0 | 1 | 0.00000000000000000000 | 0 + 1 | 31 | 1201.20384890897723321000 | 0 | 1 | 496.0000000000000000 | 4 + 1 | 41 | 109.200247763831844321405335 | 0 | 1 | 205.0000000000000000 | 1 +(5 rows) + +-- where false queries are router plannable but not fast-path +SELECT * + FROM articles_hash + WHERE false; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- fast-path with false +SELECT * + FROM articles_hash + WHERE author_id = 1 and false; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- fast-path with false +SELECT * + FROM articles_hash + WHERE author_id = 1 and 1=0; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +SELECT * + FROM articles_hash + WHERE null and author_id = 1; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- we cannot qualify dist_key = X operator Y via +-- fast-path planning +SELECT * + FROM articles_hash + WHERE author_id = 1 + 1; + id | author_id | title | word_count +--------------------------------------------------------------------- + 2 | 2 | abducing | 13642 + 12 | 2 | archiblast | 18185 + 22 | 2 | antipope | 2728 + 32 | 2 | amazon | 11342 + 42 | 2 | ausable | 15885 +(5 rows) + +-- where false with immutable function returning false +-- goes through fast-path +SELECT * + FROM articles_hash a + WHERE a.author_id = 10 and int4eq(1, 2); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- partition_column is null clause does not prune out any shards, +-- all shards remain after shard pruning, not router plannable +-- not fast-path router either +SELECT * + FROM articles_hash a + WHERE a.author_id is null; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- partition_column equals to null clause prunes out all shards +-- no shards after shard pruning, router plannable +-- not fast-path router either +SELECT * + FROM articles_hash a + WHERE a.author_id = null; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- union/difference /intersection with where false +-- this query was not originally router plannable, addition of 1=0 +-- makes it router plannable but not fast-path +SELECT * FROM ( + SELECT * FROM articles_hash WHERE author_id = 1 + UNION + SELECT * FROM articles_hash WHERE author_id = 2 and 1=0 +) AS combination +ORDER BY id; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- same with the above, but with WHERE false +SELECT * FROM ( + SELECT * FROM articles_hash WHERE author_id = 1 + UNION + SELECT * FROM articles_hash WHERE author_id = 2 and 1=0 +) AS combination WHERE false +ORDER BY id; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- window functions with where false +SELECT word_count, rank() OVER (PARTITION BY author_id ORDER BY word_count) + FROM articles_hash + WHERE author_id = 1 and 1=0; + word_count | rank +--------------------------------------------------------------------- +(0 rows) + +-- complex query hitting a single shard and a fast-path +SELECT + count(DISTINCT CASE + WHEN + word_count > 100 + THEN + id + ELSE + NULL + END) as c + FROM + articles_hash + WHERE + author_id = 5; + c +--------------------------------------------------------------------- + 5 +(1 row) + +-- queries inside transactions can be fast-path router plannable +BEGIN; +SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +END; +-- queries inside read-only transactions can be fast-path router plannable +SET TRANSACTION READ ONLY; +WARNING: SET TRANSACTION can only be used in transaction blocks +SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +END; +WARNING: there is no transaction in progress +-- cursor queries are fast-path router plannable +BEGIN; +DECLARE test_cursor CURSOR FOR + SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; +FETCH test_cursor; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 +(1 row) + +FETCH ALL test_cursor; + id | author_id | title | word_count +--------------------------------------------------------------------- + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(4 rows) + +FETCH test_cursor; -- fetch one row after the last + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +FETCH BACKWARD test_cursor; + id | author_id | title | word_count +--------------------------------------------------------------------- + 41 | 1 | aznavour | 11814 +(1 row) + +END; +-- queries inside copy can be router plannable +COPY ( + SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id) TO STDOUT; +1 1 arsenous 9572 +11 1 alamo 1347 +21 1 arcading 5890 +31 1 athwartships 7271 +41 1 aznavour 11814 +-- table creation queries inside can be fast-path router plannable +CREATE TEMP TABLE temp_articles_hash as + SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; +-- fast-path router plannable queries may include filter for aggregates +SELECT count(*), count(*) FILTER (WHERE id < 3) + FROM articles_hash + WHERE author_id = 1; + count | count +--------------------------------------------------------------------- + 5 | 1 +(1 row) + +-- prepare queries can be router plannable +PREPARE author_1_articles as + SELECT * + FROM articles_hash + WHERE author_id = 1; +EXECUTE author_1_articles; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_1_articles; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_1_articles; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_1_articles; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_1_articles; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_1_articles; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- parametric prepare queries can be router plannable +PREPARE author_articles(int) as + SELECT * + FROM articles_hash + WHERE author_id = $1; +EXECUTE author_articles(1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_articles(1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_articles(1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_articles(1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_articles(1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_articles(1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_articles(NULL); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE author_articles(NULL); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE author_articles(NULL); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE author_articles(NULL); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE author_articles(NULL); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE author_articles(NULL); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE author_articles(NULL); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +PREPARE author_articles_update(int) AS + UPDATE articles_hash SET title = 'test' WHERE author_id = $1; +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +-- we don't want too many details. though we're omitting +-- "DETAIL: distribution column value:", we see it acceptable +-- since the query results verifies the correctness +\set VERBOSITY terse +SELECT author_articles_max_id(); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(1); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(1); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(1); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(1); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(1); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(1); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT * FROM author_articles_id_word_count(); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(1); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(1); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(1); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(1); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(1); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(1); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +\set VERBOSITY default +-- insert .. select via coordinator could also +-- use fast-path queries +PREPARE insert_sel(int, int) AS +INSERT INTO articles_hash + SELECT * FROM articles_hash WHERE author_id = $2 AND word_count = $1 OFFSET 0; +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +-- one final interesting preperad statement +-- where one of the filters is on the target list +PREPARE fast_path_agg_filter(int, int) AS + SELECT + count(*) FILTER (WHERE word_count=$1) + FROM + articles_hash + WHERE author_id = $2; +EXECUTE fast_path_agg_filter(1,1); + count +--------------------------------------------------------------------- + 0 +(1 row) + +EXECUTE fast_path_agg_filter(2,2); + count +--------------------------------------------------------------------- + 0 +(1 row) + +EXECUTE fast_path_agg_filter(3,3); + count +--------------------------------------------------------------------- + 0 +(1 row) + +EXECUTE fast_path_agg_filter(4,4); + count +--------------------------------------------------------------------- + 0 +(1 row) + +EXECUTE fast_path_agg_filter(5,5); + count +--------------------------------------------------------------------- + 0 +(1 row) + +EXECUTE fast_path_agg_filter(6,6); + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- views internally become subqueries, so not fast-path router query +SELECT * FROM test_view; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- materialized views can be created for fast-path router plannable queries +CREATE MATERIALIZED VIEW mv_articles_hash_empty AS + SELECT * FROM articles_hash WHERE author_id = 1; +SELECT * FROM mv_articles_hash_empty; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +SELECT id + FROM articles_hash + WHERE author_id = 1; + id +--------------------------------------------------------------------- + 1 + 11 + 21 + 31 + 41 +(5 rows) + +INSERT INTO articles_hash VALUES (51, 1, 'amateus', 1814), (52, 1, 'second amateus', 2824); +-- verify insert is successfull (not router plannable and executable) +SELECT id + FROM articles_hash + WHERE author_id = 1; + id +--------------------------------------------------------------------- + 1 + 11 + 21 + 31 + 41 + 51 + 52 +(7 rows) + +SELECT count(*) FROM collections_list WHERE key = 4; + count +--------------------------------------------------------------------- + 5 +(1 row) + +SELECT count(*) FROM collections_list_1 WHERE key = 4; + count +--------------------------------------------------------------------- + 5 +(1 row) + +SELECT count(*) FROM collections_list_2 WHERE key = 4; + count +--------------------------------------------------------------------- + 0 +(1 row) + +UPDATE collections_list SET value = 15 WHERE key = 4; +SELECT count(*) FILTER (where value = 15) FROM collections_list WHERE key = 4; + count +--------------------------------------------------------------------- + 5 +(1 row) + +SELECT count(*) FILTER (where value = 15) FROM collections_list_1 WHERE key = 4; + count +--------------------------------------------------------------------- + 5 +(1 row) + +SELECT count(*) FILTER (where value = 15) FROM collections_list_2 WHERE key = 4; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- test INSERT using values from generate_series() and repeat() functions +INSERT INTO authors_reference (id, name) VALUES (generate_series(1, 10), repeat('Migjeni', 3)); +SELECT * FROM authors_reference ORDER BY 1, 2; + id | name +--------------------------------------------------------------------- + 1 | MigjeniMigjeniMigjeni + 2 | MigjeniMigjeniMigjeni + 3 | MigjeniMigjeniMigjeni + 4 | MigjeniMigjeniMigjeni + 5 | MigjeniMigjeniMigjeni + 6 | MigjeniMigjeniMigjeni + 7 | MigjeniMigjeniMigjeni + 8 | MigjeniMigjeniMigjeni + 9 | MigjeniMigjeniMigjeni + 10 | MigjeniMigjeniMigjeni +(10 rows) + diff --git a/src/test/regress/expected/arbitrary_configs_router_create.out b/src/test/regress/expected/arbitrary_configs_router_create.out new file mode 100644 index 000000000..74dfbf4f3 --- /dev/null +++ b/src/test/regress/expected/arbitrary_configs_router_create.out @@ -0,0 +1,121 @@ +CREATE SCHEMA arbitrary_configs_router; +SET search_path TO arbitrary_configs_router; +CREATE TABLE articles_hash ( + id bigint NOT NULL, + author_id bigint NOT NULL, + title varchar(20) NOT NULL, + word_count integer +); +SELECT create_distributed_table('articles_hash', 'author_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE authors_reference (id int, name text); +SELECT create_reference_table('authors_reference'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +-- create a bunch of test data +INSERT INTO articles_hash VALUES (1, 1, 'arsenous', 9572), (2, 2, 'abducing', 13642),( 3, 3, 'asternal', 10480),( 4, 4, 'altdorfer', 14551),( 5, 5, 'aruru', 11389), + (6, 6, 'atlases', 15459),(7, 7, 'aseptic', 12298),( 8, 8, 'agatized', 16368),(9, 9, 'alligate', 438), + (10, 10, 'aggrandize', 17277),(11, 1, 'alamo', 1347),(12, 2, 'archiblast', 18185), + (13, 3, 'aseyev', 2255),(14, 4, 'andesite', 19094),(15, 5, 'adversa', 3164), + (16, 6, 'allonym', 2),(17, 7, 'auriga', 4073),(18, 8, 'assembly', 911),(19, 9, 'aubergiste', 4981), + (20, 10, 'absentness', 1820),(21, 1, 'arcading', 5890),(22, 2, 'antipope', 2728),(23, 3, 'abhorring', 6799), + (24, 4, 'audacious', 3637),(25, 5, 'antehall', 7707),(26, 6, 'abington', 4545),(27, 7, 'arsenous', 8616), + (28, 8, 'aerophyte', 5454),(29, 9, 'amateur', 9524),(30, 10, 'andelee', 6363),(31, 1, 'athwartships', 7271), + (32, 2, 'amazon', 11342),(33, 3, 'autochrome', 8180),(34, 4, 'amnestied', 12250),(35, 5, 'aminate', 9089), + (36, 6, 'ablation', 13159),(37, 7, 'archduchies', 9997),(38, 8, 'anatine', 14067),(39, 9, 'anchises', 10906), + (40, 10, 'attemper', 14976),(41, 1, 'aznavour', 11814),(42, 2, 'ausable', 15885),(43, 3, 'affixal', 12723), + (44, 4, 'anteport', 16793),(45, 5, 'afrasia', 864),(46, 6, 'atlanta', 17702),(47, 7, 'abeyance', 1772), + (48, 8, 'alkylic', 18610),(49, 9, 'anyone', 2681),(50, 10, 'anjanette', 19519); +CREATE TABLE company_employees (company_id int, employee_id int, manager_id int); +SELECT create_distributed_table('company_employees', 'company_id', 'hash'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO company_employees values(1, 1, 0); +INSERT INTO company_employees values(1, 2, 1); +INSERT INTO company_employees values(1, 3, 1); +INSERT INTO company_employees values(1, 4, 2); +INSERT INTO company_employees values(1, 5, 4); +INSERT INTO company_employees values(3, 1, 0); +INSERT INTO company_employees values(3, 15, 1); +INSERT INTO company_employees values(3, 3, 1); +-- finally, some tests with partitioned tables +CREATE TABLE collections_list ( + key bigint, + ts timestamptz, + collection_id integer, + value numeric +) PARTITION BY LIST (collection_id ); +CREATE TABLE collections_list_1 + PARTITION OF collections_list (key, ts, collection_id, value) + FOR VALUES IN ( 1 ); +CREATE TABLE collections_list_2 + PARTITION OF collections_list (key, ts, collection_id, value) + FOR VALUES IN ( 2 ); +SELECT create_distributed_table('collections_list', 'key'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO collections_list SELECT i % 10, now(), (i % 2) + 1, i*i FROM generate_series(0, 50)i; +-- queries inside plpgsql functions could be router plannable +CREATE OR REPLACE FUNCTION author_articles_max_id() RETURNS int AS $$ +DECLARE + max_id integer; +BEGIN + SELECT MAX(id) FROM articles_hash ah + WHERE author_id = 1 + into max_id; + return max_id; +END; +$$ LANGUAGE plpgsql; +-- queries inside plpgsql functions could be router plannable +CREATE OR REPLACE FUNCTION author_articles_max_id(int) RETURNS int AS $$ +DECLARE + max_id integer; +BEGIN + SELECT MAX(id) FROM articles_hash ah + WHERE author_id = $1 + into max_id; + return max_id; +END; +$$ LANGUAGE plpgsql; +-- check that function returning setof query are router plannable +CREATE OR REPLACE FUNCTION author_articles_id_word_count() RETURNS TABLE(id bigint, word_count int) AS $$ +DECLARE +BEGIN + RETURN QUERY + SELECT ah.id, ah.word_count + FROM articles_hash ah + WHERE author_id = 1; + +END; +$$ LANGUAGE plpgsql; +-- check that function returning setof query are router plannable +CREATE OR REPLACE FUNCTION author_articles_id_word_count(int) RETURNS TABLE(id bigint, word_count int) AS $$ +DECLARE +BEGIN + RETURN QUERY + SELECT ah.id, ah.word_count + FROM articles_hash ah + WHERE author_id = $1; + +END; +$$ LANGUAGE plpgsql; +-- Suppress the warning that tells that the view won't be distributed +-- because it depends on a local table. +-- +-- This only happens when running PostgresConfig. +SET client_min_messages TO ERROR; +CREATE VIEW test_view AS + SELECT * FROM articles_hash WHERE author_id = 1; diff --git a/src/test/regress/sql/arbitrary_configs_router.sql b/src/test/regress/sql/arbitrary_configs_router.sql new file mode 100644 index 000000000..f59c5fa4a --- /dev/null +++ b/src/test/regress/sql/arbitrary_configs_router.sql @@ -0,0 +1,634 @@ +SET search_path TO arbitrary_configs_router; + +SET client_min_messages TO WARNING; + +-- test simple select for a single row +SELECT * FROM articles_hash WHERE author_id = 10 AND id = 50; + +-- get all titles by a single author +SELECT title FROM articles_hash WHERE author_id = 10; + +-- try ordering them by word count +SELECT title, word_count FROM articles_hash + WHERE author_id = 10 + ORDER BY word_count DESC NULLS LAST; + +-- look at last two articles by an author +SELECT title, id FROM articles_hash + WHERE author_id = 5 + ORDER BY id + LIMIT 2; + +-- find all articles by two authors in same shard +-- but plan is not fast path router plannable due to +-- two distribution columns in the query +SELECT title, author_id FROM articles_hash + WHERE author_id = 7 OR author_id = 8 + ORDER BY author_id ASC, id; + +-- having clause is supported if it goes to a single shard +-- and single dist. key on the query +SELECT author_id, sum(word_count) AS corpus_size FROM articles_hash + WHERE author_id = 1 + GROUP BY author_id + HAVING sum(word_count) > 1000 + ORDER BY sum(word_count) DESC; + +-- fast path planner only support = operator +SELECT * FROM articles_hash WHERE author_id <= 1; +SELECT * FROM articles_hash WHERE author_id IN (1, 3) ORDER BY 1,2,3,4; + +-- queries with CTEs cannot go through fast-path planning +WITH first_author AS ( SELECT id FROM articles_hash WHERE author_id = 1) +SELECT * FROM first_author; + +-- two CTE joins also cannot go through fast-path planning +WITH id_author AS ( SELECT id, author_id FROM articles_hash WHERE author_id = 1), +id_title AS (SELECT id, title from articles_hash WHERE author_id = 1) +SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id; + +-- this is a different case where each CTE is recursively planned and those goes +-- through the fast-path router planner, but the top level join is not +WITH id_author AS ( SELECT id, author_id FROM articles_hash WHERE author_id = 1), +id_title AS (SELECT id, title from articles_hash WHERE author_id = 2) +SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id; + +-- recursive CTEs are also cannot go through fast +-- path planning +WITH RECURSIVE hierarchy as ( + SELECT *, 1 AS level + FROM company_employees + WHERE company_id = 1 and manager_id = 0 + UNION + SELECT ce.*, (h.level+1) + FROM hierarchy h JOIN company_employees ce + ON (h.employee_id = ce.manager_id AND + h.company_id = ce.company_id AND + ce.company_id = 1)) +SELECT * FROM hierarchy WHERE LEVEL <= 2; + +WITH update_article AS ( + UPDATE articles_hash SET word_count = 10 WHERE id = 1 AND word_count = 9 RETURNING * +) +SELECT * FROM update_article; + +WITH delete_article AS ( + DELETE FROM articles_hash WHERE id = 1 AND word_count = 10 RETURNING * +) +SELECT * FROM delete_article; + +-- grouping sets are supported via fast-path +SELECT + id, substring(title, 2, 1) AS subtitle, count(*) + FROM articles_hash + WHERE author_id = 1 + GROUP BY GROUPING SETS ((id),(subtitle)) + ORDER BY id, subtitle; + +-- queries which involve functions in FROM clause are not supported via fast path planning +SELECT * FROM articles_hash, position('om' in 'Thomas') WHERE author_id = 1; + +-- sublinks are not supported via fast path planning +SELECT * FROM articles_hash +WHERE author_id IN (SELECT author_id FROM articles_hash WHERE author_id = 2) +ORDER BY articles_hash.id; + +-- subqueries are not supported via fast path planning +SELECT articles_hash.id,test.word_count +FROM articles_hash, (SELECT id, word_count FROM articles_hash) AS test WHERE test.id = articles_hash.id +ORDER BY test.word_count DESC, articles_hash.id LIMIT 5; + +SELECT articles_hash.id,test.word_count +FROM articles_hash, (SELECT id, word_count FROM articles_hash) AS test +WHERE test.id = articles_hash.id and articles_hash.author_id = 1 +ORDER BY articles_hash.id; + +-- simple lookup query just works +SELECT * + FROM articles_hash + WHERE author_id = 1; + +-- below query hits a single shard but with multiple filters +-- so cannot go via fast-path +SELECT * + FROM articles_hash + WHERE author_id = 1 OR author_id = 17; + +-- rename the output columns +SELECT id as article_id, word_count * id as random_value + FROM articles_hash + WHERE author_id = 1; + +-- joins do not go through fast-path planning +SELECT a.author_id as first_author, b.word_count as second_word_count + FROM articles_hash a, articles_hash b + WHERE a.author_id = 10 and a.author_id = b.author_id + LIMIT 3; + +-- single shard select with limit goes through fast-path planning +SELECT * + FROM articles_hash + WHERE author_id = 1 + LIMIT 3; + +-- single shard select with limit + offset goes through fast-path planning +SELECT * + FROM articles_hash + WHERE author_id = 1 + LIMIT 2 + OFFSET 1; + +-- single shard select with limit + offset + order by goes through fast-path planning +SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id desc + LIMIT 2 + OFFSET 1; + +-- single shard select with group by on non-partition column goes through fast-path planning +SELECT id + FROM articles_hash + WHERE author_id = 1 + GROUP BY id + ORDER BY id; + +-- single shard select with distinct goes through fast-path planning +SELECT DISTINCT id + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; + +-- single shard aggregate goes through fast-path planning +SELECT avg(word_count) + FROM articles_hash + WHERE author_id = 2; + +-- max, min, sum, count goes through fast-path planning +SELECT max(word_count) as max, min(word_count) as min, + sum(word_count) as sum, count(word_count) as cnt + FROM articles_hash + WHERE author_id = 2; + + +-- queries with aggregates and group by goes through fast-path planning +SELECT max(word_count) + FROM articles_hash + WHERE author_id = 1 + GROUP BY author_id; + + +-- set operations are not supported via fast-path planning +SELECT * FROM ( + SELECT * FROM articles_hash WHERE author_id = 1 + UNION + SELECT * FROM articles_hash WHERE author_id = 3 +) AS combination +ORDER BY id; + +-- function calls in the target list is supported via fast path +SELECT LEFT(title, 1) FROM articles_hash WHERE author_id = 1; + + +-- top-level union queries are supported through recursive planning + +-- unions in subqueries are not supported via fast-path planning +SELECT * FROM ( + (SELECT * FROM articles_hash WHERE author_id = 1) + UNION + (SELECT * FROM articles_hash WHERE author_id = 1)) uu +ORDER BY 1, 2 +LIMIT 5; + + +-- Test various filtering options for router plannable check + +-- cannot go through fast-path if there is +-- explicit coercion +SELECT * + FROM articles_hash + WHERE author_id = 1::bigint; + +-- can go through fast-path if there is +-- implicit coercion +-- This doesn't work see the related issue +-- reported https://github.com/citusdata/citus/issues/2605 +-- SELECT * +-- FROM articles_hash +-- WHERE author_id = 1.0; + +SELECT * + FROM articles_hash + WHERE author_id = 68719476736; -- this is bigint + +-- cannot go through fast-path due to +-- multiple filters on the dist. key +SELECT * + FROM articles_hash + WHERE author_id = 1 and author_id >= 1; + +-- cannot go through fast-path due to +-- multiple filters on the dist. key +SELECT * + FROM articles_hash + WHERE author_id = 1 or id = 1; + +-- goes through fast-path planning because +-- the dist. key is ANDed with the rest of the +-- filters +SELECT * + FROM articles_hash + WHERE author_id = 1 and (id = 1 or id = 41); + +-- this time there is an OR clause which prevents +-- router planning at all +SELECT * + FROM articles_hash + WHERE author_id = 1 and id = 1 or id = 41; + +-- goes through fast-path planning because +-- the dist. key is ANDed with the rest of the +-- filters +SELECT * + FROM articles_hash + WHERE author_id = 1 and (id = random()::int * 0); + +-- not router plannable due to function call on the right side +SELECT * + FROM articles_hash + WHERE author_id = (random()::int * 0 + 1); + +-- Citus does not qualify this as a fast-path because +-- dist_key = func() +SELECT * + FROM articles_hash + WHERE author_id = abs(-1); + +-- Citus does not qualify this as a fast-path because +-- dist_key = func() +SELECT * + FROM articles_hash + WHERE 1 = abs(author_id); + +-- Citus does not qualify this as a fast-path because +-- dist_key = func() +SELECT * + FROM articles_hash + WHERE author_id = abs(author_id - 2); + +-- the function is not on the dist. key, so qualify as +-- fast-path +SELECT * + FROM articles_hash + WHERE author_id = 1 and (id = abs(id - 2)); + +-- not router plannable due to is true +SELECT * + FROM articles_hash + WHERE (author_id = 1) is true; + +-- router plannable, (boolean expression) = true is collapsed to (boolean expression) +SELECT * + FROM articles_hash + WHERE (author_id = 1) = true; + +-- some more complex quals +SELECT count(*) FROM articles_hash WHERE (author_id = 15) AND (id = 1 OR word_count > 5); +SELECT count(*) FROM articles_hash WHERE (author_id = 15) OR (id = 1 AND word_count > 5); +SELECT count(*) FROM articles_hash WHERE (id = 15) OR (author_id = 1 AND word_count > 5); +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (author_id = 1 OR word_count > 5); +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (author_id = 1 AND (word_count > 5 OR id = 2)); +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (title ilike 'a%' AND (word_count > 5 OR author_id = 2)); +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (title ilike 'a%' AND (word_count > 5 AND author_id = 2)); +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (title ilike 'a%' AND ((word_count > 5 OR title ilike 'b%' ) AND (author_id = 2 AND word_count > 50))); + +-- fast-path router plannable, between operator is on another column +SELECT * + FROM articles_hash + WHERE (author_id = 1) and id between 0 and 20; + +-- fast-path router plannable, partition column expression is and'ed to rest +SELECT * + FROM articles_hash + WHERE (author_id = 1) and (id = 1 or id = 31) and title like '%s'; + +-- fast-path router plannable, order is changed +SELECT * + FROM articles_hash + WHERE (id = 1 or id = 31) and title like '%s' and (author_id = 1); + +-- fast-path router plannable +SELECT * + FROM articles_hash + WHERE (title like '%s' or title like 'a%') and (author_id = 1); + +-- fast-path router plannable +SELECT * + FROM articles_hash + WHERE (title like '%s' or title like 'a%') and (author_id = 1) and (word_count < 3000 or word_count > 8000); + +-- window functions are supported with fast-path router plannable +SELECT LAG(title, 1) over (ORDER BY word_count) prev, title, word_count + FROM articles_hash + WHERE author_id = 5; + +SELECT LAG(title, 1) over (ORDER BY word_count) prev, title, word_count + FROM articles_hash + WHERE author_id = 5 + ORDER BY word_count DESC; + +SELECT id, MIN(id) over (order by word_count) + FROM articles_hash + WHERE author_id = 1; + +SELECT id, word_count, AVG(word_count) over (order by word_count) + FROM articles_hash + WHERE author_id = 1; + +SELECT word_count, rank() OVER (PARTITION BY author_id ORDER BY word_count) + FROM articles_hash + WHERE author_id = 1; + +-- some more tests on complex target lists +SELECT DISTINCT ON (author_id, id) author_id, id, + MIN(id) over (order by avg(word_count)) * AVG(id * 5.2 + (1.0/max(word_count))) over (order by max(word_count)) as t1, + count(*) FILTER (WHERE title LIKE 'al%') as cnt_with_filter, + count(*) FILTER (WHERE '0300030' LIKE '%3%') as cnt_with_filter_2, + avg(case when id > 2 then char_length(word_count::text) * (id * strpos(word_count::text, '1')) end) as case_cnt, + COALESCE(strpos(avg(word_count)::text, '1'), 20) + FROM articles_hash as aliased_table + WHERE author_id = 1 + GROUP BY author_id, id + HAVING count(DISTINCT title) > 0 + ORDER BY author_id, id, sum(word_count) - avg(char_length(title)) DESC, COALESCE(array_upper(ARRAY[max(id)],1) * 5,0) DESC; + +-- where false queries are router plannable but not fast-path +SELECT * + FROM articles_hash + WHERE false; + +-- fast-path with false +SELECT * + FROM articles_hash + WHERE author_id = 1 and false; + +-- fast-path with false +SELECT * + FROM articles_hash + WHERE author_id = 1 and 1=0; + +SELECT * + FROM articles_hash + WHERE null and author_id = 1; + +-- we cannot qualify dist_key = X operator Y via +-- fast-path planning +SELECT * + FROM articles_hash + WHERE author_id = 1 + 1; + +-- where false with immutable function returning false +-- goes through fast-path +SELECT * + FROM articles_hash a + WHERE a.author_id = 10 and int4eq(1, 2); + +-- partition_column is null clause does not prune out any shards, +-- all shards remain after shard pruning, not router plannable +-- not fast-path router either +SELECT * + FROM articles_hash a + WHERE a.author_id is null; + +-- partition_column equals to null clause prunes out all shards +-- no shards after shard pruning, router plannable +-- not fast-path router either +SELECT * + FROM articles_hash a + WHERE a.author_id = null; + +-- union/difference /intersection with where false +-- this query was not originally router plannable, addition of 1=0 +-- makes it router plannable but not fast-path +SELECT * FROM ( + SELECT * FROM articles_hash WHERE author_id = 1 + UNION + SELECT * FROM articles_hash WHERE author_id = 2 and 1=0 +) AS combination +ORDER BY id; + +-- same with the above, but with WHERE false +SELECT * FROM ( + SELECT * FROM articles_hash WHERE author_id = 1 + UNION + SELECT * FROM articles_hash WHERE author_id = 2 and 1=0 +) AS combination WHERE false +ORDER BY id; + +-- window functions with where false +SELECT word_count, rank() OVER (PARTITION BY author_id ORDER BY word_count) + FROM articles_hash + WHERE author_id = 1 and 1=0; + +-- complex query hitting a single shard and a fast-path +SELECT + count(DISTINCT CASE + WHEN + word_count > 100 + THEN + id + ELSE + NULL + END) as c + FROM + articles_hash + WHERE + author_id = 5; +-- queries inside transactions can be fast-path router plannable +BEGIN; +SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; +END; + +-- queries inside read-only transactions can be fast-path router plannable +SET TRANSACTION READ ONLY; +SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; +END; + +-- cursor queries are fast-path router plannable +BEGIN; +DECLARE test_cursor CURSOR FOR + SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; +FETCH test_cursor; +FETCH ALL test_cursor; +FETCH test_cursor; -- fetch one row after the last +FETCH BACKWARD test_cursor; +END; + +-- queries inside copy can be router plannable +COPY ( + SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id) TO STDOUT; + +-- table creation queries inside can be fast-path router plannable +CREATE TEMP TABLE temp_articles_hash as + SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; + +-- fast-path router plannable queries may include filter for aggregates +SELECT count(*), count(*) FILTER (WHERE id < 3) + FROM articles_hash + WHERE author_id = 1; + +-- prepare queries can be router plannable +PREPARE author_1_articles as + SELECT * + FROM articles_hash + WHERE author_id = 1; + +EXECUTE author_1_articles; +EXECUTE author_1_articles; +EXECUTE author_1_articles; +EXECUTE author_1_articles; +EXECUTE author_1_articles; +EXECUTE author_1_articles; + +-- parametric prepare queries can be router plannable +PREPARE author_articles(int) as + SELECT * + FROM articles_hash + WHERE author_id = $1; + +EXECUTE author_articles(1); +EXECUTE author_articles(1); +EXECUTE author_articles(1); +EXECUTE author_articles(1); +EXECUTE author_articles(1); +EXECUTE author_articles(1); + +EXECUTE author_articles(NULL); +EXECUTE author_articles(NULL); +EXECUTE author_articles(NULL); +EXECUTE author_articles(NULL); +EXECUTE author_articles(NULL); +EXECUTE author_articles(NULL); +EXECUTE author_articles(NULL); + +PREPARE author_articles_update(int) AS + UPDATE articles_hash SET title = 'test' WHERE author_id = $1; + +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); + +-- we don't want too many details. though we're omitting +-- "DETAIL: distribution column value:", we see it acceptable +-- since the query results verifies the correctness +\set VERBOSITY terse + +SELECT author_articles_max_id(); +SELECT author_articles_max_id(); +SELECT author_articles_max_id(); +SELECT author_articles_max_id(); +SELECT author_articles_max_id(); +SELECT author_articles_max_id(); + +SELECT author_articles_max_id(1); +SELECT author_articles_max_id(1); +SELECT author_articles_max_id(1); +SELECT author_articles_max_id(1); +SELECT author_articles_max_id(1); +SELECT author_articles_max_id(1); + +SELECT * FROM author_articles_id_word_count(); +SELECT * FROM author_articles_id_word_count(); +SELECT * FROM author_articles_id_word_count(); +SELECT * FROM author_articles_id_word_count(); +SELECT * FROM author_articles_id_word_count(); +SELECT * FROM author_articles_id_word_count(); + +SELECT * FROM author_articles_id_word_count(1); +SELECT * FROM author_articles_id_word_count(1); +SELECT * FROM author_articles_id_word_count(1); +SELECT * FROM author_articles_id_word_count(1); +SELECT * FROM author_articles_id_word_count(1); +SELECT * FROM author_articles_id_word_count(1); + +\set VERBOSITY default + +-- insert .. select via coordinator could also +-- use fast-path queries +PREPARE insert_sel(int, int) AS +INSERT INTO articles_hash + SELECT * FROM articles_hash WHERE author_id = $2 AND word_count = $1 OFFSET 0; + +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); + +-- one final interesting preperad statement +-- where one of the filters is on the target list +PREPARE fast_path_agg_filter(int, int) AS + SELECT + count(*) FILTER (WHERE word_count=$1) + FROM + articles_hash + WHERE author_id = $2; + +EXECUTE fast_path_agg_filter(1,1); +EXECUTE fast_path_agg_filter(2,2); +EXECUTE fast_path_agg_filter(3,3); +EXECUTE fast_path_agg_filter(4,4); +EXECUTE fast_path_agg_filter(5,5); +EXECUTE fast_path_agg_filter(6,6); + +-- views internally become subqueries, so not fast-path router query +SELECT * FROM test_view; + +-- materialized views can be created for fast-path router plannable queries +CREATE MATERIALIZED VIEW mv_articles_hash_empty AS + SELECT * FROM articles_hash WHERE author_id = 1; +SELECT * FROM mv_articles_hash_empty; + + +SELECT id + FROM articles_hash + WHERE author_id = 1; + +INSERT INTO articles_hash VALUES (51, 1, 'amateus', 1814), (52, 1, 'second amateus', 2824); + +-- verify insert is successfull (not router plannable and executable) +SELECT id + FROM articles_hash + WHERE author_id = 1; + +SELECT count(*) FROM collections_list WHERE key = 4; +SELECT count(*) FROM collections_list_1 WHERE key = 4; +SELECT count(*) FROM collections_list_2 WHERE key = 4; +UPDATE collections_list SET value = 15 WHERE key = 4; +SELECT count(*) FILTER (where value = 15) FROM collections_list WHERE key = 4; +SELECT count(*) FILTER (where value = 15) FROM collections_list_1 WHERE key = 4; +SELECT count(*) FILTER (where value = 15) FROM collections_list_2 WHERE key = 4; + +-- test INSERT using values from generate_series() and repeat() functions +INSERT INTO authors_reference (id, name) VALUES (generate_series(1, 10), repeat('Migjeni', 3)); +SELECT * FROM authors_reference ORDER BY 1, 2; diff --git a/src/test/regress/sql/arbitrary_configs_router_create.sql b/src/test/regress/sql/arbitrary_configs_router_create.sql new file mode 100644 index 000000000..956100c7e --- /dev/null +++ b/src/test/regress/sql/arbitrary_configs_router_create.sql @@ -0,0 +1,118 @@ +CREATE SCHEMA arbitrary_configs_router; +SET search_path TO arbitrary_configs_router; + +CREATE TABLE articles_hash ( + id bigint NOT NULL, + author_id bigint NOT NULL, + title varchar(20) NOT NULL, + word_count integer +); + +SELECT create_distributed_table('articles_hash', 'author_id'); + +CREATE TABLE authors_reference (id int, name text); +SELECT create_reference_table('authors_reference'); + +-- create a bunch of test data +INSERT INTO articles_hash VALUES (1, 1, 'arsenous', 9572), (2, 2, 'abducing', 13642),( 3, 3, 'asternal', 10480),( 4, 4, 'altdorfer', 14551),( 5, 5, 'aruru', 11389), + (6, 6, 'atlases', 15459),(7, 7, 'aseptic', 12298),( 8, 8, 'agatized', 16368),(9, 9, 'alligate', 438), + (10, 10, 'aggrandize', 17277),(11, 1, 'alamo', 1347),(12, 2, 'archiblast', 18185), + (13, 3, 'aseyev', 2255),(14, 4, 'andesite', 19094),(15, 5, 'adversa', 3164), + (16, 6, 'allonym', 2),(17, 7, 'auriga', 4073),(18, 8, 'assembly', 911),(19, 9, 'aubergiste', 4981), + (20, 10, 'absentness', 1820),(21, 1, 'arcading', 5890),(22, 2, 'antipope', 2728),(23, 3, 'abhorring', 6799), + (24, 4, 'audacious', 3637),(25, 5, 'antehall', 7707),(26, 6, 'abington', 4545),(27, 7, 'arsenous', 8616), + (28, 8, 'aerophyte', 5454),(29, 9, 'amateur', 9524),(30, 10, 'andelee', 6363),(31, 1, 'athwartships', 7271), + (32, 2, 'amazon', 11342),(33, 3, 'autochrome', 8180),(34, 4, 'amnestied', 12250),(35, 5, 'aminate', 9089), + (36, 6, 'ablation', 13159),(37, 7, 'archduchies', 9997),(38, 8, 'anatine', 14067),(39, 9, 'anchises', 10906), + (40, 10, 'attemper', 14976),(41, 1, 'aznavour', 11814),(42, 2, 'ausable', 15885),(43, 3, 'affixal', 12723), + (44, 4, 'anteport', 16793),(45, 5, 'afrasia', 864),(46, 6, 'atlanta', 17702),(47, 7, 'abeyance', 1772), + (48, 8, 'alkylic', 18610),(49, 9, 'anyone', 2681),(50, 10, 'anjanette', 19519); + +CREATE TABLE company_employees (company_id int, employee_id int, manager_id int); + +SELECT create_distributed_table('company_employees', 'company_id', 'hash'); + +INSERT INTO company_employees values(1, 1, 0); +INSERT INTO company_employees values(1, 2, 1); +INSERT INTO company_employees values(1, 3, 1); +INSERT INTO company_employees values(1, 4, 2); +INSERT INTO company_employees values(1, 5, 4); + +INSERT INTO company_employees values(3, 1, 0); +INSERT INTO company_employees values(3, 15, 1); +INSERT INTO company_employees values(3, 3, 1); + +-- finally, some tests with partitioned tables +CREATE TABLE collections_list ( + key bigint, + ts timestamptz, + collection_id integer, + value numeric +) PARTITION BY LIST (collection_id ); + +CREATE TABLE collections_list_1 + PARTITION OF collections_list (key, ts, collection_id, value) + FOR VALUES IN ( 1 ); + +CREATE TABLE collections_list_2 + PARTITION OF collections_list (key, ts, collection_id, value) + FOR VALUES IN ( 2 ); + +SELECT create_distributed_table('collections_list', 'key'); +INSERT INTO collections_list SELECT i % 10, now(), (i % 2) + 1, i*i FROM generate_series(0, 50)i; + +-- queries inside plpgsql functions could be router plannable +CREATE OR REPLACE FUNCTION author_articles_max_id() RETURNS int AS $$ +DECLARE + max_id integer; +BEGIN + SELECT MAX(id) FROM articles_hash ah + WHERE author_id = 1 + into max_id; + return max_id; +END; +$$ LANGUAGE plpgsql; + +-- queries inside plpgsql functions could be router plannable +CREATE OR REPLACE FUNCTION author_articles_max_id(int) RETURNS int AS $$ +DECLARE + max_id integer; +BEGIN + SELECT MAX(id) FROM articles_hash ah + WHERE author_id = $1 + into max_id; + return max_id; +END; +$$ LANGUAGE plpgsql; + +-- check that function returning setof query are router plannable +CREATE OR REPLACE FUNCTION author_articles_id_word_count() RETURNS TABLE(id bigint, word_count int) AS $$ +DECLARE +BEGIN + RETURN QUERY + SELECT ah.id, ah.word_count + FROM articles_hash ah + WHERE author_id = 1; + +END; +$$ LANGUAGE plpgsql; + +-- check that function returning setof query are router plannable +CREATE OR REPLACE FUNCTION author_articles_id_word_count(int) RETURNS TABLE(id bigint, word_count int) AS $$ +DECLARE +BEGIN + RETURN QUERY + SELECT ah.id, ah.word_count + FROM articles_hash ah + WHERE author_id = $1; + +END; +$$ LANGUAGE plpgsql; + +-- Suppress the warning that tells that the view won't be distributed +-- because it depends on a local table. +-- +-- This only happens when running PostgresConfig. +SET client_min_messages TO ERROR; +CREATE VIEW test_view AS + SELECT * FROM articles_hash WHERE author_id = 1; diff --git a/src/test/regress/sql_schedule b/src/test/regress/sql_schedule index 272a84eff..9538f1482 100644 --- a/src/test/regress/sql_schedule +++ b/src/test/regress/sql_schedule @@ -15,3 +15,4 @@ test: arbitrary_configs_truncate_cascade test: arbitrary_configs_truncate_partition test: arbitrary_configs_alter_table_add_constraint_without_name test: merge_arbitrary +test: arbitrary_configs_router From e1f1d63050899e189faac395440faa187a8ca4e0 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Wed, 22 Mar 2023 15:15:23 +0300 Subject: [PATCH 16/18] Rename AllRelations.. functions to AllDistributedRelations.. (#6789) Because they're only interested in distributed tables. Even more, this replaces HasDistributionKey() check with IsCitusTableType(DISTRIBUTED_TABLE) because this doesn't make a difference on main and sounds slightly more intuitive. Plus, this would also allow safely using this function in https://github.com/citusdata/citus/pull/6773. --- .../distributed/planner/merge_planner.c | 2 +- .../relation_restriction_equivalence.c | 41 +++++++++++-------- .../relation_restriction_equivalence.h | 2 +- 3 files changed, 27 insertions(+), 18 deletions(-) diff --git a/src/backend/distributed/planner/merge_planner.c b/src/backend/distributed/planner/merge_planner.c index 03fd9e00d..4839d5725 100644 --- a/src/backend/distributed/planner/merge_planner.c +++ b/src/backend/distributed/planner/merge_planner.c @@ -229,7 +229,7 @@ ErrorIfDistTablesNotColocated(Query *parse, List *distTablesList, } /* All distributed tables must be colocated */ - if (!AllRelationsInRTEListColocated(distTablesList)) + if (!AllDistributedRelationsInRTEListColocated(distTablesList)) { return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, "For MERGE command, all the distributed tables " diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c index 5c91ee79c..3fa3068dc 100644 --- a/src/backend/distributed/planner/relation_restriction_equivalence.c +++ b/src/backend/distributed/planner/relation_restriction_equivalence.c @@ -151,9 +151,10 @@ static void ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass secondClass); static Var * PartitionKeyForRTEIdentityInQuery(Query *query, int targetRTEIndex, Index *partitionKeyIndex); -static bool AllRelationsInRestrictionContextColocated(RelationRestrictionContext * - restrictionContext); -static bool AllRelationsInListColocated(List *relationList); +static bool AllDistributedRelationsInRestrictionContextColocated( + RelationRestrictionContext * + restrictionContext); +static bool AllDistributedRelationsInListColocated(List *relationList); static bool IsNotSafeRestrictionToRecursivelyPlan(Node *node); static JoinRestrictionContext * FilterJoinRestrictionContext( JoinRestrictionContext *joinRestrictionContext, Relids @@ -384,7 +385,7 @@ SafeToPushdownUnionSubquery(Query *originalQuery, return false; } - if (!AllRelationsInRestrictionContextColocated(restrictionContext)) + if (!AllDistributedRelationsInRestrictionContextColocated(restrictionContext)) { /* distribution columns are equal, but tables are not co-located */ return false; @@ -1920,11 +1921,12 @@ FindQueryContainingRTEIdentityInternal(Node *node, /* - * AllRelationsInRestrictionContextColocated determines whether all of the relations in the - * given relation restrictions list are co-located. + * AllDistributedRelationsInRestrictionContextColocated determines whether all of the + * distributed relations in the given relation restrictions list are co-located. */ static bool -AllRelationsInRestrictionContextColocated(RelationRestrictionContext *restrictionContext) +AllDistributedRelationsInRestrictionContextColocated( + RelationRestrictionContext *restrictionContext) { RelationRestriction *relationRestriction = NULL; List *relationIdList = NIL; @@ -1935,16 +1937,16 @@ AllRelationsInRestrictionContextColocated(RelationRestrictionContext *restrictio relationIdList = lappend_oid(relationIdList, relationRestriction->relationId); } - return AllRelationsInListColocated(relationIdList); + return AllDistributedRelationsInListColocated(relationIdList); } /* - * AllRelationsInRTEListColocated determines whether all of the relations in the - * given RangeTableEntry list are co-located. + * AllDistributedRelationsInRTEListColocated determines whether all of the + * distributed relations in the given RangeTableEntry list are co-located. */ bool -AllRelationsInRTEListColocated(List *rangeTableEntryList) +AllDistributedRelationsInRTEListColocated(List *rangeTableEntryList) { RangeTblEntry *rangeTableEntry = NULL; List *relationIdList = NIL; @@ -1954,24 +1956,31 @@ AllRelationsInRTEListColocated(List *rangeTableEntryList) relationIdList = lappend_oid(relationIdList, rangeTableEntry->relid); } - return AllRelationsInListColocated(relationIdList); + return AllDistributedRelationsInListColocated(relationIdList); } /* - * AllRelationsInListColocated determines whether all of the relations in the - * given list are co-located. + * AllDistributedRelationsInListColocated determines whether all of the + * distributed relations in the given list are co-located. */ static bool -AllRelationsInListColocated(List *relationList) +AllDistributedRelationsInListColocated(List *relationList) { int initialColocationId = INVALID_COLOCATION_ID; Oid relationId = InvalidOid; foreach_oid(relationId, relationList) { - if (IsCitusTable(relationId) && !HasDistributionKey(relationId)) + if (!IsCitusTable(relationId)) { + /* not interested in Postgres tables */ + continue; + } + + if (!IsCitusTableType(relationId, DISTRIBUTED_TABLE)) + { + /* not interested in non-distributed tables */ continue; } diff --git a/src/include/distributed/relation_restriction_equivalence.h b/src/include/distributed/relation_restriction_equivalence.h index e0e716c7e..07b6348d9 100644 --- a/src/include/distributed/relation_restriction_equivalence.h +++ b/src/include/distributed/relation_restriction_equivalence.h @@ -54,5 +54,5 @@ extern RelationRestrictionContext * FilterRelationRestrictionContext( RelationRestrictionContext *relationRestrictionContext, Relids queryRteIdentities); -extern bool AllRelationsInRTEListColocated(List *rangeTableEntryList); +extern bool AllDistributedRelationsInRTEListColocated(List *rangeTableEntryList); #endif /* RELATION_RESTRICTION_EQUIVALENCE_H */ From da7db53c87af143ec6e4240261928f57d549d85f Mon Sep 17 00:00:00 2001 From: Teja Mupparti Date: Tue, 21 Mar 2023 14:45:03 -0700 Subject: [PATCH 17/18] Refactor some of the planning code to accomodate a new planning path for MERGE SQL --- .../distributed/planner/distributed_planner.c | 155 +++++++++++++----- .../distributed/planner/merge_planner.c | 17 ++ src/include/distributed/merge_planner.h | 5 + 3 files changed, 135 insertions(+), 42 deletions(-) diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index 866f7353a..eb9e21786 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -34,6 +34,7 @@ #include "distributed/intermediate_results.h" #include "distributed/listutils.h" #include "distributed/coordinator_protocol.h" +#include "distributed/merge_planner.h" #include "distributed/metadata_cache.h" #include "distributed/multi_executor.h" #include "distributed/distributed_planner.h" @@ -68,6 +69,17 @@ #include "utils/syscache.h" +/* RouterPlanType is used to determine the router plan to invoke */ +typedef enum RouterPlanType +{ + INSERT_SELECT_INTO_CITUS_TABLE, + INSERT_SELECT_INTO_LOCAL_TABLE, + DML_QUERY, + SELECT_QUERY, + MERGE_QUERY, + REPLAN_WITH_BOUND_PARAMETERS +} RouterPlanType; + static List *plannerRestrictionContextList = NIL; int MultiTaskQueryLogLevel = CITUS_LOG_LEVEL_OFF; /* multi-task query log level */ static uint64 NextPlanId = 1; @@ -129,6 +141,9 @@ static PlannedStmt * PlanDistributedStmt(DistributedPlanningContext *planContext static RTEListProperties * GetRTEListProperties(List *rangeTableList); static List * TranslatedVars(PlannerInfo *root, int relationIndex); static void WarnIfListHasForeignDistributedTable(List *rangeTableList); +static RouterPlanType GetRouterPlanType(Query *query, + Query *originalQuery, + bool hasUnresolvedParams); /* Distributed planner hook */ @@ -881,6 +896,51 @@ TryCreateDistributedPlannedStmt(PlannedStmt *localPlan, } +/* + * GetRouterPlanType checks the parse tree to return appropriate plan type. + */ +static RouterPlanType +GetRouterPlanType(Query *query, Query *originalQuery, bool hasUnresolvedParams) +{ + if (!IsModifyCommand(originalQuery)) + { + return SELECT_QUERY; + } + + Oid targetRelationId = ModifyQueryResultRelationId(query); + + EnsureModificationsCanRunOnRelation(targetRelationId); + EnsurePartitionTableNotReplicated(targetRelationId); + + /* Check the type of modification being done */ + + if (InsertSelectIntoCitusTable(originalQuery)) + { + if (hasUnresolvedParams) + { + return REPLAN_WITH_BOUND_PARAMETERS; + } + return INSERT_SELECT_INTO_CITUS_TABLE; + } + else if (InsertSelectIntoLocalTable(originalQuery)) + { + if (hasUnresolvedParams) + { + return REPLAN_WITH_BOUND_PARAMETERS; + } + return INSERT_SELECT_INTO_LOCAL_TABLE; + } + else if (IsMergeQuery(originalQuery)) + { + return MERGE_QUERY; + } + else + { + return DML_QUERY; + } +} + + /* * CreateDistributedPlan generates a distributed plan for a query. * It goes through 3 steps: @@ -898,64 +958,71 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina DistributedPlan *distributedPlan = NULL; bool hasCtes = originalQuery->cteList != NIL; - if (IsModifyCommand(originalQuery)) + /* Step 1: Try router planner */ + + RouterPlanType routerPlan = GetRouterPlanType(query, originalQuery, + hasUnresolvedParams); + + switch (routerPlan) { - Oid targetRelationId = ModifyQueryResultRelationId(query); - - EnsureModificationsCanRunOnRelation(targetRelationId); - - EnsurePartitionTableNotReplicated(targetRelationId); - - if (InsertSelectIntoCitusTable(originalQuery)) + case INSERT_SELECT_INTO_CITUS_TABLE: { - if (hasUnresolvedParams) - { - /* - * Unresolved parameters can cause performance regressions in - * INSERT...SELECT when the partition column is a parameter - * because we don't perform any additional pruning in the executor. - */ - return NULL; - } - distributedPlan = - CreateInsertSelectPlan(planId, originalQuery, plannerRestrictionContext, + CreateInsertSelectPlan(planId, + originalQuery, + plannerRestrictionContext, boundParams); + break; } - else if (InsertSelectIntoLocalTable(originalQuery)) + + case INSERT_SELECT_INTO_LOCAL_TABLE: { - if (hasUnresolvedParams) - { - /* - * Unresolved parameters can cause performance regressions in - * INSERT...SELECT when the partition column is a parameter - * because we don't perform any additional pruning in the executor. - */ - return NULL; - } distributedPlan = - CreateInsertSelectIntoLocalTablePlan(planId, originalQuery, boundParams, + CreateInsertSelectIntoLocalTablePlan(planId, + originalQuery, + boundParams, hasUnresolvedParams, plannerRestrictionContext); + break; } - else + + case DML_QUERY: { /* modifications are always routed through the same planner/executor */ distributedPlan = CreateModifyPlan(originalQuery, query, plannerRestrictionContext); + break; } - } - else - { - /* - * For select queries we, if router executor is enabled, first try to - * plan the query as a router query. If not supported, otherwise try - * the full blown plan/optimize/physical planning process needed to - * produce distributed query plans. - */ - distributedPlan = CreateRouterPlan(originalQuery, query, - plannerRestrictionContext); + case MERGE_QUERY: + { + distributedPlan = + CreateMergePlan(originalQuery, query, plannerRestrictionContext); + break; + } + + case REPLAN_WITH_BOUND_PARAMETERS: + { + /* + * Unresolved parameters can cause performance regressions in + * INSERT...SELECT when the partition column is a parameter + * because we don't perform any additional pruning in the executor. + */ + return NULL; + } + + case SELECT_QUERY: + { + /* + * For select queries we, if router executor is enabled, first try to + * plan the query as a router query. If not supported, otherwise try + * the full blown plan/optimize/physical planning process needed to + * produce distributed query plans. + */ + distributedPlan = + CreateRouterPlan(originalQuery, query, plannerRestrictionContext); + break; + } } /* the functions above always return a plan, possibly with an error */ @@ -996,6 +1063,8 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina boundParams); Assert(originalQuery != NULL); + /* Step 2: Generate subplans for CTEs and complex subqueries */ + /* * Plan subqueries and CTEs that cannot be pushed down by recursively * calling the planner and return the resulting plans to subPlanList. @@ -1096,6 +1165,8 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina query->cteList = NIL; Assert(originalQuery->cteList == NIL); + /* Step 3: Try Logical planner */ + MultiTreeRoot *logicalPlan = MultiLogicalPlanCreate(originalQuery, query, plannerRestrictionContext); MultiLogicalPlanOptimize(logicalPlan); diff --git a/src/backend/distributed/planner/merge_planner.c b/src/backend/distributed/planner/merge_planner.c index 4839d5725..46a2484bd 100644 --- a/src/backend/distributed/planner/merge_planner.c +++ b/src/backend/distributed/planner/merge_planner.c @@ -54,6 +54,23 @@ static DeferredErrorMessage * MergeQualAndTargetListFunctionsSupported(Oid #endif +/* + * CreateMergePlan attempts to create a plan for the given MERGE SQL + * statement. If planning fails ->planningError is set to a description + * of the failure. + */ +DistributedPlan * +CreateMergePlan(Query *originalQuery, Query *query, + PlannerRestrictionContext *plannerRestrictionContext) +{ + /* + * For now, this is a place holder until we isolate the merge + * planning into it's own code-path. + */ + return CreateModifyPlan(originalQuery, query, plannerRestrictionContext); +} + + /* * MergeQuerySupported does check for a MERGE command in the query, if it finds * one, it will verify the below criteria diff --git a/src/include/distributed/merge_planner.h b/src/include/distributed/merge_planner.h index 243be14d0..158f26861 100644 --- a/src/include/distributed/merge_planner.h +++ b/src/include/distributed/merge_planner.h @@ -17,10 +17,15 @@ #include "nodes/parsenodes.h" #include "distributed/distributed_planner.h" #include "distributed/errormessage.h" +#include "distributed/multi_physical_planner.h" extern bool IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte); extern DeferredErrorMessage * MergeQuerySupported(Query *originalQuery, bool multiShardQuery, PlannerRestrictionContext * plannerRestrictionContext); +extern DistributedPlan * CreateMergePlan(Query *originalQuery, Query *query, + PlannerRestrictionContext * + plannerRestrictionContext); + #endif /* MERGE_PLANNER_H */ From 372a93b529bd4d99266ed5946697e6ffe0f738f4 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Mon, 27 Mar 2023 12:19:06 +0300 Subject: [PATCH 18/18] Make 8 more tests runnable multiple times via run_test.py (#6791) Soon I will be doing some changes related to #692 in router planner and those changes require updating ~5/6 tests related to router planning. And to make those test files runnable by run_test.py multiple times, we need to make some other tests (that they're run in parallel / they badly depend on) ready for run_test.py too. --- src/test/regress/citus_tests/run_test.py | 4 + .../regress/expected/multi_data_types.out | 8 ++ .../expected/multi_modifying_xacts.out | 71 ++++++++++++----- .../regress/expected/multi_mx_copy_data.out | 5 ++ .../expected/multi_mx_modifying_xacts.out | 3 + .../expected/multi_mx_router_planner.out | 4 + .../regress/expected/multi_router_planner.out | 76 +++++++++---------- .../regress/expected/multi_simple_queries.out | 19 +++-- src/test/regress/expected/multi_upsert.out | 4 + src/test/regress/sql/multi_data_types.sql | 9 +++ .../regress/sql/multi_modifying_xacts.sql | 45 ++++++++++- src/test/regress/sql/multi_mx_copy_data.sql | 7 ++ .../regress/sql/multi_mx_modifying_xacts.sql | 4 + .../regress/sql/multi_mx_router_planner.sql | 5 ++ src/test/regress/sql/multi_router_planner.sql | 32 +++----- src/test/regress/sql/multi_simple_queries.sql | 16 ++-- src/test/regress/sql/multi_upsert.sql | 5 ++ 17 files changed, 218 insertions(+), 99 deletions(-) diff --git a/src/test/regress/citus_tests/run_test.py b/src/test/regress/citus_tests/run_test.py index 9c901785c..5964267ec 100755 --- a/src/test/regress/citus_tests/run_test.py +++ b/src/test/regress/citus_tests/run_test.py @@ -109,6 +109,10 @@ if __name__ == "__main__": "multi_mx_function_table_reference", ], ), + "multi_mx_modifying_xacts": TestDeps(None, ["multi_mx_create_table"]), + "multi_mx_router_planner": TestDeps(None, ["multi_mx_create_table"]), + "multi_mx_copy_data": TestDeps(None, ["multi_mx_create_table"]), + "multi_simple_queries": TestDeps("base_schedule"), } if not (test_file_name or test_file_path): diff --git a/src/test/regress/expected/multi_data_types.out b/src/test/regress/expected/multi_data_types.out index 4bc7da5c7..a88f9e1de 100644 --- a/src/test/regress/expected/multi_data_types.out +++ b/src/test/regress/expected/multi_data_types.out @@ -3,6 +3,14 @@ -- create, distribute, INSERT, SELECT and UPDATE -- =================================================================== SET citus.next_shard_id TO 530000; +-- Given that other test files depend on the existence of types created in this file, +-- we cannot drop them at the end. Instead, we drop them at the beginning of the test +-- to make this file runnable multiple times via run_test.py. +BEGIN; + SET LOCAL client_min_messages TO WARNING; + DROP TYPE IF EXISTS test_composite_type, other_composite_type, bug_status CASCADE; + DROP OPERATOR FAMILY IF EXISTS cats_op_fam USING hash; +COMMIT; -- create a custom type... CREATE TYPE test_composite_type AS ( i integer, diff --git a/src/test/regress/expected/multi_modifying_xacts.out b/src/test/regress/expected/multi_modifying_xacts.out index 607c327ff..0294e1060 100644 --- a/src/test/regress/expected/multi_modifying_xacts.out +++ b/src/test/regress/expected/multi_modifying_xacts.out @@ -1,5 +1,7 @@ SET citus.next_shard_id TO 1200000; SET citus.next_placement_id TO 1200000; +CREATE SCHEMA multi_modifying_xacts; +SET search_path TO multi_modifying_xacts; -- =================================================================== -- test end-to-end modification functionality -- =================================================================== @@ -190,7 +192,7 @@ ALTER TABLE labs ADD COLUMN motto text; INSERT INTO labs VALUES (6, 'Bell Labs'); ABORT; -- but the DDL should correctly roll back -SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.labs'::regclass; +SELECT "Column", "Type", "Modifiers" FROM public.table_desc WHERE relid='multi_modifying_xacts.labs'::regclass; Column | Type | Modifiers --------------------------------------------------------------------- id | bigint | not null @@ -339,7 +341,7 @@ CREATE FUNCTION reject_large_id() RETURNS trigger AS $rli$ END; $rli$ LANGUAGE plpgsql; -- register after insert trigger -SELECT * FROM run_command_on_placements('researchers', 'CREATE CONSTRAINT TRIGGER reject_large_researcher_id AFTER INSERT ON %s DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_large_id()') +SELECT * FROM run_command_on_placements('multi_modifying_xacts.researchers', 'CREATE CONSTRAINT TRIGGER reject_large_researcher_id AFTER INSERT ON %s DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE multi_modifying_xacts.reject_large_id()') ORDER BY nodeport, shardid; nodename | nodeport | shardid | success | result --------------------------------------------------------------------- @@ -498,6 +500,7 @@ AND s.logicalrelid = 'objects'::regclass; -- create trigger on one worker to reject certain values \c - - - :worker_2_port +SET search_path TO multi_modifying_xacts; SET citus.enable_metadata_sync TO OFF; CREATE FUNCTION reject_bad() RETURNS trigger AS $rb$ BEGIN @@ -514,6 +517,7 @@ AFTER INSERT ON objects_1200003 DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; -- test partial failure; worker_1 succeeds, 2 fails -- in this case, we expect the transaction to abort \set VERBOSITY terse @@ -551,6 +555,7 @@ DELETE FROM objects; -- there cannot be errors on different shards at different times -- because the first failure will fail the whole transaction \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; SET citus.enable_metadata_sync TO OFF; CREATE FUNCTION reject_bad() RETURNS trigger AS $rb$ BEGIN @@ -567,6 +572,7 @@ AFTER INSERT ON labs_1200002 DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; BEGIN; INSERT INTO objects VALUES (1, 'apple'); INSERT INTO objects VALUES (2, 'BAD'); @@ -602,12 +608,14 @@ AND (s.logicalrelid = 'objects'::regclass OR -- what if the failures happen at COMMIT time? \c - - - :worker_2_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad ON objects_1200003; CREATE CONSTRAINT TRIGGER reject_bad AFTER INSERT ON objects_1200003 DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; -- should be the same story as before, just at COMMIT time -- as we use 2PC, the transaction is rollbacked BEGIN; @@ -644,12 +652,14 @@ WHERE sp.shardid = s.shardid AND s.logicalrelid = 'objects'::regclass; -- what if all nodes have failures at COMMIT time? \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad ON labs_1200002; CREATE CONSTRAINT TRIGGER reject_bad AFTER INSERT ON labs_1200002 DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; -- reduce the log level for differences between PG14 and PG15 -- in PGconn->errorMessage -- relevant PG commit b15f254466aefbabcbed001929f6e09db59fd158 @@ -688,8 +698,10 @@ AND (s.logicalrelid = 'objects'::regclass OR -- what if one shard (objects) succeeds but another (labs) completely fails? \c - - - :worker_2_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad ON objects_1200003; \c - - - :master_port +SET search_path TO multi_modifying_xacts; SET citus.next_shard_id TO 1200004; BEGIN; INSERT INTO objects VALUES (1, 'apple'); @@ -833,6 +845,7 @@ SELECT * FROM reference_modifying_xacts; -- lets fail on of the workers at before the commit time \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; SET citus.enable_metadata_sync TO OFF; CREATE FUNCTION reject_bad_reference() RETURNS trigger AS $rb$ BEGIN @@ -849,6 +862,7 @@ AFTER INSERT ON reference_modifying_xacts_1200006 DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad_reference(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse -- try without wrapping inside a transaction INSERT INTO reference_modifying_xacts VALUES (999, 3); @@ -860,12 +874,14 @@ ERROR: illegal value COMMIT; -- lets fail one of the workers at COMMIT time \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad_reference ON reference_modifying_xacts_1200006; CREATE CONSTRAINT TRIGGER reject_bad_reference AFTER INSERT ON reference_modifying_xacts_1200006 DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad_reference(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse -- try without wrapping inside a transaction INSERT INTO reference_modifying_xacts VALUES (999, 3); @@ -890,8 +906,10 @@ ORDER BY s.logicalrelid, sp.shardstate; -- for the time-being drop the constraint \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad_reference ON reference_modifying_xacts_1200006; \c - - - :master_port +SET search_path TO multi_modifying_xacts; -- now create a hash distributed table and run tests -- including both the reference table and the hash -- distributed table @@ -923,6 +941,7 @@ INSERT INTO hash_modifying_xacts VALUES (2, 2); ABORT; -- lets fail one of the workers before COMMIT time for the hash table \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; SET citus.enable_metadata_sync TO OFF; CREATE FUNCTION reject_bad_hash() RETURNS trigger AS $rb$ BEGIN @@ -939,6 +958,7 @@ AFTER INSERT ON hash_modifying_xacts_1200007 DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad_hash(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse -- the transaction as a whole should fail BEGIN; @@ -955,6 +975,7 @@ SELECT * FROM reference_modifying_xacts WHERE key = 55; -- now lets fail on of the workers for the hash distributed table table -- when there is a reference table involved \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad_hash ON hash_modifying_xacts_1200007; -- the trigger is on execution time CREATE CONSTRAINT TRIGGER reject_bad_hash @@ -962,6 +983,7 @@ AFTER INSERT ON hash_modifying_xacts_1200007 DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad_hash(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse -- the transaction as a whole should fail BEGIN; @@ -994,11 +1016,13 @@ ORDER BY s.logicalrelid, sp.shardstate; -- and ensure that hash distributed table's -- change is rollbacked as well \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; CREATE CONSTRAINT TRIGGER reject_bad_reference AFTER INSERT ON reference_modifying_xacts_1200006 DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad_reference(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse BEGIN; -- to expand participant to include all worker nodes @@ -1127,8 +1151,10 @@ SELECT count(*) FROM pg_dist_transaction; -- in which we'll make the remote host unavailable -- first create the new user on all nodes CREATE USER test_user; +GRANT ALL ON SCHEMA multi_modifying_xacts TO test_user; -- now connect back to the master with the new user \c - test_user - :master_port +SET search_path TO multi_modifying_xacts; SET citus.next_shard_id TO 1200015; CREATE TABLE reference_failure_test (key int, value int); SELECT create_reference_table('reference_failure_test'); @@ -1148,21 +1174,24 @@ SELECT create_distributed_table('numbers_hash_failure_test', 'key'); -- ensure that the shard is created for this user \c - test_user - :worker_1_port +SET search_path TO multi_modifying_xacts; SET citus.override_table_visibility TO false; \dt reference_failure_test_1200015 - List of relations - Schema | Name | Type | Owner + List of relations + Schema | Name | Type | Owner --------------------------------------------------------------------- - public | reference_failure_test_1200015 | table | test_user + multi_modifying_xacts | reference_failure_test_1200015 | table | test_user (1 row) -- now connect with the default user, -- and rename the existing user \c - :default_user - :worker_1_port +SET search_path TO multi_modifying_xacts; ALTER USER test_user RENAME TO test_user_new; NOTICE: not propagating ALTER ROLE ... RENAME TO commands to worker nodes -- connect back to master and query the reference table \c - test_user - :master_port +SET search_path TO multi_modifying_xacts; -- should fail since the worker doesn't have test_user anymore INSERT INTO reference_failure_test VALUES (1, '1'); ERROR: connection to the remote node localhost:xxxxx failed with the following error: FATAL: role "test_user" does not exist @@ -1277,14 +1306,17 @@ WARNING: connection to the remote node localhost:xxxxx failed with the followin -- break the other node as well \c - :default_user - :worker_2_port +SET search_path TO multi_modifying_xacts; ALTER USER test_user RENAME TO test_user_new; NOTICE: not propagating ALTER ROLE ... RENAME TO commands to worker nodes \c - test_user - :master_port +SET search_path TO multi_modifying_xacts; -- fails on all shard placements INSERT INTO numbers_hash_failure_test VALUES (2,2); ERROR: connection to the remote node localhost:xxxxx failed with the following error: FATAL: role "test_user" does not exist -- connect back to the master with the proper user to continue the tests \c - :default_user - :master_port +SET search_path TO multi_modifying_xacts; SET citus.next_shard_id TO 1200020; SET citus.next_placement_id TO 1200033; -- unbreak both nodes by renaming the user back to the original name @@ -1297,6 +1329,7 @@ SELECT * FROM run_command_on_workers('ALTER USER test_user_new RENAME TO test_us DROP TABLE reference_modifying_xacts, hash_modifying_xacts, hash_modifying_xacts_second, reference_failure_test, numbers_hash_failure_test; +REVOKE ALL ON SCHEMA multi_modifying_xacts FROM test_user; DROP USER test_user; -- set up foreign keys to test transactions with co-located and reference tables BEGIN; @@ -1322,7 +1355,9 @@ SELECT create_reference_table('itemgroups'); (1 row) +SET client_min_messages TO WARNING; DROP TABLE IF EXISTS users ; +RESET client_min_messages; CREATE TABLE users ( id int PRIMARY KEY, name text, @@ -1354,18 +1389,18 @@ JOIN USING (shardid) ORDER BY id; - id | shard_name | nodename | nodeport + id | shard_name | nodename | nodeport --------------------------------------------------------------------- - 1 | users_1200022 | localhost | 57637 - 2 | users_1200025 | localhost | 57638 - 3 | users_1200023 | localhost | 57638 - 4 | users_1200023 | localhost | 57638 - 5 | users_1200022 | localhost | 57637 - 6 | users_1200024 | localhost | 57637 - 7 | users_1200023 | localhost | 57638 - 8 | users_1200022 | localhost | 57637 - 9 | users_1200025 | localhost | 57638 - 10 | users_1200022 | localhost | 57637 + 1 | multi_modifying_xacts.users_1200022 | localhost | 57637 + 2 | multi_modifying_xacts.users_1200025 | localhost | 57638 + 3 | multi_modifying_xacts.users_1200023 | localhost | 57638 + 4 | multi_modifying_xacts.users_1200023 | localhost | 57638 + 5 | multi_modifying_xacts.users_1200022 | localhost | 57637 + 6 | multi_modifying_xacts.users_1200024 | localhost | 57637 + 7 | multi_modifying_xacts.users_1200023 | localhost | 57638 + 8 | multi_modifying_xacts.users_1200022 | localhost | 57637 + 9 | multi_modifying_xacts.users_1200025 | localhost | 57638 + 10 | multi_modifying_xacts.users_1200022 | localhost | 57637 (10 rows) END; @@ -1546,5 +1581,5 @@ SELECT name FROM labs WHERE id = 1001; (1 row) RESET citus.function_opens_transaction_block; -DROP FUNCTION insert_abort(); -DROP TABLE items, users, itemgroups, usergroups, researchers, labs; +SET client_min_messages TO WARNING; +DROP SCHEMA multi_modifying_xacts CASCADE; diff --git a/src/test/regress/expected/multi_mx_copy_data.out b/src/test/regress/expected/multi_mx_copy_data.out index c1d3d7180..0db64c16e 100644 --- a/src/test/regress/expected/multi_mx_copy_data.out +++ b/src/test/regress/expected/multi_mx_copy_data.out @@ -1,6 +1,10 @@ -- -- MULTI_MX_COPY_DATA -- +-- We truncate them to make this test runnable multiple times. +-- Note that we cannot do that at the end of the test because +-- we need to keep the data for the other tests. +TRUNCATE lineitem_mx, orders_mx; \set nation_data_file :abs_srcdir '/data/nation.data' \set client_side_copy_command '\\copy nation_hash FROM ' :'nation_data_file' ' with delimiter '''|''';' :client_side_copy_command @@ -161,3 +165,4 @@ SET search_path TO public; :client_side_copy_command \set client_side_copy_command '\\copy supplier_mx FROM ' :'supplier_data_file' ' with delimiter '''|''';' :client_side_copy_command +DROP TABLE citus_mx_test_schema.nation_hash_replicated; diff --git a/src/test/regress/expected/multi_mx_modifying_xacts.out b/src/test/regress/expected/multi_mx_modifying_xacts.out index e486b8b1b..dfbdc7603 100644 --- a/src/test/regress/expected/multi_mx_modifying_xacts.out +++ b/src/test/regress/expected/multi_mx_modifying_xacts.out @@ -406,3 +406,6 @@ SELECT * FROM labs_mx WHERE id = 8; --------------------------------------------------------------------- (0 rows) +TRUNCATE objects_mx, labs_mx, researchers_mx; +DROP TRIGGER reject_bad_mx ON labs_mx_1220102; +DROP FUNCTION reject_bad_mx; diff --git a/src/test/regress/expected/multi_mx_router_planner.out b/src/test/regress/expected/multi_mx_router_planner.out index d006b4bb8..bf007be9d 100644 --- a/src/test/regress/expected/multi_mx_router_planner.out +++ b/src/test/regress/expected/multi_mx_router_planner.out @@ -1460,3 +1460,7 @@ DEBUG: query has a single distribution column value: 1 51 (6 rows) +SET client_min_messages to WARNING; +TRUNCATE articles_hash_mx, company_employees_mx, articles_single_shard_hash_mx; +DROP MATERIALIZED VIEW mv_articles_hash_mx_error; +DROP TABLE authors_hash_mx; diff --git a/src/test/regress/expected/multi_router_planner.out b/src/test/regress/expected/multi_router_planner.out index 56ff44b3b..e0e5bc541 100644 --- a/src/test/regress/expected/multi_router_planner.out +++ b/src/test/regress/expected/multi_router_planner.out @@ -6,6 +6,8 @@ SET citus.next_shard_id TO 840000; -- router planner, so we're disabling it in this file. We've bunch of -- other tests that triggers fast-path-router planner SET citus.enable_fast_path_router_planner TO false; +CREATE SCHEMA multi_router_planner; +SET search_path TO multi_router_planner; CREATE TABLE articles_hash ( id bigint NOT NULL, author_id bigint NOT NULL, @@ -290,10 +292,10 @@ WITH first_author AS MATERIALIZED ( UPDATE articles_hash SET title = first_author.name FROM first_author WHERE articles_hash.author_id = 2 AND articles_hash.id = first_author.id; DEBUG: Router planner doesn't support SELECT FOR UPDATE in common table expressions involving reference tables. -DEBUG: generating subplan XXX_1 for CTE first_author: SELECT articles_hash.id, auref.name FROM public.articles_hash, public.authors_reference auref WHERE ((articles_hash.author_id OPERATOR(pg_catalog.=) 2) AND (auref.id OPERATOR(pg_catalog.=) articles_hash.author_id)) FOR UPDATE OF articles_hash FOR UPDATE OF auref +DEBUG: generating subplan XXX_1 for CTE first_author: SELECT articles_hash.id, auref.name FROM multi_router_planner.articles_hash, multi_router_planner.authors_reference auref WHERE ((articles_hash.author_id OPERATOR(pg_catalog.=) 2) AND (auref.id OPERATOR(pg_catalog.=) articles_hash.author_id)) FOR UPDATE OF articles_hash FOR UPDATE OF auref DEBUG: Creating router plan DEBUG: query has a single distribution column value: 2 -DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE public.articles_hash SET title = first_author.name FROM (SELECT intermediate_result.id, intermediate_result.name FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, name character varying(20))) first_author WHERE ((articles_hash.author_id OPERATOR(pg_catalog.=) 2) AND (articles_hash.id OPERATOR(pg_catalog.=) first_author.id)) +DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE multi_router_planner.articles_hash SET title = first_author.name FROM (SELECT intermediate_result.id, intermediate_result.name FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, name character varying(20))) first_author WHERE ((articles_hash.author_id OPERATOR(pg_catalog.=) 2) AND (articles_hash.id OPERATOR(pg_catalog.=) first_author.id)) DEBUG: Creating router plan DEBUG: query has a single distribution column value: 2 WITH first_author AS MATERIALIZED ( @@ -356,10 +358,10 @@ WITH id_author AS MATERIALIZED ( SELECT id, author_id FROM articles_hash WHERE a id_title AS MATERIALIZED (SELECT id, title from articles_hash WHERE author_id = 2) SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id; DEBUG: cannot run command which targets multiple shards -DEBUG: generating subplan XXX_1 for CTE id_author: SELECT id, author_id FROM public.articles_hash WHERE (author_id OPERATOR(pg_catalog.=) 1) +DEBUG: generating subplan XXX_1 for CTE id_author: SELECT id, author_id FROM multi_router_planner.articles_hash WHERE (author_id OPERATOR(pg_catalog.=) 1) DEBUG: Creating router plan DEBUG: query has a single distribution column value: 1 -DEBUG: generating subplan XXX_2 for CTE id_title: SELECT id, title FROM public.articles_hash WHERE (author_id OPERATOR(pg_catalog.=) 2) +DEBUG: generating subplan XXX_2 for CTE id_title: SELECT id, title FROM multi_router_planner.articles_hash WHERE (author_id OPERATOR(pg_catalog.=) 2) DEBUG: Creating router plan DEBUG: query has a single distribution column value: 2 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id_author.id, id_author.author_id, id_title.id, id_title.title FROM (SELECT intermediate_result.id, intermediate_result.author_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint)) id_author, (SELECT intermediate_result.id, intermediate_result.title FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, title character varying(20))) id_title WHERE (id_author.id OPERATOR(pg_catalog.=) id_title.id) @@ -456,7 +458,7 @@ WITH new_article AS MATERIALIZED( ) SELECT * FROM new_article; DEBUG: only SELECT, UPDATE, or DELETE common table expressions may be router planned -DEBUG: generating subplan XXX_1 for CTE new_article: INSERT INTO public.articles_hash (id, author_id, title, word_count) VALUES (1, 1, 'arsenous'::character varying, 9) RETURNING id, author_id, title, word_count +DEBUG: generating subplan XXX_1 for CTE new_article: INSERT INTO multi_router_planner.articles_hash (id, author_id, title, word_count) VALUES (1, 1, 'arsenous'::character varying, 9) RETURNING id, author_id, title, word_count DEBUG: Creating router plan DEBUG: query has a single distribution column value: 1 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id, author_id, title, word_count FROM (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) new_article @@ -471,7 +473,7 @@ WITH update_article AS MATERIALIZED( ) SELECT * FROM update_article; DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for CTE update_article: UPDATE public.articles_hash SET word_count = 10 WHERE ((id OPERATOR(pg_catalog.=) 1) AND (word_count OPERATOR(pg_catalog.=) 9)) RETURNING id, author_id, title, word_count +DEBUG: generating subplan XXX_1 for CTE update_article: UPDATE multi_router_planner.articles_hash SET word_count = 10 WHERE ((id OPERATOR(pg_catalog.=) 1) AND (word_count OPERATOR(pg_catalog.=) 9)) RETURNING id, author_id, title, word_count DEBUG: Creating router plan DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id, author_id, title, word_count FROM (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) update_article DEBUG: Creating router plan @@ -485,7 +487,7 @@ WITH update_article AS MATERIALIZED ( ) SELECT coalesce(1,random()); DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for CTE update_article: UPDATE public.articles_hash SET word_count = 11 WHERE ((id OPERATOR(pg_catalog.=) 1) AND (word_count OPERATOR(pg_catalog.=) 10)) RETURNING id, author_id, title, word_count +DEBUG: generating subplan XXX_1 for CTE update_article: UPDATE multi_router_planner.articles_hash SET word_count = 11 WHERE ((id OPERATOR(pg_catalog.=) 1) AND (word_count OPERATOR(pg_catalog.=) 10)) RETURNING id, author_id, title, word_count DEBUG: Creating router plan DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT COALESCE((1)::double precision, random()) AS "coalesce" DEBUG: Creating router plan @@ -510,7 +512,7 @@ WITH update_article AS MATERIALIZED ( ) SELECT coalesce(1,random()); DEBUG: cannot router plan modification of a non-distributed table -DEBUG: generating subplan XXX_1 for CTE update_article: UPDATE public.authors_reference SET name = ''::character varying WHERE (id OPERATOR(pg_catalog.=) 0) RETURNING name, id +DEBUG: generating subplan XXX_1 for CTE update_article: UPDATE multi_router_planner.authors_reference SET name = ''::character varying WHERE (id OPERATOR(pg_catalog.=) 0) RETURNING name, id DEBUG: Creating router plan DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT COALESCE((1)::double precision, random()) AS "coalesce" DEBUG: Creating router plan @@ -524,7 +526,7 @@ WITH delete_article AS MATERIALIZED ( ) SELECT * FROM delete_article; DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for CTE delete_article: DELETE FROM public.articles_hash WHERE ((id OPERATOR(pg_catalog.=) 1) AND (word_count OPERATOR(pg_catalog.=) 10)) RETURNING id, author_id, title, word_count +DEBUG: generating subplan XXX_1 for CTE delete_article: DELETE FROM multi_router_planner.articles_hash WHERE ((id OPERATOR(pg_catalog.=) 1) AND (word_count OPERATOR(pg_catalog.=) 10)) RETURNING id, author_id, title, word_count DEBUG: Creating router plan DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id, author_id, title, word_count FROM (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) delete_article DEBUG: Creating router plan @@ -653,8 +655,8 @@ FROM articles_hash, (SELECT id, word_count FROM articles_hash) AS test WHERE tes ORDER BY test.word_count DESC, articles_hash.id LIMIT 5; DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for subquery SELECT id, word_count FROM public.articles_hash -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT articles_hash.id, test.word_count FROM public.articles_hash, (SELECT intermediate_result.id, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, word_count integer)) test WHERE (test.id OPERATOR(pg_catalog.=) articles_hash.id) ORDER BY test.word_count DESC, articles_hash.id LIMIT 5 +DEBUG: generating subplan XXX_1 for subquery SELECT id, word_count FROM multi_router_planner.articles_hash +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT articles_hash.id, test.word_count FROM multi_router_planner.articles_hash, (SELECT intermediate_result.id, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, word_count integer)) test WHERE (test.id OPERATOR(pg_catalog.=) articles_hash.id) ORDER BY test.word_count DESC, articles_hash.id LIMIT 5 DEBUG: Router planner cannot handle multi-shard select queries DEBUG: push down of limit count: 5 id | word_count @@ -672,8 +674,8 @@ WHERE test.id = articles_hash.id and articles_hash.author_id = 1 ORDER BY articles_hash.id; DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for subquery SELECT id, word_count FROM public.articles_hash -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT articles_hash.id, test.word_count FROM public.articles_hash, (SELECT intermediate_result.id, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, word_count integer)) test WHERE ((test.id OPERATOR(pg_catalog.=) articles_hash.id) AND (articles_hash.author_id OPERATOR(pg_catalog.=) 1)) ORDER BY articles_hash.id +DEBUG: generating subplan XXX_1 for subquery SELECT id, word_count FROM multi_router_planner.articles_hash +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT articles_hash.id, test.word_count FROM multi_router_planner.articles_hash, (SELECT intermediate_result.id, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, word_count integer)) test WHERE ((test.id OPERATOR(pg_catalog.=) articles_hash.id) AND (articles_hash.author_id OPERATOR(pg_catalog.=) 1)) ORDER BY articles_hash.id DEBUG: Creating router plan DEBUG: query has a single distribution column value: 1 id | word_count @@ -788,9 +790,9 @@ SELECT a.author_id as first_author, b.word_count as second_word_count WHERE a.author_id = 2 and a.author_id = b.author_id LIMIT 3; DEBUG: found no worker with all shard placements -DEBUG: generating subplan XXX_1 for CTE single_shard: SELECT id, author_id, title, word_count FROM public.articles_single_shard_hash +DEBUG: generating subplan XXX_1 for CTE single_shard: SELECT id, author_id, title, word_count FROM multi_router_planner.articles_single_shard_hash DEBUG: Creating router plan -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a.author_id AS first_author, b.word_count AS second_word_count FROM public.articles_hash a, (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) b WHERE ((a.author_id OPERATOR(pg_catalog.=) 2) AND (a.author_id OPERATOR(pg_catalog.=) b.author_id)) LIMIT 3 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a.author_id AS first_author, b.word_count AS second_word_count FROM multi_router_planner.articles_hash a, (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) b WHERE ((a.author_id OPERATOR(pg_catalog.=) 2) AND (a.author_id OPERATOR(pg_catalog.=) b.author_id)) LIMIT 3 DEBUG: Creating router plan DEBUG: query has a single distribution column value: 2 first_author | second_word_count @@ -1575,10 +1577,10 @@ SELECT 1 FROM authors_reference r JOIN ( ) num_db ON (r.id = num_db.datid) LIMIT 1; DEBUG: found no worker with all shard placements DEBUG: function does not have co-located tables -DEBUG: generating subplan XXX_1 for subquery SELECT datid FROM public.number1() s(datid) +DEBUG: generating subplan XXX_1 for subquery SELECT datid FROM multi_router_planner.number1() s(datid) DEBUG: Creating router plan DEBUG: generating subplan XXX_2 for subquery SELECT s.datid FROM ((SELECT intermediate_result.datid FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) s LEFT JOIN pg_database d ON (((s.datid)::oid OPERATOR(pg_catalog.=) d.oid))) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT 1 FROM (public.authors_reference r JOIN (SELECT intermediate_result.datid FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) num_db ON ((r.id OPERATOR(pg_catalog.=) num_db.datid))) LIMIT 1 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT 1 FROM (multi_router_planner.authors_reference r JOIN (SELECT intermediate_result.datid FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) num_db ON ((r.id OPERATOR(pg_catalog.=) num_db.datid))) LIMIT 1 DEBUG: Creating router plan ?column? --------------------------------------------------------------------- @@ -1590,10 +1592,10 @@ SELECT s.datid FROM number1() s LEFT JOIN pg_database d ON s.datid = d.oid; SELECT 1 FROM authors_reference r JOIN num_db ON (r.id = num_db.datid) LIMIT 1; DEBUG: found no worker with all shard placements DEBUG: function does not have co-located tables -DEBUG: generating subplan XXX_1 for subquery SELECT datid FROM public.number1() s(datid) +DEBUG: generating subplan XXX_1 for subquery SELECT datid FROM multi_router_planner.number1() s(datid) DEBUG: Creating router plan DEBUG: generating subplan XXX_2 for subquery SELECT s.datid FROM ((SELECT intermediate_result.datid FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) s LEFT JOIN pg_database d ON (((s.datid)::oid OPERATOR(pg_catalog.=) d.oid))) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT 1 FROM (public.authors_reference r JOIN (SELECT intermediate_result.datid FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) num_db ON ((r.id OPERATOR(pg_catalog.=) num_db.datid))) LIMIT 1 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT 1 FROM (multi_router_planner.authors_reference r JOIN (SELECT intermediate_result.datid FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) num_db ON ((r.id OPERATOR(pg_catalog.=) num_db.datid))) LIMIT 1 DEBUG: Creating router plan ?column? --------------------------------------------------------------------- @@ -1603,8 +1605,8 @@ DEBUG: Creating router plan WITH cte AS MATERIALIZED (SELECT * FROM num_db) SELECT 1 FROM authors_reference r JOIN cte ON (r.id = cte.datid) LIMIT 1; DEBUG: found no worker with all shard placements -DEBUG: generating subplan XXX_1 for CTE cte: SELECT datid FROM (SELECT s.datid FROM (public.number1() s(datid) LEFT JOIN pg_database d ON (((s.datid)::oid OPERATOR(pg_catalog.=) d.oid)))) num_db -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT 1 FROM (public.authors_reference r JOIN (SELECT intermediate_result.datid FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) cte ON ((r.id OPERATOR(pg_catalog.=) cte.datid))) LIMIT 1 +DEBUG: generating subplan XXX_1 for CTE cte: SELECT datid FROM (SELECT s.datid FROM (multi_router_planner.number1() s(datid) LEFT JOIN pg_database d ON (((s.datid)::oid OPERATOR(pg_catalog.=) d.oid)))) num_db +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT 1 FROM (multi_router_planner.authors_reference r JOIN (SELECT intermediate_result.datid FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) cte ON ((r.id OPERATOR(pg_catalog.=) cte.datid))) LIMIT 1 DEBUG: Creating router plan ?column? --------------------------------------------------------------------- @@ -1769,7 +1771,7 @@ SET citus.log_remote_commands TO on; -- single shard select queries are router plannable SELECT * FROM articles_range where author_id = 1; DEBUG: Creating router plan -NOTICE: issuing SELECT id, author_id, title, word_count FROM public.articles_range_840012 articles_range WHERE (author_id OPERATOR(pg_catalog.=) 1) +NOTICE: issuing SELECT id, author_id, title, word_count FROM multi_router_planner.articles_range_840012 articles_range WHERE (author_id OPERATOR(pg_catalog.=) 1) DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx id | author_id | title | word_count --------------------------------------------------------------------- @@ -1777,7 +1779,7 @@ DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx SELECT * FROM articles_range where author_id = 1 or author_id = 5; DEBUG: Creating router plan -NOTICE: issuing SELECT id, author_id, title, word_count FROM public.articles_range_840012 articles_range WHERE ((author_id OPERATOR(pg_catalog.=) 1) OR (author_id OPERATOR(pg_catalog.=) 5)) +NOTICE: issuing SELECT id, author_id, title, word_count FROM multi_router_planner.articles_range_840012 articles_range WHERE ((author_id OPERATOR(pg_catalog.=) 1) OR (author_id OPERATOR(pg_catalog.=) 5)) DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx id | author_id | title | word_count --------------------------------------------------------------------- @@ -1795,7 +1797,7 @@ NOTICE: executing the command locally: SELECT id, author_id, title, word_count SELECT * FROM articles_range ar join authors_range au on (ar.author_id = au.id) WHERE ar.author_id = 1; DEBUG: Creating router plan -NOTICE: issuing SELECT ar.id, ar.author_id, ar.title, ar.word_count, au.name, au.id FROM (public.articles_range_840012 ar JOIN public.authors_range_840008 au ON ((ar.author_id OPERATOR(pg_catalog.=) au.id))) WHERE (ar.author_id OPERATOR(pg_catalog.=) 1) +NOTICE: issuing SELECT ar.id, ar.author_id, ar.title, ar.word_count, au.name, au.id FROM (multi_router_planner.articles_range_840012 ar JOIN multi_router_planner.authors_range_840008 au ON ((ar.author_id OPERATOR(pg_catalog.=) au.id))) WHERE (ar.author_id OPERATOR(pg_catalog.=) 1) DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx id | author_id | title | word_count | name | id --------------------------------------------------------------------- @@ -2433,12 +2435,15 @@ SELECT create_distributed_table('failure_test', 'a', 'hash'); SET citus.enable_ddl_propagation TO off; CREATE USER router_user; -GRANT INSERT ON ALL TABLES IN SCHEMA public TO router_user; +GRANT USAGE ON SCHEMA multi_router_planner TO router_user; +GRANT INSERT ON ALL TABLES IN SCHEMA multi_router_planner TO router_user; \c - - - :worker_1_port SET citus.enable_ddl_propagation TO off; CREATE USER router_user; -GRANT INSERT ON ALL TABLES IN SCHEMA public TO router_user; +GRANT USAGE ON SCHEMA multi_router_planner TO router_user; +GRANT INSERT ON ALL TABLES IN SCHEMA multi_router_planner TO router_user; \c - router_user - :master_port +SET search_path TO multi_router_planner; -- we will fail to connect to worker 2, since the user does not exist -- still, we never mark placements inactive. Instead, fail the transaction BEGIN; @@ -2452,7 +2457,7 @@ SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'failure_test'::regclass ) - ORDER BY placementid; + ORDER BY shardid, nodeport; shardid | shardstate | nodename | nodeport --------------------------------------------------------------------- 840017 | 1 | localhost | 57637 @@ -2471,18 +2476,5 @@ DROP USER router_user; \c - - - :master_port DROP OWNED BY router_user; DROP USER router_user; -DROP TABLE failure_test; -DROP FUNCTION author_articles_max_id(); -DROP FUNCTION author_articles_id_word_count(); -DROP MATERIALIZED VIEW mv_articles_hash_empty; -DROP MATERIALIZED VIEW mv_articles_hash_data; -DROP VIEW num_db; -DROP FUNCTION number1(); -DROP TABLE articles_hash; -DROP TABLE articles_single_shard_hash; -DROP TABLE authors_hash; -DROP TABLE authors_range; -DROP TABLE authors_reference; -DROP TABLE company_employees; -DROP TABLE articles_range; -DROP TABLE articles_append; +SET client_min_messages TO WARNING; +DROP SCHEMA multi_router_planner CASCADE; diff --git a/src/test/regress/expected/multi_simple_queries.out b/src/test/regress/expected/multi_simple_queries.out index 6bd8bad4a..4578d69a8 100644 --- a/src/test/regress/expected/multi_simple_queries.out +++ b/src/test/regress/expected/multi_simple_queries.out @@ -7,6 +7,8 @@ SET citus.coordinator_aggregation_strategy TO 'disabled'; -- =================================================================== -- test end-to-end query functionality -- =================================================================== +CREATE SCHEMA simple_queries_test; +SET search_path TO simple_queries_test; CREATE TABLE articles ( id bigint NOT NULL, author_id bigint NOT NULL, @@ -382,7 +384,7 @@ SELECT author_id FROM articles 8 (3 rows) -SELECT o_orderstatus, count(*), avg(o_totalprice) FROM orders +SELECT o_orderstatus, count(*), avg(o_totalprice) FROM public.orders GROUP BY o_orderstatus HAVING count(*) > 1450 OR avg(o_totalprice) > 150000 ORDER BY o_orderstatus; @@ -392,7 +394,7 @@ SELECT o_orderstatus, count(*), avg(o_totalprice) FROM orders P | 75 | 164847.914533333333 (2 rows) -SELECT o_orderstatus, sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders +SELECT o_orderstatus, sum(l_linenumber), avg(l_linenumber) FROM public.lineitem, public.orders WHERE l_orderkey = o_orderkey AND l_orderkey > 9030 GROUP BY o_orderstatus HAVING sum(l_linenumber) > 1000 @@ -541,7 +543,7 @@ DEBUG: query has a single distribution column value: 2 -- error out on unsupported aggregate SET client_min_messages to 'NOTICE'; -CREATE AGGREGATE public.invalid(int) ( +CREATE AGGREGATE invalid(int) ( sfunc = int4pl, stype = int ); @@ -812,10 +814,11 @@ SELECT * FROM (SELECT nextval('query_seq') FROM articles LIMIT 3) vals; (3 rows) -- but not elsewhere -SELECT sum(nextval('query_seq')) FROM articles; -ERROR: relation "public.query_seq" does not exist +SELECT sum(nextval('simple_queries_test.query_seq')) FROM articles; +ERROR: relation "simple_queries_test.query_seq" does not exist CONTEXT: while executing command on localhost:xxxxx -SELECT n FROM (SELECT nextval('query_seq') n, random() FROM articles) vals; -ERROR: relation "public.query_seq" does not exist +SELECT n FROM (SELECT nextval('simple_queries_test.query_seq') n, random() FROM articles) vals; +ERROR: relation "simple_queries_test.query_seq" does not exist CONTEXT: while executing command on localhost:xxxxx -DROP SEQUENCE query_seq; +SET client_min_messages TO WARNING; +DROP SCHEMA simple_queries_test CASCADE; diff --git a/src/test/regress/expected/multi_upsert.out b/src/test/regress/expected/multi_upsert.out index 08308aba0..e41b2a3d5 100644 --- a/src/test/regress/expected/multi_upsert.out +++ b/src/test/regress/expected/multi_upsert.out @@ -1,5 +1,7 @@ -- this test file aims to test UPSERT feature on Citus SET citus.next_shard_id TO 980000; +CREATE SCHEMA upsert_test; +SET search_path TO upsert_test; CREATE TABLE upsert_test ( part_key int UNIQUE, @@ -244,3 +246,5 @@ ERROR: functions used in the WHERE clause of the ON CONFLICT clause of INSERTs INSERT INTO upsert_test (part_key, other_col) VALUES (1, 1) ON CONFLICT (part_key) DO UPDATE SET part_key = 15; ERROR: modifying the partition value of rows is not allowed +SET client_min_messages TO WARNING; +DROP SCHEMA upsert_test CASCADE; diff --git a/src/test/regress/sql/multi_data_types.sql b/src/test/regress/sql/multi_data_types.sql index 7601bb319..d307c4c6f 100644 --- a/src/test/regress/sql/multi_data_types.sql +++ b/src/test/regress/sql/multi_data_types.sql @@ -6,6 +6,15 @@ SET citus.next_shard_id TO 530000; +-- Given that other test files depend on the existence of types created in this file, +-- we cannot drop them at the end. Instead, we drop them at the beginning of the test +-- to make this file runnable multiple times via run_test.py. +BEGIN; + SET LOCAL client_min_messages TO WARNING; + DROP TYPE IF EXISTS test_composite_type, other_composite_type, bug_status CASCADE; + DROP OPERATOR FAMILY IF EXISTS cats_op_fam USING hash; +COMMIT; + -- create a custom type... CREATE TYPE test_composite_type AS ( i integer, diff --git a/src/test/regress/sql/multi_modifying_xacts.sql b/src/test/regress/sql/multi_modifying_xacts.sql index 2be3a0911..506480093 100644 --- a/src/test/regress/sql/multi_modifying_xacts.sql +++ b/src/test/regress/sql/multi_modifying_xacts.sql @@ -1,6 +1,9 @@ SET citus.next_shard_id TO 1200000; SET citus.next_placement_id TO 1200000; +CREATE SCHEMA multi_modifying_xacts; +SET search_path TO multi_modifying_xacts; + -- =================================================================== -- test end-to-end modification functionality -- =================================================================== @@ -169,7 +172,7 @@ INSERT INTO labs VALUES (6, 'Bell Labs'); ABORT; -- but the DDL should correctly roll back -SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.labs'::regclass; +SELECT "Column", "Type", "Modifiers" FROM public.table_desc WHERE relid='multi_modifying_xacts.labs'::regclass; SELECT * FROM labs WHERE id = 6; -- COPY can happen after single row INSERT @@ -294,7 +297,7 @@ CREATE FUNCTION reject_large_id() RETURNS trigger AS $rli$ $rli$ LANGUAGE plpgsql; -- register after insert trigger -SELECT * FROM run_command_on_placements('researchers', 'CREATE CONSTRAINT TRIGGER reject_large_researcher_id AFTER INSERT ON %s DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_large_id()') +SELECT * FROM run_command_on_placements('multi_modifying_xacts.researchers', 'CREATE CONSTRAINT TRIGGER reject_large_researcher_id AFTER INSERT ON %s DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE multi_modifying_xacts.reject_large_id()') ORDER BY nodeport, shardid; -- hide postgresql version dependend messages for next test only @@ -418,6 +421,7 @@ AND s.logicalrelid = 'objects'::regclass; -- create trigger on one worker to reject certain values \c - - - :worker_2_port +SET search_path TO multi_modifying_xacts; SET citus.enable_metadata_sync TO OFF; CREATE FUNCTION reject_bad() RETURNS trigger AS $rb$ @@ -437,6 +441,7 @@ DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; -- test partial failure; worker_1 succeeds, 2 fails -- in this case, we expect the transaction to abort @@ -465,6 +470,7 @@ DELETE FROM objects; -- there cannot be errors on different shards at different times -- because the first failure will fail the whole transaction \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; SET citus.enable_metadata_sync TO OFF; CREATE FUNCTION reject_bad() RETURNS trigger AS $rb$ BEGIN @@ -483,6 +489,7 @@ DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; BEGIN; INSERT INTO objects VALUES (1, 'apple'); @@ -506,6 +513,7 @@ AND (s.logicalrelid = 'objects'::regclass OR -- what if the failures happen at COMMIT time? \c - - - :worker_2_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad ON objects_1200003; @@ -515,6 +523,7 @@ DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; -- should be the same story as before, just at COMMIT time -- as we use 2PC, the transaction is rollbacked @@ -547,6 +556,7 @@ AND s.logicalrelid = 'objects'::regclass; -- what if all nodes have failures at COMMIT time? \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad ON labs_1200002; @@ -556,6 +566,7 @@ DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; -- reduce the log level for differences between PG14 and PG15 -- in PGconn->errorMessage @@ -586,10 +597,12 @@ AND (s.logicalrelid = 'objects'::regclass OR -- what if one shard (objects) succeeds but another (labs) completely fails? \c - - - :worker_2_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad ON objects_1200003; \c - - - :master_port +SET search_path TO multi_modifying_xacts; SET citus.next_shard_id TO 1200004; BEGIN; INSERT INTO objects VALUES (1, 'apple'); @@ -682,6 +695,7 @@ SELECT * FROM reference_modifying_xacts; -- lets fail on of the workers at before the commit time \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; SET citus.enable_metadata_sync TO OFF; CREATE FUNCTION reject_bad_reference() RETURNS trigger AS $rb$ BEGIN @@ -700,6 +714,7 @@ DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad_reference(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse -- try without wrapping inside a transaction INSERT INTO reference_modifying_xacts VALUES (999, 3); @@ -711,6 +726,7 @@ COMMIT; -- lets fail one of the workers at COMMIT time \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad_reference ON reference_modifying_xacts_1200006; CREATE CONSTRAINT TRIGGER reject_bad_reference @@ -719,6 +735,7 @@ DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad_reference(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse -- try without wrapping inside a transaction @@ -740,10 +757,12 @@ ORDER BY s.logicalrelid, sp.shardstate; -- for the time-being drop the constraint \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad_reference ON reference_modifying_xacts_1200006; \c - - - :master_port +SET search_path TO multi_modifying_xacts; -- now create a hash distributed table and run tests -- including both the reference table and the hash @@ -777,6 +796,7 @@ ABORT; -- lets fail one of the workers before COMMIT time for the hash table \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; SET citus.enable_metadata_sync TO OFF; CREATE FUNCTION reject_bad_hash() RETURNS trigger AS $rb$ BEGIN @@ -795,6 +815,7 @@ DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad_hash(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse -- the transaction as a whole should fail @@ -809,6 +830,7 @@ SELECT * FROM reference_modifying_xacts WHERE key = 55; -- now lets fail on of the workers for the hash distributed table table -- when there is a reference table involved \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad_hash ON hash_modifying_xacts_1200007; -- the trigger is on execution time @@ -818,6 +840,7 @@ DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad_hash(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse -- the transaction as a whole should fail @@ -844,6 +867,7 @@ ORDER BY s.logicalrelid, sp.shardstate; -- change is rollbacked as well \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; CREATE CONSTRAINT TRIGGER reject_bad_reference AFTER INSERT ON reference_modifying_xacts_1200006 @@ -851,6 +875,7 @@ DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad_reference(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse BEGIN; @@ -920,9 +945,11 @@ SELECT count(*) FROM pg_dist_transaction; -- first create the new user on all nodes CREATE USER test_user; +GRANT ALL ON SCHEMA multi_modifying_xacts TO test_user; -- now connect back to the master with the new user \c - test_user - :master_port +SET search_path TO multi_modifying_xacts; SET citus.next_shard_id TO 1200015; CREATE TABLE reference_failure_test (key int, value int); SELECT create_reference_table('reference_failure_test'); @@ -934,16 +961,19 @@ SELECT create_distributed_table('numbers_hash_failure_test', 'key'); -- ensure that the shard is created for this user \c - test_user - :worker_1_port +SET search_path TO multi_modifying_xacts; SET citus.override_table_visibility TO false; \dt reference_failure_test_1200015 -- now connect with the default user, -- and rename the existing user \c - :default_user - :worker_1_port +SET search_path TO multi_modifying_xacts; ALTER USER test_user RENAME TO test_user_new; -- connect back to master and query the reference table \c - test_user - :master_port +SET search_path TO multi_modifying_xacts; -- should fail since the worker doesn't have test_user anymore INSERT INTO reference_failure_test VALUES (1, '1'); @@ -1007,15 +1037,18 @@ SELECT count(*) FROM numbers_hash_failure_test; -- break the other node as well \c - :default_user - :worker_2_port +SET search_path TO multi_modifying_xacts; ALTER USER test_user RENAME TO test_user_new; \c - test_user - :master_port +SET search_path TO multi_modifying_xacts; -- fails on all shard placements INSERT INTO numbers_hash_failure_test VALUES (2,2); -- connect back to the master with the proper user to continue the tests \c - :default_user - :master_port +SET search_path TO multi_modifying_xacts; SET citus.next_shard_id TO 1200020; SET citus.next_placement_id TO 1200033; -- unbreak both nodes by renaming the user back to the original name @@ -1024,6 +1057,7 @@ SELECT * FROM run_command_on_workers('ALTER USER test_user_new RENAME TO test_us DROP TABLE reference_modifying_xacts, hash_modifying_xacts, hash_modifying_xacts_second, reference_failure_test, numbers_hash_failure_test; +REVOKE ALL ON SCHEMA multi_modifying_xacts FROM test_user; DROP USER test_user; -- set up foreign keys to test transactions with co-located and reference tables @@ -1043,7 +1077,10 @@ CREATE TABLE itemgroups ( ); SELECT create_reference_table('itemgroups'); +SET client_min_messages TO WARNING; DROP TABLE IF EXISTS users ; +RESET client_min_messages; + CREATE TABLE users ( id int PRIMARY KEY, name text, @@ -1199,5 +1236,5 @@ SELECT insert_abort(); SELECT name FROM labs WHERE id = 1001; RESET citus.function_opens_transaction_block; -DROP FUNCTION insert_abort(); -DROP TABLE items, users, itemgroups, usergroups, researchers, labs; +SET client_min_messages TO WARNING; +DROP SCHEMA multi_modifying_xacts CASCADE; diff --git a/src/test/regress/sql/multi_mx_copy_data.sql b/src/test/regress/sql/multi_mx_copy_data.sql index 26d4d3c42..b4598ae61 100644 --- a/src/test/regress/sql/multi_mx_copy_data.sql +++ b/src/test/regress/sql/multi_mx_copy_data.sql @@ -2,6 +2,11 @@ -- MULTI_MX_COPY_DATA -- +-- We truncate them to make this test runnable multiple times. +-- Note that we cannot do that at the end of the test because +-- we need to keep the data for the other tests. +TRUNCATE lineitem_mx, orders_mx; + \set nation_data_file :abs_srcdir '/data/nation.data' \set client_side_copy_command '\\copy nation_hash FROM ' :'nation_data_file' ' with delimiter '''|''';' :client_side_copy_command @@ -96,3 +101,5 @@ SET search_path TO public; :client_side_copy_command \set client_side_copy_command '\\copy supplier_mx FROM ' :'supplier_data_file' ' with delimiter '''|''';' :client_side_copy_command + +DROP TABLE citus_mx_test_schema.nation_hash_replicated; diff --git a/src/test/regress/sql/multi_mx_modifying_xacts.sql b/src/test/regress/sql/multi_mx_modifying_xacts.sql index cf60f023d..15335f579 100644 --- a/src/test/regress/sql/multi_mx_modifying_xacts.sql +++ b/src/test/regress/sql/multi_mx_modifying_xacts.sql @@ -331,3 +331,7 @@ COMMIT; -- no data should persists SELECT * FROM objects_mx WHERE id = 1; SELECT * FROM labs_mx WHERE id = 8; + +TRUNCATE objects_mx, labs_mx, researchers_mx; +DROP TRIGGER reject_bad_mx ON labs_mx_1220102; +DROP FUNCTION reject_bad_mx; diff --git a/src/test/regress/sql/multi_mx_router_planner.sql b/src/test/regress/sql/multi_mx_router_planner.sql index fdfd81b07..6a1271720 100644 --- a/src/test/regress/sql/multi_mx_router_planner.sql +++ b/src/test/regress/sql/multi_mx_router_planner.sql @@ -657,3 +657,8 @@ INSERT INTO articles_hash_mx VALUES (51, 1, 'amateus', 1814); SELECT id FROM articles_hash_mx WHERE author_id = 1; + +SET client_min_messages to WARNING; +TRUNCATE articles_hash_mx, company_employees_mx, articles_single_shard_hash_mx; +DROP MATERIALIZED VIEW mv_articles_hash_mx_error; +DROP TABLE authors_hash_mx; diff --git a/src/test/regress/sql/multi_router_planner.sql b/src/test/regress/sql/multi_router_planner.sql index 87104599c..142568d5d 100644 --- a/src/test/regress/sql/multi_router_planner.sql +++ b/src/test/regress/sql/multi_router_planner.sql @@ -10,6 +10,9 @@ SET citus.next_shard_id TO 840000; -- other tests that triggers fast-path-router planner SET citus.enable_fast_path_router_planner TO false; +CREATE SCHEMA multi_router_planner; +SET search_path TO multi_router_planner; + CREATE TABLE articles_hash ( id bigint NOT NULL, author_id bigint NOT NULL, @@ -1182,12 +1185,15 @@ SELECT create_distributed_table('failure_test', 'a', 'hash'); SET citus.enable_ddl_propagation TO off; CREATE USER router_user; -GRANT INSERT ON ALL TABLES IN SCHEMA public TO router_user; +GRANT USAGE ON SCHEMA multi_router_planner TO router_user; +GRANT INSERT ON ALL TABLES IN SCHEMA multi_router_planner TO router_user; \c - - - :worker_1_port SET citus.enable_ddl_propagation TO off; CREATE USER router_user; -GRANT INSERT ON ALL TABLES IN SCHEMA public TO router_user; +GRANT USAGE ON SCHEMA multi_router_planner TO router_user; +GRANT INSERT ON ALL TABLES IN SCHEMA multi_router_planner TO router_user; \c - router_user - :master_port +SET search_path TO multi_router_planner; -- we will fail to connect to worker 2, since the user does not exist -- still, we never mark placements inactive. Instead, fail the transaction BEGIN; @@ -1199,29 +1205,13 @@ SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'failure_test'::regclass ) - ORDER BY placementid; + ORDER BY shardid, nodeport; \c - postgres - :worker_1_port DROP OWNED BY router_user; DROP USER router_user; \c - - - :master_port DROP OWNED BY router_user; DROP USER router_user; -DROP TABLE failure_test; -DROP FUNCTION author_articles_max_id(); -DROP FUNCTION author_articles_id_word_count(); - -DROP MATERIALIZED VIEW mv_articles_hash_empty; -DROP MATERIALIZED VIEW mv_articles_hash_data; - -DROP VIEW num_db; -DROP FUNCTION number1(); - -DROP TABLE articles_hash; -DROP TABLE articles_single_shard_hash; -DROP TABLE authors_hash; -DROP TABLE authors_range; -DROP TABLE authors_reference; -DROP TABLE company_employees; -DROP TABLE articles_range; -DROP TABLE articles_append; +SET client_min_messages TO WARNING; +DROP SCHEMA multi_router_planner CASCADE; diff --git a/src/test/regress/sql/multi_simple_queries.sql b/src/test/regress/sql/multi_simple_queries.sql index 8d7e45255..7fcf45b1c 100644 --- a/src/test/regress/sql/multi_simple_queries.sql +++ b/src/test/regress/sql/multi_simple_queries.sql @@ -11,6 +11,9 @@ SET citus.coordinator_aggregation_strategy TO 'disabled'; -- test end-to-end query functionality -- =================================================================== +CREATE SCHEMA simple_queries_test; +SET search_path TO simple_queries_test; + CREATE TABLE articles ( id bigint NOT NULL, author_id bigint NOT NULL, @@ -203,12 +206,12 @@ SELECT author_id FROM articles HAVING author_id <= 2 OR author_id = 8 ORDER BY author_id; -SELECT o_orderstatus, count(*), avg(o_totalprice) FROM orders +SELECT o_orderstatus, count(*), avg(o_totalprice) FROM public.orders GROUP BY o_orderstatus HAVING count(*) > 1450 OR avg(o_totalprice) > 150000 ORDER BY o_orderstatus; -SELECT o_orderstatus, sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders +SELECT o_orderstatus, sum(l_linenumber), avg(l_linenumber) FROM public.lineitem, public.orders WHERE l_orderkey = o_orderkey AND l_orderkey > 9030 GROUP BY o_orderstatus HAVING sum(l_linenumber) > 1000 @@ -277,7 +280,7 @@ SELECT avg(word_count) -- error out on unsupported aggregate SET client_min_messages to 'NOTICE'; -CREATE AGGREGATE public.invalid(int) ( +CREATE AGGREGATE invalid(int) ( sfunc = int4pl, stype = int ); @@ -355,7 +358,8 @@ SELECT nextval('query_seq')*2 FROM articles LIMIT 3; SELECT * FROM (SELECT nextval('query_seq') FROM articles LIMIT 3) vals; -- but not elsewhere -SELECT sum(nextval('query_seq')) FROM articles; -SELECT n FROM (SELECT nextval('query_seq') n, random() FROM articles) vals; +SELECT sum(nextval('simple_queries_test.query_seq')) FROM articles; +SELECT n FROM (SELECT nextval('simple_queries_test.query_seq') n, random() FROM articles) vals; -DROP SEQUENCE query_seq; +SET client_min_messages TO WARNING; +DROP SCHEMA simple_queries_test CASCADE; diff --git a/src/test/regress/sql/multi_upsert.sql b/src/test/regress/sql/multi_upsert.sql index 24503b7a4..6ef72d576 100644 --- a/src/test/regress/sql/multi_upsert.sql +++ b/src/test/regress/sql/multi_upsert.sql @@ -3,6 +3,8 @@ SET citus.next_shard_id TO 980000; +CREATE SCHEMA upsert_test; +SET search_path TO upsert_test; CREATE TABLE upsert_test ( @@ -207,3 +209,6 @@ INSERT INTO upsert_test (part_key, other_col) VALUES (1, 1) ON CONFLICT (part_ke -- error out on attempt to update the partition key INSERT INTO upsert_test (part_key, other_col) VALUES (1, 1) ON CONFLICT (part_key) DO UPDATE SET part_key = 15; + +SET client_min_messages TO WARNING; +DROP SCHEMA upsert_test CASCADE;