diff --git a/.circleci/config.yml b/.circleci/config.yml index 1d65ae59c..d0db414ce 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -6,7 +6,7 @@ orbs: parameters: image_suffix: type: string - default: '-vb84a6c5' + default: '-v4b2ae97' pg13_version: type: string default: '13.10' diff --git a/src/backend/columnar/.gitignore b/src/backend/columnar/.gitignore new file mode 100644 index 000000000..b70410d1d --- /dev/null +++ b/src/backend/columnar/.gitignore @@ -0,0 +1,3 @@ +# The directory used to store columnar sql files after pre-processing them +# with 'cpp' in build-time, see src/backend/columnar/Makefile. +/build/ diff --git a/src/backend/columnar/Makefile b/src/backend/columnar/Makefile index f9fa09b7c..ded52a98d 100644 --- a/src/backend/columnar/Makefile +++ b/src/backend/columnar/Makefile @@ -10,14 +10,51 @@ OBJS += \ MODULE_big = citus_columnar EXTENSION = citus_columnar -columnar_sql_files = $(patsubst $(citus_abs_srcdir)/%,%,$(wildcard $(citus_abs_srcdir)/sql/*.sql)) -columnar_downgrade_sql_files = $(patsubst $(citus_abs_srcdir)/%,%,$(wildcard $(citus_abs_srcdir)/sql/downgrades/*.sql)) -DATA = $(columnar_sql_files) \ - $(columnar_downgrade_sql_files) +template_sql_files = $(patsubst $(citus_abs_srcdir)/%,%,$(wildcard $(citus_abs_srcdir)/sql/*.sql)) +template_downgrade_sql_files = $(patsubst $(citus_abs_srcdir)/sql/downgrades/%,%,$(wildcard $(citus_abs_srcdir)/sql/downgrades/*.sql)) +generated_sql_files = $(patsubst %,$(citus_abs_srcdir)/build/%,$(template_sql_files)) +generated_downgrade_sql_files += $(patsubst %,$(citus_abs_srcdir)/build/sql/%,$(template_downgrade_sql_files)) + +DATA_built = $(generated_sql_files) PG_CPPFLAGS += -I$(libpq_srcdir) -I$(safestringlib_srcdir)/include include $(citus_top_builddir)/Makefile.global -.PHONY: install-all +SQL_DEPDIR=.deps/sql +SQL_BUILDDIR=build/sql + +$(generated_sql_files): $(citus_abs_srcdir)/build/%: % + @mkdir -p $(citus_abs_srcdir)/$(SQL_DEPDIR) $(citus_abs_srcdir)/$(SQL_BUILDDIR) + @# -MF is used to store dependency files(.Po) in another directory for separation + @# -MT is used to change the target of the rule emitted by dependency generation. + @# -P is used to inhibit generation of linemarkers in the output from the preprocessor. + @# -undef is used to not predefine any system-specific or GCC-specific macros. + @# `man cpp` for further information + cd $(citus_abs_srcdir) && cpp -undef -w -P -MMD -MP -MF$(SQL_DEPDIR)/$(*F).Po -MT$@ $< > $@ + +$(generated_downgrade_sql_files): $(citus_abs_srcdir)/build/sql/%: sql/downgrades/% + @mkdir -p $(citus_abs_srcdir)/$(SQL_DEPDIR) $(citus_abs_srcdir)/$(SQL_BUILDDIR) + @# -MF is used to store dependency files(.Po) in another directory for separation + @# -MT is used to change the target of the rule emitted by dependency generation. + @# -P is used to inhibit generation of linemarkers in the output from the preprocessor. + @# -undef is used to not predefine any system-specific or GCC-specific macros. + @# `man cpp` for further information + cd $(citus_abs_srcdir) && cpp -undef -w -P -MMD -MP -MF$(SQL_DEPDIR)/$(*F).Po -MT$@ $< > $@ + +.PHONY: install install-downgrades install-all + +cleanup-before-install: + rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus_columnar.control + rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/columnar--* + rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus_columnar--* + +install: cleanup-before-install + +# install and install-downgrades should be run sequentially install-all: install + $(MAKE) install-downgrades + +install-downgrades: $(generated_downgrade_sql_files) + $(INSTALL_DATA) $(generated_downgrade_sql_files) '$(DESTDIR)$(datadir)/$(datamoduledir)/' + diff --git a/src/backend/columnar/sql/citus_columnar--11.1-1--11.2-1.sql b/src/backend/columnar/sql/citus_columnar--11.1-1--11.2-1.sql index 60a0401d5..89ccd9e74 100644 --- a/src/backend/columnar/sql/citus_columnar--11.1-1--11.2-1.sql +++ b/src/backend/columnar/sql/citus_columnar--11.1-1--11.2-1.sql @@ -1 +1,19 @@ -- citus_columnar--11.1-1--11.2-1 + +#include "udfs/columnar_ensure_am_depends_catalog/11.2-1.sql" + +DELETE FROM pg_depend +WHERE classid = 'pg_am'::regclass::oid + AND objid IN (select oid from pg_am where amname = 'columnar') + AND objsubid = 0 + AND refclassid = 'pg_class'::regclass::oid + AND refobjid IN ( + 'columnar_internal.stripe_first_row_number_idx'::regclass::oid, + 'columnar_internal.chunk_group_pkey'::regclass::oid, + 'columnar_internal.chunk_pkey'::regclass::oid, + 'columnar_internal.options_pkey'::regclass::oid, + 'columnar_internal.stripe_first_row_number_idx'::regclass::oid, + 'columnar_internal.stripe_pkey'::regclass::oid + ) + AND refobjsubid = 0 + AND deptype = 'n'; diff --git a/src/backend/columnar/sql/downgrades/citus_columnar--11.2-1--11.1-1.sql b/src/backend/columnar/sql/downgrades/citus_columnar--11.2-1--11.1-1.sql index 9acf68da3..c987bfa67 100644 --- a/src/backend/columnar/sql/downgrades/citus_columnar--11.2-1--11.1-1.sql +++ b/src/backend/columnar/sql/downgrades/citus_columnar--11.2-1--11.1-1.sql @@ -1 +1,4 @@ -- citus_columnar--11.2-1--11.1-1 + +-- Note that we intentionally do not re-insert the pg_depend records that we +-- deleted via citus_columnar--11.1-1--11.2-1.sql. diff --git a/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/11.2-1.sql b/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/11.2-1.sql new file mode 100644 index 000000000..101db17fb --- /dev/null +++ b/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/11.2-1.sql @@ -0,0 +1,43 @@ +CREATE OR REPLACE FUNCTION columnar_internal.columnar_ensure_am_depends_catalog() + RETURNS void + LANGUAGE plpgsql + SET search_path = pg_catalog +AS $func$ +BEGIN + INSERT INTO pg_depend + WITH columnar_schema_members(relid) AS ( + SELECT pg_class.oid AS relid FROM pg_class + WHERE relnamespace = + COALESCE( + (SELECT pg_namespace.oid FROM pg_namespace WHERE nspname = 'columnar_internal'), + (SELECT pg_namespace.oid FROM pg_namespace WHERE nspname = 'columnar') + ) + AND relname IN ('chunk', + 'chunk_group', + 'options', + 'storageid_seq', + 'stripe') + ) + SELECT -- Define a dependency edge from "columnar table access method" .. + 'pg_am'::regclass::oid as classid, + (select oid from pg_am where amname = 'columnar') as objid, + 0 as objsubid, + -- ... to some objects registered as regclass and that lives in + -- "columnar" schema. That contains catalog tables and the sequences + -- created in "columnar" schema. + -- + -- Given the possibility of user might have created their own objects + -- in columnar schema, we explicitly specify list of objects that we + -- are interested in. + 'pg_class'::regclass::oid as refclassid, + columnar_schema_members.relid as refobjid, + 0 as refobjsubid, + 'n' as deptype + FROM columnar_schema_members + -- Avoid inserting duplicate entries into pg_depend. + EXCEPT TABLE pg_depend; +END; +$func$; +COMMENT ON FUNCTION columnar_internal.columnar_ensure_am_depends_catalog() + IS 'internal function responsible for creating dependencies from columnar ' + 'table access method to the rel objects in columnar schema'; diff --git a/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/latest.sql b/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/latest.sql index ade15390a..101db17fb 100644 --- a/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/latest.sql +++ b/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/latest.sql @@ -1,4 +1,4 @@ -CREATE OR REPLACE FUNCTION citus_internal.columnar_ensure_am_depends_catalog() +CREATE OR REPLACE FUNCTION columnar_internal.columnar_ensure_am_depends_catalog() RETURNS void LANGUAGE plpgsql SET search_path = pg_catalog @@ -14,22 +14,17 @@ BEGIN ) AND relname IN ('chunk', 'chunk_group', - 'chunk_group_pkey', - 'chunk_pkey', 'options', - 'options_pkey', 'storageid_seq', - 'stripe', - 'stripe_first_row_number_idx', - 'stripe_pkey') + 'stripe') ) SELECT -- Define a dependency edge from "columnar table access method" .. 'pg_am'::regclass::oid as classid, (select oid from pg_am where amname = 'columnar') as objid, 0 as objsubid, - -- ... to each object that is registered to pg_class and that lives - -- in "columnar" schema. That contains catalog tables, indexes - -- created on them and the sequences created in "columnar" schema. + -- ... to some objects registered as regclass and that lives in + -- "columnar" schema. That contains catalog tables and the sequences + -- created in "columnar" schema. -- -- Given the possibility of user might have created their own objects -- in columnar schema, we explicitly specify list of objects that we @@ -43,6 +38,6 @@ BEGIN EXCEPT TABLE pg_depend; END; $func$; -COMMENT ON FUNCTION citus_internal.columnar_ensure_am_depends_catalog() +COMMENT ON FUNCTION columnar_internal.columnar_ensure_am_depends_catalog() IS 'internal function responsible for creating dependencies from columnar ' 'table access method to the rel objects in columnar schema'; diff --git a/src/backend/distributed/commands/alter_table.c b/src/backend/distributed/commands/alter_table.c index 0592cb762..f51b62535 100644 --- a/src/backend/distributed/commands/alter_table.c +++ b/src/backend/distributed/commands/alter_table.c @@ -1348,8 +1348,7 @@ CreateCitusTableLike(TableConversionState *con) } else if (IsCitusTableType(con->relationId, REFERENCE_TABLE)) { - CreateDistributedTable(con->newRelationId, NULL, DISTRIBUTE_BY_NONE, 0, false, - NULL); + CreateReferenceTable(con->newRelationId); } else if (IsCitusTableType(con->relationId, CITUS_LOCAL_TABLE)) { diff --git a/src/backend/distributed/commands/create_distributed_table.c b/src/backend/distributed/commands/create_distributed_table.c index 86133322d..e38395296 100644 --- a/src/backend/distributed/commands/create_distributed_table.c +++ b/src/backend/distributed/commands/create_distributed_table.c @@ -94,6 +94,28 @@ #include "utils/syscache.h" #include "utils/inval.h" + +/* common params that apply to all Citus table types */ +typedef struct +{ + char distributionMethod; + char replicationModel; +} CitusTableParams; + + +/* + * Params that only apply to distributed tables, i.e., the ones that are + * known as DISTRIBUTED_TABLE by Citus metadata. + */ +typedef struct +{ + int shardCount; + bool shardCountIsStrict; + char *colocateWithTableName; + char *distributionColumnName; +} DistributedTableParams; + + /* * once every LOG_PER_TUPLE_AMOUNT, the copy will be logged. */ @@ -106,17 +128,22 @@ static void CreateDistributedTableConcurrently(Oid relationId, char *colocateWithTableName, int shardCount, bool shardCountIsStrict); -static char DecideReplicationModel(char distributionMethod, char *colocateWithTableName); +static char DecideDistTableReplicationModel(char distributionMethod, + char *colocateWithTableName); static List * HashSplitPointsForShardList(List *shardList); static List * HashSplitPointsForShardCount(int shardCount); static List * WorkerNodesForShardList(List *shardList); static List * RoundRobinWorkerNodeList(List *workerNodeList, int listLength); +static CitusTableParams DecideCitusTableParams(CitusTableType tableType, + DistributedTableParams * + distributedTableParams); +static void CreateCitusTable(Oid relationId, CitusTableType tableType, + DistributedTableParams *distributedTableParams); static void CreateHashDistributedTableShards(Oid relationId, int shardCount, Oid colocatedTableId, bool localTableEmpty); -static uint32 ColocationIdForNewTable(Oid relationId, Var *distributionColumn, - char distributionMethod, char replicationModel, - int shardCount, bool shardCountIsStrict, - char *colocateWithTableName); +static uint32 ColocationIdForNewTable(Oid relationId, CitusTableType tableType, + DistributedTableParams *distributedTableParams, + Var *distributionColumn); static void EnsureRelationCanBeDistributed(Oid relationId, Var *distributionColumn, char distributionMethod, uint32 colocationId, char replicationModel); @@ -377,8 +404,8 @@ CreateDistributedTableConcurrently(Oid relationId, char *distributionColumnName, EnsureForeignKeysForDistributedTableConcurrently(relationId); - char replicationModel = DecideReplicationModel(distributionMethod, - colocateWithTableName); + char replicationModel = DecideDistTableReplicationModel(distributionMethod, + colocateWithTableName); /* * we fail transaction before local table conversion if the table could not be colocated with @@ -622,8 +649,8 @@ static void EnsureColocateWithTableIsValid(Oid relationId, char distributionMethod, char *distributionColumnName, char *colocateWithTableName) { - char replicationModel = DecideReplicationModel(distributionMethod, - colocateWithTableName); + char replicationModel = DecideDistTableReplicationModel(distributionMethod, + colocateWithTableName); /* * we fail transaction before local table conversion if the table could not be colocated with @@ -860,9 +887,6 @@ create_reference_table(PG_FUNCTION_ARGS) CheckCitusVersion(ERROR); Oid relationId = PG_GETARG_OID(0); - char *colocateWithTableName = NULL; - char *distributionColumnName = NULL; - EnsureCitusTableCanBeCreated(relationId); /* enable create_reference_table on an empty node */ @@ -895,8 +919,7 @@ create_reference_table(PG_FUNCTION_ARGS) errdetail("There are no active worker nodes."))); } - CreateDistributedTable(relationId, distributionColumnName, DISTRIBUTE_BY_NONE, - ShardCount, false, colocateWithTableName); + CreateReferenceTable(relationId); PG_RETURN_VOID(); } @@ -951,18 +974,90 @@ EnsureRelationExists(Oid relationId) /* - * CreateDistributedTable creates distributed table in the given configuration. + * CreateReferenceTable is a wrapper around CreateCitusTable that creates a + * distributed table. + */ +void +CreateDistributedTable(Oid relationId, char *distributionColumnName, + char distributionMethod, + int shardCount, bool shardCountIsStrict, + char *colocateWithTableName) +{ + CitusTableType tableType; + switch (distributionMethod) + { + case DISTRIBUTE_BY_HASH: + { + tableType = HASH_DISTRIBUTED; + break; + } + + case DISTRIBUTE_BY_APPEND: + { + tableType = APPEND_DISTRIBUTED; + break; + } + + case DISTRIBUTE_BY_RANGE: + { + tableType = RANGE_DISTRIBUTED; + break; + } + + default: + { + ereport(ERROR, (errmsg("unexpected distribution method when " + "deciding Citus table type"))); + break; + } + } + + DistributedTableParams distributedTableParams = { + .colocateWithTableName = colocateWithTableName, + .shardCount = shardCount, + .shardCountIsStrict = shardCountIsStrict, + .distributionColumnName = distributionColumnName + }; + CreateCitusTable(relationId, tableType, &distributedTableParams); +} + + +/* + * CreateReferenceTable is a wrapper around CreateCitusTable that creates a + * reference table. + */ +void +CreateReferenceTable(Oid relationId) +{ + CreateCitusTable(relationId, REFERENCE_TABLE, NULL); +} + + +/* + * CreateCitusTable is the internal method that creates a Citus table in + * given configuration. + * + * DistributedTableParams should be non-null only if we're creating a distributed + * table. + * * This functions contains all necessary logic to create distributed tables. It * performs necessary checks to ensure distributing the table is safe. If it is * safe to distribute the table, this function creates distributed table metadata, * creates shards and copies local data to shards. This function also handles * partitioned tables by distributing its partitions as well. */ -void -CreateDistributedTable(Oid relationId, char *distributionColumnName, - char distributionMethod, int shardCount, - bool shardCountIsStrict, char *colocateWithTableName) +static void +CreateCitusTable(Oid relationId, CitusTableType tableType, + DistributedTableParams *distributedTableParams) { + if ((tableType == HASH_DISTRIBUTED || tableType == APPEND_DISTRIBUTED || + tableType == RANGE_DISTRIBUTED) != (distributedTableParams != NULL)) + { + ereport(ERROR, (errmsg("distributed table params must be provided " + "when creating a distributed table and must " + "not be otherwise"))); + } + /* * EnsureTableNotDistributed errors out when relation is a citus table but * we don't want to ask user to first undistribute their citus local tables @@ -988,11 +1083,8 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName, * that ALTER TABLE hook does the necessary job, which means converting * local tables to citus local tables to properly support such foreign * keys. - * - * This function does not expect to create Citus local table, so we blindly - * create reference table when the method is DISTRIBUTE_BY_NONE. */ - else if (distributionMethod == DISTRIBUTE_BY_NONE && + else if (tableType == REFERENCE_TABLE && ShouldEnableLocalReferenceForeignKeys() && HasForeignKeyWithLocalTable(relationId)) { @@ -1022,24 +1114,29 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName, PropagatePrerequisiteObjectsForDistributedTable(relationId); - char replicationModel = DecideReplicationModel(distributionMethod, - colocateWithTableName); + Var *distributionColumn = NULL; + if (distributedTableParams) + { + distributionColumn = BuildDistributionKeyFromColumnName(relationId, + distributedTableParams-> + distributionColumnName, + NoLock); + } - Var *distributionColumn = BuildDistributionKeyFromColumnName(relationId, - distributionColumnName, - NoLock); + CitusTableParams citusTableParams = DecideCitusTableParams(tableType, + distributedTableParams); /* * ColocationIdForNewTable assumes caller acquires lock on relationId. In our case, * our caller already acquired lock on relationId. */ - uint32 colocationId = ColocationIdForNewTable(relationId, distributionColumn, - distributionMethod, replicationModel, - shardCount, shardCountIsStrict, - colocateWithTableName); + uint32 colocationId = ColocationIdForNewTable(relationId, tableType, + distributedTableParams, + distributionColumn); - EnsureRelationCanBeDistributed(relationId, distributionColumn, distributionMethod, - colocationId, replicationModel); + EnsureRelationCanBeDistributed(relationId, distributionColumn, + citusTableParams.distributionMethod, + colocationId, citusTableParams.replicationModel); /* * Make sure that existing reference tables have been replicated to all the nodes @@ -1068,8 +1165,10 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName, bool autoConverted = false; /* create an entry for distributed table in pg_dist_partition */ - InsertIntoPgDistPartition(relationId, distributionMethod, distributionColumn, - colocationId, replicationModel, autoConverted); + InsertIntoPgDistPartition(relationId, citusTableParams.distributionMethod, + distributionColumn, + colocationId, citusTableParams.replicationModel, + autoConverted); /* foreign tables do not support TRUNCATE trigger */ if (RegularTable(relationId)) @@ -1078,17 +1177,14 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName, } /* create shards for hash distributed and reference tables */ - if (distributionMethod == DISTRIBUTE_BY_HASH) + if (tableType == HASH_DISTRIBUTED) { - CreateHashDistributedTableShards(relationId, shardCount, colocatedTableId, + CreateHashDistributedTableShards(relationId, distributedTableParams->shardCount, + colocatedTableId, localTableEmpty); } - else if (distributionMethod == DISTRIBUTE_BY_NONE) + else if (tableType == REFERENCE_TABLE) { - /* - * This function does not expect to create Citus local table, so we blindly - * create reference table when the method is DISTRIBUTE_BY_NONE. - */ CreateReferenceTableShard(relationId); } @@ -1130,9 +1226,14 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName, { MemoryContextReset(citusPartitionContext); - CreateDistributedTable(partitionRelationId, distributionColumnName, - distributionMethod, shardCount, false, - parentRelationName); + DistributedTableParams childDistributedTableParams = { + .colocateWithTableName = parentRelationName, + .shardCount = distributedTableParams->shardCount, + .shardCountIsStrict = false, + .distributionColumnName = distributedTableParams->distributionColumnName, + }; + CreateCitusTable(partitionRelationId, tableType, + &childDistributedTableParams); } MemoryContextSwitchTo(oldContext); @@ -1140,8 +1241,7 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName, } /* copy over data for hash distributed and reference tables */ - if (distributionMethod == DISTRIBUTE_BY_HASH || - distributionMethod == DISTRIBUTE_BY_NONE) + if (tableType == HASH_DISTRIBUTED || tableType == REFERENCE_TABLE) { if (RegularTable(relationId)) { @@ -1160,6 +1260,70 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName, } +/* + * DecideCitusTableParams decides CitusTableParams based on given CitusTableType + * and DistributedTableParams if it's a distributed table. + * + * DistributedTableParams should be non-null only if CitusTableType corresponds + * to a distributed table. + */ +static +CitusTableParams +DecideCitusTableParams(CitusTableType tableType, + DistributedTableParams *distributedTableParams) +{ + CitusTableParams citusTableParams = { 0 }; + switch (tableType) + { + case HASH_DISTRIBUTED: + { + citusTableParams.distributionMethod = DISTRIBUTE_BY_HASH; + citusTableParams.replicationModel = + DecideDistTableReplicationModel(DISTRIBUTE_BY_HASH, + distributedTableParams-> + colocateWithTableName); + break; + } + + case APPEND_DISTRIBUTED: + { + citusTableParams.distributionMethod = DISTRIBUTE_BY_APPEND; + citusTableParams.replicationModel = + DecideDistTableReplicationModel(APPEND_DISTRIBUTED, + distributedTableParams-> + colocateWithTableName); + break; + } + + case RANGE_DISTRIBUTED: + { + citusTableParams.distributionMethod = DISTRIBUTE_BY_RANGE; + citusTableParams.replicationModel = + DecideDistTableReplicationModel(RANGE_DISTRIBUTED, + distributedTableParams-> + colocateWithTableName); + break; + } + + case REFERENCE_TABLE: + { + citusTableParams.distributionMethod = DISTRIBUTE_BY_NONE; + citusTableParams.replicationModel = REPLICATION_MODEL_2PC; + break; + } + + default: + { + ereport(ERROR, (errmsg("unexpected table type when deciding Citus " + "table params"))); + break; + } + } + + return citusTableParams; +} + + /* * PropagatePrerequisiteObjectsForDistributedTable ensures we can create shards * on all nodes by ensuring all dependent objects exist on all node. @@ -1420,18 +1584,16 @@ DropFKeysRelationInvolvedWithTableType(Oid relationId, int tableTypeFlag) /* - * DecideReplicationModel function decides which replication model should be - * used depending on given distribution configuration. + * DecideDistTableReplicationModel function decides which replication model should be + * used for a distributed table depending on given distribution configuration. */ static char -DecideReplicationModel(char distributionMethod, char *colocateWithTableName) +DecideDistTableReplicationModel(char distributionMethod, char *colocateWithTableName) { - if (distributionMethod == DISTRIBUTE_BY_NONE) - { - return REPLICATION_MODEL_2PC; - } - else if (pg_strncasecmp(colocateWithTableName, "default", NAMEDATALEN) != 0 && - !IsColocateWithNone(colocateWithTableName)) + Assert(distributionMethod != DISTRIBUTE_BY_NONE); + + if (!IsColocateWithDefault(colocateWithTableName) && + !IsColocateWithNone(colocateWithTableName)) { text *colocateWithTableNameText = cstring_to_text(colocateWithTableName); Oid colocatedRelationId = ResolveRelationId(colocateWithTableNameText, false); @@ -1506,28 +1668,34 @@ CreateHashDistributedTableShards(Oid relationId, int shardCount, /* - * ColocationIdForNewTable returns a colocation id for hash-distributed table + * ColocationIdForNewTable returns a colocation id for given table * according to given configuration. If there is no such configuration, it * creates one and returns colocation id of newly the created colocation group. + * Note that DistributedTableParams and the distribution column Var should be + * non-null only if CitusTableType corresponds to a distributed table. + * * For append and range distributed tables, this function errors out if * colocateWithTableName parameter is not NULL, otherwise directly returns * INVALID_COLOCATION_ID. * + * For reference tables, returns the common reference table colocation id. + * * This function assumes its caller take necessary lock on relationId to * prevent possible changes on it. */ static uint32 -ColocationIdForNewTable(Oid relationId, Var *distributionColumn, - char distributionMethod, char replicationModel, - int shardCount, bool shardCountIsStrict, - char *colocateWithTableName) +ColocationIdForNewTable(Oid relationId, CitusTableType tableType, + DistributedTableParams *distributedTableParams, + Var *distributionColumn) { + CitusTableParams citusTableParams = DecideCitusTableParams(tableType, + distributedTableParams); + uint32 colocationId = INVALID_COLOCATION_ID; - if (distributionMethod == DISTRIBUTE_BY_APPEND || - distributionMethod == DISTRIBUTE_BY_RANGE) + if (tableType == APPEND_DISTRIBUTED || tableType == RANGE_DISTRIBUTED) { - if (pg_strncasecmp(colocateWithTableName, "default", NAMEDATALEN) != 0) + if (!IsColocateWithDefault(distributedTableParams->colocateWithTableName)) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot distribute relation"), @@ -1537,7 +1705,7 @@ ColocationIdForNewTable(Oid relationId, Var *distributionColumn, return colocationId; } - else if (distributionMethod == DISTRIBUTE_BY_NONE) + else if (tableType == REFERENCE_TABLE) { return CreateReferenceTableColocationId(); } @@ -1548,27 +1716,29 @@ ColocationIdForNewTable(Oid relationId, Var *distributionColumn, * can be sure that there will no modifications on the colocation table * until this transaction is committed. */ - Assert(distributionMethod == DISTRIBUTE_BY_HASH); + Assert(citusTableParams.distributionMethod == DISTRIBUTE_BY_HASH); Oid distributionColumnType = distributionColumn->vartype; Oid distributionColumnCollation = get_typcollation(distributionColumnType); /* get an advisory lock to serialize concurrent default group creations */ - if (IsColocateWithDefault(colocateWithTableName)) + if (IsColocateWithDefault(distributedTableParams->colocateWithTableName)) { AcquireColocationDefaultLock(); } colocationId = FindColocateWithColocationId(relationId, - replicationModel, + citusTableParams.replicationModel, distributionColumnType, distributionColumnCollation, - shardCount, + distributedTableParams->shardCount, + distributedTableParams-> shardCountIsStrict, + distributedTableParams-> colocateWithTableName); - if (IsColocateWithDefault(colocateWithTableName) && (colocationId != - INVALID_COLOCATION_ID)) + if (IsColocateWithDefault(distributedTableParams->colocateWithTableName) && + (colocationId != INVALID_COLOCATION_ID)) { /* * we can release advisory lock if there is already a default entry for given params; @@ -1580,23 +1750,25 @@ ColocationIdForNewTable(Oid relationId, Var *distributionColumn, if (colocationId == INVALID_COLOCATION_ID) { - if (IsColocateWithDefault(colocateWithTableName)) + if (IsColocateWithDefault(distributedTableParams->colocateWithTableName)) { /* * Generate a new colocation ID and insert a pg_dist_colocation * record. */ - colocationId = CreateColocationGroup(shardCount, ShardReplicationFactor, + colocationId = CreateColocationGroup(distributedTableParams->shardCount, + ShardReplicationFactor, distributionColumnType, distributionColumnCollation); } - else if (IsColocateWithNone(colocateWithTableName)) + else if (IsColocateWithNone(distributedTableParams->colocateWithTableName)) { /* * Generate a new colocation ID and insert a pg_dist_colocation * record. */ - colocationId = CreateColocationGroup(shardCount, ShardReplicationFactor, + colocationId = CreateColocationGroup(distributedTableParams->shardCount, + ShardReplicationFactor, distributionColumnType, distributionColumnCollation); } diff --git a/src/backend/distributed/commands/foreign_constraint.c b/src/backend/distributed/commands/foreign_constraint.c index cf1e43fd4..6f12db13f 100644 --- a/src/backend/distributed/commands/foreign_constraint.c +++ b/src/backend/distributed/commands/foreign_constraint.c @@ -221,7 +221,8 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis if (!referencedIsCitus && !selfReferencingTable) { if (IsCitusLocalTableByDistParams(referencingDistMethod, - referencingReplicationModel)) + referencingReplicationModel, + referencingColocationId)) { ErrorOutForFKeyBetweenPostgresAndCitusLocalTable(referencedTableId); } @@ -245,8 +246,7 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis if (!selfReferencingTable) { referencedDistMethod = PartitionMethod(referencedTableId); - referencedDistKey = IsCitusTableType(referencedTableId, - CITUS_TABLE_WITH_NO_DIST_KEY) ? + referencedDistKey = !HasDistributionKey(referencedTableId) ? NULL : DistPartitionKey(referencedTableId); referencedColocationId = TableColocationId(referencedTableId); @@ -278,9 +278,17 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis } bool referencingIsCitusLocalOrRefTable = - (referencingDistMethod == DISTRIBUTE_BY_NONE); + IsCitusLocalTableByDistParams(referencingDistMethod, + referencingReplicationModel, + referencingColocationId) || + IsReferenceTableByDistParams(referencingDistMethod, + referencingReplicationModel); bool referencedIsCitusLocalOrRefTable = - (referencedDistMethod == DISTRIBUTE_BY_NONE); + IsCitusLocalTableByDistParams(referencedDistMethod, + referencedReplicationModel, + referencedColocationId) || + IsReferenceTableByDistParams(referencedDistMethod, + referencedReplicationModel); if (referencingIsCitusLocalOrRefTable && referencedIsCitusLocalOrRefTable) { EnsureSupportedFKeyBetweenCitusLocalAndRefTable(constraintForm, @@ -313,7 +321,8 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis * reference table is referenced. */ bool referencedIsReferenceTable = - (referencedReplicationModel == REPLICATION_MODEL_2PC); + IsReferenceTableByDistParams(referencedDistMethod, + referencedReplicationModel); if (!referencedIsReferenceTable && ( referencingColocationId == INVALID_COLOCATION_ID || referencingColocationId != referencedColocationId)) diff --git a/src/backend/distributed/commands/index.c b/src/backend/distributed/commands/index.c index 5f1598510..aa0715372 100644 --- a/src/backend/distributed/commands/index.c +++ b/src/backend/distributed/commands/index.c @@ -1190,7 +1190,7 @@ ErrorIfUnsupportedIndexStmt(IndexStmt *createIndexStatement) * Non-distributed tables do not have partition key, and unique constraints * are allowed for them. Thus, we added a short-circuit for non-distributed tables. */ - if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (!HasDistributionKey(relationId)) { return; } diff --git a/src/backend/distributed/commands/multi_copy.c b/src/backend/distributed/commands/multi_copy.c index b5ac6a519..1203aeff4 100644 --- a/src/backend/distributed/commands/multi_copy.c +++ b/src/backend/distributed/commands/multi_copy.c @@ -393,7 +393,7 @@ CitusCopyFrom(CopyStmt *copyStatement, QueryCompletion *completionTag) if (IsCitusTableTypeCacheEntry(cacheEntry, HASH_DISTRIBUTED) || IsCitusTableTypeCacheEntry(cacheEntry, RANGE_DISTRIBUTED) || IsCitusTableTypeCacheEntry(cacheEntry, APPEND_DISTRIBUTED) || - IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY)) + !HasDistributionKeyCacheEntry(cacheEntry)) { CopyToExistingShards(copyStatement, completionTag); } diff --git a/src/backend/distributed/commands/table.c b/src/backend/distributed/commands/table.c index 39a652f10..c0d8fa92b 100644 --- a/src/backend/distributed/commands/table.c +++ b/src/backend/distributed/commands/table.c @@ -75,7 +75,7 @@ static void DistributePartitionUsingParent(Oid parentRelationId, static void ErrorIfMultiLevelPartitioning(Oid parentRelationId, Oid partitionRelationId); static void ErrorIfAttachCitusTableToPgLocalTable(Oid parentRelationId, Oid partitionRelationId); -static bool AlterTableDefinesFKeyBetweenPostgresAndNonDistTable( +static bool ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef( AlterTableStmt *alterTableStatement); static bool ShouldMarkConnectedRelationsNotAutoConverted(Oid leftRelationId, Oid rightRelationId); @@ -1119,7 +1119,7 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand, if (ShouldEnableLocalReferenceForeignKeys() && processUtilityContext != PROCESS_UTILITY_SUBCOMMAND && - AlterTableDefinesFKeyBetweenPostgresAndNonDistTable(alterTableStatement)) + ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef(alterTableStatement)) { /* * We don't process subcommands generated by postgres. @@ -1584,12 +1584,12 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand, /* - * AlterTableDefinesFKeyBetweenPostgresAndNonDistTable returns true if given + * ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef returns true if given * alter table command defines foreign key between a postgres table and a * reference or citus local table. */ static bool -AlterTableDefinesFKeyBetweenPostgresAndNonDistTable(AlterTableStmt *alterTableStatement) +ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef(AlterTableStmt *alterTableStatement) { List *foreignKeyConstraintList = GetAlterTableAddFKeyConstraintList(alterTableStatement); @@ -1607,9 +1607,12 @@ AlterTableDefinesFKeyBetweenPostgresAndNonDistTable(AlterTableStmt *alterTableSt if (!IsCitusTable(leftRelationId)) { return RelationIdListContainsCitusTableType(rightRelationIdList, - CITUS_TABLE_WITH_NO_DIST_KEY); + CITUS_LOCAL_TABLE) || + RelationIdListContainsCitusTableType(rightRelationIdList, + REFERENCE_TABLE); } - else if (IsCitusTableType(leftRelationId, CITUS_TABLE_WITH_NO_DIST_KEY)) + else if (IsCitusTableType(leftRelationId, CITUS_LOCAL_TABLE) || + IsCitusTableType(leftRelationId, REFERENCE_TABLE)) { return RelationIdListContainsPostgresTable(rightRelationIdList); } @@ -3666,7 +3669,7 @@ SetupExecutionModeForAlterTable(Oid relationId, AlterTableCmd *command) * sequential mode. */ if (executeSequentially && - !IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY) && + HasDistributionKey(relationId) && ParallelQueryExecutedInTransaction()) { char *relationName = get_rel_name(relationId); diff --git a/src/backend/distributed/commands/truncate.c b/src/backend/distributed/commands/truncate.c index 0993c287f..52f769a11 100644 --- a/src/backend/distributed/commands/truncate.c +++ b/src/backend/distributed/commands/truncate.c @@ -324,7 +324,7 @@ ExecuteTruncateStmtSequentialIfNecessary(TruncateStmt *command) { Oid relationId = RangeVarGetRelid(rangeVar, NoLock, failOK); - if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY) && + if (IsCitusTable(relationId) && !HasDistributionKey(relationId) && TableReferenced(relationId)) { char *relationName = get_rel_name(relationId); diff --git a/src/backend/distributed/metadata/metadata_cache.c b/src/backend/distributed/metadata/metadata_cache.c index 8fd4c5de6..b7753108c 100644 --- a/src/backend/distributed/metadata/metadata_cache.c +++ b/src/backend/distributed/metadata/metadata_cache.c @@ -311,7 +311,7 @@ static void InvalidateDistTableCache(void); static void InvalidateDistObjectCache(void); static bool InitializeTableCacheEntry(int64 shardId, bool missingOk); static bool IsCitusTableTypeInternal(char partitionMethod, char replicationModel, - CitusTableType tableType); + uint32 colocationId, CitusTableType tableType); static bool RefreshTableCacheEntryIfInvalid(ShardIdCacheEntry *shardEntry, bool missingOk); @@ -450,7 +450,36 @@ bool IsCitusTableTypeCacheEntry(CitusTableCacheEntry *tableEntry, CitusTableType tableType) { return IsCitusTableTypeInternal(tableEntry->partitionMethod, - tableEntry->replicationModel, tableType); + tableEntry->replicationModel, + tableEntry->colocationId, tableType); +} + + +/* + * HasDistributionKey returs true if given Citus table doesn't have a + * distribution key. + */ +bool +HasDistributionKey(Oid relationId) +{ + CitusTableCacheEntry *tableEntry = LookupCitusTableCacheEntry(relationId); + if (tableEntry == NULL) + { + ereport(ERROR, (errmsg("relation with oid %u is not a Citus table", relationId))); + } + + return HasDistributionKeyCacheEntry(tableEntry); +} + + +/* + * HasDistributionKey returs true if given cache entry identifies a Citus + * table that doesn't have a distribution key. + */ +bool +HasDistributionKeyCacheEntry(CitusTableCacheEntry *tableEntry) +{ + return tableEntry->partitionMethod != DISTRIBUTE_BY_NONE; } @@ -460,7 +489,7 @@ IsCitusTableTypeCacheEntry(CitusTableCacheEntry *tableEntry, CitusTableType tabl */ static bool IsCitusTableTypeInternal(char partitionMethod, char replicationModel, - CitusTableType tableType) + uint32 colocationId, CitusTableType tableType) { switch (tableType) { @@ -501,12 +530,8 @@ IsCitusTableTypeInternal(char partitionMethod, char replicationModel, case CITUS_LOCAL_TABLE: { return partitionMethod == DISTRIBUTE_BY_NONE && - replicationModel != REPLICATION_MODEL_2PC; - } - - case CITUS_TABLE_WITH_NO_DIST_KEY: - { - return partitionMethod == DISTRIBUTE_BY_NONE; + replicationModel != REPLICATION_MODEL_2PC && + colocationId == INVALID_COLOCATION_ID; } case ANY_CITUS_TABLE_TYPE: @@ -529,33 +554,21 @@ IsCitusTableTypeInternal(char partitionMethod, char replicationModel, char * GetTableTypeName(Oid tableId) { - bool regularTable = false; - char partitionMethod = ' '; - char replicationModel = ' '; - if (IsCitusTable(tableId)) - { - CitusTableCacheEntry *referencingCacheEntry = GetCitusTableCacheEntry(tableId); - partitionMethod = referencingCacheEntry->partitionMethod; - replicationModel = referencingCacheEntry->replicationModel; - } - else - { - regularTable = true; - } - - if (regularTable) + if (!IsCitusTable(tableId)) { return "regular table"; } - else if (partitionMethod == 'h') + + CitusTableCacheEntry *tableCacheEntry = GetCitusTableCacheEntry(tableId); + if (IsCitusTableTypeCacheEntry(tableCacheEntry, HASH_DISTRIBUTED)) { return "distributed table"; } - else if (partitionMethod == 'n' && replicationModel == 't') + else if (IsCitusTableTypeCacheEntry(tableCacheEntry, REFERENCE_TABLE)) { return "reference table"; } - else if (partitionMethod == 'n' && replicationModel != 't') + else if (IsCitusTableTypeCacheEntry(tableCacheEntry, CITUS_LOCAL_TABLE)) { return "citus local table"; } @@ -765,14 +778,28 @@ PgDistPartitionTupleViaCatalog(Oid relationId) /* - * IsCitusLocalTableByDistParams returns true if given partitionMethod and - * replicationModel would identify a citus local table. + * IsReferenceTableByDistParams returns true if given partitionMethod and + * replicationModel would identify a reference table. */ bool -IsCitusLocalTableByDistParams(char partitionMethod, char replicationModel) +IsReferenceTableByDistParams(char partitionMethod, char replicationModel) { return partitionMethod == DISTRIBUTE_BY_NONE && - replicationModel != REPLICATION_MODEL_2PC; + replicationModel == REPLICATION_MODEL_2PC; +} + + +/* + * IsCitusLocalTableByDistParams returns true if given partitionMethod, + * replicationModel and colocationId would identify a citus local table. + */ +bool +IsCitusLocalTableByDistParams(char partitionMethod, char replicationModel, + uint32 colocationId) +{ + return partitionMethod == DISTRIBUTE_BY_NONE && + replicationModel != REPLICATION_MODEL_2PC && + colocationId == INVALID_COLOCATION_ID; } @@ -4837,11 +4864,14 @@ CitusTableTypeIdList(CitusTableType citusTableType) Datum partMethodDatum = datumArray[Anum_pg_dist_partition_partmethod - 1]; Datum replicationModelDatum = datumArray[Anum_pg_dist_partition_repmodel - 1]; + Datum colocationIdDatum = datumArray[Anum_pg_dist_partition_colocationid - 1]; Oid partitionMethod = DatumGetChar(partMethodDatum); Oid replicationModel = DatumGetChar(replicationModelDatum); + uint32 colocationId = DatumGetUInt32(colocationIdDatum); - if (IsCitusTableTypeInternal(partitionMethod, replicationModel, citusTableType)) + if (IsCitusTableTypeInternal(partitionMethod, replicationModel, colocationId, + citusTableType)) { Datum relationIdDatum = datumArray[Anum_pg_dist_partition_logicalrelid - 1]; diff --git a/src/backend/distributed/metadata/metadata_sync.c b/src/backend/distributed/metadata/metadata_sync.c index 6a5840f78..df9104efd 100644 --- a/src/backend/distributed/metadata/metadata_sync.c +++ b/src/backend/distributed/metadata/metadata_sync.c @@ -535,7 +535,7 @@ ShouldSyncTableMetadata(Oid relationId) bool hashDistributed = IsCitusTableTypeCacheEntry(tableEntry, HASH_DISTRIBUTED); bool citusTableWithNoDistKey = - IsCitusTableTypeCacheEntry(tableEntry, CITUS_TABLE_WITH_NO_DIST_KEY); + !HasDistributionKeyCacheEntry(tableEntry); return ShouldSyncTableMetadataInternal(hashDistributed, citusTableWithNoDistKey); } @@ -1158,7 +1158,7 @@ DistributionCreateCommand(CitusTableCacheEntry *cacheEntry) char replicationModel = cacheEntry->replicationModel; StringInfo tablePartitionKeyNameString = makeStringInfo(); - if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (!HasDistributionKeyCacheEntry(cacheEntry)) { appendStringInfo(tablePartitionKeyNameString, "NULL"); } diff --git a/src/backend/distributed/metadata/node_metadata.c b/src/backend/distributed/metadata/node_metadata.c index f6639f8d2..91ffca4fe 100644 --- a/src/backend/distributed/metadata/node_metadata.c +++ b/src/backend/distributed/metadata/node_metadata.c @@ -1536,7 +1536,7 @@ get_shard_id_for_distribution_column(PG_FUNCTION_ARGS) errmsg("relation is not distributed"))); } - if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (!HasDistributionKey(relationId)) { List *shardIntervalList = LoadShardIntervalList(relationId); if (shardIntervalList == NIL) @@ -1918,6 +1918,10 @@ ErrorIfNodeContainsNonRemovablePlacements(WorkerNode *workerNode) { int32 groupId = workerNode->groupId; List *shardPlacements = AllShardPlacementsOnNodeGroup(groupId); + + /* sort the list to prevent regression tests getting flaky */ + shardPlacements = SortList(shardPlacements, CompareGroupShardPlacements); + GroupShardPlacement *placement = NULL; foreach_ptr(placement, shardPlacements) { diff --git a/src/backend/distributed/operations/shard_rebalancer.c b/src/backend/distributed/operations/shard_rebalancer.c index baed8b0d5..c5282202e 100644 --- a/src/backend/distributed/operations/shard_rebalancer.c +++ b/src/backend/distributed/operations/shard_rebalancer.c @@ -1818,10 +1818,10 @@ static void RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid) { char transferMode = LookupShardTransferMode(shardReplicationModeOid); - EnsureReferenceTablesExistOnAllNodesExtended(transferMode); if (list_length(options->relationIdList) == 0) { + EnsureReferenceTablesExistOnAllNodesExtended(transferMode); return; } @@ -1836,6 +1836,25 @@ RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid) List *placementUpdateList = GetRebalanceSteps(options); + if (transferMode == TRANSFER_MODE_AUTOMATIC) + { + /* + * If the shard transfer mode is set to auto, we should check beforehand + * if we are able to use logical replication to transfer shards or not. + * We throw an error if any of the tables do not have a replica identity, which + * is required for logical replication to replicate UPDATE and DELETE commands. + */ + PlacementUpdateEvent *placementUpdate = NULL; + foreach_ptr(placementUpdate, placementUpdateList) + { + Oid relationId = RelationIdForShard(placementUpdate->shardId); + List *colocatedTableList = ColocatedTableList(relationId); + VerifyTablesHaveReplicaIdentity(colocatedTableList); + } + } + + EnsureReferenceTablesExistOnAllNodesExtended(transferMode); + if (list_length(placementUpdateList) == 0) { return; @@ -1916,12 +1935,6 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo EnsureTableOwner(colocatedTableId); } - if (shardTransferMode == TRANSFER_MODE_AUTOMATIC) - { - /* make sure that all tables included in the rebalance have a replica identity*/ - VerifyTablesHaveReplicaIdentity(colocatedTableList); - } - List *placementUpdateList = GetRebalanceSteps(options); if (list_length(placementUpdateList) == 0) @@ -1930,6 +1943,23 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo return 0; } + if (shardTransferMode == TRANSFER_MODE_AUTOMATIC) + { + /* + * If the shard transfer mode is set to auto, we should check beforehand + * if we are able to use logical replication to transfer shards or not. + * We throw an error if any of the tables do not have a replica identity, which + * is required for logical replication to replicate UPDATE and DELETE commands. + */ + PlacementUpdateEvent *placementUpdate = NULL; + foreach_ptr(placementUpdate, placementUpdateList) + { + relationId = RelationIdForShard(placementUpdate->shardId); + List *colocatedTables = ColocatedTableList(relationId); + VerifyTablesHaveReplicaIdentity(colocatedTables); + } + } + DropOrphanedResourcesInSeparateTransaction(); /* find the name of the shard transfer mode to interpolate in the scheduled command */ diff --git a/src/backend/distributed/operations/worker_copy_table_to_node_udf.c b/src/backend/distributed/operations/worker_copy_table_to_node_udf.c index 7af80ef55..f0f83744d 100644 --- a/src/backend/distributed/operations/worker_copy_table_to_node_udf.c +++ b/src/backend/distributed/operations/worker_copy_table_to_node_udf.c @@ -53,8 +53,14 @@ worker_copy_table_to_node(PG_FUNCTION_ARGS) targetNodeId); StringInfo selectShardQueryForCopy = makeStringInfo(); + + /* + * Even though we do COPY(SELECT ...) all the columns, we can't just do SELECT * because we need to not COPY generated colums. + */ + const char *columnList = CopyableColumnNamesFromRelationName(relationSchemaName, + relationName); appendStringInfo(selectShardQueryForCopy, - "SELECT * FROM %s;", relationQualifiedName); + "SELECT %s FROM %s;", columnList, relationQualifiedName); ParamListInfo params = NULL; ExecuteQueryStringIntoDestReceiver(selectShardQueryForCopy->data, params, diff --git a/src/backend/distributed/operations/worker_shard_copy.c b/src/backend/distributed/operations/worker_shard_copy.c index 9239caffb..e9c2af512 100644 --- a/src/backend/distributed/operations/worker_shard_copy.c +++ b/src/backend/distributed/operations/worker_shard_copy.c @@ -73,7 +73,7 @@ static void ShardCopyDestReceiverDestroy(DestReceiver *destReceiver); static bool CanUseLocalCopy(uint32_t destinationNodeId); static StringInfo ConstructShardCopyStatement(List *destinationShardFullyQualifiedName, bool - useBinaryFormat); + useBinaryFormat, TupleDesc tupleDesc); static void WriteLocalTuple(TupleTableSlot *slot, ShardCopyDestReceiver *copyDest); static int ReadFromLocalBufferCallback(void *outBuf, int minRead, int maxRead); static void LocalCopyToShard(ShardCopyDestReceiver *copyDest, CopyOutState @@ -105,7 +105,8 @@ ConnectToRemoteAndStartCopy(ShardCopyDestReceiver *copyDest) StringInfo copyStatement = ConstructShardCopyStatement( copyDest->destinationShardFullyQualifiedName, - copyDest->copyOutState->binary); + copyDest->copyOutState->binary, + copyDest->tupleDescriptor); if (!SendRemoteCommand(copyDest->connection, copyStatement->data)) { @@ -344,21 +345,80 @@ ShardCopyDestReceiverDestroy(DestReceiver *dest) } +/* + * CopyableColumnNamesFromTupleDesc function creates and returns a comma seperated column names string to be used in COPY + * and SELECT statements when copying a table. The COPY and SELECT statements should filter out the GENERATED columns since COPY + * statement fails to handle them. Iterating over the attributes of the table we also need to skip the dropped columns. + */ +const char * +CopyableColumnNamesFromTupleDesc(TupleDesc tupDesc) +{ + StringInfo columnList = makeStringInfo(); + bool firstInList = true; + + for (int i = 0; i < tupDesc->natts; i++) + { + Form_pg_attribute att = TupleDescAttr(tupDesc, i); + if (att->attgenerated || att->attisdropped) + { + continue; + } + if (!firstInList) + { + appendStringInfo(columnList, ","); + } + + firstInList = false; + + appendStringInfo(columnList, "%s", quote_identifier(NameStr(att->attname))); + } + + return columnList->data; +} + + +/* + * CopyableColumnNamesFromRelationName function is a wrapper for CopyableColumnNamesFromTupleDesc. + */ +const char * +CopyableColumnNamesFromRelationName(const char *schemaName, const char *relationName) +{ + Oid namespaceOid = get_namespace_oid(schemaName, true); + + Oid relationId = get_relname_relid(relationName, namespaceOid); + + Relation relation = relation_open(relationId, AccessShareLock); + + TupleDesc tupleDesc = RelationGetDescr(relation); + + const char *columnList = CopyableColumnNamesFromTupleDesc(tupleDesc); + + relation_close(relation, NoLock); + + return columnList; +} + + /* * ConstructShardCopyStatement constructs the text of a COPY statement * for copying into a result table */ static StringInfo ConstructShardCopyStatement(List *destinationShardFullyQualifiedName, bool - useBinaryFormat) + useBinaryFormat, + TupleDesc tupleDesc) { char *destinationShardSchemaName = linitial(destinationShardFullyQualifiedName); char *destinationShardRelationName = lsecond(destinationShardFullyQualifiedName); + StringInfo command = makeStringInfo(); - appendStringInfo(command, "COPY %s.%s FROM STDIN", + + const char *columnList = CopyableColumnNamesFromTupleDesc(tupleDesc); + + appendStringInfo(command, "COPY %s.%s (%s) FROM STDIN", quote_identifier(destinationShardSchemaName), quote_identifier( - destinationShardRelationName)); + destinationShardRelationName), columnList); if (useBinaryFormat) { diff --git a/src/backend/distributed/operations/worker_split_copy_udf.c b/src/backend/distributed/operations/worker_split_copy_udf.c index b96475992..c154ac040 100644 --- a/src/backend/distributed/operations/worker_split_copy_udf.c +++ b/src/backend/distributed/operations/worker_split_copy_udf.c @@ -110,8 +110,13 @@ worker_split_copy(PG_FUNCTION_ARGS) splitCopyInfoList)))); StringInfo selectShardQueryForCopy = makeStringInfo(); + const char *columnList = CopyableColumnNamesFromRelationName( + sourceShardToCopySchemaName, + sourceShardToCopyName); + appendStringInfo(selectShardQueryForCopy, - "SELECT * FROM %s;", sourceShardToCopyQualifiedName); + "SELECT %s FROM %s;", columnList, + sourceShardToCopyQualifiedName); ParamListInfo params = NULL; ExecuteQueryStringIntoDestReceiver(selectShardQueryForCopy->data, params, diff --git a/src/backend/distributed/planner/deparse_shard_query.c b/src/backend/distributed/planner/deparse_shard_query.c index afb4597b8..8a3f092ce 100644 --- a/src/backend/distributed/planner/deparse_shard_query.c +++ b/src/backend/distributed/planner/deparse_shard_query.c @@ -404,7 +404,8 @@ SetTaskQueryIfShouldLazyDeparse(Task *task, Query *query) return; } - SetTaskQueryString(task, DeparseTaskQuery(task, query)); + SetTaskQueryString(task, AnnotateQuery(DeparseTaskQuery(task, query), + task->partitionColumn, task->colocationId)); } diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index 5f9c56212..05b0a2c7f 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -34,6 +34,7 @@ #include "distributed/intermediate_results.h" #include "distributed/listutils.h" #include "distributed/coordinator_protocol.h" +#include "distributed/merge_planner.h" #include "distributed/metadata_cache.h" #include "distributed/multi_executor.h" #include "distributed/distributed_planner.h" @@ -69,6 +70,17 @@ #include "utils/syscache.h" +/* RouterPlanType is used to determine the router plan to invoke */ +typedef enum RouterPlanType +{ + INSERT_SELECT_INTO_CITUS_TABLE, + INSERT_SELECT_INTO_LOCAL_TABLE, + DML_QUERY, + SELECT_QUERY, + MERGE_QUERY, + REPLAN_WITH_BOUND_PARAMETERS +} RouterPlanType; + static List *plannerRestrictionContextList = NIL; int MultiTaskQueryLogLevel = CITUS_LOG_LEVEL_OFF; /* multi-task query log level */ static uint64 NextPlanId = 1; @@ -76,12 +88,9 @@ static uint64 NextPlanId = 1; /* keep track of planner call stack levels */ int PlannerLevel = 0; -static void ErrorIfQueryHasUnsupportedMergeCommand(Query *queryTree, - List *rangeTableList); -static bool ContainsMergeCommandWalker(Node *node); static bool ListContainsDistributedTableRTE(List *rangeTableList, bool *maybeHasForeignDistributedTable); -static bool IsUpdateOrDelete(Query *query); +static bool IsUpdateOrDeleteOrMerge(Query *query); static PlannedStmt * CreateDistributedPlannedStmt( DistributedPlanningContext *planContext); static PlannedStmt * InlineCtesAndCreateDistributedPlannedStmt(uint64 planId, @@ -133,7 +142,10 @@ static PlannedStmt * PlanDistributedStmt(DistributedPlanningContext *planContext static RTEListProperties * GetRTEListProperties(List *rangeTableList); static List * TranslatedVars(PlannerInfo *root, int relationIndex); static void WarnIfListHasForeignDistributedTable(List *rangeTableList); -static void ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList); +static RouterPlanType GetRouterPlanType(Query *query, + Query *originalQuery, + bool hasUnresolvedParams); + /* Distributed planner hook */ PlannedStmt * @@ -162,7 +174,7 @@ distributed_planner(Query *parse, * We cannot have merge command for this path as well because * there cannot be recursively planned merge command. */ - Assert(!ContainsMergeCommandWalker((Node *) parse)); + Assert(!IsMergeQuery(parse)); needsDistributedPlanning = true; } @@ -206,12 +218,6 @@ distributed_planner(Query *parse, if (!fastPathRouterQuery) { - /* - * Fast path queries cannot have merge command, and we - * prevent the remaining here. - */ - ErrorIfQueryHasUnsupportedMergeCommand(parse, rangeTableList); - /* * When there are partitioned tables (not applicable to fast path), * pretend that they are regular tables to avoid unnecessary work @@ -310,72 +316,6 @@ distributed_planner(Query *parse, } -/* - * ErrorIfQueryHasUnsupportedMergeCommand walks over the query tree and bails out - * if there is no Merge command (e.g., CMD_MERGE) in the query tree. For merge, - * looks for all supported combinations, throws an exception if any violations - * are seen. - */ -static void -ErrorIfQueryHasUnsupportedMergeCommand(Query *queryTree, List *rangeTableList) -{ - /* - * Postgres currently doesn't support Merge queries inside subqueries and - * ctes, but lets be defensive and do query tree walk anyway. - * - * We do not call this path for fast-path queries to avoid this additional - * overhead. - */ - if (!ContainsMergeCommandWalker((Node *) queryTree)) - { - /* No MERGE found */ - return; - } - - - /* - * In Citus we have limited support for MERGE, it's allowed - * only if all the tables(target, source or any CTE) tables - * are are local i.e. a combination of Citus local and Non-Citus - * tables (regular Postgres tables). - */ - ErrorIfMergeHasUnsupportedTables(queryTree, rangeTableList); -} - - -/* - * ContainsMergeCommandWalker walks over the node and finds if there are any - * Merge command (e.g., CMD_MERGE) in the node. - */ -static bool -ContainsMergeCommandWalker(Node *node) -{ - #if PG_VERSION_NUM < PG_VERSION_15 - return false; - #endif - - if (node == NULL) - { - return false; - } - - if (IsA(node, Query)) - { - Query *query = (Query *) node; - if (IsMergeQuery(query)) - { - return true; - } - - return query_tree_walker((Query *) node, ContainsMergeCommandWalker, NULL, 0); - } - - return expression_tree_walker(node, ContainsMergeCommandWalker, NULL); - - return false; -} - - /* * ExtractRangeTableEntryList is a wrapper around ExtractRangeTableEntryWalker. * The function traverses the input query and returns all the range table @@ -679,10 +619,11 @@ IsMultiTaskPlan(DistributedPlan *distributedPlan) * IsUpdateOrDelete returns true if the query performs an update or delete. */ bool -IsUpdateOrDelete(Query *query) +IsUpdateOrDeleteOrMerge(Query *query) { return query->commandType == CMD_UPDATE || - query->commandType == CMD_DELETE; + query->commandType == CMD_DELETE || + query->commandType == CMD_MERGE; } @@ -856,7 +797,7 @@ CreateDistributedPlannedStmt(DistributedPlanningContext *planContext) * if it is planned as a multi shard modify query. */ if ((distributedPlan->planningError || - (IsUpdateOrDelete(planContext->originalQuery) && IsMultiTaskPlan( + (IsUpdateOrDeleteOrMerge(planContext->originalQuery) && IsMultiTaskPlan( distributedPlan))) && hasUnresolvedParams) { @@ -961,6 +902,51 @@ TryCreateDistributedPlannedStmt(PlannedStmt *localPlan, } +/* + * GetRouterPlanType checks the parse tree to return appropriate plan type. + */ +static RouterPlanType +GetRouterPlanType(Query *query, Query *originalQuery, bool hasUnresolvedParams) +{ + if (!IsModifyCommand(originalQuery)) + { + return SELECT_QUERY; + } + + Oid targetRelationId = ModifyQueryResultRelationId(query); + + EnsureModificationsCanRunOnRelation(targetRelationId); + EnsurePartitionTableNotReplicated(targetRelationId); + + /* Check the type of modification being done */ + + if (InsertSelectIntoCitusTable(originalQuery)) + { + if (hasUnresolvedParams) + { + return REPLAN_WITH_BOUND_PARAMETERS; + } + return INSERT_SELECT_INTO_CITUS_TABLE; + } + else if (InsertSelectIntoLocalTable(originalQuery)) + { + if (hasUnresolvedParams) + { + return REPLAN_WITH_BOUND_PARAMETERS; + } + return INSERT_SELECT_INTO_LOCAL_TABLE; + } + else if (IsMergeQuery(originalQuery)) + { + return MERGE_QUERY; + } + else + { + return DML_QUERY; + } +} + + /* * CreateDistributedPlan generates a distributed plan for a query. * It goes through 3 steps: @@ -978,88 +964,83 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina DistributedPlan *distributedPlan = NULL; bool hasCtes = originalQuery->cteList != NIL; - if (IsModifyCommand(originalQuery)) + /* Step 1: Try router planner */ + + RouterPlanType routerPlan = GetRouterPlanType(query, originalQuery, + hasUnresolvedParams); + + switch (routerPlan) { - Oid targetRelationId = ModifyQueryResultRelationId(query); - - EnsureModificationsCanRunOnRelation(targetRelationId); - - EnsurePartitionTableNotReplicated(targetRelationId); - - if (InsertSelectIntoCitusTable(originalQuery)) + case INSERT_SELECT_INTO_CITUS_TABLE: { - if (hasUnresolvedParams) - { - /* - * Unresolved parameters can cause performance regressions in - * INSERT...SELECT when the partition column is a parameter - * because we don't perform any additional pruning in the executor. - */ - return NULL; - } - distributedPlan = - CreateInsertSelectPlan(planId, originalQuery, plannerRestrictionContext, + CreateInsertSelectPlan(planId, + originalQuery, + plannerRestrictionContext, boundParams); + break; } - else if (InsertSelectIntoLocalTable(originalQuery)) + + case INSERT_SELECT_INTO_LOCAL_TABLE: { - if (hasUnresolvedParams) - { - /* - * Unresolved parameters can cause performance regressions in - * INSERT...SELECT when the partition column is a parameter - * because we don't perform any additional pruning in the executor. - */ - return NULL; - } distributedPlan = - CreateInsertSelectIntoLocalTablePlan(planId, originalQuery, boundParams, + CreateInsertSelectIntoLocalTablePlan(planId, + originalQuery, + boundParams, hasUnresolvedParams, plannerRestrictionContext); + break; } - else + + case DML_QUERY: { /* modifications are always routed through the same planner/executor */ distributedPlan = CreateModifyPlan(originalQuery, query, plannerRestrictionContext); + break; } - /* the functions above always return a plan, possibly with an error */ - Assert(distributedPlan); + case MERGE_QUERY: + { + distributedPlan = + CreateMergePlan(originalQuery, query, plannerRestrictionContext); + break; + } - if (distributedPlan->planningError == NULL) + case REPLAN_WITH_BOUND_PARAMETERS: { - return distributedPlan; + /* + * Unresolved parameters can cause performance regressions in + * INSERT...SELECT when the partition column is a parameter + * because we don't perform any additional pruning in the executor. + */ + return NULL; } - else + + case SELECT_QUERY: { - RaiseDeferredError(distributedPlan->planningError, DEBUG2); + /* + * For select queries we, if router executor is enabled, first try to + * plan the query as a router query. If not supported, otherwise try + * the full blown plan/optimize/physical planning process needed to + * produce distributed query plans. + */ + distributedPlan = + CreateRouterPlan(originalQuery, query, plannerRestrictionContext); + break; } } + + /* the functions above always return a plan, possibly with an error */ + Assert(distributedPlan); + + if (distributedPlan->planningError == NULL) + { + return distributedPlan; + } else { - /* - * For select queries we, if router executor is enabled, first try to - * plan the query as a router query. If not supported, otherwise try - * the full blown plan/optimize/physical planning process needed to - * produce distributed query plans. - */ - - distributedPlan = CreateRouterPlan(originalQuery, query, - plannerRestrictionContext); - if (distributedPlan->planningError == NULL) - { - return distributedPlan; - } - else - { - /* - * For debugging it's useful to display why query was not - * router plannable. - */ - RaiseDeferredError(distributedPlan->planningError, DEBUG2); - } + RaiseDeferredError(distributedPlan->planningError, DEBUG2); } if (hasUnresolvedParams) @@ -1088,6 +1069,8 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina boundParams); Assert(originalQuery != NULL); + /* Step 2: Generate subplans for CTEs and complex subqueries */ + /* * Plan subqueries and CTEs that cannot be pushed down by recursively * calling the planner and return the resulting plans to subPlanList. @@ -1188,6 +1171,8 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina query->cteList = NIL; Assert(originalQuery->cteList == NIL); + /* Step 3: Try Logical planner */ + MultiTreeRoot *logicalPlan = MultiLogicalPlanCreate(originalQuery, query, plannerRestrictionContext); MultiLogicalPlanOptimize(logicalPlan); @@ -2617,148 +2602,3 @@ WarnIfListHasForeignDistributedTable(List *rangeTableList) } } } - - -/* - * IsMergeAllowedOnRelation takes a relation entry and checks if MERGE command is - * permitted on special relations, such as materialized view, returns true only if - * it's a "source" relation. - */ -bool -IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte) -{ - if (!IsMergeQuery(parse)) - { - return false; - } - - RangeTblEntry *targetRte = rt_fetch(parse->resultRelation, parse->rtable); - - /* Is it a target relation? */ - if (targetRte->relid == rte->relid) - { - return false; - } - - return true; -} - - -/* - * ErrorIfMergeHasUnsupportedTables checks if all the tables(target, source or any CTE - * present) in the MERGE command are local i.e. a combination of Citus local and Non-Citus - * tables (regular Postgres tables), raises an exception for all other combinations. - */ -static void -ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList) -{ - ListCell *tableCell = NULL; - - foreach(tableCell, rangeTableList) - { - RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(tableCell); - Oid relationId = rangeTableEntry->relid; - - switch (rangeTableEntry->rtekind) - { - case RTE_RELATION: - { - /* Check the relation type */ - break; - } - - case RTE_SUBQUERY: - case RTE_FUNCTION: - case RTE_TABLEFUNC: - case RTE_VALUES: - case RTE_JOIN: - case RTE_CTE: - { - /* Skip them as base table(s) will be checked */ - continue; - } - - /* - * RTE_NAMEDTUPLESTORE is typically used in ephmeral named relations, - * such as, trigger data; until we find a genuine use case, raise an - * exception. - * RTE_RESULT is a node added by the planner and we shouldn't - * encounter it in the parse tree. - */ - case RTE_NAMEDTUPLESTORE: - case RTE_RESULT: - { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("MERGE command is not supported with " - "Tuplestores and results"))); - break; - } - - default: - { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("MERGE command: Unrecognized range table entry."))); - } - } - - /* RTE Relation can be of various types, check them now */ - - /* skip the regular views as they are replaced with subqueries */ - if (rangeTableEntry->relkind == RELKIND_VIEW) - { - continue; - } - - if (rangeTableEntry->relkind == RELKIND_MATVIEW || - rangeTableEntry->relkind == RELKIND_FOREIGN_TABLE) - { - /* Materialized view or Foreign table as target is not allowed */ - if (IsMergeAllowedOnRelation(parse, rangeTableEntry)) - { - /* Non target relation is ok */ - continue; - } - else - { - ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("MERGE command is not allowed " - "on materialized view"))); - } - } - - if (rangeTableEntry->relkind != RELKIND_RELATION && - rangeTableEntry->relkind != RELKIND_PARTITIONED_TABLE) - { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("Unexpected relation type(relkind:%c) in MERGE command", - rangeTableEntry->relkind))); - } - - Assert(rangeTableEntry->relid != 0); - - /* Distributed tables and Reference tables are not supported yet */ - if (IsCitusTableType(relationId, REFERENCE_TABLE) || - IsCitusTableType(relationId, DISTRIBUTED_TABLE)) - { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("MERGE command is not supported on " - "distributed/reference tables yet"))); - } - - /* Regular Postgres tables and Citus local tables are allowed */ - if (!IsCitusTable(relationId) || - IsCitusTableType(relationId, CITUS_LOCAL_TABLE)) - { - continue; - } - - - /* Any other Citus table type missing ? */ - } - - /* All the tables are local, supported */ -} diff --git a/src/backend/distributed/planner/fast_path_router_planner.c b/src/backend/distributed/planner/fast_path_router_planner.c index aa029f3c0..ecb62478a 100644 --- a/src/backend/distributed/planner/fast_path_router_planner.c +++ b/src/backend/distributed/planner/fast_path_router_planner.c @@ -54,10 +54,11 @@ bool EnableFastPathRouterPlanner = true; static bool ColumnAppearsMultipleTimes(Node *quals, Var *distributionKey); -static bool ConjunctionContainsColumnFilter(Node *node, Var *column, - Node **distributionKeyValue); static bool DistKeyInSimpleOpExpression(Expr *clause, Var *distColumn, Node **distributionKeyValue); +static bool ConjunctionContainsColumnFilter(Node *node, + Var *column, + Node **distributionKeyValue); /* diff --git a/src/backend/distributed/planner/merge_planner.c b/src/backend/distributed/planner/merge_planner.c new file mode 100644 index 000000000..46a2484bd --- /dev/null +++ b/src/backend/distributed/planner/merge_planner.c @@ -0,0 +1,718 @@ +/*------------------------------------------------------------------------- + * + * merge_planner.c + * + * This file contains functions to help plan MERGE queries. + * + * Copyright (c) Citus Data, Inc. + * + *------------------------------------------------------------------------- + */ + +#include + +#include "postgres.h" +#include "nodes/makefuncs.h" +#include "optimizer/optimizer.h" +#include "parser/parsetree.h" +#include "utils/lsyscache.h" + +#include "distributed/citus_clauses.h" +#include "distributed/listutils.h" +#include "distributed/merge_planner.h" +#include "distributed/multi_logical_optimizer.h" +#include "distributed/multi_router_planner.h" +#include "distributed/pg_version_constants.h" +#include "distributed/query_pushdown_planning.h" + +#if PG_VERSION_NUM >= PG_VERSION_15 + +static DeferredErrorMessage * CheckIfRTETypeIsUnsupported(Query *parse, + RangeTblEntry *rangeTableEntry); +static DeferredErrorMessage * ErrorIfDistTablesNotColocated(Query *parse, + List * + distTablesList, + PlannerRestrictionContext + * + plannerRestrictionContext); +static DeferredErrorMessage * ErrorIfMergeHasUnsupportedTables(Query *parse, + List *rangeTableList, + PlannerRestrictionContext * + restrictionContext); +static bool IsDistributionColumnInMergeSource(Expr *columnExpression, Query *query, bool + skipOuterVars); +static DeferredErrorMessage * InsertDistributionColumnMatchesSource(Query *query, + RangeTblEntry * + resultRte); + +static DeferredErrorMessage * MergeQualAndTargetListFunctionsSupported(Oid + resultRelationId, + FromExpr *joinTree, + Node *quals, + List *targetList, + CmdType commandType); +#endif + + +/* + * CreateMergePlan attempts to create a plan for the given MERGE SQL + * statement. If planning fails ->planningError is set to a description + * of the failure. + */ +DistributedPlan * +CreateMergePlan(Query *originalQuery, Query *query, + PlannerRestrictionContext *plannerRestrictionContext) +{ + /* + * For now, this is a place holder until we isolate the merge + * planning into it's own code-path. + */ + return CreateModifyPlan(originalQuery, query, plannerRestrictionContext); +} + + +/* + * MergeQuerySupported does check for a MERGE command in the query, if it finds + * one, it will verify the below criteria + * - Supported tables and combinations in ErrorIfMergeHasUnsupportedTables + * - Distributed tables requirements in ErrorIfDistTablesNotColocated + * - Checks target-lists and functions-in-quals in TargetlistAndFunctionsSupported + */ +DeferredErrorMessage * +MergeQuerySupported(Query *originalQuery, bool multiShardQuery, + PlannerRestrictionContext *plannerRestrictionContext) +{ + /* function is void for pre-15 versions of Postgres */ + #if PG_VERSION_NUM < PG_VERSION_15 + + return NULL; + + #else + + /* For non-MERGE commands it's a no-op */ + if (!IsMergeQuery(originalQuery)) + { + return NULL; + } + + /* + * TODO: For now, we are adding an exception where any volatile or stable + * functions are not allowed in the MERGE query, but this will become too + * restrictive as this will prevent many useful and simple cases, such as, + * INSERT VALUES(ts::timestamp), bigserial column inserts etc. But without + * this restriction, we have a potential danger of some of the function(s) + * getting executed at the worker which will result in incorrect behavior. + */ + if (contain_mutable_functions((Node *) originalQuery)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "non-IMMUTABLE functions are not yet supported " + "in MERGE sql with distributed tables ", + NULL, NULL); + } + + List *rangeTableList = ExtractRangeTableEntryList(originalQuery); + RangeTblEntry *resultRte = ExtractResultRelationRTE(originalQuery); + + /* + * Fast path queries cannot have merge command, and we prevent the remaining here. + * In Citus we have limited support for MERGE, it's allowed only if all + * the tables(target, source or any CTE) tables are are local i.e. a + * combination of Citus local and Non-Citus tables (regular Postgres tables) + * or distributed tables with some restrictions, please see header of routine + * ErrorIfDistTablesNotColocated for details. + */ + DeferredErrorMessage *deferredError = + ErrorIfMergeHasUnsupportedTables(originalQuery, + rangeTableList, + plannerRestrictionContext); + if (deferredError) + { + /* MERGE's unsupported combination, raise the exception */ + RaiseDeferredError(deferredError, ERROR); + } + + Oid resultRelationId = resultRte->relid; + deferredError = MergeQualAndTargetListFunctionsSupported(resultRelationId, + originalQuery->jointree, + originalQuery->jointree-> + quals, + originalQuery->targetList, + originalQuery->commandType); + if (deferredError) + { + return deferredError; + } + + /* + * MERGE is a special case where we have multiple modify statements + * within itself. Check each INSERT/UPDATE/DELETE individually. + */ + MergeAction *action = NULL; + foreach_ptr(action, originalQuery->mergeActionList) + { + Assert(originalQuery->returningList == NULL); + deferredError = MergeQualAndTargetListFunctionsSupported(resultRelationId, + originalQuery->jointree, + action->qual, + action->targetList, + action->commandType); + if (deferredError) + { + /* MERGE's unsupported scenario, raise the exception */ + RaiseDeferredError(deferredError, ERROR); + } + } + + deferredError = + InsertDistributionColumnMatchesSource(originalQuery, resultRte); + if (deferredError) + { + /* MERGE's unsupported scenario, raise the exception */ + RaiseDeferredError(deferredError, ERROR); + } + + if (multiShardQuery) + { + deferredError = + DeferErrorIfUnsupportedSubqueryPushdown(originalQuery, + plannerRestrictionContext); + if (deferredError) + { + return deferredError; + } + } + + if (HasDangerousJoinUsing(originalQuery->rtable, (Node *) originalQuery->jointree)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "a join with USING causes an internal naming " + "conflict, use ON instead", NULL, NULL); + } + + return NULL; + + #endif +} + + +/* + * IsMergeAllowedOnRelation takes a relation entry and checks if MERGE command is + * permitted on special relations, such as materialized view, returns true only if + * it's a "source" relation. + */ +bool +IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte) +{ + if (!IsMergeQuery(parse)) + { + return false; + } + + /* Fetch the MERGE target relation */ + RangeTblEntry *targetRte = rt_fetch(parse->resultRelation, parse->rtable); + + /* Is it a target relation? */ + if (targetRte->relid == rte->relid) + { + return false; + } + + return true; +} + + +#if PG_VERSION_NUM >= PG_VERSION_15 + +/* + * ErrorIfDistTablesNotColocated Checks to see if + * + * - There are a minimum of two distributed tables (source and a target). + * - All the distributed tables are indeed colocated. + * + * If any of the conditions are not met, it raises an exception. + */ +static DeferredErrorMessage * +ErrorIfDistTablesNotColocated(Query *parse, List *distTablesList, + PlannerRestrictionContext * + plannerRestrictionContext) +{ + /* All MERGE tables must be distributed */ + if (list_length(distTablesList) < 2) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "For MERGE command, both the source and target " + "must be distributed", NULL, NULL); + } + + /* All distributed tables must be colocated */ + if (!AllDistributedRelationsInRTEListColocated(distTablesList)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "For MERGE command, all the distributed tables " + "must be colocated", NULL, NULL); + } + + return NULL; +} + + +/* + * ErrorIfRTETypeIsUnsupported Checks for types of tables that are not supported, such + * as, reference tables, append-distributed tables and materialized view as target relation. + * Routine returns NULL for the supported types, error message for everything else. + */ +static DeferredErrorMessage * +CheckIfRTETypeIsUnsupported(Query *parse, RangeTblEntry *rangeTableEntry) +{ + if (rangeTableEntry->relkind == RELKIND_MATVIEW || + rangeTableEntry->relkind == RELKIND_FOREIGN_TABLE) + { + /* Materialized view or Foreign table as target is not allowed */ + if (IsMergeAllowedOnRelation(parse, rangeTableEntry)) + { + /* Non target relation is ok */ + return NULL; + } + else + { + /* Usually we don't reach this exception as the Postgres parser catches it */ + StringInfo errorMessage = makeStringInfo(); + appendStringInfo(errorMessage, "MERGE command is not allowed on " + "relation type(relkind:%c)", + rangeTableEntry->relkind); + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + errorMessage->data, NULL, NULL); + } + } + + if (rangeTableEntry->relkind != RELKIND_RELATION && + rangeTableEntry->relkind != RELKIND_PARTITIONED_TABLE) + { + StringInfo errorMessage = makeStringInfo(); + appendStringInfo(errorMessage, "Unexpected table type(relkind:%c) " + "in MERGE command", rangeTableEntry->relkind); + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + errorMessage->data, NULL, NULL); + } + + Assert(rangeTableEntry->relid != 0); + + /* Reference tables are not supported yet */ + if (IsCitusTableType(rangeTableEntry->relid, REFERENCE_TABLE)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE command is not supported on reference " + "tables yet", NULL, NULL); + } + + /* Append/Range tables are not supported */ + if (IsCitusTableType(rangeTableEntry->relid, APPEND_DISTRIBUTED) || + IsCitusTableType(rangeTableEntry->relid, RANGE_DISTRIBUTED)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "For MERGE command, all the distributed tables " + "must be colocated, for append/range distribution, " + "colocation is not supported", NULL, + "Consider using hash distribution instead"); + } + + return NULL; +} + + +/* + * ErrorIfMergeHasUnsupportedTables checks if all the tables(target, source or any CTE + * present) in the MERGE command are local i.e. a combination of Citus local and Non-Citus + * tables (regular Postgres tables), or distributed tables with some restrictions, please + * see header of routine ErrorIfDistTablesNotColocated for details, raises an exception + * for all other combinations. + */ +static DeferredErrorMessage * +ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList, + PlannerRestrictionContext *restrictionContext) +{ + List *distTablesList = NIL; + bool foundLocalTables = false; + + RangeTblEntry *rangeTableEntry = NULL; + foreach_ptr(rangeTableEntry, rangeTableList) + { + Oid relationId = rangeTableEntry->relid; + + switch (rangeTableEntry->rtekind) + { + case RTE_RELATION: + { + /* Check the relation type */ + break; + } + + case RTE_SUBQUERY: + case RTE_FUNCTION: + case RTE_TABLEFUNC: + case RTE_VALUES: + case RTE_JOIN: + case RTE_CTE: + { + /* Skip them as base table(s) will be checked */ + continue; + } + + /* + * RTE_NAMEDTUPLESTORE is typically used in ephmeral named relations, + * such as, trigger data; until we find a genuine use case, raise an + * exception. + * RTE_RESULT is a node added by the planner and we shouldn't + * encounter it in the parse tree. + */ + case RTE_NAMEDTUPLESTORE: + case RTE_RESULT: + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE command is not supported with " + "Tuplestores and results", + NULL, NULL); + } + + default: + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE command: Unrecognized range table entry.", + NULL, NULL); + } + } + + /* RTE Relation can be of various types, check them now */ + + /* skip the regular views as they are replaced with subqueries */ + if (rangeTableEntry->relkind == RELKIND_VIEW) + { + continue; + } + + DeferredErrorMessage *errorMessage = + CheckIfRTETypeIsUnsupported(parse, rangeTableEntry); + if (errorMessage) + { + return errorMessage; + } + + /* + * For now, save all distributed tables, later (below) we will + * check for supported combination(s). + */ + if (IsCitusTableType(relationId, DISTRIBUTED_TABLE)) + { + distTablesList = lappend(distTablesList, rangeTableEntry); + continue; + } + + /* Regular Postgres tables and Citus local tables are allowed */ + if (!IsCitusTable(relationId) || + IsCitusTableType(relationId, CITUS_LOCAL_TABLE)) + { + foundLocalTables = true; + continue; + } + + /* Any other Citus table type missing ? */ + } + + /* Ensure all tables are indeed local */ + if (foundLocalTables && list_length(distTablesList) == 0) + { + /* All the tables are local, supported */ + return NULL; + } + else if (foundLocalTables && list_length(distTablesList) > 0) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE command is not supported with " + "combination of distributed/local tables yet", + NULL, NULL); + } + + /* Ensure all distributed tables are indeed co-located */ + return ErrorIfDistTablesNotColocated(parse, + distTablesList, + restrictionContext); +} + + +/* + * IsPartitionColumnInMerge returns true if the given column is a partition column. + * The function uses FindReferencedTableColumn to find the original relation + * id and column that the column expression refers to. It then checks whether + * that column is a partition column of the relation. + * + * Also, the function returns always false for reference tables given that + * reference tables do not have partition column. + * + * If skipOuterVars is true, then it doesn't process the outervars. + */ +bool +IsDistributionColumnInMergeSource(Expr *columnExpression, Query *query, bool + skipOuterVars) +{ + bool isDistributionColumn = false; + Var *column = NULL; + RangeTblEntry *relationRTE = NULL; + + /* ParentQueryList is same as the original query for MERGE */ + FindReferencedTableColumn(columnExpression, list_make1(query), query, &column, + &relationRTE, + skipOuterVars); + Oid relationId = relationRTE ? relationRTE->relid : InvalidOid; + if (relationId != InvalidOid && column != NULL) + { + Var *distributionColumn = DistPartitionKey(relationId); + + /* not all distributed tables have partition column */ + if (distributionColumn != NULL && column->varattno == + distributionColumn->varattno) + { + isDistributionColumn = true; + } + } + + return isDistributionColumn; +} + + +/* + * InsertDistributionColumnMatchesSource check to see if MERGE is inserting a + * value into the target which is not from the source table, if so, it + * raises an exception. + * Note: Inserting random values other than the joined column values will + * result in unexpected behaviour of rows ending up in incorrect shards, to + * prevent such mishaps, we disallow such inserts here. + */ +static DeferredErrorMessage * +InsertDistributionColumnMatchesSource(Query *query, RangeTblEntry *resultRte) +{ + Assert(IsMergeQuery(query)); + + if (!IsCitusTableType(resultRte->relid, DISTRIBUTED_TABLE)) + { + return NULL; + } + + bool foundDistributionColumn = false; + MergeAction *action = NULL; + foreach_ptr(action, query->mergeActionList) + { + /* Skip MATCHED clause as INSERTS are not allowed in it*/ + if (action->matched) + { + continue; + } + + /* NOT MATCHED can have either INSERT or DO NOTHING */ + if (action->commandType == CMD_NOTHING) + { + return NULL; + } + + if (action->targetList == NIL) + { + /* INSERT DEFAULT VALUES is not allowed */ + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "cannot perform MERGE INSERT with DEFAULTS", + NULL, NULL); + } + + Assert(action->commandType == CMD_INSERT); + Var *targetKey = PartitionColumn(resultRte->relid, 1); + + TargetEntry *targetEntry = NULL; + foreach_ptr(targetEntry, action->targetList) + { + AttrNumber originalAttrNo = targetEntry->resno; + + /* skip processing of target table non-partition columns */ + if (originalAttrNo != targetKey->varattno) + { + continue; + } + + foundDistributionColumn = true; + + if (IsA(targetEntry->expr, Var)) + { + if (IsDistributionColumnInMergeSource(targetEntry->expr, query, true)) + { + return NULL; + } + else + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE INSERT must use the source table " + "distribution column value", + NULL, NULL); + } + } + else + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE INSERT must refer a source column " + "for distribution column ", + NULL, NULL); + } + } + + if (!foundDistributionColumn) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE INSERT must have distribution column as value", + NULL, NULL); + } + } + + return NULL; +} + + +/* + * MergeQualAndTargetListFunctionsSupported Checks WHEN/ON clause actions to see what functions + * are allowed, if we are updating distribution column, etc. + */ +static DeferredErrorMessage * +MergeQualAndTargetListFunctionsSupported(Oid resultRelationId, FromExpr *joinTree, + Node *quals, + List *targetList, CmdType commandType) +{ + uint32 rangeTableId = 1; + Var *distributionColumn = NULL; + if (IsCitusTable(resultRelationId) && HasDistributionKey(resultRelationId)) + { + distributionColumn = PartitionColumn(resultRelationId, rangeTableId); + } + + ListCell *targetEntryCell = NULL; + bool hasVarArgument = false; /* A STABLE function is passed a Var argument */ + bool hasBadCoalesce = false; /* CASE/COALESCE passed a mutable function */ + foreach(targetEntryCell, targetList) + { + TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell); + + /* skip resjunk entries: UPDATE adds some for ctid, etc. */ + if (targetEntry->resjunk) + { + continue; + } + + bool targetEntryDistributionColumn = false; + AttrNumber targetColumnAttrNumber = InvalidAttrNumber; + + if (distributionColumn) + { + if (commandType == CMD_UPDATE) + { + /* + * Note that it is not possible to give an alias to + * UPDATE table SET ... + */ + if (targetEntry->resname) + { + targetColumnAttrNumber = get_attnum(resultRelationId, + targetEntry->resname); + if (targetColumnAttrNumber == distributionColumn->varattno) + { + targetEntryDistributionColumn = true; + } + } + } + } + + if (targetEntryDistributionColumn && + TargetEntryChangesValue(targetEntry, distributionColumn, joinTree)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "updating the distribution column is not " + "allowed in MERGE actions", + NULL, NULL); + } + + if (FindNodeMatchingCheckFunction((Node *) targetEntry->expr, + CitusIsVolatileFunction)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "functions used in MERGE actions on distributed " + "tables must not be VOLATILE", + NULL, NULL); + } + + if (MasterIrreducibleExpression((Node *) targetEntry->expr, + &hasVarArgument, &hasBadCoalesce)) + { + Assert(hasVarArgument || hasBadCoalesce); + } + + if (FindNodeMatchingCheckFunction((Node *) targetEntry->expr, + NodeIsFieldStore)) + { + /* DELETE cannot do field indirection already */ + Assert(commandType == CMD_UPDATE || commandType == CMD_INSERT); + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "inserting or modifying composite type fields is not " + "supported", NULL, + "Use the column name to insert or update the composite " + "type as a single value"); + } + } + + + /* + * Check the condition, convert list of expressions into expression tree for further processing + */ + if (quals) + { + if (IsA(quals, List)) + { + quals = (Node *) make_ands_explicit((List *) quals); + } + + if (FindNodeMatchingCheckFunction((Node *) quals, CitusIsVolatileFunction)) + { + StringInfo errorMessage = makeStringInfo(); + appendStringInfo(errorMessage, "functions used in the %s clause of MERGE " + "queries on distributed tables must not be VOLATILE", + (commandType == CMD_MERGE) ? "ON" : "WHEN"); + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + errorMessage->data, NULL, NULL); + } + else if (MasterIrreducibleExpression(quals, &hasVarArgument, &hasBadCoalesce)) + { + Assert(hasVarArgument || hasBadCoalesce); + } + } + + if (hasVarArgument) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "STABLE functions used in MERGE queries " + "cannot be called with column references", + NULL, NULL); + } + + if (hasBadCoalesce) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "non-IMMUTABLE functions are not allowed in CASE or " + "COALESCE statements", + NULL, NULL); + } + + if (quals != NULL && nodeTag(quals) == T_CurrentOfExpr) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "cannot run MERGE actions with cursors", + NULL, NULL); + } + + return NULL; +} + + +#endif diff --git a/src/backend/distributed/planner/multi_join_order.c b/src/backend/distributed/planner/multi_join_order.c index 9b2342b20..b1195c664 100644 --- a/src/backend/distributed/planner/multi_join_order.c +++ b/src/backend/distributed/planner/multi_join_order.c @@ -1383,7 +1383,7 @@ DistPartitionKey(Oid relationId) CitusTableCacheEntry *partitionEntry = GetCitusTableCacheEntry(relationId); /* non-distributed tables do not have partition column */ - if (IsCitusTableTypeCacheEntry(partitionEntry, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (!HasDistributionKeyCacheEntry(partitionEntry)) { return NULL; } diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index 7e665b567..d9322bf5e 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -228,7 +228,7 @@ TargetListOnPartitionColumn(Query *query, List *targetEntryList) * If the expression belongs to a non-distributed table continue searching for * other partition keys. */ - if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (IsCitusTable(relationId) && !HasDistributionKey(relationId)) { continue; } diff --git a/src/backend/distributed/planner/multi_physical_planner.c b/src/backend/distributed/planner/multi_physical_planner.c index 901e9de17..be6caf0e2 100644 --- a/src/backend/distributed/planner/multi_physical_planner.c +++ b/src/backend/distributed/planner/multi_physical_planner.c @@ -2199,7 +2199,7 @@ QueryPushdownSqlTaskList(Query *query, uint64 jobId, Oid relationId = relationRestriction->relationId; CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId); - if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (!HasDistributionKeyCacheEntry(cacheEntry)) { continue; } @@ -2377,7 +2377,7 @@ ErrorIfUnsupportedShardDistribution(Query *query) nonReferenceRelations = lappend_oid(nonReferenceRelations, relationId); } - else if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY)) + else if (IsCitusTable(relationId) && !HasDistributionKey(relationId)) { /* do not need to handle non-distributed tables */ continue; @@ -2482,7 +2482,7 @@ QueryPushdownTaskCreate(Query *originalQuery, int shardIndex, ShardInterval *shardInterval = NULL; CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId); - if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (!HasDistributionKeyCacheEntry(cacheEntry)) { /* non-distributed tables have only one shard */ shardInterval = cacheEntry->sortedShardIntervalArray[0]; @@ -3697,7 +3697,7 @@ PartitionedOnColumn(Var *column, List *rangeTableList, List *dependentJobList) Var *partitionColumn = PartitionColumn(relationId, rangeTableId); /* non-distributed tables do not have partition columns */ - if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (IsCitusTable(relationId) && !HasDistributionKey(relationId)) { return false; } @@ -5343,8 +5343,7 @@ ActiveShardPlacementLists(List *taskList) /* - * CompareShardPlacements compares two shard placements by their tuple oid; this - * oid reflects the tuple's insertion order into pg_dist_placement. + * CompareShardPlacements compares two shard placements by placement id. */ int CompareShardPlacements(const void *leftElement, const void *rightElement) @@ -5370,6 +5369,35 @@ CompareShardPlacements(const void *leftElement, const void *rightElement) } +/* + * CompareGroupShardPlacements compares two group shard placements by placement id. + */ +int +CompareGroupShardPlacements(const void *leftElement, const void *rightElement) +{ + const GroupShardPlacement *leftPlacement = + *((const GroupShardPlacement **) leftElement); + const GroupShardPlacement *rightPlacement = + *((const GroupShardPlacement **) rightElement); + + uint64 leftPlacementId = leftPlacement->placementId; + uint64 rightPlacementId = rightPlacement->placementId; + + if (leftPlacementId < rightPlacementId) + { + return -1; + } + else if (leftPlacementId > rightPlacementId) + { + return 1; + } + else + { + return 0; + } +} + + /* * LeftRotateList returns a copy of the given list that has been cyclically * shifted to the left by the given rotation count. For this, the function diff --git a/src/backend/distributed/planner/multi_router_planner.c b/src/backend/distributed/planner/multi_router_planner.c index e2776cc9a..9603ccc2d 100644 --- a/src/backend/distributed/planner/multi_router_planner.c +++ b/src/backend/distributed/planner/multi_router_planner.c @@ -33,6 +33,7 @@ #include "distributed/intermediate_result_pruning.h" #include "distributed/metadata_utility.h" #include "distributed/coordinator_protocol.h" +#include "distributed/merge_planner.h" #include "distributed/metadata_cache.h" #include "distributed/multi_executor.h" #include "distributed/multi_join_order.h" @@ -125,21 +126,15 @@ static bool IsTidColumn(Node *node); static DeferredErrorMessage * ModifyPartialQuerySupported(Query *queryTree, bool multiShardQuery, Oid *distributedTableId); -static bool NodeIsFieldStore(Node *node); -static DeferredErrorMessage * MultiShardUpdateDeleteMergeSupported(Query *originalQuery, - PlannerRestrictionContext - * - plannerRestrictionContext); +static DeferredErrorMessage * MultiShardUpdateDeleteSupported(Query *originalQuery, + PlannerRestrictionContext + * + plannerRestrictionContext); static DeferredErrorMessage * SingleShardUpdateDeleteSupported(Query *originalQuery, PlannerRestrictionContext * plannerRestrictionContext); -static bool HasDangerousJoinUsing(List *rtableList, Node *jtnode); -static bool MasterIrreducibleExpression(Node *expression, bool *varArgument, - bool *badCoalesce); static bool MasterIrreducibleExpressionWalker(Node *expression, WalkerState *state); static bool MasterIrreducibleExpressionFunctionChecker(Oid func_id, void *context); -static bool TargetEntryChangesValue(TargetEntry *targetEntry, Var *column, - FromExpr *joinTree); static Job * RouterInsertJob(Query *originalQuery); static void ErrorIfNoShardsExist(CitusTableCacheEntry *cacheEntry); static DeferredErrorMessage * DeferErrorIfModifyView(Query *queryTree); @@ -180,12 +175,7 @@ static void ReorderTaskPlacementsByTaskAssignmentPolicy(Job *job, static bool ModifiesLocalTableWithRemoteCitusLocalTable(List *rangeTableList); static DeferredErrorMessage * DeferErrorIfUnsupportedLocalTableJoin(List *rangeTableList); static bool IsLocallyAccessibleCitusLocalTable(Oid relationId); -static DeferredErrorMessage * TargetlistAndFunctionsSupported(Oid resultRelationId, - FromExpr *joinTree, - Node *quals, - List *targetList, - CmdType commandType, - List *returningList); + /* * CreateRouterPlan attempts to create a router executor plan for the given @@ -523,7 +513,7 @@ IsTidColumn(Node *node) * updating distribution column, etc. * Note: This subset of checks are repeated for each MERGE modify action. */ -static DeferredErrorMessage * +DeferredErrorMessage * TargetlistAndFunctionsSupported(Oid resultRelationId, FromExpr *joinTree, Node *quals, List *targetList, CmdType commandType, List *returningList) @@ -899,7 +889,7 @@ IsLocallyAccessibleCitusLocalTable(Oid relationId) /* * NodeIsFieldStore returns true if given Node is a FieldStore object. */ -static bool +bool NodeIsFieldStore(Node *node) { return node && IsA(node, FieldStore); @@ -921,8 +911,14 @@ ModifyQuerySupported(Query *queryTree, Query *originalQuery, bool multiShardQuer PlannerRestrictionContext *plannerRestrictionContext) { Oid distributedTableId = InvalidOid; - DeferredErrorMessage *error = ModifyPartialQuerySupported(queryTree, multiShardQuery, - &distributedTableId); + DeferredErrorMessage *error = MergeQuerySupported(originalQuery, multiShardQuery, + plannerRestrictionContext); + if (error) + { + return error; + } + + error = ModifyPartialQuerySupported(queryTree, multiShardQuery, &distributedTableId); if (error) { return error; @@ -1092,13 +1088,13 @@ ModifyQuerySupported(Query *queryTree, Query *originalQuery, bool multiShardQuer } } - if (commandType != CMD_INSERT) + if (commandType != CMD_INSERT && commandType != CMD_MERGE) { DeferredErrorMessage *errorMessage = NULL; if (multiShardQuery) { - errorMessage = MultiShardUpdateDeleteMergeSupported( + errorMessage = MultiShardUpdateDeleteSupported( originalQuery, plannerRestrictionContext); } @@ -1279,12 +1275,12 @@ ErrorIfOnConflictNotSupported(Query *queryTree) /* - * MultiShardUpdateDeleteMergeSupported returns the error message if the update/delete is + * MultiShardUpdateDeleteSupported returns the error message if the update/delete is * not pushdownable, otherwise it returns NULL. */ static DeferredErrorMessage * -MultiShardUpdateDeleteMergeSupported(Query *originalQuery, - PlannerRestrictionContext *plannerRestrictionContext) +MultiShardUpdateDeleteSupported(Query *originalQuery, + PlannerRestrictionContext *plannerRestrictionContext) { DeferredErrorMessage *errorMessage = NULL; RangeTblEntry *resultRangeTable = ExtractResultRelationRTE(originalQuery); @@ -1315,8 +1311,9 @@ MultiShardUpdateDeleteMergeSupported(Query *originalQuery, } else { - errorMessage = DeferErrorIfUnsupportedSubqueryPushdown(originalQuery, - plannerRestrictionContext); + errorMessage = DeferErrorIfUnsupportedSubqueryPushdown( + originalQuery, + plannerRestrictionContext); } return errorMessage; @@ -1356,7 +1353,7 @@ SingleShardUpdateDeleteSupported(Query *originalQuery, * HasDangerousJoinUsing search jointree for unnamed JOIN USING. Check the * implementation of has_dangerous_join_using in ruleutils. */ -static bool +bool HasDangerousJoinUsing(List *rtableList, Node *joinTreeNode) { if (IsA(joinTreeNode, RangeTblRef)) @@ -1460,7 +1457,7 @@ IsMergeQuery(Query *query) * which do, but for now we just error out. That makes both the code and user-education * easier. */ -static bool +bool MasterIrreducibleExpression(Node *expression, bool *varArgument, bool *badCoalesce) { WalkerState data; @@ -1608,7 +1605,7 @@ MasterIrreducibleExpressionFunctionChecker(Oid func_id, void *context) * expression is a value that is implied by the qualifiers of the join * tree, or the target entry sets a different column. */ -static bool +bool TargetEntryChangesValue(TargetEntry *targetEntry, Var *column, FromExpr *joinTree) { bool isColumnValueChanged = true; @@ -1879,8 +1876,8 @@ RouterJob(Query *originalQuery, PlannerRestrictionContext *plannerRestrictionCon if (*planningError) { /* - * For MERGE, we do _not_ plan anything other than Router job, let's - * not continue further down the lane in distributed planning, simply + * For MERGE, we do _not_ plan any other router job than the MERGE job itself, + * let's not continue further down the lane in distributed planning, simply * bail out. */ if (IsMergeQuery(originalQuery)) @@ -2707,7 +2704,7 @@ TargetShardIntervalForFastPathQuery(Query *query, bool *isMultiShardQuery, { Oid relationId = ExtractFirstCitusTableId(query); - if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (!HasDistributionKey(relationId)) { /* we don't need to do shard pruning for non-distributed tables */ return list_make1(LoadShardIntervalList(relationId)); @@ -3000,7 +2997,7 @@ BuildRoutesForInsert(Query *query, DeferredErrorMessage **planningError) Assert(query->commandType == CMD_INSERT); /* reference tables and citus local tables can only have one shard */ - if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (!HasDistributionKeyCacheEntry(cacheEntry)) { List *shardIntervalList = LoadShardIntervalList(distributedTableId); @@ -3541,7 +3538,7 @@ ExtractInsertPartitionKeyValue(Query *query) uint32 rangeTableId = 1; Const *singlePartitionValueConst = NULL; - if (IsCitusTableType(distributedTableId, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (!HasDistributionKey(distributedTableId)) { return NULL; } @@ -3861,8 +3858,7 @@ ErrorIfQueryHasUnroutableModifyingCTE(Query *queryTree) CitusTableCacheEntry *modificationTableCacheEntry = GetCitusTableCacheEntry(distributedTableId); - if (IsCitusTableTypeCacheEntry(modificationTableCacheEntry, - CITUS_TABLE_WITH_NO_DIST_KEY)) + if (!HasDistributionKeyCacheEntry(modificationTableCacheEntry)) { return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, "cannot router plan modification of a non-distributed table", diff --git a/src/backend/distributed/planner/query_colocation_checker.c b/src/backend/distributed/planner/query_colocation_checker.c index b7cc41068..c5de0ef9e 100644 --- a/src/backend/distributed/planner/query_colocation_checker.c +++ b/src/backend/distributed/planner/query_colocation_checker.c @@ -168,7 +168,7 @@ AnchorRte(Query *subquery) { Oid relationId = currentRte->relid; - if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (IsCitusTable(relationId) && !HasDistributionKey(relationId)) { /* * Non-distributed tables should not be the anchor rte since they diff --git a/src/backend/distributed/planner/query_pushdown_planning.c b/src/backend/distributed/planner/query_pushdown_planning.c index 5cae19497..cbe6a3606 100644 --- a/src/backend/distributed/planner/query_pushdown_planning.c +++ b/src/backend/distributed/planner/query_pushdown_planning.c @@ -591,10 +591,16 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery, } else if (!RestrictionEquivalenceForPartitionKeys(plannerRestrictionContext)) { + StringInfo errorMessage = makeStringInfo(); + bool isMergeCmd = IsMergeQuery(originalQuery); + appendStringInfo(errorMessage, + "%s" + "only supported when all distributed tables are " + "co-located and joined on their distribution columns", + isMergeCmd ? "MERGE command is " : "complex joins are "); + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "complex joins are only supported when all distributed tables are " - "co-located and joined on their distribution columns", - NULL, NULL); + errorMessage->data, NULL, NULL); } /* we shouldn't allow reference tables in the FROM clause when the query has sublinks */ diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c index 713f1f4f2..3fa3068dc 100644 --- a/src/backend/distributed/planner/relation_restriction_equivalence.c +++ b/src/backend/distributed/planner/relation_restriction_equivalence.c @@ -151,8 +151,10 @@ static void ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass secondClass); static Var * PartitionKeyForRTEIdentityInQuery(Query *query, int targetRTEIndex, Index *partitionKeyIndex); -static bool AllRelationsInRestrictionContextColocated(RelationRestrictionContext * - restrictionContext); +static bool AllDistributedRelationsInRestrictionContextColocated( + RelationRestrictionContext * + restrictionContext); +static bool AllDistributedRelationsInListColocated(List *relationList); static bool IsNotSafeRestrictionToRecursivelyPlan(Node *node); static JoinRestrictionContext * FilterJoinRestrictionContext( JoinRestrictionContext *joinRestrictionContext, Relids @@ -383,7 +385,7 @@ SafeToPushdownUnionSubquery(Query *originalQuery, return false; } - if (!AllRelationsInRestrictionContextColocated(restrictionContext)) + if (!AllDistributedRelationsInRestrictionContextColocated(restrictionContext)) { /* distribution columns are equal, but tables are not co-located */ return false; @@ -703,8 +705,8 @@ EquivalenceListContainsRelationsEquality(List *attributeEquivalenceList, int rteIdentity = GetRTEIdentity(relationRestriction->rte); /* we shouldn't check for the equality of non-distributed tables */ - if (IsCitusTableType(relationRestriction->relationId, - CITUS_TABLE_WITH_NO_DIST_KEY)) + if (IsCitusTable(relationRestriction->relationId) && + !HasDistributionKey(relationRestriction->relationId)) { continue; } @@ -1919,22 +1921,66 @@ FindQueryContainingRTEIdentityInternal(Node *node, /* - * AllRelationsInRestrictionContextColocated determines whether all of the relations in the - * given relation restrictions list are co-located. + * AllDistributedRelationsInRestrictionContextColocated determines whether all of the + * distributed relations in the given relation restrictions list are co-located. */ static bool -AllRelationsInRestrictionContextColocated(RelationRestrictionContext *restrictionContext) +AllDistributedRelationsInRestrictionContextColocated( + RelationRestrictionContext *restrictionContext) { RelationRestriction *relationRestriction = NULL; - int initialColocationId = INVALID_COLOCATION_ID; + List *relationIdList = NIL; /* check whether all relations exists in the main restriction list */ foreach_ptr(relationRestriction, restrictionContext->relationRestrictionList) { - Oid relationId = relationRestriction->relationId; + relationIdList = lappend_oid(relationIdList, relationRestriction->relationId); + } - if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY)) + return AllDistributedRelationsInListColocated(relationIdList); +} + + +/* + * AllDistributedRelationsInRTEListColocated determines whether all of the + * distributed relations in the given RangeTableEntry list are co-located. + */ +bool +AllDistributedRelationsInRTEListColocated(List *rangeTableEntryList) +{ + RangeTblEntry *rangeTableEntry = NULL; + List *relationIdList = NIL; + + foreach_ptr(rangeTableEntry, rangeTableEntryList) + { + relationIdList = lappend_oid(relationIdList, rangeTableEntry->relid); + } + + return AllDistributedRelationsInListColocated(relationIdList); +} + + +/* + * AllDistributedRelationsInListColocated determines whether all of the + * distributed relations in the given list are co-located. + */ +static bool +AllDistributedRelationsInListColocated(List *relationList) +{ + int initialColocationId = INVALID_COLOCATION_ID; + Oid relationId = InvalidOid; + + foreach_oid(relationId, relationList) + { + if (!IsCitusTable(relationId)) { + /* not interested in Postgres tables */ + continue; + } + + if (!IsCitusTableType(relationId, DISTRIBUTED_TABLE)) + { + /* not interested in non-distributed tables */ continue; } diff --git a/src/backend/distributed/planner/shard_pruning.c b/src/backend/distributed/planner/shard_pruning.c index 665c9a75b..5375a70fa 100644 --- a/src/backend/distributed/planner/shard_pruning.c +++ b/src/backend/distributed/planner/shard_pruning.c @@ -333,7 +333,7 @@ PruneShards(Oid relationId, Index rangeTableId, List *whereClauseList, } /* short circuit for non-distributed tables such as reference table */ - if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (!HasDistributionKeyCacheEntry(cacheEntry)) { prunedList = ShardArrayToList(cacheEntry->sortedShardIntervalArray, cacheEntry->shardIntervalArrayLength); diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c index 6ceae8b52..c390ee1c0 100644 --- a/src/backend/distributed/shared_library_init.c +++ b/src/backend/distributed/shared_library_init.c @@ -2304,7 +2304,7 @@ RegisterCitusConfigVariables(void) NULL, &CitusStatsTenantsLimit, 10, 1, 100, - PGC_USERSET, + PGC_POSTMASTER, GUC_STANDARD, NULL, NULL, NULL); @@ -2313,7 +2313,7 @@ RegisterCitusConfigVariables(void) gettext_noop("monitor period"), NULL, &CitusStatsTenantsPeriod, - 60, 1, 60 * 60, + 60, 1, 1000000000, PGC_USERSET, GUC_STANDARD, NULL, NULL, NULL); diff --git a/src/backend/distributed/sql/citus--11.2-1--11.3-1.sql b/src/backend/distributed/sql/citus--11.2-1--11.3-1.sql index 73a2bf8a9..30df05d49 100644 --- a/src/backend/distributed/sql/citus--11.2-1--11.3-1.sql +++ b/src/backend/distributed/sql/citus--11.2-1--11.3-1.sql @@ -2,4 +2,5 @@ -- bump version to 11.3-1 +#include "udfs/citus_stats_tenants_local/11.3-1.sql" #include "udfs/citus_stats_tenants/11.3-1.sql" diff --git a/src/backend/distributed/sql/downgrades/citus--11.3-1--11.2-1.sql b/src/backend/distributed/sql/downgrades/citus--11.3-1--11.2-1.sql index 4bf4c75ff..28052b7fa 100644 --- a/src/backend/distributed/sql/downgrades/citus--11.3-1--11.2-1.sql +++ b/src/backend/distributed/sql/downgrades/citus--11.3-1--11.2-1.sql @@ -1,5 +1,7 @@ -- citus--11.3-1--11.2-1 --- this is an empty downgrade path since citus--11.2-1--11.3-1.sql is empty for now + +DROP VIEW pg_catalog.citus_stats_tenants_local; +DROP FUNCTION pg_catalog.citus_stats_tenants_local(boolean); DROP VIEW pg_catalog.citus_stats_tenants; DROP FUNCTION pg_catalog.citus_stats_tenants(boolean); diff --git a/src/backend/distributed/sql/udfs/citus_stats_tenants/11.3-1.sql b/src/backend/distributed/sql/udfs/citus_stats_tenants/11.3-1.sql index f476a9c28..d85f90d66 100644 --- a/src/backend/distributed/sql/udfs/citus_stats_tenants/11.3-1.sql +++ b/src/backend/distributed/sql/udfs/citus_stats_tenants/11.3-1.sql @@ -1,27 +1,66 @@ -CREATE OR REPLACE FUNCTION pg_catalog.citus_stats_tenants( +-- cts in the query is an abbreviation for citus_stats_tenants +CREATE OR REPLACE FUNCTION pg_catalog.citus_stats_tenants ( return_all_tenants BOOLEAN DEFAULT FALSE, + OUT nodeid INT, OUT colocation_id INT, OUT tenant_attribute TEXT, OUT read_count_in_this_period INT, OUT read_count_in_last_period INT, OUT query_count_in_this_period INT, OUT query_count_in_last_period INT, - OUT score BIGINT) -RETURNS SETOF RECORD -LANGUAGE C -AS 'citus', $$citus_stats_tenants$$; - + OUT score BIGINT +) + RETURNS SETOF record + LANGUAGE plpgsql + AS $function$ +BEGIN + RETURN QUERY + SELECT * + FROM jsonb_to_recordset(( + SELECT + jsonb_agg(all_cst_rows_as_jsonb.cst_row_as_jsonb)::jsonb + FROM ( + SELECT + jsonb_array_elements(run_command_on_all_nodes.result::jsonb)::jsonb || + ('{"nodeid":' || run_command_on_all_nodes.nodeid || '}')::jsonb AS cst_row_as_jsonb + FROM + run_command_on_all_nodes ( + $$ + SELECT + coalesce(to_jsonb (array_agg(cstl.*)), '[]'::jsonb) + FROM citus_stats_tenants_local($$||return_all_tenants||$$) cstl; + $$, + parallel:= TRUE, + give_warning_for_connection_errors:= TRUE) + WHERE + success = 't') + AS all_cst_rows_as_jsonb)) +AS ( + nodeid INT, + colocation_id INT, + tenant_attribute TEXT, + read_count_in_this_period INT, + read_count_in_last_period INT, + query_count_in_this_period INT, + query_count_in_last_period INT, + score BIGINT +) + ORDER BY score DESC + LIMIT CASE WHEN NOT return_all_tenants THEN current_setting('citus.stats_tenants_limit')::BIGINT END; +END; +$function$; CREATE OR REPLACE VIEW citus.citus_stats_tenants AS SELECT + nodeid, colocation_id, tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period -FROM pg_catalog.citus_stats_tenants() -ORDER BY score DESC; +FROM pg_catalog.citus_stats_tenants(FALSE); ALTER VIEW citus.citus_stats_tenants SET SCHEMA pg_catalog; + GRANT SELECT ON pg_catalog.citus_stats_tenants TO PUBLIC; diff --git a/src/backend/distributed/sql/udfs/citus_stats_tenants/latest.sql b/src/backend/distributed/sql/udfs/citus_stats_tenants/latest.sql index f476a9c28..d85f90d66 100644 --- a/src/backend/distributed/sql/udfs/citus_stats_tenants/latest.sql +++ b/src/backend/distributed/sql/udfs/citus_stats_tenants/latest.sql @@ -1,27 +1,66 @@ -CREATE OR REPLACE FUNCTION pg_catalog.citus_stats_tenants( +-- cts in the query is an abbreviation for citus_stats_tenants +CREATE OR REPLACE FUNCTION pg_catalog.citus_stats_tenants ( return_all_tenants BOOLEAN DEFAULT FALSE, + OUT nodeid INT, OUT colocation_id INT, OUT tenant_attribute TEXT, OUT read_count_in_this_period INT, OUT read_count_in_last_period INT, OUT query_count_in_this_period INT, OUT query_count_in_last_period INT, - OUT score BIGINT) -RETURNS SETOF RECORD -LANGUAGE C -AS 'citus', $$citus_stats_tenants$$; - + OUT score BIGINT +) + RETURNS SETOF record + LANGUAGE plpgsql + AS $function$ +BEGIN + RETURN QUERY + SELECT * + FROM jsonb_to_recordset(( + SELECT + jsonb_agg(all_cst_rows_as_jsonb.cst_row_as_jsonb)::jsonb + FROM ( + SELECT + jsonb_array_elements(run_command_on_all_nodes.result::jsonb)::jsonb || + ('{"nodeid":' || run_command_on_all_nodes.nodeid || '}')::jsonb AS cst_row_as_jsonb + FROM + run_command_on_all_nodes ( + $$ + SELECT + coalesce(to_jsonb (array_agg(cstl.*)), '[]'::jsonb) + FROM citus_stats_tenants_local($$||return_all_tenants||$$) cstl; + $$, + parallel:= TRUE, + give_warning_for_connection_errors:= TRUE) + WHERE + success = 't') + AS all_cst_rows_as_jsonb)) +AS ( + nodeid INT, + colocation_id INT, + tenant_attribute TEXT, + read_count_in_this_period INT, + read_count_in_last_period INT, + query_count_in_this_period INT, + query_count_in_last_period INT, + score BIGINT +) + ORDER BY score DESC + LIMIT CASE WHEN NOT return_all_tenants THEN current_setting('citus.stats_tenants_limit')::BIGINT END; +END; +$function$; CREATE OR REPLACE VIEW citus.citus_stats_tenants AS SELECT + nodeid, colocation_id, tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period -FROM pg_catalog.citus_stats_tenants() -ORDER BY score DESC; +FROM pg_catalog.citus_stats_tenants(FALSE); ALTER VIEW citus.citus_stats_tenants SET SCHEMA pg_catalog; + GRANT SELECT ON pg_catalog.citus_stats_tenants TO PUBLIC; diff --git a/src/backend/distributed/sql/udfs/citus_stats_tenants_local/11.3-1.sql b/src/backend/distributed/sql/udfs/citus_stats_tenants_local/11.3-1.sql new file mode 100644 index 000000000..5a47835e7 --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_stats_tenants_local/11.3-1.sql @@ -0,0 +1,27 @@ +CREATE OR REPLACE FUNCTION pg_catalog.citus_stats_tenants_local( + return_all_tenants BOOLEAN DEFAULT FALSE, + OUT colocation_id INT, + OUT tenant_attribute TEXT, + OUT read_count_in_this_period INT, + OUT read_count_in_last_period INT, + OUT query_count_in_this_period INT, + OUT query_count_in_last_period INT, + OUT score BIGINT) +RETURNS SETOF RECORD +LANGUAGE C +AS 'citus', $$citus_stats_tenants_local$$; + + +CREATE OR REPLACE VIEW citus.citus_stats_tenants_local AS +SELECT + colocation_id, + tenant_attribute, + read_count_in_this_period, + read_count_in_last_period, + query_count_in_this_period, + query_count_in_last_period +FROM pg_catalog.citus_stats_tenants_local() +ORDER BY score DESC; + +ALTER VIEW citus.citus_stats_tenants_local SET SCHEMA pg_catalog; +GRANT SELECT ON pg_catalog.citus_stats_tenants_local TO PUBLIC; diff --git a/src/backend/distributed/sql/udfs/citus_stats_tenants_local/latest.sql b/src/backend/distributed/sql/udfs/citus_stats_tenants_local/latest.sql new file mode 100644 index 000000000..5a47835e7 --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_stats_tenants_local/latest.sql @@ -0,0 +1,27 @@ +CREATE OR REPLACE FUNCTION pg_catalog.citus_stats_tenants_local( + return_all_tenants BOOLEAN DEFAULT FALSE, + OUT colocation_id INT, + OUT tenant_attribute TEXT, + OUT read_count_in_this_period INT, + OUT read_count_in_last_period INT, + OUT query_count_in_this_period INT, + OUT query_count_in_last_period INT, + OUT score BIGINT) +RETURNS SETOF RECORD +LANGUAGE C +AS 'citus', $$citus_stats_tenants_local$$; + + +CREATE OR REPLACE VIEW citus.citus_stats_tenants_local AS +SELECT + colocation_id, + tenant_attribute, + read_count_in_this_period, + read_count_in_last_period, + query_count_in_this_period, + query_count_in_last_period +FROM pg_catalog.citus_stats_tenants_local() +ORDER BY score DESC; + +ALTER VIEW citus.citus_stats_tenants_local SET SCHEMA pg_catalog; +GRANT SELECT ON pg_catalog.citus_stats_tenants_local TO PUBLIC; diff --git a/src/backend/distributed/transaction/relation_access_tracking.c b/src/backend/distributed/transaction/relation_access_tracking.c index a6a8ba5f6..2ecbba5b7 100644 --- a/src/backend/distributed/transaction/relation_access_tracking.c +++ b/src/backend/distributed/transaction/relation_access_tracking.c @@ -195,7 +195,7 @@ RecordRelationAccessIfNonDistTable(Oid relationId, ShardPlacementAccessType acce * recursively calling RecordRelationAccessBase(), so becareful about * removing this check. */ - if (!IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (IsCitusTable(relationId) && HasDistributionKey(relationId)) { return; } @@ -732,8 +732,8 @@ CheckConflictingRelationAccesses(Oid relationId, ShardPlacementAccessType access CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId); - if (!(IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY) && - cacheEntry->referencingRelationsViaForeignKey != NIL)) + if (HasDistributionKeyCacheEntry(cacheEntry) || + cacheEntry->referencingRelationsViaForeignKey == NIL) { return; } @@ -931,7 +931,7 @@ HoldsConflictingLockWithReferencedRelations(Oid relationId, ShardPlacementAccess * We're only interested in foreign keys to reference tables and citus * local tables. */ - if (!IsCitusTableType(referencedRelation, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (IsCitusTable(referencedRelation) && HasDistributionKey(referencedRelation)) { continue; } @@ -993,7 +993,7 @@ HoldsConflictingLockWithReferencingRelations(Oid relationId, ShardPlacementAcces CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId); bool holdsConflictingLocks = false; - Assert(IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY)); + Assert(!HasDistributionKeyCacheEntry(cacheEntry)); Oid referencingRelation = InvalidOid; foreach_oid(referencingRelation, cacheEntry->referencingRelationsViaForeignKey) diff --git a/src/backend/distributed/utils/attribute.c b/src/backend/distributed/utils/attribute.c index db5a5d0cb..10a6ef864 100644 --- a/src/backend/distributed/utils/attribute.c +++ b/src/backend/distributed/utils/attribute.c @@ -14,17 +14,19 @@ #include "distributed/citus_safe_lib.h" #include "distributed/log_utils.h" #include "distributed/listutils.h" +#include "distributed/metadata_cache.h" #include "distributed/jsonbutils.h" +#include "distributed/colocation_utils.h" #include "distributed/tuplestore.h" #include "distributed/colocation_utils.h" #include "executor/execdesc.h" #include "storage/ipc.h" #include "storage/lwlock.h" #include "storage/shmem.h" +#include #include "utils/builtins.h" #include "utils/json.h" #include "distributed/utils/attribute.h" -#include "miscadmin.h" #include @@ -39,7 +41,7 @@ ExecutorEnd_hook_type prev_ExecutorEnd = NULL; #define ONE_QUERY_SCORE 1000000000 /* TODO maybe needs to be a stack */ -char attributeToTenant[100] = ""; +char attributeToTenant[MAX_TENANT_ATTRIBUTE_LENGTH] = ""; CmdType attributeCommandType = CMD_UNKNOWN; int colocationGroupId = -1; clock_t attributeToTenantStart = { 0 }; @@ -52,15 +54,16 @@ char *monitorTrancheName = "Multi Tenant Monitor Tranche"; static shmem_startup_hook_type prev_shmem_startup_hook = NULL; -static void UpdatePeriodsIfNecessary(MultiTenantMonitor *monitor, - TenantStats *tenantStats); -static void ReduceScoreIfNecessary(MultiTenantMonitor *monitor, TenantStats *tenantStats, - time_t updateTime); +static int CompareTenantScore(const void *leftElement, const void *rightElement); +static void UpdatePeriodsIfNecessary(TenantStats *tenantStats, time_t queryTime); +static void ReduceScoreIfNecessary(TenantStats *tenantStats, time_t queryTime); +static void EvictTenantsIfNecessary(time_t queryTime); +static void RecordTenantStats(TenantStats *tenantStats); static void CreateMultiTenantMonitor(void); static MultiTenantMonitor * CreateSharedMemoryForMultiTenantMonitor(void); static MultiTenantMonitor * GetMultiTenantMonitor(void); static void MultiTenantMonitorSMInit(void); -static int CreateTenantStats(MultiTenantMonitor *monitor); +static int CreateTenantStats(MultiTenantMonitor *monitor, time_t queryTime); static int FindTenantStats(MultiTenantMonitor *monitor); static size_t MultiTenantMonitorshmemSize(void); static char * ExtractTopComment(const char *inputString); @@ -72,17 +75,18 @@ int CitusStatsTenantsPeriod = (time_t) 60; int CitusStatsTenantsLimit = 10; -PG_FUNCTION_INFO_V1(citus_stats_tenants); +PG_FUNCTION_INFO_V1(citus_stats_tenants_local); PG_FUNCTION_INFO_V1(clean_citus_stats_tenants); +PG_FUNCTION_INFO_V1(sleep_until_next_period); /* - * citus_stats_tenants finds, updates and returns the statistics for tenants. + * citus_stats_tenants_local finds, updates and returns the statistics for tenants. */ Datum -citus_stats_tenants(PG_FUNCTION_ARGS) +citus_stats_tenants_local(PG_FUNCTION_ARGS) { - /*CheckCitusVersion(ERROR); */ + CheckCitusVersion(ERROR); /* * We keep more than CitusStatsTenantsLimit tenants in our monitor. @@ -107,11 +111,6 @@ citus_stats_tenants(PG_FUNCTION_ARGS) LWLockAcquire(&monitor->lock, LW_EXCLUSIVE); - monitor->periodStart = monitor->periodStart + - ((monitoringTime - monitor->periodStart) / - CitusStatsTenantsPeriod) * - CitusStatsTenantsPeriod; - int numberOfRowsToReturn = 0; if (returnAllTenants) { @@ -122,6 +121,14 @@ citus_stats_tenants(PG_FUNCTION_ARGS) numberOfRowsToReturn = Min(monitor->tenantCount, CitusStatsTenantsLimit); } + for (int tenantIndex = 0; tenantIndex < monitor->tenantCount; tenantIndex++) + { + UpdatePeriodsIfNecessary(&monitor->tenants[tenantIndex], monitoringTime); + ReduceScoreIfNecessary(&monitor->tenants[tenantIndex], monitoringTime); + } + SafeQsort(monitor->tenants, monitor->tenantCount, sizeof(TenantStats), + CompareTenantScore); + for (int i = 0; i < numberOfRowsToReturn; i++) { memset(values, 0, sizeof(values)); @@ -129,9 +136,6 @@ citus_stats_tenants(PG_FUNCTION_ARGS) TenantStats *tenantStats = &monitor->tenants[i]; - UpdatePeriodsIfNecessary(monitor, tenantStats); - ReduceScoreIfNecessary(monitor, tenantStats, monitoringTime); - values[0] = Int32GetDatum(tenantStats->colocationGroupId); values[1] = PointerGetDatum(cstring_to_text(tenantStats->tenantAttribute)); values[2] = Int32GetDatum(tenantStats->readsInThisPeriod); @@ -159,20 +163,43 @@ clean_citus_stats_tenants(PG_FUNCTION_ARGS) { MultiTenantMonitor *monitor = GetMultiTenantMonitor(); monitor->tenantCount = 0; - monitor->periodStart = time(0); PG_RETURN_VOID(); } /* - * AttributeQueryIfAnnotated assigns the attributes of tenant if the query is annotated. + * sleep_until_next_period sleeps until the next monitoring period starts. + */ +Datum +sleep_until_next_period(PG_FUNCTION_ARGS) +{ + struct timeval currentTime; + gettimeofday(¤tTime, NULL); + + long int nextPeriodStart = currentTime.tv_sec - + (currentTime.tv_sec % CitusStatsTenantsPeriod) + + CitusStatsTenantsPeriod; + + long int sleepTime = (nextPeriodStart - currentTime.tv_sec) * 1000000 - + currentTime.tv_usec + 100000; + pg_usleep(sleepTime); + + PG_RETURN_VOID(); +} + + +/* + * AttributeQueryIfAnnotated checks the query annotation and if the query is annotated + * for the tenant statistics monitoring this function records the tenant attributes. */ void AttributeQueryIfAnnotated(const char *query_string, CmdType commandType) { strcpy_s(attributeToTenant, sizeof(attributeToTenant), ""); + attributeCommandType = commandType; + if (query_string == NULL) { return; @@ -254,6 +281,10 @@ AnnotateQuery(char *queryString, char *partitionColumn, int colocationId) } +/* + * CitusAttributeToEnd keeps the statistics for the tenant and calls the previously installed end hook + * or the standard executor end function. + */ void CitusAttributeToEnd(QueryDesc *queryDesc) { @@ -275,6 +306,28 @@ CitusAttributeToEnd(QueryDesc *queryDesc) } +/* + * CompareTenantScore is used to sort the tenant statistics by score + * in descending order. + */ +static int +CompareTenantScore(const void *leftElement, const void *rightElement) +{ + const TenantStats *leftTenant = (const TenantStats *) leftElement; + const TenantStats *rightTenant = (const TenantStats *) rightElement; + + if (leftTenant->score > rightTenant->score) + { + return -1; + } + else if (leftTenant->score < rightTenant->score) + { + return 1; + } + return 0; +} + + /* * AttributeMetricsIfApplicable updates the metrics for current tenant's statistics */ @@ -299,93 +352,48 @@ AttributeMetricsIfApplicable() LWLockAcquire(&monitor->lock, LW_SHARED); - monitor->periodStart = monitor->periodStart + - ((queryTime - monitor->periodStart) / - CitusStatsTenantsPeriod) * - CitusStatsTenantsPeriod; + int currentTenantIndex = FindTenantStats(monitor); - int tenantIndex = FindTenantStats(monitor); - - if (tenantIndex == -1) + if (currentTenantIndex != -1) { - tenantIndex = CreateTenantStats(monitor); + TenantStats *tenantStats = &monitor->tenants[currentTenantIndex]; + LWLockAcquire(&tenantStats->lock, LW_EXCLUSIVE); + + UpdatePeriodsIfNecessary(tenantStats, queryTime); + ReduceScoreIfNecessary(tenantStats, queryTime); + RecordTenantStats(tenantStats); + + LWLockRelease(&tenantStats->lock); } - TenantStats *tenantStats = &monitor->tenants[tenantIndex]; - - LWLockAcquire(&tenantStats->lock, LW_EXCLUSIVE); - - UpdatePeriodsIfNecessary(monitor, tenantStats); - tenantStats->lastQueryTime = queryTime; - - ReduceScoreIfNecessary(monitor, tenantStats, queryTime); - - /* - * We do this after the reducing the scores so the scores in this period are not affected by the reduction. - */ - tenantStats->score += ONE_QUERY_SCORE; - - - /* - * After updating the score we might need to change the rank of the tenant in the monitor - */ - while (tenantIndex != 0 && - monitor->tenants[tenantIndex - 1].score < - monitor->tenants[tenantIndex].score) + else { - LWLockAcquire(&monitor->tenants[tenantIndex - 1].lock, LW_EXCLUSIVE); - - ReduceScoreIfNecessary(monitor, &monitor->tenants[tenantIndex - 1], - queryTime); - - TenantStats tempTenant = monitor->tenants[tenantIndex]; - monitor->tenants[tenantIndex] = monitor->tenants[tenantIndex - 1]; - monitor->tenants[tenantIndex - 1] = tempTenant; - - LWLockRelease(&monitor->tenants[tenantIndex].lock); - - tenantIndex--; - } - tenantStats = &monitor->tenants[tenantIndex]; - - if (attributeCommandType == CMD_SELECT) - { - tenantStats->readCount++; - tenantStats->readsInThisPeriod++; - tenantStats->totalReadTime += cpu_time_used; - } - else if (attributeCommandType == CMD_UPDATE || - attributeCommandType == CMD_INSERT || - attributeCommandType == CMD_DELETE) - { - tenantStats->writeCount++; - tenantStats->writesInThisPeriod++; - tenantStats->totalWriteTime += cpu_time_used; - } - - LWLockRelease(&monitor->lock); - LWLockRelease(&tenantStats->lock); - - /* - * We keep up to CitusStatsTenantsLimit * 3 tenants instead of CitusStatsTenantsLimit, - * so we don't lose data immediately after a tenant is out of top CitusStatsTenantsLimit - * - * Every time tenant count hits CitusStatsTenantsLimit * 3, we reduce it back to CitusStatsTenantsLimit * 2. - */ - if (monitor->tenantCount >= CitusStatsTenantsLimit * 3) - { - LWLockAcquire(&monitor->lock, LW_EXCLUSIVE); - monitor->tenantCount = CitusStatsTenantsLimit * 2; LWLockRelease(&monitor->lock); - } - if (MultiTenantMonitoringLogLevel != CITUS_LOG_LEVEL_OFF) - { - ereport(NOTICE, (errmsg("total read count = %d, total read CPU time = %f " - "to tenant: %s", - tenantStats->readCount, - tenantStats->totalReadTime, - tenantStats->tenantAttribute))); + LWLockAcquire(&monitor->lock, LW_EXCLUSIVE); + currentTenantIndex = FindTenantStats(monitor); + + if (currentTenantIndex == -1) + { + currentTenantIndex = CreateTenantStats(monitor, queryTime); + } + + LWLockRelease(&monitor->lock); + + LWLockAcquire(&monitor->lock, LW_SHARED); + currentTenantIndex = FindTenantStats(monitor); + if (currentTenantIndex != -1) + { + TenantStats *tenantStats = &monitor->tenants[currentTenantIndex]; + LWLockAcquire(&tenantStats->lock, LW_EXCLUSIVE); + + UpdatePeriodsIfNecessary(tenantStats, queryTime); + ReduceScoreIfNecessary(tenantStats, queryTime); + RecordTenantStats(tenantStats); + + LWLockRelease(&tenantStats->lock); + } } + LWLockRelease(&monitor->lock); } strcpy_s(attributeToTenant, sizeof(attributeToTenant), ""); @@ -402,13 +410,15 @@ AttributeMetricsIfApplicable() * statistics. */ static void -UpdatePeriodsIfNecessary(MultiTenantMonitor *monitor, TenantStats *tenantStats) +UpdatePeriodsIfNecessary(TenantStats *tenantStats, time_t queryTime) { + time_t periodStart = queryTime - (queryTime % CitusStatsTenantsPeriod); + /* * If the last query in this tenant was before the start of current period * but there are some query count for this period we move them to the last period. */ - if (tenantStats->lastQueryTime < monitor->periodStart && + if (tenantStats->lastQueryTime < periodStart && (tenantStats->writesInThisPeriod || tenantStats->readsInThisPeriod)) { tenantStats->writesInLastPeriod = tenantStats->writesInThisPeriod; @@ -421,12 +431,14 @@ UpdatePeriodsIfNecessary(MultiTenantMonitor *monitor, TenantStats *tenantStats) /* * If the last query is more than two periods ago, we clean the last period counts too. */ - if (tenantStats->lastQueryTime < monitor->periodStart - CitusStatsTenantsPeriod) + if (tenantStats->lastQueryTime < periodStart - CitusStatsTenantsPeriod) { tenantStats->writesInLastPeriod = 0; tenantStats->readsInLastPeriod = 0; } + + tenantStats->lastQueryTime = queryTime; } @@ -437,9 +449,10 @@ UpdatePeriodsIfNecessary(MultiTenantMonitor *monitor, TenantStats *tenantStats) * periods that passed after the lsat score reduction and reduces the score accordingly. */ static void -ReduceScoreIfNecessary(MultiTenantMonitor *monitor, TenantStats *tenantStats, - time_t updateTime) +ReduceScoreIfNecessary(TenantStats *tenantStats, time_t queryTime) { + time_t periodStart = queryTime - (queryTime % CitusStatsTenantsPeriod); + /* * With each query we increase the score of tenant by ONE_QUERY_SCORE. * After one period we halve the scores. @@ -448,7 +461,7 @@ ReduceScoreIfNecessary(MultiTenantMonitor *monitor, TenantStats *tenantStats, * If the latest score reduction was in this period this number should be 0, * if it was in the last period this number should be 1 and so on. */ - int periodCountAfterLastScoreReduction = (monitor->periodStart - + int periodCountAfterLastScoreReduction = (periodStart - tenantStats->lastScoreReduction + CitusStatsTenantsPeriod - 1) / CitusStatsTenantsPeriod; @@ -467,7 +480,63 @@ ReduceScoreIfNecessary(MultiTenantMonitor *monitor, TenantStats *tenantStats, if (periodCountAfterLastScoreReduction > 0) { tenantStats->score >>= periodCountAfterLastScoreReduction; - tenantStats->lastScoreReduction = updateTime; + tenantStats->lastScoreReduction = queryTime; + } +} + + +/* + * EvictTenantsIfNecessary sorts and evicts the tenants if the tenant count is more than or + * equal to 3 * CitusStatsTenantsLimit. + */ +static void +EvictTenantsIfNecessary(time_t queryTime) +{ + MultiTenantMonitor *monitor = GetMultiTenantMonitor(); + + /* + * We keep up to CitusStatsTenantsLimit * 3 tenants instead of CitusStatsTenantsLimit, + * so we don't lose data immediately after a tenant is out of top CitusStatsTenantsLimit + * + * Every time tenant count hits CitusStatsTenantsLimit * 3, we reduce it back to CitusStatsTenantsLimit * 2. + */ + if (monitor->tenantCount >= CitusStatsTenantsLimit * 3) + { + for (int tenantIndex = 0; tenantIndex < monitor->tenantCount; tenantIndex++) + { + ReduceScoreIfNecessary(&monitor->tenants[tenantIndex], queryTime); + } + SafeQsort(monitor->tenants, monitor->tenantCount, sizeof(TenantStats), + CompareTenantScore); + monitor->tenantCount = CitusStatsTenantsLimit * 2; + } +} + + +/* + * RecordTenantStats records the query statistics for the tenant. + */ +static void +RecordTenantStats(TenantStats *tenantStats) +{ + if (tenantStats->score < LLONG_MAX - ONE_QUERY_SCORE) + { + tenantStats->score += ONE_QUERY_SCORE; + } + else + { + tenantStats->score = LLONG_MAX; + } + + if (attributeCommandType == CMD_SELECT) + { + tenantStats->readsInThisPeriod++; + } + else if (attributeCommandType == CMD_UPDATE || + attributeCommandType == CMD_INSERT || + attributeCommandType == CMD_DELETE) + { + tenantStats->writesInThisPeriod++; } } @@ -480,7 +549,6 @@ CreateMultiTenantMonitor() { MultiTenantMonitor *monitor = CreateSharedMemoryForMultiTenantMonitor(); monitor->tenantCount = 0; - monitor->periodStart = time(0); } @@ -545,9 +613,6 @@ InitializeMultiTenantMonitorSMHandleManagement() /* * MultiTenantMonitorSMInit initializes the shared memory for MultiTenantMonitorSMData. - * - * MultiTenantMonitorSMData only holds the dsm (dynamic shared memory) handle for the actual - * multi tenant monitor. */ static void MultiTenantMonitorSMInit() @@ -565,8 +630,14 @@ MultiTenantMonitorSMInit() * CreateTenantStats creates the data structure for a tenant's statistics. */ static int -CreateTenantStats(MultiTenantMonitor *monitor) +CreateTenantStats(MultiTenantMonitor *monitor, time_t queryTime) { + /* + * If the tenant count reached 3 * CitusStatsTenantsLimit, we evict the tenants + * with the lowest score. + */ + EvictTenantsIfNecessary(queryTime); + int tenantIndex = monitor->tenantCount; memset(&monitor->tenants[tenantIndex], 0, sizeof(monitor->tenants[tenantIndex])); @@ -590,7 +661,7 @@ CreateTenantStats(MultiTenantMonitor *monitor) /* - * FindTenantStats finds the dsm (dynamic shared memory) handle for the current tenant's statistics. + * FindTenantStats finds the index for the current tenant's statistics. */ static int FindTenantStats(MultiTenantMonitor *monitor) diff --git a/src/backend/distributed/utils/colocation_utils.c b/src/backend/distributed/utils/colocation_utils.c index aabfcdf62..985d4c38e 100644 --- a/src/backend/distributed/utils/colocation_utils.c +++ b/src/backend/distributed/utils/colocation_utils.c @@ -442,8 +442,7 @@ ShardsIntervalsEqual(ShardInterval *leftShardInterval, ShardInterval *rightShard { return HashPartitionedShardIntervalsEqual(leftShardInterval, rightShardInterval); } - else if (IsCitusTableType(leftShardInterval->relationId, - CITUS_TABLE_WITH_NO_DIST_KEY)) + else if (!HasDistributionKey(leftShardInterval->relationId)) { /* * Reference tables has only a single shard and all reference tables diff --git a/src/backend/distributed/utils/shardinterval_utils.c b/src/backend/distributed/utils/shardinterval_utils.c index 2980d11a4..12635f9f4 100644 --- a/src/backend/distributed/utils/shardinterval_utils.c +++ b/src/backend/distributed/utils/shardinterval_utils.c @@ -223,8 +223,7 @@ ShardIndex(ShardInterval *shardInterval) * currently it is not required. */ if (!IsCitusTableTypeCacheEntry(cacheEntry, HASH_DISTRIBUTED) && - !IsCitusTableTypeCacheEntry( - cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY)) + HasDistributionKeyCacheEntry(cacheEntry)) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("finding index of a given shard is only supported for " @@ -233,7 +232,7 @@ ShardIndex(ShardInterval *shardInterval) } /* short-circuit for reference tables */ - if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (!HasDistributionKeyCacheEntry(cacheEntry)) { /* * Reference tables and citus local tables have only a single shard, @@ -333,7 +332,7 @@ FindShardIntervalIndex(Datum searchedValue, CitusTableCacheEntry *cacheEntry) shardIndex = CalculateUniformHashRangeIndex(hashedValue, shardCount); } } - else if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY)) + else if (!HasDistributionKeyCacheEntry(cacheEntry)) { /* non-distributed tables have a single shard, all values mapped to that shard */ Assert(shardCount == 1); diff --git a/src/include/distributed/distributed_planner.h b/src/include/distributed/distributed_planner.h index 29c3c7154..412859449 100644 --- a/src/include/distributed/distributed_planner.h +++ b/src/include/distributed/distributed_planner.h @@ -255,6 +255,4 @@ extern struct DistributedPlan * CreateDistributedPlan(uint64 planId, PlannerRestrictionContext * plannerRestrictionContext); -extern bool IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte); - #endif /* DISTRIBUTED_PLANNER_H */ diff --git a/src/include/distributed/merge_planner.h b/src/include/distributed/merge_planner.h new file mode 100644 index 000000000..158f26861 --- /dev/null +++ b/src/include/distributed/merge_planner.h @@ -0,0 +1,31 @@ +/*------------------------------------------------------------------------- + * + * merge_planner.h + * + * Declarations for public functions and types related to router planning. + * + * Copyright (c) Citus Data, Inc. + * + *------------------------------------------------------------------------- + */ + +#ifndef MERGE_PLANNER_H +#define MERGE_PLANNER_H + +#include "c.h" + +#include "nodes/parsenodes.h" +#include "distributed/distributed_planner.h" +#include "distributed/errormessage.h" +#include "distributed/multi_physical_planner.h" + +extern bool IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte); +extern DeferredErrorMessage * MergeQuerySupported(Query *originalQuery, + bool multiShardQuery, + PlannerRestrictionContext * + plannerRestrictionContext); +extern DistributedPlan * CreateMergePlan(Query *originalQuery, Query *query, + PlannerRestrictionContext * + plannerRestrictionContext); + +#endif /* MERGE_PLANNER_H */ diff --git a/src/include/distributed/metadata_cache.h b/src/include/distributed/metadata_cache.h index 07fa50e64..e7cb2514d 100644 --- a/src/include/distributed/metadata_cache.h +++ b/src/include/distributed/metadata_cache.h @@ -133,9 +133,6 @@ typedef enum REFERENCE_TABLE, CITUS_LOCAL_TABLE, - /* table without a dist key such as reference table */ - CITUS_TABLE_WITH_NO_DIST_KEY, - ANY_CITUS_TABLE_TYPE } CitusTableType; @@ -143,6 +140,8 @@ extern List * AllCitusTableIds(void); extern bool IsCitusTableType(Oid relationId, CitusTableType tableType); extern bool IsCitusTableTypeCacheEntry(CitusTableCacheEntry *tableEtnry, CitusTableType tableType); +bool HasDistributionKey(Oid relationId); +bool HasDistributionKeyCacheEntry(CitusTableCacheEntry *tableEntry); extern char * GetTableTypeName(Oid tableId); extern void SetCreateCitusTransactionLevel(int val); @@ -154,7 +153,9 @@ extern List * LookupDistShardTuples(Oid relationId); extern char PartitionMethodViaCatalog(Oid relationId); extern Var * PartitionColumnViaCatalog(Oid relationId); extern uint32 ColocationIdViaCatalog(Oid relationId); -extern bool IsCitusLocalTableByDistParams(char partitionMethod, char replicationModel); +bool IsReferenceTableByDistParams(char partitionMethod, char replicationModel); +extern bool IsCitusLocalTableByDistParams(char partitionMethod, char replicationModel, + uint32 colocationId); extern List * CitusTableList(void); extern ShardInterval * LoadShardInterval(uint64 shardId); extern bool ShardExists(uint64 shardId); diff --git a/src/include/distributed/metadata_utility.h b/src/include/distributed/metadata_utility.h index ceea51678..acb4ae5da 100644 --- a/src/include/distributed/metadata_utility.h +++ b/src/include/distributed/metadata_utility.h @@ -325,6 +325,7 @@ extern void DeleteShardPlacementRow(uint64 placementId); extern void CreateDistributedTable(Oid relationId, char *distributionColumnName, char distributionMethod, int shardCount, bool shardCountIsStrict, char *colocateWithTableName); +extern void CreateReferenceTable(Oid relationId); extern void CreateTruncateTrigger(Oid relationId); extern TableConversionReturn * UndistributeTable(TableConversionParameters *params); diff --git a/src/include/distributed/multi_physical_planner.h b/src/include/distributed/multi_physical_planner.h index 52a42ee26..49fe28f1d 100644 --- a/src/include/distributed/multi_physical_planner.h +++ b/src/include/distributed/multi_physical_planner.h @@ -556,6 +556,7 @@ extern bool BinaryOpExpression(Expr *clause, Node **leftOperand, Node **rightOpe /* helper functions */ extern Var * MakeInt4Column(void); extern int CompareShardPlacements(const void *leftElement, const void *rightElement); +extern int CompareGroupShardPlacements(const void *leftElement, const void *rightElement); extern bool ShardIntervalsOverlap(ShardInterval *firstInterval, ShardInterval *secondInterval); extern bool ShardIntervalsOverlapWithParams(Datum firstMin, Datum firstMax, diff --git a/src/include/distributed/multi_router_planner.h b/src/include/distributed/multi_router_planner.h index 07d160865..698a0fd60 100644 --- a/src/include/distributed/multi_router_planner.h +++ b/src/include/distributed/multi_router_planner.h @@ -100,6 +100,17 @@ extern PlannedStmt * FastPathPlanner(Query *originalQuery, Query *parse, ParamLi extern bool FastPathRouterQuery(Query *query, Node **distributionKeyValue); extern bool JoinConditionIsOnFalse(List *relOptInfo); extern Oid ResultRelationOidForQuery(Query *query); - +extern DeferredErrorMessage * TargetlistAndFunctionsSupported(Oid resultRelationId, + FromExpr *joinTree, + Node *quals, + List *targetList, + CmdType commandType, + List *returningList); +extern bool NodeIsFieldStore(Node *node); +extern bool TargetEntryChangesValue(TargetEntry *targetEntry, Var *column, + FromExpr *joinTree); +extern bool MasterIrreducibleExpression(Node *expression, bool *varArgument, + bool *badCoalesce); +extern bool HasDangerousJoinUsing(List *rtableList, Node *jtnode); #endif /* MULTI_ROUTER_PLANNER_H */ diff --git a/src/include/distributed/relation_restriction_equivalence.h b/src/include/distributed/relation_restriction_equivalence.h index ccd50a6db..07b6348d9 100644 --- a/src/include/distributed/relation_restriction_equivalence.h +++ b/src/include/distributed/relation_restriction_equivalence.h @@ -54,4 +54,5 @@ extern RelationRestrictionContext * FilterRelationRestrictionContext( RelationRestrictionContext *relationRestrictionContext, Relids queryRteIdentities); +extern bool AllDistributedRelationsInRTEListColocated(List *rangeTableEntryList); #endif /* RELATION_RESTRICTION_EQUIVALENCE_H */ diff --git a/src/include/distributed/utils/attribute.h b/src/include/distributed/utils/attribute.h index f2453b818..f56fec4f2 100644 --- a/src/include/distributed/utils/attribute.h +++ b/src/include/distributed/utils/attribute.h @@ -15,38 +15,74 @@ #include "executor/executor.h" #include "storage/lwlock.h" +#define MAX_TENANT_ATTRIBUTE_LENGTH 100 + +/* + * TenantStats is the struct that keeps statistics about one tenant. + */ typedef struct TenantStats { - char tenantAttribute[100]; - + /* + * The attribute value, e.g distribution column, and colocation group id + * of the tenant. + */ + char tenantAttribute[MAX_TENANT_ATTRIBUTE_LENGTH]; int colocationGroupId; - int readCount; - double totalReadTime; + /* + * Number of SELECT queries this tenant ran in this and last periods. + */ int readsInLastPeriod; int readsInThisPeriod; - int writeCount; - double totalWriteTime; + /* + * Number of INSERT, UPDATE, and DELETE queries this tenant ran in this and last periods. + */ int writesInLastPeriod; int writesInThisPeriod; + /* + * The latest time this tenant ran a query. This value is used to update the score later. + */ time_t lastQueryTime; + /* + * The tenant monitoring score of this tenant. This value is increased by ONE_QUERY_SCORE at every query + * and halved after every period. + */ long long score; + + /* + * The latest time the score of this tenant is halved. This value is used to correctly calculate the reduction later. + */ time_t lastScoreReduction; + /* + * Locks needed to update this tenant's statistics. + */ NamedLWLockTranche namedLockTranche; LWLock lock; } TenantStats; +/* + * MultiTenantMonitor is the struct for keeping the statistics + * of the tenants + */ typedef struct MultiTenantMonitor { - time_t periodStart; - + /* + * Lock mechanism for the monitor. + * Each tenant update acquires the lock in shared mode and + * the tenant number reduction and monitor view acquires in exclusive mode. + */ NamedLWLockTranche namedLockTranche; LWLock lock; + /* + * tenantCount is the number of items in the tenants array. + * The total length of tenants array is set up at CreateSharedMemoryForMultiTenantMonitor + * and is 3 * citus.stats_tenants_limit + */ int tenantCount; TenantStats tenants[FLEXIBLE_ARRAY_MEMBER]; } MultiTenantMonitor; diff --git a/src/include/distributed/worker_shard_copy.h b/src/include/distributed/worker_shard_copy.h index 2ab2775f9..77f57c761 100644 --- a/src/include/distributed/worker_shard_copy.h +++ b/src/include/distributed/worker_shard_copy.h @@ -19,4 +19,9 @@ extern DestReceiver * CreateShardCopyDestReceiver(EState *executorState, List *destinationShardFullyQualifiedName, uint32_t destinationNodeId); +extern const char * CopyableColumnNamesFromRelationName(const char *schemaName, const + char *relationName); + +extern const char * CopyableColumnNamesFromTupleDesc(TupleDesc tupdesc); + #endif /* WORKER_SHARD_COPY_H_ */ diff --git a/src/test/regress/Makefile b/src/test/regress/Makefile index 368f8f8c5..d9700df80 100644 --- a/src/test/regress/Makefile +++ b/src/test/regress/Makefile @@ -117,29 +117,31 @@ check-minimal-mx: all -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/mx_minimal_schedule $(EXTRA_TESTS) check-custom-schedule: all - $(pg_regress_multi_check) --load-extension=citus \ + $(pg_regress_multi_check) --load-extension=citus --worker-count=$(WORKERCOUNT) \ -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS) check-failure-custom-schedule: all - $(pg_regress_multi_check) --load-extension=citus --mitmproxy \ + $(pg_regress_multi_check) --load-extension=citus --mitmproxy --worker-count=$(WORKERCOUNT) \ -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS) check-isolation-custom-schedule: all $(isolation_test_files) - $(pg_regress_multi_check) --load-extension=citus --isolationtester \ + $(pg_regress_multi_check) --load-extension=citus --isolationtester --worker-count=$(WORKERCOUNT) \ -- $(MULTI_REGRESS_OPTS) --inputdir=$(citus_abs_srcdir)/build --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS) check-custom-schedule-vg: all $(pg_regress_multi_check) --load-extension=citus \ - --valgrind --pg_ctl-timeout=360 --connection-timeout=500000 --valgrind-path=valgrind --valgrind-log-file=$(CITUS_VALGRIND_LOG_FILE) \ + --valgrind --pg_ctl-timeout=360 --connection-timeout=500000 --worker-count=$(WORKERCOUNT) \ + --valgrind-path=valgrind --valgrind-log-file=$(CITUS_VALGRIND_LOG_FILE) \ -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS) check-failure-custom-schedule-vg: all $(pg_regress_multi_check) --load-extension=citus --mitmproxy \ - --valgrind --pg_ctl-timeout=360 --connection-timeout=500000 --valgrind-path=valgrind --valgrind-log-file=$(CITUS_VALGRIND_LOG_FILE) \ + --valgrind --pg_ctl-timeout=360 --connection-timeout=500000 --worker-count=$(WORKERCOUNT) \ + --valgrind-path=valgrind --valgrind-log-file=$(CITUS_VALGRIND_LOG_FILE) \ -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS) check-isolation-custom-schedule-vg: all $(isolation_test_files) - $(pg_regress_multi_check) --load-extension=citus --isolationtester \ + $(pg_regress_multi_check) --load-extension=citus --isolationtester --worker-count=$(WORKERCOUNT) \ --valgrind --pg_ctl-timeout=360 --connection-timeout=500000 --valgrind-path=valgrind --valgrind-log-file=$(CITUS_VALGRIND_LOG_FILE) \ -- $(MULTI_REGRESS_OPTS) --inputdir=$(citus_abs_srcdir)/build --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS) diff --git a/src/test/regress/Pipfile b/src/test/regress/Pipfile index bb848c792..16da96f21 100644 --- a/src/test/regress/Pipfile +++ b/src/test/regress/Pipfile @@ -8,6 +8,12 @@ mitmproxy = {editable = true, ref = "main", git = "https://github.com/citusdata/ construct = "==2.9.45" docopt = "==0.6.2" cryptography = ">=39.0.1" +pytest = "*" +psycopg = "*" +filelock = "*" +pytest-asyncio = "*" +pytest-timeout = "*" +pytest-xdist = "*" [dev-packages] black = "*" diff --git a/src/test/regress/Pipfile.lock b/src/test/regress/Pipfile.lock index 0349032b2..709254d77 100644 --- a/src/test/regress/Pipfile.lock +++ b/src/test/regress/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "dfc5545eeb592c0dd5ed002b7665d940288c5ead77d2f31a0aa08391569577fc" + "sha256": "456a43ce06df947ccbf02db7fcbfd654999acaae25911990d4d74fc04b10c77e" }, "pipfile-spec": 6, "requires": { @@ -24,6 +24,14 @@ "markers": "python_version >= '3.6'", "version": "==3.4.1" }, + "attrs": { + "hashes": [ + "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836", + "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99" + ], + "markers": "python_version >= '3.6'", + "version": "==22.2.0" + }, "blinker": { "hashes": [ "sha256:471aee25f3992bd325afa3772f1063dbdbbca947a041b8b89466dc00d606f8b6" @@ -211,32 +219,32 @@ }, "cryptography": { "hashes": [ - "sha256:0f8da300b5c8af9f98111ffd512910bc792b4c77392a9523624680f7956a99d4", - "sha256:35f7c7d015d474f4011e859e93e789c87d21f6f4880ebdc29896a60403328f1f", - "sha256:4789d1e3e257965e960232345002262ede4d094d1a19f4d3b52e48d4d8f3b885", - "sha256:5aa67414fcdfa22cf052e640cb5ddc461924a045cacf325cd164e65312d99502", - "sha256:5d2d8b87a490bfcd407ed9d49093793d0f75198a35e6eb1a923ce1ee86c62b41", - "sha256:6687ef6d0a6497e2b58e7c5b852b53f62142cfa7cd1555795758934da363a965", - "sha256:6f8ba7f0328b79f08bdacc3e4e66fb4d7aab0c3584e0bd41328dce5262e26b2e", - "sha256:706843b48f9a3f9b9911979761c91541e3d90db1ca905fd63fee540a217698bc", - "sha256:807ce09d4434881ca3a7594733669bd834f5b2c6d5c7e36f8c00f691887042ad", - "sha256:83e17b26de248c33f3acffb922748151d71827d6021d98c70e6c1a25ddd78505", - "sha256:96f1157a7c08b5b189b16b47bc9db2332269d6680a196341bf30046330d15388", - "sha256:aec5a6c9864be7df2240c382740fcf3b96928c46604eaa7f3091f58b878c0bb6", - "sha256:b0afd054cd42f3d213bf82c629efb1ee5f22eba35bf0eec88ea9ea7304f511a2", - "sha256:c5caeb8188c24888c90b5108a441c106f7faa4c4c075a2bcae438c6e8ca73cef", - "sha256:ced4e447ae29ca194449a3f1ce132ded8fcab06971ef5f618605aacaa612beac", - "sha256:d1f6198ee6d9148405e49887803907fe8962a23e6c6f83ea7d98f1c0de375695", - "sha256:e124352fd3db36a9d4a21c1aa27fd5d051e621845cb87fb851c08f4f75ce8be6", - "sha256:e422abdec8b5fa8462aa016786680720d78bdce7a30c652b7fadf83a4ba35336", - "sha256:ef8b72fa70b348724ff1218267e7f7375b8de4e8194d1636ee60510aae104cd0", - "sha256:f0c64d1bd842ca2633e74a1a28033d139368ad959872533b1bab8c80e8240a0c", - "sha256:f24077a3b5298a5a06a8e0536e3ea9ec60e4c7ac486755e5fb6e6ea9b3500106", - "sha256:fdd188c8a6ef8769f148f88f859884507b954cc64db6b52f66ef199bb9ad660a", - "sha256:fe913f20024eb2cb2f323e42a64bdf2911bb9738a15dba7d3cce48151034e3a8" + "sha256:103e8f7155f3ce2ffa0049fe60169878d47a4364b277906386f8de21c9234aa1", + "sha256:23df8ca3f24699167daf3e23e51f7ba7334d504af63a94af468f468b975b7dd7", + "sha256:2725672bb53bb92dc7b4150d233cd4b8c59615cd8288d495eaa86db00d4e5c06", + "sha256:30b1d1bfd00f6fc80d11300a29f1d8ab2b8d9febb6ed4a38a76880ec564fae84", + "sha256:35d658536b0a4117c885728d1a7032bdc9a5974722ae298d6c533755a6ee3915", + "sha256:50cadb9b2f961757e712a9737ef33d89b8190c3ea34d0fb6675e00edbe35d074", + "sha256:5f8c682e736513db7d04349b4f6693690170f95aac449c56f97415c6980edef5", + "sha256:6236a9610c912b129610eb1a274bdc1350b5df834d124fa84729ebeaf7da42c3", + "sha256:788b3921d763ee35dfdb04248d0e3de11e3ca8eb22e2e48fef880c42e1f3c8f9", + "sha256:8bc0008ef798231fac03fe7d26e82d601d15bd16f3afaad1c6113771566570f3", + "sha256:8f35c17bd4faed2bc7797d2a66cbb4f986242ce2e30340ab832e5d99ae60e011", + "sha256:b49a88ff802e1993b7f749b1eeb31134f03c8d5c956e3c125c75558955cda536", + "sha256:bc0521cce2c1d541634b19f3ac661d7a64f9555135e9d8af3980965be717fd4a", + "sha256:bc5b871e977c8ee5a1bbc42fa8d19bcc08baf0c51cbf1586b0e87a2694dde42f", + "sha256:c43ac224aabcbf83a947eeb8b17eaf1547bce3767ee2d70093b461f31729a480", + "sha256:d15809e0dbdad486f4ad0979753518f47980020b7a34e9fc56e8be4f60702fac", + "sha256:d7d84a512a59f4412ca8549b01f94be4161c94efc598bf09d027d67826beddc0", + "sha256:e029b844c21116564b8b61216befabca4b500e6816fa9f0ba49527653cae2108", + "sha256:e8a0772016feeb106efd28d4a328e77dc2edae84dfbac06061319fdb669ff828", + "sha256:e944fe07b6f229f4c1a06a7ef906a19652bdd9fd54c761b0ff87e83ae7a30354", + "sha256:eb40fe69cfc6f5cdab9a5ebd022131ba21453cf7b8a7fd3631f45bbf52bed612", + "sha256:fa507318e427169ade4e9eccef39e9011cdc19534f55ca2f36ec3f388c1f70f3", + "sha256:ffd394c7896ed7821a6d13b24657c6a34b6e2650bd84ae063cf11ccffa4f1a97" ], "index": "pypi", - "version": "==39.0.1" + "version": "==39.0.2" }, "docopt": { "hashes": [ @@ -245,6 +253,30 @@ "index": "pypi", "version": "==0.6.2" }, + "exceptiongroup": { + "hashes": [ + "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e", + "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785" + ], + "markers": "python_version < '3.11'", + "version": "==1.1.1" + }, + "execnet": { + "hashes": [ + "sha256:8f694f3ba9cc92cab508b152dcfe322153975c29bda272e2fd7f3f00f36e47c5", + "sha256:a295f7cc774947aac58dde7fdc85f4aa00c42adf5d8f5468fc630c1acf30a142" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==1.9.0" + }, + "filelock": { + "hashes": [ + "sha256:3199fd0d3faea8b911be52b663dfccceb84c95949dd13179aa21436d1a79c4ce", + "sha256:e90b34656470756edf8b19656785c5fea73afa1953f3e1b0d645cef11cab3182" + ], + "index": "pypi", + "version": "==3.10.0" + }, "flask": { "hashes": [ "sha256:59da8a3170004800a2837844bfa84d49b022550616070f7cb1a659682b2e7c9f", @@ -285,6 +317,14 @@ "markers": "python_full_version >= '3.6.1'", "version": "==6.0.1" }, + "iniconfig": { + "hashes": [ + "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", + "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374" + ], + "markers": "python_version >= '3.7'", + "version": "==2.0.0" + }, "itsdangerous": { "hashes": [ "sha256:2c2349112351b88699d8d4b6b075022c0808887cb7ad10069318a8b0bc88db44", @@ -380,60 +420,79 @@ }, "msgpack": { "hashes": [ - "sha256:002b5c72b6cd9b4bafd790f364b8480e859b4712e91f43014fe01e4f957b8467", - "sha256:0a68d3ac0104e2d3510de90a1091720157c319ceeb90d74f7b5295a6bee51bae", - "sha256:0df96d6eaf45ceca04b3f3b4b111b86b33785683d682c655063ef8057d61fd92", - "sha256:0dfe3947db5fb9ce52aaea6ca28112a170db9eae75adf9339a1aec434dc954ef", - "sha256:0e3590f9fb9f7fbc36df366267870e77269c03172d086fa76bb4eba8b2b46624", - "sha256:11184bc7e56fd74c00ead4f9cc9a3091d62ecb96e97653add7a879a14b003227", - "sha256:112b0f93202d7c0fef0b7810d465fde23c746a2d482e1e2de2aafd2ce1492c88", - "sha256:1276e8f34e139aeff1c77a3cefb295598b504ac5314d32c8c3d54d24fadb94c9", - "sha256:1576bd97527a93c44fa856770197dec00d223b0b9f36ef03f65bac60197cedf8", - "sha256:1e91d641d2bfe91ba4c52039adc5bccf27c335356055825c7f88742c8bb900dd", - "sha256:26b8feaca40a90cbe031b03d82b2898bf560027160d3eae1423f4a67654ec5d6", - "sha256:2999623886c5c02deefe156e8f869c3b0aaeba14bfc50aa2486a0415178fce55", - "sha256:2a2df1b55a78eb5f5b7d2a4bb221cd8363913830145fad05374a80bf0877cb1e", - "sha256:2bb8cdf50dd623392fa75525cce44a65a12a00c98e1e37bf0fb08ddce2ff60d2", - "sha256:2cc5ca2712ac0003bcb625c96368fd08a0f86bbc1a5578802512d87bc592fe44", - "sha256:35bc0faa494b0f1d851fd29129b2575b2e26d41d177caacd4206d81502d4c6a6", - "sha256:3c11a48cf5e59026ad7cb0dc29e29a01b5a66a3e333dc11c04f7e991fc5510a9", - "sha256:449e57cc1ff18d3b444eb554e44613cffcccb32805d16726a5494038c3b93dab", - "sha256:462497af5fd4e0edbb1559c352ad84f6c577ffbbb708566a0abaaa84acd9f3ae", - "sha256:4733359808c56d5d7756628736061c432ded018e7a1dff2d35a02439043321aa", - "sha256:48f5d88c99f64c456413d74a975bd605a9b0526293218a3b77220a2c15458ba9", - "sha256:49565b0e3d7896d9ea71d9095df15b7f75a035c49be733051c34762ca95bbf7e", - "sha256:4ab251d229d10498e9a2f3b1e68ef64cb393394ec477e3370c457f9430ce9250", - "sha256:4d5834a2a48965a349da1c5a79760d94a1a0172fbb5ab6b5b33cbf8447e109ce", - "sha256:4dea20515f660aa6b7e964433b1808d098dcfcabbebeaaad240d11f909298075", - "sha256:545e3cf0cf74f3e48b470f68ed19551ae6f9722814ea969305794645da091236", - "sha256:63e29d6e8c9ca22b21846234913c3466b7e4ee6e422f205a2988083de3b08cae", - "sha256:6916c78f33602ecf0509cc40379271ba0f9ab572b066bd4bdafd7434dee4bc6e", - "sha256:6a4192b1ab40f8dca3f2877b70e63799d95c62c068c84dc028b40a6cb03ccd0f", - "sha256:6c9566f2c39ccced0a38d37c26cc3570983b97833c365a6044edef3574a00c08", - "sha256:76ee788122de3a68a02ed6f3a16bbcd97bc7c2e39bd4d94be2f1821e7c4a64e6", - "sha256:7760f85956c415578c17edb39eed99f9181a48375b0d4a94076d84148cf67b2d", - "sha256:77ccd2af37f3db0ea59fb280fa2165bf1b096510ba9fe0cc2bf8fa92a22fdb43", - "sha256:81fc7ba725464651190b196f3cd848e8553d4d510114a954681fd0b9c479d7e1", - "sha256:85f279d88d8e833ec015650fd15ae5eddce0791e1e8a59165318f371158efec6", - "sha256:9667bdfdf523c40d2511f0e98a6c9d3603be6b371ae9a238b7ef2dc4e7a427b0", - "sha256:a75dfb03f8b06f4ab093dafe3ddcc2d633259e6c3f74bb1b01996f5d8aa5868c", - "sha256:ac5bd7901487c4a1dd51a8c58f2632b15d838d07ceedaa5e4c080f7190925bff", - "sha256:aca0f1644d6b5a73eb3e74d4d64d5d8c6c3d577e753a04c9e9c87d07692c58db", - "sha256:b17be2478b622939e39b816e0aa8242611cc8d3583d1cd8ec31b249f04623243", - "sha256:c1683841cd4fa45ac427c18854c3ec3cd9b681694caf5bff04edb9387602d661", - "sha256:c23080fdeec4716aede32b4e0ef7e213c7b1093eede9ee010949f2a418ced6ba", - "sha256:d5b5b962221fa2c5d3a7f8133f9abffc114fe218eb4365e40f17732ade576c8e", - "sha256:d603de2b8d2ea3f3bcb2efe286849aa7a81531abc52d8454da12f46235092bcb", - "sha256:e83f80a7fec1a62cf4e6c9a660e39c7f878f603737a0cdac8c13131d11d97f52", - "sha256:eb514ad14edf07a1dbe63761fd30f89ae79b42625731e1ccf5e1f1092950eaa6", - "sha256:eba96145051ccec0ec86611fe9cf693ce55f2a3ce89c06ed307de0e085730ec1", - "sha256:ed6f7b854a823ea44cf94919ba3f727e230da29feb4a99711433f25800cf747f", - "sha256:f0029245c51fd9473dc1aede1160b0a29f4a912e6b1dd353fa6d317085b219da", - "sha256:f5d869c18f030202eb412f08b28d2afeea553d6613aee89e200d7aca7ef01f5f", - "sha256:fb62ea4b62bfcb0b380d5680f9a4b3f9a2d166d9394e9bbd9666c0ee09a3645c", - "sha256:fcb8a47f43acc113e24e910399376f7277cf8508b27e5b88499f053de6b115a8" + "sha256:06f5174b5f8ed0ed919da0e62cbd4ffde676a374aba4020034da05fab67b9164", + "sha256:0c05a4a96585525916b109bb85f8cb6511db1c6f5b9d9cbcbc940dc6b4be944b", + "sha256:137850656634abddfb88236008339fdaba3178f4751b28f270d2ebe77a563b6c", + "sha256:17358523b85973e5f242ad74aa4712b7ee560715562554aa2134d96e7aa4cbbf", + "sha256:18334484eafc2b1aa47a6d42427da7fa8f2ab3d60b674120bce7a895a0a85bdd", + "sha256:1835c84d65f46900920b3708f5ba829fb19b1096c1800ad60bae8418652a951d", + "sha256:1967f6129fc50a43bfe0951c35acbb729be89a55d849fab7686004da85103f1c", + "sha256:1ab2f3331cb1b54165976a9d976cb251a83183631c88076613c6c780f0d6e45a", + "sha256:1c0f7c47f0087ffda62961d425e4407961a7ffd2aa004c81b9c07d9269512f6e", + "sha256:20a97bf595a232c3ee6d57ddaadd5453d174a52594bf9c21d10407e2a2d9b3bd", + "sha256:20c784e66b613c7f16f632e7b5e8a1651aa5702463d61394671ba07b2fc9e025", + "sha256:266fa4202c0eb94d26822d9bfd7af25d1e2c088927fe8de9033d929dd5ba24c5", + "sha256:28592e20bbb1620848256ebc105fc420436af59515793ed27d5c77a217477705", + "sha256:288e32b47e67f7b171f86b030e527e302c91bd3f40fd9033483f2cacc37f327a", + "sha256:3055b0455e45810820db1f29d900bf39466df96ddca11dfa6d074fa47054376d", + "sha256:332360ff25469c346a1c5e47cbe2a725517919892eda5cfaffe6046656f0b7bb", + "sha256:362d9655cd369b08fda06b6657a303eb7172d5279997abe094512e919cf74b11", + "sha256:366c9a7b9057e1547f4ad51d8facad8b406bab69c7d72c0eb6f529cf76d4b85f", + "sha256:36961b0568c36027c76e2ae3ca1132e35123dcec0706c4b7992683cc26c1320c", + "sha256:379026812e49258016dd84ad79ac8446922234d498058ae1d415f04b522d5b2d", + "sha256:382b2c77589331f2cb80b67cc058c00f225e19827dbc818d700f61513ab47bea", + "sha256:476a8fe8fae289fdf273d6d2a6cb6e35b5a58541693e8f9f019bfe990a51e4ba", + "sha256:48296af57cdb1d885843afd73c4656be5c76c0c6328db3440c9601a98f303d87", + "sha256:4867aa2df9e2a5fa5f76d7d5565d25ec76e84c106b55509e78c1ede0f152659a", + "sha256:4c075728a1095efd0634a7dccb06204919a2f67d1893b6aa8e00497258bf926c", + "sha256:4f837b93669ce4336e24d08286c38761132bc7ab29782727f8557e1eb21b2080", + "sha256:4f8d8b3bf1ff2672567d6b5c725a1b347fe838b912772aa8ae2bf70338d5a198", + "sha256:525228efd79bb831cf6830a732e2e80bc1b05436b086d4264814b4b2955b2fa9", + "sha256:5494ea30d517a3576749cad32fa27f7585c65f5f38309c88c6d137877fa28a5a", + "sha256:55b56a24893105dc52c1253649b60f475f36b3aa0fc66115bffafb624d7cb30b", + "sha256:56a62ec00b636583e5cb6ad313bbed36bb7ead5fa3a3e38938503142c72cba4f", + "sha256:57e1f3528bd95cc44684beda696f74d3aaa8a5e58c816214b9046512240ef437", + "sha256:586d0d636f9a628ddc6a17bfd45aa5b5efaf1606d2b60fa5d87b8986326e933f", + "sha256:5cb47c21a8a65b165ce29f2bec852790cbc04936f502966768e4aae9fa763cb7", + "sha256:6c4c68d87497f66f96d50142a2b73b97972130d93677ce930718f68828b382e2", + "sha256:821c7e677cc6acf0fd3f7ac664c98803827ae6de594a9f99563e48c5a2f27eb0", + "sha256:916723458c25dfb77ff07f4c66aed34e47503b2eb3188b3adbec8d8aa6e00f48", + "sha256:9e6ca5d5699bcd89ae605c150aee83b5321f2115695e741b99618f4856c50898", + "sha256:9f5ae84c5c8a857ec44dc180a8b0cc08238e021f57abdf51a8182e915e6299f0", + "sha256:a2b031c2e9b9af485d5e3c4520f4220d74f4d222a5b8dc8c1a3ab9448ca79c57", + "sha256:a61215eac016f391129a013c9e46f3ab308db5f5ec9f25811e811f96962599a8", + "sha256:a740fa0e4087a734455f0fc3abf5e746004c9da72fbd541e9b113013c8dc3282", + "sha256:a9985b214f33311df47e274eb788a5893a761d025e2b92c723ba4c63936b69b1", + "sha256:ab31e908d8424d55601ad7075e471b7d0140d4d3dd3272daf39c5c19d936bd82", + "sha256:ac9dd47af78cae935901a9a500104e2dea2e253207c924cc95de149606dc43cc", + "sha256:addab7e2e1fcc04bd08e4eb631c2a90960c340e40dfc4a5e24d2ff0d5a3b3edb", + "sha256:b1d46dfe3832660f53b13b925d4e0fa1432b00f5f7210eb3ad3bb9a13c6204a6", + "sha256:b2de4c1c0538dcb7010902a2b97f4e00fc4ddf2c8cda9749af0e594d3b7fa3d7", + "sha256:b5ef2f015b95f912c2fcab19c36814963b5463f1fb9049846994b007962743e9", + "sha256:b72d0698f86e8d9ddf9442bdedec15b71df3598199ba33322d9711a19f08145c", + "sha256:bae7de2026cbfe3782c8b78b0db9cbfc5455e079f1937cb0ab8d133496ac55e1", + "sha256:bf22a83f973b50f9d38e55c6aade04c41ddda19b00c4ebc558930d78eecc64ed", + "sha256:c075544284eadc5cddc70f4757331d99dcbc16b2bbd4849d15f8aae4cf36d31c", + "sha256:c396e2cc213d12ce017b686e0f53497f94f8ba2b24799c25d913d46c08ec422c", + "sha256:cb5aaa8c17760909ec6cb15e744c3ebc2ca8918e727216e79607b7bbce9c8f77", + "sha256:cdc793c50be3f01106245a61b739328f7dccc2c648b501e237f0699fe1395b81", + "sha256:d25dd59bbbbb996eacf7be6b4ad082ed7eacc4e8f3d2df1ba43822da9bfa122a", + "sha256:e42b9594cc3bf4d838d67d6ed62b9e59e201862a25e9a157019e171fbe672dd3", + "sha256:e57916ef1bd0fee4f21c4600e9d1da352d8816b52a599c46460e93a6e9f17086", + "sha256:ed40e926fa2f297e8a653c954b732f125ef97bdd4c889f243182299de27e2aa9", + "sha256:ef8108f8dedf204bb7b42994abf93882da1159728a2d4c5e82012edd92c9da9f", + "sha256:f933bbda5a3ee63b8834179096923b094b76f0c7a73c1cfe8f07ad608c58844b", + "sha256:fe5c63197c55bce6385d9aee16c4d0641684628f63ace85f73571e65ad1c1e8d" ], - "version": "==1.0.4" + "version": "==1.0.5" + }, + "packaging": { + "hashes": [ + "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2", + "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97" + ], + "markers": "python_version >= '3.7'", + "version": "==23.0" }, "passlib": { "hashes": [ @@ -442,6 +501,14 @@ ], "version": "==1.7.4" }, + "pluggy": { + "hashes": [ + "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159", + "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3" + ], + "markers": "python_version >= '3.6'", + "version": "==1.0.0" + }, "protobuf": { "hashes": [ "sha256:0c44e01f74109decea196b5b313b08edb5316df77313995594a6981e95674259", @@ -469,6 +536,14 @@ "markers": "python_version >= '3.5'", "version": "==3.18.3" }, + "psycopg": { + "hashes": [ + "sha256:59b4a71536b146925513c0234dfd1dc42b81e65d56ce5335dff4813434dbc113", + "sha256:b1500c42063abaa01d30b056f0b300826b8dd8d586900586029a294ce74af327" + ], + "index": "pypi", + "version": "==3.1.8" + }, "publicsuffix2": { "hashes": [ "sha256:00f8cc31aa8d0d5592a5ced19cccba7de428ebca985db26ac852d920ddd6fe7b", @@ -523,6 +598,38 @@ ], "version": "==1.8.2" }, + "pytest": { + "hashes": [ + "sha256:130328f552dcfac0b1cec75c12e3f005619dc5f874f0a06e8ff7263f0ee6225e", + "sha256:c99ab0c73aceb050f68929bc93af19ab6db0558791c6a0715723abe9d0ade9d4" + ], + "index": "pypi", + "version": "==7.2.2" + }, + "pytest-asyncio": { + "hashes": [ + "sha256:83cbf01169ce3e8eb71c6c278ccb0574d1a7a3bb8eaaf5e50e0ad342afb33b36", + "sha256:f129998b209d04fcc65c96fc85c11e5316738358909a8399e93be553d7656442" + ], + "index": "pypi", + "version": "==0.20.3" + }, + "pytest-timeout": { + "hashes": [ + "sha256:c07ca07404c612f8abbe22294b23c368e2e5104b521c1790195561f37e1ac3d9", + "sha256:f6f50101443ce70ad325ceb4473c4255e9d74e3c7cd0ef827309dfa4c0d975c6" + ], + "index": "pypi", + "version": "==2.1.0" + }, + "pytest-xdist": { + "hashes": [ + "sha256:1849bd98d8b242b948e472db7478e090bf3361912a8fed87992ed94085f54727", + "sha256:37290d161638a20b672401deef1cba812d110ac27e35d213f091d15b8beb40c9" + ], + "index": "pypi", + "version": "==3.2.1" + }, "ruamel.yaml": { "hashes": [ "sha256:1a771fc92d3823682b7f0893ad56cb5a5c87c48e62b5399d6f42c8759a583b33", @@ -531,6 +638,48 @@ "markers": "python_version >= '3'", "version": "==0.17.16" }, + "ruamel.yaml.clib": { + "hashes": [ + "sha256:045e0626baf1c52e5527bd5db361bc83180faaba2ff586e763d3d5982a876a9e", + "sha256:15910ef4f3e537eea7fe45f8a5d19997479940d9196f357152a09031c5be59f3", + "sha256:184faeaec61dbaa3cace407cffc5819f7b977e75360e8d5ca19461cd851a5fc5", + "sha256:1f08fd5a2bea9c4180db71678e850b995d2a5f4537be0e94557668cf0f5f9497", + "sha256:2aa261c29a5545adfef9296b7e33941f46aa5bbd21164228e833412af4c9c75f", + "sha256:3110a99e0f94a4a3470ff67fc20d3f96c25b13d24c6980ff841e82bafe827cac", + "sha256:3243f48ecd450eddadc2d11b5feb08aca941b5cd98c9b1db14b2fd128be8c697", + "sha256:370445fd795706fd291ab00c9df38a0caed0f17a6fb46b0f607668ecb16ce763", + "sha256:40d030e2329ce5286d6b231b8726959ebbe0404c92f0a578c0e2482182e38282", + "sha256:41d0f1fa4c6830176eef5b276af04c89320ea616655d01327d5ce65e50575c94", + "sha256:4a4d8d417868d68b979076a9be6a38c676eca060785abaa6709c7b31593c35d1", + "sha256:4b3a93bb9bc662fc1f99c5c3ea8e623d8b23ad22f861eb6fce9377ac07ad6072", + "sha256:5bc0667c1eb8f83a3752b71b9c4ba55ef7c7058ae57022dd9b29065186a113d9", + "sha256:721bc4ba4525f53f6a611ec0967bdcee61b31df5a56801281027a3a6d1c2daf5", + "sha256:763d65baa3b952479c4e972669f679fe490eee058d5aa85da483ebae2009d231", + "sha256:7bdb4c06b063f6fd55e472e201317a3bb6cdeeee5d5a38512ea5c01e1acbdd93", + "sha256:8831a2cedcd0f0927f788c5bdf6567d9dc9cc235646a434986a852af1cb54b4b", + "sha256:91a789b4aa0097b78c93e3dc4b40040ba55bef518f84a40d4442f713b4094acb", + "sha256:92460ce908546ab69770b2e576e4f99fbb4ce6ab4b245345a3869a0a0410488f", + "sha256:99e77daab5d13a48a4054803d052ff40780278240a902b880dd37a51ba01a307", + "sha256:a234a20ae07e8469da311e182e70ef6b199d0fbeb6c6cc2901204dd87fb867e8", + "sha256:a7b301ff08055d73223058b5c46c55638917f04d21577c95e00e0c4d79201a6b", + "sha256:be2a7ad8fd8f7442b24323d24ba0b56c51219513cfa45b9ada3b87b76c374d4b", + "sha256:bf9a6bc4a0221538b1a7de3ed7bca4c93c02346853f44e1cd764be0023cd3640", + "sha256:c3ca1fbba4ae962521e5eb66d72998b51f0f4d0f608d3c0347a48e1af262efa7", + "sha256:d000f258cf42fec2b1bbf2863c61d7b8918d31ffee905da62dede869254d3b8a", + "sha256:d5859983f26d8cd7bb5c287ef452e8aacc86501487634573d260968f753e1d71", + "sha256:d5e51e2901ec2366b79f16c2299a03e74ba4531ddcfacc1416639c557aef0ad8", + "sha256:da538167284de58a52109a9b89b8f6a53ff8437dd6dc26d33b57bf6699153122", + "sha256:debc87a9516b237d0466a711b18b6ebeb17ba9f391eb7f91c649c5c4ec5006c7", + "sha256:df5828871e6648db72d1c19b4bd24819b80a755c4541d3409f0f7acd0f335c80", + "sha256:ecdf1a604009bd35c674b9225a8fa609e0282d9b896c03dd441a91e5f53b534e", + "sha256:efa08d63ef03d079dcae1dfe334f6c8847ba8b645d08df286358b1f5293d24ab", + "sha256:f01da5790e95815eb5a8a138508c01c758e5f5bc0ce4286c4f7028b8dd7ac3d0", + "sha256:f34019dced51047d6f70cb9383b2ae2853b7fc4dce65129a5acd49f4f9256646", + "sha256:f6d3d39611ac2e4f62c3128a9eed45f19a6608670c5a2f4f07f24e8de3441d38" + ], + "markers": "platform_python_implementation == 'CPython' and python_version < '3.10'", + "version": "==0.2.7" + }, "sortedcontainers": { "hashes": [ "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", @@ -538,6 +687,14 @@ ], "version": "==2.4.0" }, + "tomli": { + "hashes": [ + "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", + "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" + ], + "markers": "python_version < '3.11'", + "version": "==2.0.1" + }, "tornado": { "hashes": [ "sha256:1d54d13ab8414ed44de07efecb97d4ef7c39f7438cf5e976ccd356bebb1b5fca", @@ -555,6 +712,14 @@ "markers": "python_version >= '3.7'", "version": "==6.2" }, + "typing-extensions": { + "hashes": [ + "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb", + "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4" + ], + "markers": "python_version >= '3.7'", + "version": "==4.5.0" + }, "urwid": { "hashes": [ "sha256:588bee9c1cb208d0906a9f73c613d2bd32c3ed3702012f51efe318a3f2127eae" @@ -563,11 +728,11 @@ }, "werkzeug": { "hashes": [ - "sha256:7ea2d48322cc7c0f8b3a215ed73eabd7b5d75d0b50e31ab006286ccff9e00b8f", - "sha256:f979ab81f58d7318e064e99c4506445d60135ac5cd2e177a2de0089bfd4c9bd5" + "sha256:2e1ccc9417d4da358b9de6f174e3ac094391ea1d4fbef2d667865d819dfd0afe", + "sha256:56433961bc1f12533306c624f3be5e744389ac61d722175d543e1751285da612" ], "markers": "python_version >= '3.7'", - "version": "==2.2.2" + "version": "==2.2.3" }, "wsproto": { "hashes": [ @@ -690,11 +855,11 @@ }, "flake8-bugbear": { "hashes": [ - "sha256:04a115e5f9c8e87c38bdbbcdf9f58223ffe05469c07c9a7bd8633330bc4d078b", - "sha256:55902ab5a48c5ea53d8689ecd146eda548e72f2724192b9c1d68f6d975d13c06" + "sha256:beb5c7efcd7ccc2039ef66a77bb8db925e7be3531ff1cb4d0b7030d0e2113d72", + "sha256:e3e7f74c8a49ad3794a7183353026dabd68c74030d5f46571f84c1fb0eb79363" ], "index": "pypi", - "version": "==23.1.20" + "version": "==23.3.12" }, "isort": { "hashes": [ @@ -730,19 +895,19 @@ }, "pathspec": { "hashes": [ - "sha256:3a66eb970cbac598f9e5ccb5b2cf58930cd8e3ed86d393d541eaf2d8b1705229", - "sha256:64d338d4e0914e91c1792321e6907b5a593f1ab1851de7fc269557a21b30ebbc" + "sha256:2798de800fa92780e33acca925945e9a19a133b715067cf165b8866c15a31687", + "sha256:d8af70af76652554bd134c22b3e8a1cc46ed7d91edcdd721ef1a0c51a84a5293" ], "markers": "python_version >= '3.7'", - "version": "==0.11.0" + "version": "==0.11.1" }, "platformdirs": { "hashes": [ - "sha256:8a1228abb1ef82d788f74139988b137e78692984ec7b08eaa6c65f1723af28f9", - "sha256:b1d5eb14f221506f50d6604a561f4c5786d9e80355219694a1b244bcd96f4567" + "sha256:024996549ee88ec1a9aa99ff7f8fc819bb59e2c3477b410d90a16d32d6e707aa", + "sha256:e5986afb596e4bb5bde29a79ac9061aa955b94fca2399b7aaac4090860920dd8" ], "markers": "python_version >= '3.7'", - "version": "==3.0.0" + "version": "==3.1.1" }, "pycodestyle": { "hashes": [ @@ -767,6 +932,14 @@ ], "markers": "python_version < '3.11'", "version": "==2.0.1" + }, + "typing-extensions": { + "hashes": [ + "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb", + "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4" + ], + "markers": "python_version >= '3.7'", + "version": "==4.5.0" } } } diff --git a/src/test/regress/bin/normalize.sed b/src/test/regress/bin/normalize.sed index d6e8a6111..65692e1c9 100644 --- a/src/test/regress/bin/normalize.sed +++ b/src/test/regress/bin/normalize.sed @@ -28,6 +28,10 @@ s/\(ref_id\)=\([0-9]+\)/(ref_id)=(X)/g # shard table names for multi_subtransactions s/"t2_[0-9]+"/"t2_xxxxxxx"/g +# shard table names for MERGE tests +s/merge_schema\.([_a-z0-9]+)_40[0-9]+ /merge_schema.\1_xxxxxxx /g +s/pgmerge_schema\.([_a-z0-9]+)_40[0-9]+ /pgmerge_schema.\1_xxxxxxx /g + # shard table names for multi_subquery s/ keyval(1|2|ref)_[0-9]+ / keyval\1_xxxxxxx /g diff --git a/src/test/regress/citus_tests/run_test.py b/src/test/regress/citus_tests/run_test.py index a4b303e90..5964267ec 100755 --- a/src/test/regress/citus_tests/run_test.py +++ b/src/test/regress/citus_tests/run_test.py @@ -15,6 +15,16 @@ import common import config + +# Returns true if given test_schedule_line is of the form: +# "test: upgrade_ ... _after .." +def schedule_line_is_upgrade_after(test_schedule_line: str) -> bool: + return ( + test_schedule_line.startswith("test: upgrade_") + and "_after" in test_schedule_line + ) + + if __name__ == "__main__": args = argparse.ArgumentParser() args.add_argument( @@ -63,10 +73,11 @@ if __name__ == "__main__": schedule: Optional[str] direct_extra_tests: list[str] - def __init__(self, schedule, extra_tests=None, repeatable=True): + def __init__(self, schedule, extra_tests=None, repeatable=True, worker_count=2): self.schedule = schedule self.direct_extra_tests = extra_tests or [] self.repeatable = repeatable + self.worker_count = worker_count def extra_tests(self): all_deps = OrderedDict() @@ -98,6 +109,10 @@ if __name__ == "__main__": "multi_mx_function_table_reference", ], ), + "multi_mx_modifying_xacts": TestDeps(None, ["multi_mx_create_table"]), + "multi_mx_router_planner": TestDeps(None, ["multi_mx_create_table"]), + "multi_mx_copy_data": TestDeps(None, ["multi_mx_create_table"]), + "multi_simple_queries": TestDeps("base_schedule"), } if not (test_file_name or test_file_path): @@ -170,8 +185,22 @@ if __name__ == "__main__": return "base_schedule" return "minimal_schedule" + # we run the tests with 2 workers by default. + # If we find any dependency which requires more workers, we update the worker count. + def worker_count_for(test_name): + if test_name in deps: + return deps[test_name].worker_count + return 2 + + test_worker_count = max(worker_count_for(test_file_name), 2) + if test_file_name in deps: dependencies = deps[test_file_name] + elif schedule_line_is_upgrade_after(test_schedule_line): + dependencies = TestDeps( + default_base_schedule(test_schedule), + [test_file_name.replace("_after", "_before")], + ) else: dependencies = TestDeps(default_base_schedule(test_schedule)) @@ -189,6 +218,7 @@ if __name__ == "__main__": with open(tmp_schedule_path, "a") as myfile: for dependency in dependencies.extra_tests(): myfile.write(f"test: {dependency}\n") + test_worker_count = max(worker_count_for(dependency), test_worker_count) repetition_cnt = args["repeat"] if repetition_cnt > 1 and not dependencies.repeatable: @@ -209,7 +239,11 @@ if __name__ == "__main__": make_recipe += "-vg" # prepare command to run tests - test_command = f"make -C {regress_dir} {make_recipe} SCHEDULE='{pathlib.Path(tmp_schedule_path).stem}'" + test_command = ( + f"make -C {regress_dir} {make_recipe} " + f"WORKERCOUNT={test_worker_count} " + f"SCHEDULE='{pathlib.Path(tmp_schedule_path).stem}'" + ) # run test command n times try: diff --git a/src/test/regress/create_schedule b/src/test/regress/create_schedule index 82dfa2475..e301678b9 100644 --- a/src/test/regress/create_schedule +++ b/src/test/regress/create_schedule @@ -13,3 +13,5 @@ test: arbitrary_configs_truncate_create test: arbitrary_configs_truncate_cascade_create test: arbitrary_configs_truncate_partition_create test: arbitrary_configs_alter_table_add_constraint_without_name_create +test: merge_arbitrary_create +test: arbitrary_configs_router_create diff --git a/src/test/regress/expected/arbitrary_configs_router.out b/src/test/regress/expected/arbitrary_configs_router.out new file mode 100644 index 000000000..a42b955cc --- /dev/null +++ b/src/test/regress/expected/arbitrary_configs_router.out @@ -0,0 +1,1561 @@ +SET search_path TO arbitrary_configs_router; +SET client_min_messages TO WARNING; +-- test simple select for a single row +SELECT * FROM articles_hash WHERE author_id = 10 AND id = 50; + id | author_id | title | word_count +--------------------------------------------------------------------- + 50 | 10 | anjanette | 19519 +(1 row) + +-- get all titles by a single author +SELECT title FROM articles_hash WHERE author_id = 10; + title +--------------------------------------------------------------------- + aggrandize + absentness + andelee + attemper + anjanette +(5 rows) + +-- try ordering them by word count +SELECT title, word_count FROM articles_hash + WHERE author_id = 10 + ORDER BY word_count DESC NULLS LAST; + title | word_count +--------------------------------------------------------------------- + anjanette | 19519 + aggrandize | 17277 + attemper | 14976 + andelee | 6363 + absentness | 1820 +(5 rows) + +-- look at last two articles by an author +SELECT title, id FROM articles_hash + WHERE author_id = 5 + ORDER BY id + LIMIT 2; + title | id +--------------------------------------------------------------------- + aruru | 5 + adversa | 15 +(2 rows) + +-- find all articles by two authors in same shard +-- but plan is not fast path router plannable due to +-- two distribution columns in the query +SELECT title, author_id FROM articles_hash + WHERE author_id = 7 OR author_id = 8 + ORDER BY author_id ASC, id; + title | author_id +--------------------------------------------------------------------- + aseptic | 7 + auriga | 7 + arsenous | 7 + archduchies | 7 + abeyance | 7 + agatized | 8 + assembly | 8 + aerophyte | 8 + anatine | 8 + alkylic | 8 +(10 rows) + +-- having clause is supported if it goes to a single shard +-- and single dist. key on the query +SELECT author_id, sum(word_count) AS corpus_size FROM articles_hash + WHERE author_id = 1 + GROUP BY author_id + HAVING sum(word_count) > 1000 + ORDER BY sum(word_count) DESC; + author_id | corpus_size +--------------------------------------------------------------------- + 1 | 35894 +(1 row) + +-- fast path planner only support = operator +SELECT * FROM articles_hash WHERE author_id <= 1; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +SELECT * FROM articles_hash WHERE author_id IN (1, 3) ORDER BY 1,2,3,4; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 3 | 3 | asternal | 10480 + 11 | 1 | alamo | 1347 + 13 | 3 | aseyev | 2255 + 21 | 1 | arcading | 5890 + 23 | 3 | abhorring | 6799 + 31 | 1 | athwartships | 7271 + 33 | 3 | autochrome | 8180 + 41 | 1 | aznavour | 11814 + 43 | 3 | affixal | 12723 +(10 rows) + +-- queries with CTEs cannot go through fast-path planning +WITH first_author AS ( SELECT id FROM articles_hash WHERE author_id = 1) +SELECT * FROM first_author; + id +--------------------------------------------------------------------- + 1 + 11 + 21 + 31 + 41 +(5 rows) + +-- two CTE joins also cannot go through fast-path planning +WITH id_author AS ( SELECT id, author_id FROM articles_hash WHERE author_id = 1), +id_title AS (SELECT id, title from articles_hash WHERE author_id = 1) +SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id; + id | author_id | id | title +--------------------------------------------------------------------- + 1 | 1 | 1 | arsenous + 11 | 1 | 11 | alamo + 21 | 1 | 21 | arcading + 31 | 1 | 31 | athwartships + 41 | 1 | 41 | aznavour +(5 rows) + +-- this is a different case where each CTE is recursively planned and those goes +-- through the fast-path router planner, but the top level join is not +WITH id_author AS ( SELECT id, author_id FROM articles_hash WHERE author_id = 1), +id_title AS (SELECT id, title from articles_hash WHERE author_id = 2) +SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id; + id | author_id | id | title +--------------------------------------------------------------------- +(0 rows) + +-- recursive CTEs are also cannot go through fast +-- path planning +WITH RECURSIVE hierarchy as ( + SELECT *, 1 AS level + FROM company_employees + WHERE company_id = 1 and manager_id = 0 + UNION + SELECT ce.*, (h.level+1) + FROM hierarchy h JOIN company_employees ce + ON (h.employee_id = ce.manager_id AND + h.company_id = ce.company_id AND + ce.company_id = 1)) +SELECT * FROM hierarchy WHERE LEVEL <= 2; + company_id | employee_id | manager_id | level +--------------------------------------------------------------------- + 1 | 1 | 0 | 1 + 1 | 2 | 1 | 2 + 1 | 3 | 1 | 2 +(3 rows) + +WITH update_article AS ( + UPDATE articles_hash SET word_count = 10 WHERE id = 1 AND word_count = 9 RETURNING * +) +SELECT * FROM update_article; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +WITH delete_article AS ( + DELETE FROM articles_hash WHERE id = 1 AND word_count = 10 RETURNING * +) +SELECT * FROM delete_article; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- grouping sets are supported via fast-path +SELECT + id, substring(title, 2, 1) AS subtitle, count(*) + FROM articles_hash + WHERE author_id = 1 + GROUP BY GROUPING SETS ((id),(subtitle)) + ORDER BY id, subtitle; + id | subtitle | count +--------------------------------------------------------------------- + 1 | | 1 + 11 | | 1 + 21 | | 1 + 31 | | 1 + 41 | | 1 + | l | 1 + | r | 2 + | t | 1 + | z | 1 +(9 rows) + +-- queries which involve functions in FROM clause are not supported via fast path planning +SELECT * FROM articles_hash, position('om' in 'Thomas') WHERE author_id = 1; + id | author_id | title | word_count | position +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 | 3 + 11 | 1 | alamo | 1347 | 3 + 21 | 1 | arcading | 5890 | 3 + 31 | 1 | athwartships | 7271 | 3 + 41 | 1 | aznavour | 11814 | 3 +(5 rows) + +-- sublinks are not supported via fast path planning +SELECT * FROM articles_hash +WHERE author_id IN (SELECT author_id FROM articles_hash WHERE author_id = 2) +ORDER BY articles_hash.id; + id | author_id | title | word_count +--------------------------------------------------------------------- + 2 | 2 | abducing | 13642 + 12 | 2 | archiblast | 18185 + 22 | 2 | antipope | 2728 + 32 | 2 | amazon | 11342 + 42 | 2 | ausable | 15885 +(5 rows) + +-- subqueries are not supported via fast path planning +SELECT articles_hash.id,test.word_count +FROM articles_hash, (SELECT id, word_count FROM articles_hash) AS test WHERE test.id = articles_hash.id +ORDER BY test.word_count DESC, articles_hash.id LIMIT 5; + id | word_count +--------------------------------------------------------------------- + 50 | 19519 + 14 | 19094 + 48 | 18610 + 12 | 18185 + 46 | 17702 +(5 rows) + +SELECT articles_hash.id,test.word_count +FROM articles_hash, (SELECT id, word_count FROM articles_hash) AS test +WHERE test.id = articles_hash.id and articles_hash.author_id = 1 +ORDER BY articles_hash.id; + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +-- simple lookup query just works +SELECT * + FROM articles_hash + WHERE author_id = 1; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- below query hits a single shard but with multiple filters +-- so cannot go via fast-path +SELECT * + FROM articles_hash + WHERE author_id = 1 OR author_id = 17; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- rename the output columns +SELECT id as article_id, word_count * id as random_value + FROM articles_hash + WHERE author_id = 1; + article_id | random_value +--------------------------------------------------------------------- + 1 | 9572 + 11 | 14817 + 21 | 123690 + 31 | 225401 + 41 | 484374 +(5 rows) + +-- joins do not go through fast-path planning +SELECT a.author_id as first_author, b.word_count as second_word_count + FROM articles_hash a, articles_hash b + WHERE a.author_id = 10 and a.author_id = b.author_id + LIMIT 3; + first_author | second_word_count +--------------------------------------------------------------------- + 10 | 17277 + 10 | 1820 + 10 | 6363 +(3 rows) + +-- single shard select with limit goes through fast-path planning +SELECT * + FROM articles_hash + WHERE author_id = 1 + LIMIT 3; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 +(3 rows) + +-- single shard select with limit + offset goes through fast-path planning +SELECT * + FROM articles_hash + WHERE author_id = 1 + LIMIT 2 + OFFSET 1; + id | author_id | title | word_count +--------------------------------------------------------------------- + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 +(2 rows) + +-- single shard select with limit + offset + order by goes through fast-path planning +SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id desc + LIMIT 2 + OFFSET 1; + id | author_id | title | word_count +--------------------------------------------------------------------- + 31 | 1 | athwartships | 7271 + 21 | 1 | arcading | 5890 +(2 rows) + +-- single shard select with group by on non-partition column goes through fast-path planning +SELECT id + FROM articles_hash + WHERE author_id = 1 + GROUP BY id + ORDER BY id; + id +--------------------------------------------------------------------- + 1 + 11 + 21 + 31 + 41 +(5 rows) + +-- single shard select with distinct goes through fast-path planning +SELECT DISTINCT id + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; + id +--------------------------------------------------------------------- + 1 + 11 + 21 + 31 + 41 +(5 rows) + +-- single shard aggregate goes through fast-path planning +SELECT avg(word_count) + FROM articles_hash + WHERE author_id = 2; + avg +--------------------------------------------------------------------- + 12356.400000000000 +(1 row) + +-- max, min, sum, count goes through fast-path planning +SELECT max(word_count) as max, min(word_count) as min, + sum(word_count) as sum, count(word_count) as cnt + FROM articles_hash + WHERE author_id = 2; + max | min | sum | cnt +--------------------------------------------------------------------- + 18185 | 2728 | 61782 | 5 +(1 row) + +-- queries with aggregates and group by goes through fast-path planning +SELECT max(word_count) + FROM articles_hash + WHERE author_id = 1 + GROUP BY author_id; + max +--------------------------------------------------------------------- + 11814 +(1 row) + +-- set operations are not supported via fast-path planning +SELECT * FROM ( + SELECT * FROM articles_hash WHERE author_id = 1 + UNION + SELECT * FROM articles_hash WHERE author_id = 3 +) AS combination +ORDER BY id; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 3 | 3 | asternal | 10480 + 11 | 1 | alamo | 1347 + 13 | 3 | aseyev | 2255 + 21 | 1 | arcading | 5890 + 23 | 3 | abhorring | 6799 + 31 | 1 | athwartships | 7271 + 33 | 3 | autochrome | 8180 + 41 | 1 | aznavour | 11814 + 43 | 3 | affixal | 12723 +(10 rows) + +-- function calls in the target list is supported via fast path +SELECT LEFT(title, 1) FROM articles_hash WHERE author_id = 1; + left +--------------------------------------------------------------------- + a + a + a + a + a +(5 rows) + +-- top-level union queries are supported through recursive planning +-- unions in subqueries are not supported via fast-path planning +SELECT * FROM ( + (SELECT * FROM articles_hash WHERE author_id = 1) + UNION + (SELECT * FROM articles_hash WHERE author_id = 1)) uu +ORDER BY 1, 2 +LIMIT 5; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- Test various filtering options for router plannable check +-- cannot go through fast-path if there is +-- explicit coercion +SELECT * + FROM articles_hash + WHERE author_id = 1::bigint; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- can go through fast-path if there is +-- implicit coercion +-- This doesn't work see the related issue +-- reported https://github.com/citusdata/citus/issues/2605 +-- SELECT * +-- FROM articles_hash +-- WHERE author_id = 1.0; +SELECT * + FROM articles_hash + WHERE author_id = 68719476736; -- this is bigint + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- cannot go through fast-path due to +-- multiple filters on the dist. key +SELECT * + FROM articles_hash + WHERE author_id = 1 and author_id >= 1; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- cannot go through fast-path due to +-- multiple filters on the dist. key +SELECT * + FROM articles_hash + WHERE author_id = 1 or id = 1; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- goes through fast-path planning because +-- the dist. key is ANDed with the rest of the +-- filters +SELECT * + FROM articles_hash + WHERE author_id = 1 and (id = 1 or id = 41); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 41 | 1 | aznavour | 11814 +(2 rows) + +-- this time there is an OR clause which prevents +-- router planning at all +SELECT * + FROM articles_hash + WHERE author_id = 1 and id = 1 or id = 41; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 41 | 1 | aznavour | 11814 +(2 rows) + +-- goes through fast-path planning because +-- the dist. key is ANDed with the rest of the +-- filters +SELECT * + FROM articles_hash + WHERE author_id = 1 and (id = random()::int * 0); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- not router plannable due to function call on the right side +SELECT * + FROM articles_hash + WHERE author_id = (random()::int * 0 + 1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- Citus does not qualify this as a fast-path because +-- dist_key = func() +SELECT * + FROM articles_hash + WHERE author_id = abs(-1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- Citus does not qualify this as a fast-path because +-- dist_key = func() +SELECT * + FROM articles_hash + WHERE 1 = abs(author_id); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- Citus does not qualify this as a fast-path because +-- dist_key = func() +SELECT * + FROM articles_hash + WHERE author_id = abs(author_id - 2); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- the function is not on the dist. key, so qualify as +-- fast-path +SELECT * + FROM articles_hash + WHERE author_id = 1 and (id = abs(id - 2)); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 +(1 row) + +-- not router plannable due to is true +SELECT * + FROM articles_hash + WHERE (author_id = 1) is true; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- router plannable, (boolean expression) = true is collapsed to (boolean expression) +SELECT * + FROM articles_hash + WHERE (author_id = 1) = true; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- some more complex quals +SELECT count(*) FROM articles_hash WHERE (author_id = 15) AND (id = 1 OR word_count > 5); + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM articles_hash WHERE (author_id = 15) OR (id = 1 AND word_count > 5); + count +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT count(*) FROM articles_hash WHERE (id = 15) OR (author_id = 1 AND word_count > 5); + count +--------------------------------------------------------------------- + 6 +(1 row) + +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (author_id = 1 OR word_count > 5); + count +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (author_id = 1 AND (word_count > 5 OR id = 2)); + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (title ilike 'a%' AND (word_count > 5 OR author_id = 2)); + count +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (title ilike 'a%' AND (word_count > 5 AND author_id = 2)); + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (title ilike 'a%' AND ((word_count > 5 OR title ilike 'b%' ) AND (author_id = 2 AND word_count > 50))); + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- fast-path router plannable, between operator is on another column +SELECT * + FROM articles_hash + WHERE (author_id = 1) and id between 0 and 20; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 +(2 rows) + +-- fast-path router plannable, partition column expression is and'ed to rest +SELECT * + FROM articles_hash + WHERE (author_id = 1) and (id = 1 or id = 31) and title like '%s'; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 31 | 1 | athwartships | 7271 +(2 rows) + +-- fast-path router plannable, order is changed +SELECT * + FROM articles_hash + WHERE (id = 1 or id = 31) and title like '%s' and (author_id = 1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 31 | 1 | athwartships | 7271 +(2 rows) + +-- fast-path router plannable +SELECT * + FROM articles_hash + WHERE (title like '%s' or title like 'a%') and (author_id = 1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- fast-path router plannable +SELECT * + FROM articles_hash + WHERE (title like '%s' or title like 'a%') and (author_id = 1) and (word_count < 3000 or word_count > 8000); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 41 | 1 | aznavour | 11814 +(3 rows) + +-- window functions are supported with fast-path router plannable +SELECT LAG(title, 1) over (ORDER BY word_count) prev, title, word_count + FROM articles_hash + WHERE author_id = 5; + prev | title | word_count +--------------------------------------------------------------------- + | afrasia | 864 + afrasia | adversa | 3164 + adversa | antehall | 7707 + antehall | aminate | 9089 + aminate | aruru | 11389 +(5 rows) + +SELECT LAG(title, 1) over (ORDER BY word_count) prev, title, word_count + FROM articles_hash + WHERE author_id = 5 + ORDER BY word_count DESC; + prev | title | word_count +--------------------------------------------------------------------- + aminate | aruru | 11389 + antehall | aminate | 9089 + adversa | antehall | 7707 + afrasia | adversa | 3164 + | afrasia | 864 +(5 rows) + +SELECT id, MIN(id) over (order by word_count) + FROM articles_hash + WHERE author_id = 1; + id | min +--------------------------------------------------------------------- + 11 | 11 + 21 | 11 + 31 | 11 + 1 | 1 + 41 | 1 +(5 rows) + +SELECT id, word_count, AVG(word_count) over (order by word_count) + FROM articles_hash + WHERE author_id = 1; + id | word_count | avg +--------------------------------------------------------------------- + 11 | 1347 | 1347.0000000000000000 + 21 | 5890 | 3618.5000000000000000 + 31 | 7271 | 4836.0000000000000000 + 1 | 9572 | 6020.0000000000000000 + 41 | 11814 | 7178.8000000000000000 +(5 rows) + +SELECT word_count, rank() OVER (PARTITION BY author_id ORDER BY word_count) + FROM articles_hash + WHERE author_id = 1; + word_count | rank +--------------------------------------------------------------------- + 1347 | 1 + 5890 | 2 + 7271 | 3 + 9572 | 4 + 11814 | 5 +(5 rows) + +-- some more tests on complex target lists +SELECT DISTINCT ON (author_id, id) author_id, id, + MIN(id) over (order by avg(word_count)) * AVG(id * 5.2 + (1.0/max(word_count))) over (order by max(word_count)) as t1, + count(*) FILTER (WHERE title LIKE 'al%') as cnt_with_filter, + count(*) FILTER (WHERE '0300030' LIKE '%3%') as cnt_with_filter_2, + avg(case when id > 2 then char_length(word_count::text) * (id * strpos(word_count::text, '1')) end) as case_cnt, + COALESCE(strpos(avg(word_count)::text, '1'), 20) + FROM articles_hash as aliased_table + WHERE author_id = 1 + GROUP BY author_id, id + HAVING count(DISTINCT title) > 0 + ORDER BY author_id, id, sum(word_count) - avg(char_length(title)) DESC, COALESCE(array_upper(ARRAY[max(id)],1) * 5,0) DESC; + author_id | id | t1 | cnt_with_filter | cnt_with_filter_2 | case_cnt | coalesce +--------------------------------------------------------------------- + 1 | 1 | 83.20028854345579490574 | 0 | 1 | | 0 + 1 | 11 | 629.20816629547141796586 | 1 | 1 | 44.0000000000000000 | 1 + 1 | 21 | 915.20501693381380745499 | 0 | 1 | 0.00000000000000000000 | 0 + 1 | 31 | 1201.20384890897723321000 | 0 | 1 | 496.0000000000000000 | 4 + 1 | 41 | 109.200247763831844321405335 | 0 | 1 | 205.0000000000000000 | 1 +(5 rows) + +-- where false queries are router plannable but not fast-path +SELECT * + FROM articles_hash + WHERE false; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- fast-path with false +SELECT * + FROM articles_hash + WHERE author_id = 1 and false; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- fast-path with false +SELECT * + FROM articles_hash + WHERE author_id = 1 and 1=0; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +SELECT * + FROM articles_hash + WHERE null and author_id = 1; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- we cannot qualify dist_key = X operator Y via +-- fast-path planning +SELECT * + FROM articles_hash + WHERE author_id = 1 + 1; + id | author_id | title | word_count +--------------------------------------------------------------------- + 2 | 2 | abducing | 13642 + 12 | 2 | archiblast | 18185 + 22 | 2 | antipope | 2728 + 32 | 2 | amazon | 11342 + 42 | 2 | ausable | 15885 +(5 rows) + +-- where false with immutable function returning false +-- goes through fast-path +SELECT * + FROM articles_hash a + WHERE a.author_id = 10 and int4eq(1, 2); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- partition_column is null clause does not prune out any shards, +-- all shards remain after shard pruning, not router plannable +-- not fast-path router either +SELECT * + FROM articles_hash a + WHERE a.author_id is null; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- partition_column equals to null clause prunes out all shards +-- no shards after shard pruning, router plannable +-- not fast-path router either +SELECT * + FROM articles_hash a + WHERE a.author_id = null; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- union/difference /intersection with where false +-- this query was not originally router plannable, addition of 1=0 +-- makes it router plannable but not fast-path +SELECT * FROM ( + SELECT * FROM articles_hash WHERE author_id = 1 + UNION + SELECT * FROM articles_hash WHERE author_id = 2 and 1=0 +) AS combination +ORDER BY id; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- same with the above, but with WHERE false +SELECT * FROM ( + SELECT * FROM articles_hash WHERE author_id = 1 + UNION + SELECT * FROM articles_hash WHERE author_id = 2 and 1=0 +) AS combination WHERE false +ORDER BY id; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- window functions with where false +SELECT word_count, rank() OVER (PARTITION BY author_id ORDER BY word_count) + FROM articles_hash + WHERE author_id = 1 and 1=0; + word_count | rank +--------------------------------------------------------------------- +(0 rows) + +-- complex query hitting a single shard and a fast-path +SELECT + count(DISTINCT CASE + WHEN + word_count > 100 + THEN + id + ELSE + NULL + END) as c + FROM + articles_hash + WHERE + author_id = 5; + c +--------------------------------------------------------------------- + 5 +(1 row) + +-- queries inside transactions can be fast-path router plannable +BEGIN; +SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +END; +-- queries inside read-only transactions can be fast-path router plannable +SET TRANSACTION READ ONLY; +WARNING: SET TRANSACTION can only be used in transaction blocks +SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +END; +WARNING: there is no transaction in progress +-- cursor queries are fast-path router plannable +BEGIN; +DECLARE test_cursor CURSOR FOR + SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; +FETCH test_cursor; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 +(1 row) + +FETCH ALL test_cursor; + id | author_id | title | word_count +--------------------------------------------------------------------- + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(4 rows) + +FETCH test_cursor; -- fetch one row after the last + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +FETCH BACKWARD test_cursor; + id | author_id | title | word_count +--------------------------------------------------------------------- + 41 | 1 | aznavour | 11814 +(1 row) + +END; +-- queries inside copy can be router plannable +COPY ( + SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id) TO STDOUT; +1 1 arsenous 9572 +11 1 alamo 1347 +21 1 arcading 5890 +31 1 athwartships 7271 +41 1 aznavour 11814 +-- table creation queries inside can be fast-path router plannable +CREATE TEMP TABLE temp_articles_hash as + SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; +-- fast-path router plannable queries may include filter for aggregates +SELECT count(*), count(*) FILTER (WHERE id < 3) + FROM articles_hash + WHERE author_id = 1; + count | count +--------------------------------------------------------------------- + 5 | 1 +(1 row) + +-- prepare queries can be router plannable +PREPARE author_1_articles as + SELECT * + FROM articles_hash + WHERE author_id = 1; +EXECUTE author_1_articles; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_1_articles; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_1_articles; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_1_articles; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_1_articles; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_1_articles; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- parametric prepare queries can be router plannable +PREPARE author_articles(int) as + SELECT * + FROM articles_hash + WHERE author_id = $1; +EXECUTE author_articles(1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_articles(1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_articles(1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_articles(1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_articles(1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_articles(1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_articles(NULL); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE author_articles(NULL); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE author_articles(NULL); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE author_articles(NULL); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE author_articles(NULL); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE author_articles(NULL); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE author_articles(NULL); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +PREPARE author_articles_update(int) AS + UPDATE articles_hash SET title = 'test' WHERE author_id = $1; +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +-- we don't want too many details. though we're omitting +-- "DETAIL: distribution column value:", we see it acceptable +-- since the query results verifies the correctness +\set VERBOSITY terse +SELECT author_articles_max_id(); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(1); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(1); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(1); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(1); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(1); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(1); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT * FROM author_articles_id_word_count(); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(1); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(1); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(1); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(1); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(1); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(1); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +\set VERBOSITY default +-- insert .. select via coordinator could also +-- use fast-path queries +PREPARE insert_sel(int, int) AS +INSERT INTO articles_hash + SELECT * FROM articles_hash WHERE author_id = $2 AND word_count = $1 OFFSET 0; +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +-- one final interesting preperad statement +-- where one of the filters is on the target list +PREPARE fast_path_agg_filter(int, int) AS + SELECT + count(*) FILTER (WHERE word_count=$1) + FROM + articles_hash + WHERE author_id = $2; +EXECUTE fast_path_agg_filter(1,1); + count +--------------------------------------------------------------------- + 0 +(1 row) + +EXECUTE fast_path_agg_filter(2,2); + count +--------------------------------------------------------------------- + 0 +(1 row) + +EXECUTE fast_path_agg_filter(3,3); + count +--------------------------------------------------------------------- + 0 +(1 row) + +EXECUTE fast_path_agg_filter(4,4); + count +--------------------------------------------------------------------- + 0 +(1 row) + +EXECUTE fast_path_agg_filter(5,5); + count +--------------------------------------------------------------------- + 0 +(1 row) + +EXECUTE fast_path_agg_filter(6,6); + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- views internally become subqueries, so not fast-path router query +SELECT * FROM test_view; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- materialized views can be created for fast-path router plannable queries +CREATE MATERIALIZED VIEW mv_articles_hash_empty AS + SELECT * FROM articles_hash WHERE author_id = 1; +SELECT * FROM mv_articles_hash_empty; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +SELECT id + FROM articles_hash + WHERE author_id = 1; + id +--------------------------------------------------------------------- + 1 + 11 + 21 + 31 + 41 +(5 rows) + +INSERT INTO articles_hash VALUES (51, 1, 'amateus', 1814), (52, 1, 'second amateus', 2824); +-- verify insert is successfull (not router plannable and executable) +SELECT id + FROM articles_hash + WHERE author_id = 1; + id +--------------------------------------------------------------------- + 1 + 11 + 21 + 31 + 41 + 51 + 52 +(7 rows) + +SELECT count(*) FROM collections_list WHERE key = 4; + count +--------------------------------------------------------------------- + 5 +(1 row) + +SELECT count(*) FROM collections_list_1 WHERE key = 4; + count +--------------------------------------------------------------------- + 5 +(1 row) + +SELECT count(*) FROM collections_list_2 WHERE key = 4; + count +--------------------------------------------------------------------- + 0 +(1 row) + +UPDATE collections_list SET value = 15 WHERE key = 4; +SELECT count(*) FILTER (where value = 15) FROM collections_list WHERE key = 4; + count +--------------------------------------------------------------------- + 5 +(1 row) + +SELECT count(*) FILTER (where value = 15) FROM collections_list_1 WHERE key = 4; + count +--------------------------------------------------------------------- + 5 +(1 row) + +SELECT count(*) FILTER (where value = 15) FROM collections_list_2 WHERE key = 4; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- test INSERT using values from generate_series() and repeat() functions +INSERT INTO authors_reference (id, name) VALUES (generate_series(1, 10), repeat('Migjeni', 3)); +SELECT * FROM authors_reference ORDER BY 1, 2; + id | name +--------------------------------------------------------------------- + 1 | MigjeniMigjeniMigjeni + 2 | MigjeniMigjeniMigjeni + 3 | MigjeniMigjeniMigjeni + 4 | MigjeniMigjeniMigjeni + 5 | MigjeniMigjeniMigjeni + 6 | MigjeniMigjeniMigjeni + 7 | MigjeniMigjeniMigjeni + 8 | MigjeniMigjeniMigjeni + 9 | MigjeniMigjeniMigjeni + 10 | MigjeniMigjeniMigjeni +(10 rows) + diff --git a/src/test/regress/expected/arbitrary_configs_router_create.out b/src/test/regress/expected/arbitrary_configs_router_create.out new file mode 100644 index 000000000..74dfbf4f3 --- /dev/null +++ b/src/test/regress/expected/arbitrary_configs_router_create.out @@ -0,0 +1,121 @@ +CREATE SCHEMA arbitrary_configs_router; +SET search_path TO arbitrary_configs_router; +CREATE TABLE articles_hash ( + id bigint NOT NULL, + author_id bigint NOT NULL, + title varchar(20) NOT NULL, + word_count integer +); +SELECT create_distributed_table('articles_hash', 'author_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE authors_reference (id int, name text); +SELECT create_reference_table('authors_reference'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +-- create a bunch of test data +INSERT INTO articles_hash VALUES (1, 1, 'arsenous', 9572), (2, 2, 'abducing', 13642),( 3, 3, 'asternal', 10480),( 4, 4, 'altdorfer', 14551),( 5, 5, 'aruru', 11389), + (6, 6, 'atlases', 15459),(7, 7, 'aseptic', 12298),( 8, 8, 'agatized', 16368),(9, 9, 'alligate', 438), + (10, 10, 'aggrandize', 17277),(11, 1, 'alamo', 1347),(12, 2, 'archiblast', 18185), + (13, 3, 'aseyev', 2255),(14, 4, 'andesite', 19094),(15, 5, 'adversa', 3164), + (16, 6, 'allonym', 2),(17, 7, 'auriga', 4073),(18, 8, 'assembly', 911),(19, 9, 'aubergiste', 4981), + (20, 10, 'absentness', 1820),(21, 1, 'arcading', 5890),(22, 2, 'antipope', 2728),(23, 3, 'abhorring', 6799), + (24, 4, 'audacious', 3637),(25, 5, 'antehall', 7707),(26, 6, 'abington', 4545),(27, 7, 'arsenous', 8616), + (28, 8, 'aerophyte', 5454),(29, 9, 'amateur', 9524),(30, 10, 'andelee', 6363),(31, 1, 'athwartships', 7271), + (32, 2, 'amazon', 11342),(33, 3, 'autochrome', 8180),(34, 4, 'amnestied', 12250),(35, 5, 'aminate', 9089), + (36, 6, 'ablation', 13159),(37, 7, 'archduchies', 9997),(38, 8, 'anatine', 14067),(39, 9, 'anchises', 10906), + (40, 10, 'attemper', 14976),(41, 1, 'aznavour', 11814),(42, 2, 'ausable', 15885),(43, 3, 'affixal', 12723), + (44, 4, 'anteport', 16793),(45, 5, 'afrasia', 864),(46, 6, 'atlanta', 17702),(47, 7, 'abeyance', 1772), + (48, 8, 'alkylic', 18610),(49, 9, 'anyone', 2681),(50, 10, 'anjanette', 19519); +CREATE TABLE company_employees (company_id int, employee_id int, manager_id int); +SELECT create_distributed_table('company_employees', 'company_id', 'hash'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO company_employees values(1, 1, 0); +INSERT INTO company_employees values(1, 2, 1); +INSERT INTO company_employees values(1, 3, 1); +INSERT INTO company_employees values(1, 4, 2); +INSERT INTO company_employees values(1, 5, 4); +INSERT INTO company_employees values(3, 1, 0); +INSERT INTO company_employees values(3, 15, 1); +INSERT INTO company_employees values(3, 3, 1); +-- finally, some tests with partitioned tables +CREATE TABLE collections_list ( + key bigint, + ts timestamptz, + collection_id integer, + value numeric +) PARTITION BY LIST (collection_id ); +CREATE TABLE collections_list_1 + PARTITION OF collections_list (key, ts, collection_id, value) + FOR VALUES IN ( 1 ); +CREATE TABLE collections_list_2 + PARTITION OF collections_list (key, ts, collection_id, value) + FOR VALUES IN ( 2 ); +SELECT create_distributed_table('collections_list', 'key'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO collections_list SELECT i % 10, now(), (i % 2) + 1, i*i FROM generate_series(0, 50)i; +-- queries inside plpgsql functions could be router plannable +CREATE OR REPLACE FUNCTION author_articles_max_id() RETURNS int AS $$ +DECLARE + max_id integer; +BEGIN + SELECT MAX(id) FROM articles_hash ah + WHERE author_id = 1 + into max_id; + return max_id; +END; +$$ LANGUAGE plpgsql; +-- queries inside plpgsql functions could be router plannable +CREATE OR REPLACE FUNCTION author_articles_max_id(int) RETURNS int AS $$ +DECLARE + max_id integer; +BEGIN + SELECT MAX(id) FROM articles_hash ah + WHERE author_id = $1 + into max_id; + return max_id; +END; +$$ LANGUAGE plpgsql; +-- check that function returning setof query are router plannable +CREATE OR REPLACE FUNCTION author_articles_id_word_count() RETURNS TABLE(id bigint, word_count int) AS $$ +DECLARE +BEGIN + RETURN QUERY + SELECT ah.id, ah.word_count + FROM articles_hash ah + WHERE author_id = 1; + +END; +$$ LANGUAGE plpgsql; +-- check that function returning setof query are router plannable +CREATE OR REPLACE FUNCTION author_articles_id_word_count(int) RETURNS TABLE(id bigint, word_count int) AS $$ +DECLARE +BEGIN + RETURN QUERY + SELECT ah.id, ah.word_count + FROM articles_hash ah + WHERE author_id = $1; + +END; +$$ LANGUAGE plpgsql; +-- Suppress the warning that tells that the view won't be distributed +-- because it depends on a local table. +-- +-- This only happens when running PostgresConfig. +SET client_min_messages TO ERROR; +CREATE VIEW test_view AS + SELECT * FROM articles_hash WHERE author_id = 1; diff --git a/src/test/regress/expected/citus_non_blocking_split_shards.out b/src/test/regress/expected/citus_non_blocking_split_shards.out index d6dde8b7a..fe3cade55 100644 --- a/src/test/regress/expected/citus_non_blocking_split_shards.out +++ b/src/test/regress/expected/citus_non_blocking_split_shards.out @@ -60,7 +60,7 @@ SELECT create_reference_table('reference_table'); (1 row) -CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY); +CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY, genid integer GENERATED ALWAYS AS ( measureid + 3 ) stored, value varchar(44), col_todrop integer); CLUSTER colocated_dist_table USING colocated_dist_table_pkey; SELECT create_distributed_table('colocated_dist_table', 'measureid', colocate_with:='sensors'); create_distributed_table @@ -84,8 +84,9 @@ ALTER TABLE sensors ADD CONSTRAINT fkey_table_to_dist FOREIGN KEY (measureid) RE -- END : Create Foreign key constraints. -- BEGIN : Load data into tables. INSERT INTO reference_table SELECT i FROM generate_series(0,1000)i; -INSERT INTO colocated_dist_table SELECT i FROM generate_series(0,1000)i; +INSERT INTO colocated_dist_table(measureid, value, col_todrop) SELECT i,'Value',i FROM generate_series(0,1000)i; INSERT INTO sensors SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' FROM generate_series(0,1000)i; +ALTER TABLE colocated_dist_table DROP COLUMN col_todrop; SELECT COUNT(*) FROM sensors; count --------------------------------------------------------------------- diff --git a/src/test/regress/expected/citus_split_shard_by_split_points.out b/src/test/regress/expected/citus_split_shard_by_split_points.out index 87f50da31..13f3b7a36 100644 --- a/src/test/regress/expected/citus_split_shard_by_split_points.out +++ b/src/test/regress/expected/citus_split_shard_by_split_points.out @@ -56,7 +56,7 @@ SELECT create_reference_table('reference_table'); (1 row) -CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY); +CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY, genid integer GENERATED ALWAYS AS ( measureid + 3 ) stored, value varchar(44), col_todrop integer); CLUSTER colocated_dist_table USING colocated_dist_table_pkey; SELECT create_distributed_table('colocated_dist_table', 'measureid', colocate_with:='sensors'); create_distributed_table @@ -80,8 +80,9 @@ ALTER TABLE sensors ADD CONSTRAINT fkey_table_to_dist FOREIGN KEY (measureid) RE -- END : Create Foreign key constraints. -- BEGIN : Load data into tables. INSERT INTO reference_table SELECT i FROM generate_series(0,1000)i; -INSERT INTO colocated_dist_table SELECT i FROM generate_series(0,1000)i; +INSERT INTO colocated_dist_table(measureid, value, col_todrop) SELECT i,'Value',i FROM generate_series(0,1000)i; INSERT INTO sensors SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' FROM generate_series(0,1000)i; +ALTER TABLE colocated_dist_table DROP COLUMN col_todrop; SELECT COUNT(*) FROM sensors; count --------------------------------------------------------------------- diff --git a/src/test/regress/expected/citus_stats_tenants.out b/src/test/regress/expected/citus_stats_tenants.out index 8623676fb..474562ef6 100644 --- a/src/test/regress/expected/citus_stats_tenants.out +++ b/src/test/regress/expected/citus_stats_tenants.out @@ -6,12 +6,33 @@ CREATE OR REPLACE FUNCTION pg_catalog.clean_citus_stats_tenants() RETURNS VOID LANGUAGE C AS 'citus', $$clean_citus_stats_tenants$$; +CREATE OR REPLACE FUNCTION pg_catalog.sleep_until_next_period() +RETURNS VOID +LANGUAGE C +AS 'citus', $$sleep_until_next_period$$; SELECT result FROM run_command_on_all_nodes('SELECT clean_citus_stats_tenants()'); result --------------------------------------------------------------------- +(3 rows) + +-- set period to a high number to prevent stats from being reset +SELECT result FROM run_command_on_all_nodes('ALTER SYSTEM SET citus.stats_tenants_period TO 1000000000'); + result +--------------------------------------------------------------------- + ALTER SYSTEM + ALTER SYSTEM + ALTER SYSTEM +(3 rows) + +SELECT result FROM run_command_on_all_nodes('SELECT pg_reload_conf()'); + result +--------------------------------------------------------------------- + t + t + t (3 rows) CREATE TABLE dist_tbl (a INT, b TEXT); @@ -47,110 +68,22 @@ INSERT INTO dist_tbl VALUES (2, 'abcd'); UPDATE dist_tbl SET b = a + 1 WHERE a = 3; UPDATE dist_tbl SET b = a + 1 WHERE a = 4; DELETE FROM dist_tbl WHERE a = 5; -\c - - - :worker_1_port -SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute; +SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants(true) ORDER BY tenant_attribute; tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period --------------------------------------------------------------------- 1 | 0 | 0 | 1 | 0 - 5 | 0 | 0 | 1 | 0 -(2 rows) - -\c - - - :worker_2_port -SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute; - tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period ---------------------------------------------------------------------- 2 | 0 | 0 | 1 | 0 3 | 0 | 0 | 1 | 0 4 | 0 | 0 | 1 | 0 -(3 rows) - -\c - - - :master_port -SET search_path TO citus_stats_tenants; -SELECT result FROM run_command_on_all_nodes('ALTER SYSTEM SET citus.stats_tenants_period TO 3'); - result ---------------------------------------------------------------------- - ALTER SYSTEM - ALTER SYSTEM - ALTER SYSTEM -(3 rows) - -SELECT result FROM run_command_on_all_nodes('SELECT pg_reload_conf()'); - result ---------------------------------------------------------------------- - t - t - t -(3 rows) - -SELECT count(*)>=0 FROM dist_tbl WHERE a = 1; - ?column? ---------------------------------------------------------------------- - t -(1 row) - -SELECT count(*)>=0 FROM dist_tbl WHERE a = 2; - ?column? ---------------------------------------------------------------------- - t -(1 row) - -\c - - - :worker_1_port -SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute; - tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period ---------------------------------------------------------------------- - 1 | 1 | 0 | 2 | 0 5 | 0 | 0 | 1 | 0 -(2 rows) +(5 rows) -\c - - - :worker_2_port -SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute; - tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period ---------------------------------------------------------------------- - 2 | 1 | 0 | 2 | 0 - 3 | 0 | 0 | 1 | 0 - 4 | 0 | 0 | 1 | 0 -(3 rows) - -\c - - - :master_port -SELECT pg_sleep (3); - pg_sleep ---------------------------------------------------------------------- - -(1 row) - -\c - - - :worker_1_port -SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute; - tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period ---------------------------------------------------------------------- - 1 | 0 | 1 | 0 | 2 - 5 | 0 | 0 | 0 | 1 -(2 rows) - -\c - - - :worker_2_port -SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute; - tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period ---------------------------------------------------------------------- - 2 | 0 | 1 | 0 | 2 - 3 | 0 | 0 | 0 | 1 - 4 | 0 | 0 | 0 | 1 -(3 rows) - -\c - - - :master_port -SET search_path TO citus_stats_tenants; -SELECT result FROM run_command_on_all_nodes('ALTER SYSTEM SET citus.stats_tenants_period TO 60'); - result ---------------------------------------------------------------------- - ALTER SYSTEM - ALTER SYSTEM - ALTER SYSTEM -(3 rows) - -SELECT result FROM run_command_on_all_nodes('SELECT pg_reload_conf()'); +SELECT result FROM run_command_on_all_nodes('SELECT clean_citus_stats_tenants()'); result --------------------------------------------------------------------- - t - t - t + + + (3 rows) -- queries with multiple tenants should not be counted @@ -167,16 +100,11 @@ SELECT count(*)>=0 FROM ref_tbl WHERE a = 1; t (1 row) -\c - - - :worker_1_port -SELECT tenant_attribute, query_count_in_this_period FROM citus_stats_tenants ORDER BY tenant_attribute; +SELECT tenant_attribute, query_count_in_this_period FROM citus_stats_tenants(true) ORDER BY tenant_attribute; tenant_attribute | query_count_in_this_period --------------------------------------------------------------------- - 1 | 0 - 5 | 0 -(2 rows) +(0 rows) -\c - - - :master_port -SET search_path TO citus_stats_tenants; -- queries with multiple tables but one tenant should be counted SELECT count(*)>=0 FROM dist_tbl, dist_tbl_2 WHERE dist_tbl.a = 1 AND dist_tbl_2.a = 1; ?column? @@ -190,17 +118,15 @@ SELECT count(*)>=0 FROM dist_tbl JOIN dist_tbl_2 ON dist_tbl.a = dist_tbl_2.a WH t (1 row) -\c - - - :worker_1_port -SELECT tenant_attribute, query_count_in_this_period FROM citus_stats_tenants WHERE tenant_attribute = '1'; +SELECT tenant_attribute, query_count_in_this_period FROM citus_stats_tenants(true) WHERE tenant_attribute = '1'; tenant_attribute | query_count_in_this_period --------------------------------------------------------------------- 1 | 2 (1 row) -\c - - - :master_port -SET search_path TO citus_stats_tenants; -- test scoring -- all of these distribution column values are from second worker +SELECT nodeid AS worker_2_nodeid FROM pg_dist_node WHERE nodeport = :worker_2_port \gset SELECT count(*)>=0 FROM dist_tbl WHERE a = 2; ?column? --------------------------------------------------------------------- @@ -225,18 +151,15 @@ SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'abcd'; t (1 row) -\c - - - :worker_2_port -SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) ORDER BY score DESC; +SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) WHERE nodeid = :worker_2_nodeid ORDER BY score DESC, tenant_attribute; tenant_attribute | query_count_in_this_period | score --------------------------------------------------------------------- - 2 | 1 | 2000000000 - 3 | 1 | 1500000000 - 4 | 1 | 1500000000 + 2 | 1 | 1000000000 + 3 | 1 | 1000000000 + 4 | 1 | 1000000000 abcd | 1 | 1000000000 (4 rows) -\c - - - :master_port -SET search_path TO citus_stats_tenants; SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'abcd'; ?column? --------------------------------------------------------------------- @@ -255,48 +178,99 @@ SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde'; t (1 row) -\c - - - :worker_2_port -SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) ORDER BY score DESC; - tenant_attribute | query_count_in_this_period | score ---------------------------------------------------------------------- - abcd | 3 | 3000000000 - 2 | 1 | 2000000000 - 3 | 1 | 1500000000 - 4 | 1 | 1500000000 - bcde | 1 | 1000000000 -(5 rows) - -\c - - - :master_port -SET search_path TO citus_stats_tenants; -SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde'; - ?column? ---------------------------------------------------------------------- - t -(1 row) - -SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde'; - ?column? ---------------------------------------------------------------------- - t -(1 row) - SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'cdef'; ?column? --------------------------------------------------------------------- t (1 row) -\c - - - :worker_2_port -SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) ORDER BY score DESC; +SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) WHERE nodeid = :worker_2_nodeid ORDER BY score DESC, tenant_attribute; + tenant_attribute | query_count_in_this_period | score +--------------------------------------------------------------------- + abcd | 3 | 3000000000 + 2 | 1 | 1000000000 + 3 | 1 | 1000000000 + 4 | 1 | 1000000000 + bcde | 1 | 1000000000 + cdef | 1 | 1000000000 +(6 rows) + +SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde'; + ?column? +--------------------------------------------------------------------- + t +(1 row) + +SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde'; + ?column? +--------------------------------------------------------------------- + t +(1 row) + +SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'defg'; + ?column? +--------------------------------------------------------------------- + t +(1 row) + +SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) WHERE nodeid = :worker_2_nodeid ORDER BY score DESC, tenant_attribute; tenant_attribute | query_count_in_this_period | score --------------------------------------------------------------------- abcd | 3 | 3000000000 bcde | 3 | 3000000000 - 2 | 1 | 2000000000 - 3 | 1 | 1500000000 - 4 | 1 | 1500000000 + 2 | 1 | 1000000000 + 3 | 1 | 1000000000 + 4 | 1 | 1000000000 cdef | 1 | 1000000000 -(6 rows) + defg | 1 | 1000000000 +(7 rows) + +-- test period passing +SELECT result FROM run_command_on_all_nodes('SELECT clean_citus_stats_tenants()'); + result +--------------------------------------------------------------------- + + + +(3 rows) + +SELECT count(*)>=0 FROM dist_tbl WHERE a = 1; + ?column? +--------------------------------------------------------------------- + t +(1 row) + +INSERT INTO dist_tbl VALUES (5, 'abcd'); +\c - - - :worker_1_port +SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants_local ORDER BY tenant_attribute; + tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period +--------------------------------------------------------------------- + 1 | 1 | 0 | 1 | 0 + 5 | 0 | 0 | 1 | 0 +(2 rows) + +-- simulate passing the period +SET citus.stats_tenants_period TO 2; +SELECT sleep_until_next_period(); + sleep_until_next_period +--------------------------------------------------------------------- + +(1 row) + +SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants_local ORDER BY tenant_attribute; + tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period +--------------------------------------------------------------------- + 1 | 0 | 1 | 0 | 1 + 5 | 0 | 0 | 0 | 1 +(2 rows) + +\c - - - :worker_2_port +SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) ORDER BY score DESC; + tenant_attribute | query_count_in_this_period | score +--------------------------------------------------------------------- + 1 | 0 | 500000000 + 5 | 0 | 500000000 +(2 rows) \c - - - :master_port SET search_path TO citus_stats_tenants; @@ -375,23 +349,33 @@ SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, q tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period --------------------------------------------------------------------- /*bcde | 1 | 0 | 1 | 0 + /b*c/de | 1 | 0 | 1 | 0 /b*cde | 1 | 0 | 1 | 0 /b/**/cde | 1 | 0 | 1 | 0 /b/*/cde | 1 | 0 | 1 | 0 + /bcde | 1 | 0 | 1 | 0 + äbc | 1 | 0 | 1 | 0 b/*//cde | 1 | 0 | 1 | 0 + bcde* | 1 | 0 | 1 | 0 bcde*/ | 1 | 0 | 1 | 0 -(6 rows) +(10 rows) \c - - - :worker_2_port SET search_path TO citus_stats_tenants; SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute; tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period --------------------------------------------------------------------- + /*bcde | 1 | 0 | 1 | 0 /b*c/de | 1 | 0 | 1 | 0 + /b*cde | 1 | 0 | 1 | 0 + /b/**/cde | 1 | 0 | 1 | 0 + /b/*/cde | 1 | 0 | 1 | 0 /bcde | 1 | 0 | 1 | 0 äbc | 1 | 0 | 1 | 0 + b/*//cde | 1 | 0 | 1 | 0 bcde* | 1 | 0 | 1 | 0 -(4 rows) + bcde*/ | 1 | 0 | 1 | 0 +(10 rows) SELECT result FROM run_command_on_all_nodes('SELECT clean_citus_stats_tenants()'); result @@ -428,11 +412,7 @@ SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde*'; SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute; tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period --------------------------------------------------------------------- - /b*c/de | 1 | 0 | 1 | 0 - /bcde | 1 | 0 | 1 | 0 - äbc | 1 | 0 | 1 | 0 - bcde* | 1 | 0 | 1 | 0 -(4 rows) +(0 rows) -- test local cached queries & prepared statements PREPARE dist_tbl_text_select_plan (text) AS SELECT count(*)>=0 FROM dist_tbl_text WHERE a = $1; @@ -511,11 +491,7 @@ EXECUTE dist_tbl_text_select_plan('bcde*'); SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute; tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period --------------------------------------------------------------------- - /b*c/de | 4 | 0 | 4 | 0 - /bcde | 4 | 0 | 4 | 0 - äbc | 4 | 0 | 4 | 0 - bcde* | 4 | 0 | 4 | 0 -(4 rows) +(0 rows) \c - - - :master_port SET search_path TO citus_stats_tenants; @@ -597,10 +573,10 @@ SET search_path TO citus_stats_tenants; SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute; tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period --------------------------------------------------------------------- - /b*c/de | 7 | 0 | 7 | 0 - /bcde | 7 | 0 | 7 | 0 - äbc | 7 | 0 | 7 | 0 - bcde* | 7 | 0 | 7 | 0 + /b*c/de | 3 | 0 | 3 | 0 + /bcde | 3 | 0 | 3 | 0 + äbc | 3 | 0 | 3 | 0 + bcde* | 3 | 0 | 3 | 0 (4 rows) \c - - - :master_port diff --git a/src/test/regress/expected/merge.out b/src/test/regress/expected/merge.out index 6fc472b70..e2b3aea65 100644 --- a/src/test/regress/expected/merge.out +++ b/src/test/regress/expected/merge.out @@ -17,7 +17,9 @@ CREATE SCHEMA merge_schema; SET search_path TO merge_schema; SET citus.shard_count TO 4; SET citus.next_shard_id TO 4000000; -SET citus.explain_all_tasks to true; +SET citus.explain_all_tasks TO true; +SET citus.shard_replication_factor TO 1; +SET citus.max_adaptive_executor_pool_size TO 1; SELECT 1 FROM master_add_node('localhost', :master_port, groupid => 0); NOTICE: localhost:xxxxx is the coordinator and already contains metadata, skipping syncing the metadata ?column? @@ -214,9 +216,45 @@ HINT: To remove the local data, run: SELECT truncate_local_data_after_distribut (1 row) +-- Updates one of the row with customer_id = 30002 +SELECT * from target t WHERE t.customer_id = 30002; + customer_id | last_order_id | order_center | order_count | last_order +--------------------------------------------------------------------- + 30002 | 103 | AX | -1 | Sun Jan 17 19:53:00 2021 +(1 row) + +-- Turn on notice to print tasks sent to nodes +SET citus.log_remote_commands to true; MERGE INTO target t USING source s - ON (t.customer_id = s.customer_id) + ON (t.customer_id = s.customer_id) AND t.customer_id = 30002 + WHEN MATCHED AND t.order_center = 'XX' THEN + DELETE + WHEN MATCHED THEN + UPDATE SET -- Existing customer, update the order count and last_order_id + order_count = t.order_count + 1, + last_order_id = s.order_id + WHEN NOT MATCHED THEN + DO NOTHING; +NOTICE: issuing MERGE INTO merge_schema.target_xxxxxxx t USING merge_schema.source_xxxxxxx s ON ((t.customer_id OPERATOR(pg_catalog.=) s.customer_id) AND (t.customer_id OPERATOR(pg_catalog.=) 30002)) WHEN MATCHED AND ((t.order_center COLLATE "default") OPERATOR(pg_catalog.=) 'XX'::text) THEN DELETE WHEN MATCHED THEN UPDATE SET last_order_id = s.order_id, order_count = (t.order_count OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT * from target t WHERE t.customer_id = 30002; + customer_id | last_order_id | order_center | order_count | last_order +--------------------------------------------------------------------- + 30002 | 103 | AX | 0 | Sun Jan 17 19:53:00 2021 +(1 row) + +-- Deletes one of the row with customer_id = 30004 +SELECT * from target t WHERE t.customer_id = 30004; + customer_id | last_order_id | order_center | order_count | last_order +--------------------------------------------------------------------- + 30004 | 99 | XX | -1 | Fri Sep 11 03:23:00 2020 +(1 row) + +MERGE INTO target t + USING source s + ON (t.customer_id = s.customer_id) AND t.customer_id = 30004 WHEN MATCHED AND t.order_center = 'XX' THEN DELETE WHEN MATCHED THEN @@ -226,7 +264,34 @@ MERGE INTO target t WHEN NOT MATCHED THEN -- New entry, record it. INSERT (customer_id, last_order_id, order_center, order_count, last_order) VALUES (customer_id, s.order_id, s.order_center, 123, s.order_time); -ERROR: MERGE command is not supported on distributed/reference tables yet +SELECT * from target t WHERE t.customer_id = 30004; + customer_id | last_order_id | order_center | order_count | last_order +--------------------------------------------------------------------- +(0 rows) + +-- Updating distribution column is allowed if the operation is a no-op +SELECT * from target t WHERE t.customer_id = 30000; + customer_id | last_order_id | order_center | order_count | last_order +--------------------------------------------------------------------- + 30000 | 101 | WX | 123 | Sat Jan 01 00:00:00 2022 +(1 row) + +MERGE INTO target t +USING SOURCE s +ON (t.customer_id = s.customer_id AND t.customer_id = 30000) +WHEN MATCHED THEN + UPDATE SET customer_id = 30000; +MERGE INTO target t +USING SOURCE s +ON (t.customer_id = s.customer_id AND t.customer_id = 30000) +WHEN MATCHED THEN + UPDATE SET customer_id = t.customer_id; +SELECT * from target t WHERE t.customer_id = 30000; + customer_id | last_order_id | order_center | order_count | last_order +--------------------------------------------------------------------- + 30000 | 101 | WX | 123 | Sat Jan 01 00:00:00 2022 +(1 row) + -- -- Test MERGE with CTE as source -- @@ -269,7 +334,6 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (pg_res.id, pg_res.val); --- Two rows with id 2 and val incremented, id 3, and id 1 is deleted SELECT * FROM t1 order by id; id | val --------------------------------------------------------------------- @@ -386,18 +450,61 @@ HINT: To remove the local data, run: SELECT truncate_local_data_after_distribut (1 row) +SELECT * FROM t1 order by id; + id | val +--------------------------------------------------------------------- + 1 | 0 + 2 | 0 + 5 | 0 +(3 rows) + +SET citus.log_remote_commands to true; WITH s1_res AS ( SELECT * FROM s1 ) MERGE INTO t1 - USING s1_res ON (s1_res.id = t1.id) + USING s1_res ON (s1_res.id = t1.id) AND t1.id = 6 WHEN MATCHED AND s1_res.val = 0 THEN DELETE WHEN MATCHED THEN UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing WITH s1_res AS (SELECT s1.id, s1.val FROM merge_schema.s1_xxxxxxx s1) MERGE INTO merge_schema.t1_xxxxxxx t1 USING s1_res ON ((s1_res.id OPERATOR(pg_catalog.=) t1.id) AND (t1.id OPERATOR(pg_catalog.=) 6)) WHEN MATCHED AND (s1_res.val OPERATOR(pg_catalog.=) 0) THEN DELETE WHEN MATCHED THEN UPDATE SET val = (t1.val OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing WITH s1_res AS (SELECT s1.id, s1.val FROM merge_schema.s1_xxxxxxx s1) MERGE INTO merge_schema.t1_xxxxxxx t1 USING s1_res ON ((s1_res.id OPERATOR(pg_catalog.=) t1.id) AND (t1.id OPERATOR(pg_catalog.=) 6)) WHEN MATCHED AND (s1_res.val OPERATOR(pg_catalog.=) 0) THEN DELETE WHEN MATCHED THEN UPDATE SET val = (t1.val OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing WITH s1_res AS (SELECT s1.id, s1.val FROM merge_schema.s1_xxxxxxx s1) MERGE INTO merge_schema.t1_xxxxxxx t1 USING s1_res ON ((s1_res.id OPERATOR(pg_catalog.=) t1.id) AND (t1.id OPERATOR(pg_catalog.=) 6)) WHEN MATCHED AND (s1_res.val OPERATOR(pg_catalog.=) 0) THEN DELETE WHEN MATCHED THEN UPDATE SET val = (t1.val OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing WITH s1_res AS (SELECT s1.id, s1.val FROM merge_schema.s1_xxxxxxx s1) MERGE INTO merge_schema.t1_xxxxxxx t1 USING s1_res ON ((s1_res.id OPERATOR(pg_catalog.=) t1.id) AND (t1.id OPERATOR(pg_catalog.=) 6)) WHEN MATCHED AND (s1_res.val OPERATOR(pg_catalog.=) 0) THEN DELETE WHEN MATCHED THEN UPDATE SET val = (t1.val OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx' +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx' +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing COMMIT PREPARED 'citus_xx_xx_xx_xx' +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing COMMIT PREPARED 'citus_xx_xx_xx_xx' +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +-- Other than id 6 everything else is a NO match, and should appear in target +SELECT * FROM t1 order by 1, 2; + id | val +--------------------------------------------------------------------- + 1 | 0 + 1 | 0 + 2 | 0 + 2 | 1 + 3 | 1 + 4 | 1 + 5 | 0 + 6 | 1 +(8 rows) + -- -- Test with multiple join conditions -- @@ -553,16 +660,38 @@ HINT: To remove the local data, run: SELECT truncate_local_data_after_distribut (1 row) +SELECT * FROM t2 ORDER BY 1; + id | val | src +--------------------------------------------------------------------- + 1 | 0 | target + 2 | 0 | target + 3 | 1 | match + 4 | 0 | match +(4 rows) + +SET citus.log_remote_commands to true; MERGE INTO t2 USING s2 -ON t2.id = s2.id AND t2.src = s2.src +ON t2.id = s2.id AND t2.src = s2.src AND t2.id = 4 WHEN MATCHED AND t2.val = 1 THEN UPDATE SET val = s2.val + 10 WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN - INSERT (id, val, src) VALUES (s2.id, s2.val, s2.src); -ERROR: MERGE command is not supported on distributed/reference tables yet + DO NOTHING; +NOTICE: issuing MERGE INTO merge_schema.t2_xxxxxxx t2 USING merge_schema.s2_xxxxxxx s2 ON ((t2.id OPERATOR(pg_catalog.=) s2.id) AND (t2.src OPERATOR(pg_catalog.=) s2.src) AND (t2.id OPERATOR(pg_catalog.=) 4)) WHEN MATCHED AND (t2.val OPERATOR(pg_catalog.=) 1) THEN UPDATE SET val = (s2.val OPERATOR(pg_catalog.+) 10) WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +-- Row with id = 4 is a match for delete clause, row should be deleted +-- Row with id = 3 is a NO match, row from source will be inserted +SELECT * FROM t2 ORDER BY 1; + id | val | src +--------------------------------------------------------------------- + 1 | 0 | target + 2 | 0 | target + 3 | 1 | match +(3 rows) + -- -- With sub-query as the MERGE source -- @@ -943,7 +1072,7 @@ WHEN MATCHED THEN UPDATE SET value = vl_source.value, id = vl_target.id + 1 WHEN NOT MATCHED THEN INSERT VALUES(vl_source.ID, vl_source.value); -DEBUG: +DEBUG: RESET client_min_messages; SELECT * INTO vl_local FROM vl_target ORDER BY 1 ; -- Should be equal @@ -996,7 +1125,7 @@ WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED THEN INSERT VALUES(rs_source.id); -DEBUG: +DEBUG: RESET client_min_messages; SELECT * INTO rs_local FROM rs_target ORDER BY 1 ; -- Should be equal @@ -1094,7 +1223,8 @@ END; $$ language plpgsql volatile; CREATE TABLE fn_target(id int, data varchar); MERGE INTO fn_target -USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source +--USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source +USING (SELECT id, source FROM dist_table) as fn_source ON fn_source.id = fn_target.id WHEN MATCHED THEN DO NOTHING @@ -1110,29 +1240,22 @@ SELECT citus_add_local_table_to_metadata('fn_target'); (1 row) -SELECT create_distributed_table('dist_table', 'id'); -NOTICE: Copying data from local table... -NOTICE: copying the data has completed -DETAIL: The local data in the table is no longer visible, but is still on disk. -HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.dist_table$$) - create_distributed_table +SELECT citus_add_local_table_to_metadata('dist_table'); + citus_add_local_table_to_metadata --------------------------------------------------------------------- (1 row) SET client_min_messages TO DEBUG1; MERGE INTO fn_target -USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source +--USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source +USING (SELECT id, source FROM dist_table) as fn_source ON fn_source.id = fn_target.id WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED THEN INSERT VALUES(fn_source.id, fn_source.source); -DEBUG: function does not have co-located tables -DEBUG: generating subplan XXX_1 for subquery SELECT id, source FROM merge_schema.f_dist() f(id integer, source character varying) -DEBUG: -DEBUG: Plan XXX query after replacing subqueries and CTEs: MERGE INTO merge_schema.fn_target USING (SELECT intermediate_result.id, intermediate_result.source FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, source character varying)) fn_source ON (fn_source.id OPERATOR(pg_catalog.=) fn_target.id) WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED THEN INSERT (id, data) VALUES (fn_source.id, fn_source.source) -DEBUG: +DEBUG: RESET client_min_messages; SELECT * INTO fn_local FROM fn_target ORDER BY 1 ; -- Should be equal @@ -1204,7 +1327,7 @@ MERGE INTO ft_target DELETE WHEN NOT MATCHED THEN INSERT (id, user_val) VALUES (foreign_table.id, foreign_table.user_val); -DEBUG: +DEBUG: RESET client_min_messages; SELECT * FROM ft_target; id | user_val @@ -1213,9 +1336,1174 @@ SELECT * FROM ft_target; 3 | source (2 rows) +-- +-- complex joins on the source side +-- +-- source(join of two relations) relation is an unaliased join +CREATE TABLE target_cj(tid int, src text, val int); +CREATE TABLE source_cj1(sid1 int, src1 text, val1 int); +CREATE TABLE source_cj2(sid2 int, src2 text, val2 int); +INSERT INTO target_cj VALUES (1, 'target', 0); +INSERT INTO target_cj VALUES (2, 'target', 0); +INSERT INTO target_cj VALUES (2, 'target', 0); +INSERT INTO target_cj VALUES (3, 'target', 0); +INSERT INTO source_cj1 VALUES (2, 'source-1', 10); +INSERT INTO source_cj2 VALUES (2, 'source-2', 20); +BEGIN; +MERGE INTO target_cj t +USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src2 +WHEN NOT MATCHED THEN + DO NOTHING; +-- Gold result to compare against +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | source-2 | 0 + 2 | source-2 | 0 + 3 | target | 0 +(4 rows) + +ROLLBACK; +BEGIN; +-- try accessing columns from either side of the source join +MERGE INTO target_cj t +USING source_cj1 s2 + INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET tid = sid2, src = src1, val = val2 +WHEN NOT MATCHED THEN + DO NOTHING; +-- Gold result to compare against +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | source-1 | 20 + 2 | source-1 | 20 + 3 | target | 0 +(4 rows) + +ROLLBACK; +-- Test the same scenarios with distributed tables +SELECT create_distributed_table('target_cj', 'tid'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.target_cj$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('source_cj1', 'sid1'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.source_cj1$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('source_cj2', 'sid2'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.source_cj2$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SET citus.log_remote_commands to true; +MERGE INTO target_cj t +USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src2 +WHEN NOT MATCHED THEN + DO NOTHING; +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.target_cj_xxxxxxx t USING (merge_schema.source_cj1_xxxxxxx s1 JOIN merge_schema.source_cj2_xxxxxxx s2 ON ((s1.sid1 OPERATOR(pg_catalog.=) s2.sid2))) ON ((t.tid OPERATOR(pg_catalog.=) s1.sid1) AND (t.tid OPERATOR(pg_catalog.=) 2)) WHEN MATCHED THEN UPDATE SET src = s2.src2 WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | source-2 | 0 + 2 | source-2 | 0 + 3 | target | 0 +(4 rows) + +ROLLBACK; +BEGIN; +-- try accessing columns from either side of the source join +MERGE INTO target_cj t +USING source_cj1 s2 + INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src1, val = val2 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | source-1 | 20 + 2 | source-1 | 20 + 3 | target | 0 +(4 rows) + +ROLLBACK; +-- sub-query as a source +BEGIN; +MERGE INTO target_cj t +USING (SELECT * FROM source_cj1 WHERE sid1 = 2) sub +ON t.tid = sub.sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = sub.src1, val = val1 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | source-1 | 10 + 2 | source-1 | 10 + 3 | target | 0 +(4 rows) + +ROLLBACK; +-- Test self-join +BEGIN; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | target | 0 + 2 | target | 0 + 3 | target | 0 +(4 rows) + +set citus.log_remote_commands to true; +MERGE INTO target_cj t1 +USING (SELECT * FROM target_cj) sub +ON t1.tid = sub.tid AND t1.tid = 3 +WHEN MATCHED THEN + UPDATE SET src = sub.src, val = sub.val + 100 +WHEN NOT MATCHED THEN + DO NOTHING; +NOTICE: issuing MERGE INTO merge_schema.target_cj_xxxxxxx t1 USING (SELECT target_cj.tid, target_cj.src, target_cj.val FROM merge_schema.target_cj_xxxxxxx target_cj) sub ON ((t1.tid OPERATOR(pg_catalog.=) sub.tid) AND (t1.tid OPERATOR(pg_catalog.=) 3)) WHEN MATCHED THEN UPDATE SET src = sub.src, val = (sub.val OPERATOR(pg_catalog.+) 100) WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +set citus.log_remote_commands to false; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | target | 0 + 2 | target | 0 + 3 | target | 100 +(4 rows) + +ROLLBACK; +-- Test PREPARE +PREPARE foo(int) AS +MERGE INTO target_cj target +USING (SELECT * FROM source_cj1) sub +ON target.tid = sub.sid1 AND target.tid = $1 +WHEN MATCHED THEN + UPDATE SET val = sub.val1 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | target | 0 + 2 | target | 0 + 3 | target | 0 +(4 rows) + +BEGIN; +EXECUTE foo(2); +EXECUTE foo(2); +EXECUTE foo(2); +EXECUTE foo(2); +EXECUTE foo(2); +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | target | 10 + 2 | target | 10 + 3 | target | 0 +(4 rows) + +ROLLBACK; +BEGIN; +SET citus.log_remote_commands to true; +SET client_min_messages TO DEBUG1; +EXECUTE foo(2); +DEBUG: +DEBUG: +DEBUG: +DEBUG: +DEBUG: +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.target_cj_xxxxxxx target USING (SELECT source_cj1.sid1, source_cj1.src1, source_cj1.val1 FROM merge_schema.source_cj1_xxxxxxx source_cj1) sub ON ((target.tid OPERATOR(pg_catalog.=) sub.sid1) AND (target.tid OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = sub.val1 WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +RESET client_min_messages; +EXECUTE foo(2); +NOTICE: issuing MERGE INTO merge_schema.target_cj_xxxxxxx target USING (SELECT source_cj1.sid1, source_cj1.src1, source_cj1.val1 FROM merge_schema.source_cj1_xxxxxxx source_cj1) sub ON ((target.tid OPERATOR(pg_catalog.=) sub.sid1) AND (target.tid OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = sub.val1 WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | target | 10 + 2 | target | 10 + 3 | target | 0 +(4 rows) + +ROLLBACK; +-- Test distributed tables, must be co-located and joined on distribution column. +-- +-- We create two sets of source and target tables, one set is Postgres and the other +-- is Citus distributed. Run the _exact_ MERGE SQL on both the sets and compare the +-- final results of target tables of Postgres and Citus, the result should match. +-- This is repeated for various MERGE SQL combinations +-- +CREATE TABLE pg_target(id int, val varchar); +CREATE TABLE pg_source(id int, val varchar); +CREATE TABLE citus_target(id int, val varchar); +CREATE TABLE citus_source(id int, val varchar); +-- Half of the source rows do not match +INSERT INTO pg_target SELECT i, 'target' FROM generate_series(250, 500) i; +INSERT INTO pg_source SELECT i, 'source' FROM generate_series(1, 500) i; +INSERT INTO citus_target SELECT i, 'target' FROM generate_series(250, 500) i; +INSERT INTO citus_source SELECT i, 'source' FROM generate_series(1, 500) i; +SELECT create_distributed_table('citus_target', 'id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.citus_target$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('citus_source', 'id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.citus_source$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- +-- This routine compares the target tables of Postgres and Citus and +-- returns true if they match, false if the results do not match. +-- +CREATE OR REPLACE FUNCTION compare_tables() RETURNS BOOLEAN AS $$ +DECLARE ret BOOL; +BEGIN +SELECT count(1) = 0 INTO ret + FROM pg_target + FULL OUTER JOIN citus_target + USING (id, val) + WHERE pg_target.id IS NULL + OR citus_target.id IS NULL; +RETURN ret; +END +$$ LANGUAGE PLPGSQL; +-- Make sure we start with exact data in Postgres and Citus +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +-- Run the MERGE on both Postgres and Citus, and compare the final target tables +BEGIN; +SET citus.log_remote_commands to true; +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- +-- ON clause filter on source +-- +BEGIN; +SET citus.log_remote_commands to true; +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id AND s.id < 100 +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id AND s.id < 100 +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- +-- ON clause filter on target +-- +BEGIN; +SET citus.log_remote_commands to true; +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id AND t.id < 100 +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id AND t.id < 100 +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (t.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (t.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (t.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (t.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- +-- NOT MATCHED clause filter on source +-- +BEGIN; +SET citus.log_remote_commands to true; +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id +WHEN MATCHED THEN + DO NOTHING +WHEN NOT MATCHED AND s.id < 100 THEN + INSERT VALUES(s.id, s.val); +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN MATCHED THEN + DO NOTHING +WHEN NOT MATCHED AND s.id < 100 THEN + INSERT VALUES(s.id, s.val); +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED AND (s.id OPERATOR(pg_catalog.<) 100) THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED AND (s.id OPERATOR(pg_catalog.<) 100) THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED AND (s.id OPERATOR(pg_catalog.<) 100) THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED AND (s.id OPERATOR(pg_catalog.<) 100) THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- +-- Test constant filter in ON clause to check if shards are pruned +-- with restriction information +-- +-- +-- Though constant filter is present, this won't prune shards as +-- NOT MATCHED clause is present +-- +BEGIN; +SET citus.log_remote_commands to true; +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id AND s.id = 250 +WHEN MATCHED THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id AND s.id = 250 +WHEN MATCHED THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- This will prune shards with restriction information as NOT MATCHED is void +BEGIN; +SET citus.log_remote_commands to true; +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id AND s.id = 250 +WHEN MATCHED THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN NOT MATCHED THEN + DO NOTHING; +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id AND s.id = 250 +WHEN MATCHED THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN NOT MATCHED THEN + DO NOTHING; +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- Test CTE with distributed tables +CREATE VIEW pg_source_view AS SELECT * FROM pg_source WHERE id < 400; +WARNING: "view pg_source_view" has dependency to "table pg_source" that is not in Citus' metadata +DETAIL: "view pg_source_view" will be created only locally +HINT: Distribute "table pg_source" first to distribute "view pg_source_view" +CREATE VIEW citus_source_view AS SELECT * FROM citus_source WHERE id < 400; +BEGIN; +SEt citus.log_remote_commands to true; +WITH cte AS ( + SELECT * FROM pg_source_view +) +MERGE INTO pg_target t +USING cte +ON cte.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated by CTE' +WHEN NOT MATCHED THEN + INSERT VALUES (cte.id, cte.val) +WHEN MATCHED AND t.id < 350 THEN + DELETE; +WITH cte AS ( + SELECT * FROM citus_source_view +) +MERGE INTO citus_target t +USING cte +ON cte.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated by CTE' +WHEN NOT MATCHED THEN + INSERT VALUES (cte.id, cte.val) +WHEN MATCHED AND t.id < 350 THEN + DELETE; +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing WITH cte AS (SELECT citus_source_view.id, citus_source_view.val FROM (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source WHERE (citus_source.id OPERATOR(pg_catalog.<) 400)) citus_source_view) MERGE INTO merge_schema.citus_target_xxxxxxx t USING cte ON (cte.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by CTE'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (cte.id, cte.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing WITH cte AS (SELECT citus_source_view.id, citus_source_view.val FROM (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source WHERE (citus_source.id OPERATOR(pg_catalog.<) 400)) citus_source_view) MERGE INTO merge_schema.citus_target_xxxxxxx t USING cte ON (cte.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by CTE'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (cte.id, cte.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing WITH cte AS (SELECT citus_source_view.id, citus_source_view.val FROM (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source WHERE (citus_source.id OPERATOR(pg_catalog.<) 400)) citus_source_view) MERGE INTO merge_schema.citus_target_xxxxxxx t USING cte ON (cte.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by CTE'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (cte.id, cte.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing WITH cte AS (SELECT citus_source_view.id, citus_source_view.val FROM (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source WHERE (citus_source.id OPERATOR(pg_catalog.<) 400)) citus_source_view) MERGE INTO merge_schema.citus_target_xxxxxxx t USING cte ON (cte.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by CTE'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (cte.id, cte.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- Test sub-query with distributed tables +BEGIN; +SEt citus.log_remote_commands to true; +MERGE INTO pg_target t +USING (SELECT * FROM pg_source) subq +ON subq.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated by subquery' +WHEN NOT MATCHED THEN + INSERT VALUES (subq.id, subq.val) +WHEN MATCHED AND t.id < 350 THEN + DELETE; +MERGE INTO citus_target t +USING (SELECT * FROM citus_source) subq +ON subq.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated by subquery' +WHEN NOT MATCHED THEN + INSERT VALUES (subq.id, subq.val) +WHEN MATCHED AND t.id < 350 THEN + DELETE; +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) subq ON (subq.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by subquery'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (subq.id, subq.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) subq ON (subq.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by subquery'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (subq.id, subq.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) subq ON (subq.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by subquery'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (subq.id, subq.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) subq ON (subq.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by subquery'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (subq.id, subq.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- Test PREPARE +PREPARE pg_prep(int) AS +MERGE INTO pg_target +USING (SELECT * FROM pg_source) sub +ON pg_target.id = sub.id AND pg_target.id = $1 +WHEN MATCHED THEN + UPDATE SET val = 'Updated by prepare using ' || sub.val +WHEN NOT MATCHED THEN + INSERT VALUES (sub.id, sub.val); +PREPARE citus_prep(int) AS +MERGE INTO citus_target +USING (SELECT * FROM citus_source) sub +ON citus_target.id = sub.id AND citus_target.id = $1 +WHEN MATCHED THEN + UPDATE SET val = 'Updated by prepare using ' || sub.val +WHEN NOT MATCHED THEN + INSERT VALUES (sub.id, sub.val); +BEGIN; +SELECT * FROM pg_target WHERE id = 500; -- before merge + id | val +--------------------------------------------------------------------- + 500 | target +(1 row) + +SELECT count(*) FROM pg_target; -- before merge + count +--------------------------------------------------------------------- + 251 +(1 row) + +EXECUTE pg_prep(500); +SELECT * FROM pg_target WHERE id = 500; -- non-cached + id | val +--------------------------------------------------------------------- + 500 | Updated by prepare using source +(1 row) + +EXECUTE pg_prep(500); +EXECUTE pg_prep(500); +EXECUTE pg_prep(500); +EXECUTE pg_prep(500); +EXECUTE pg_prep(500); +SELECT * FROM pg_target WHERE id = 500; -- cached + id | val +--------------------------------------------------------------------- + 500 | Updated by prepare using source +(1 row) + +SELECT count(*) FROM pg_target; -- cached + count +--------------------------------------------------------------------- + 3245 +(1 row) + +SELECT * FROM citus_target WHERE id = 500; -- before merge + id | val +--------------------------------------------------------------------- + 500 | target +(1 row) + +SELECT count(*) FROM citus_target; -- before merge + count +--------------------------------------------------------------------- + 251 +(1 row) + +SET citus.log_remote_commands to true; +EXECUTE citus_prep(500); +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SELECT * FROM citus_target WHERE id = 500; -- non-cached +NOTICE: issuing SELECT id, val FROM merge_schema.citus_target_xxxxxxx citus_target WHERE (id OPERATOR(pg_catalog.=) 500) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx + id | val +--------------------------------------------------------------------- + 500 | Updated by prepare using source +(1 row) + +EXECUTE citus_prep(500); +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +EXECUTE citus_prep(500); +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +EXECUTE citus_prep(500); +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +EXECUTE citus_prep(500); +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +EXECUTE citus_prep(500); +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT * FROM citus_target WHERE id = 500; -- cached + id | val +--------------------------------------------------------------------- + 500 | Updated by prepare using source +(1 row) + +SELECT count(*) FROM citus_target; -- cached + count +--------------------------------------------------------------------- + 3245 +(1 row) + +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- Test partitions + distributed tables +CREATE TABLE pg_pa_target (tid integer, balance float, val text) + PARTITION BY LIST (tid); +CREATE TABLE citus_pa_target (tid integer, balance float, val text) + PARTITION BY LIST (tid); +CREATE TABLE part1 PARTITION OF pg_pa_target FOR VALUES IN (1,4) + WITH (autovacuum_enabled=off); +CREATE TABLE part2 PARTITION OF pg_pa_target FOR VALUES IN (2,5,6) + WITH (autovacuum_enabled=off); +CREATE TABLE part3 PARTITION OF pg_pa_target FOR VALUES IN (3,8,9) + WITH (autovacuum_enabled=off); +CREATE TABLE part4 PARTITION OF pg_pa_target DEFAULT + WITH (autovacuum_enabled=off); +CREATE TABLE part5 PARTITION OF citus_pa_target FOR VALUES IN (1,4) + WITH (autovacuum_enabled=off); +CREATE TABLE part6 PARTITION OF citus_pa_target FOR VALUES IN (2,5,6) + WITH (autovacuum_enabled=off); +CREATE TABLE part7 PARTITION OF citus_pa_target FOR VALUES IN (3,8,9) + WITH (autovacuum_enabled=off); +CREATE TABLE part8 PARTITION OF citus_pa_target DEFAULT + WITH (autovacuum_enabled=off); +CREATE TABLE pg_pa_source (sid integer, delta float); +CREATE TABLE citus_pa_source (sid integer, delta float); +-- insert many rows to the source table +INSERT INTO pg_pa_source SELECT id, id * 10 FROM generate_series(1,14) AS id; +INSERT INTO citus_pa_source SELECT id, id * 10 FROM generate_series(1,14) AS id; +-- insert a few rows in the target table (odd numbered tid) +INSERT INTO pg_pa_target SELECT id, id * 100, 'initial' FROM generate_series(1,14,2) AS id; +INSERT INTO citus_pa_target SELECT id, id * 100, 'initial' FROM generate_series(1,14,2) AS id; +SELECT create_distributed_table('citus_pa_target', 'tid'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.part5$$) +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.part6$$) +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.part7$$) +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.part8$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('citus_pa_source', 'sid'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.citus_pa_source$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE OR REPLACE FUNCTION pa_compare_tables() RETURNS BOOLEAN AS $$ +DECLARE ret BOOL; +BEGIN +SELECT count(1) = 0 INTO ret + FROM pg_pa_target + FULL OUTER JOIN citus_pa_target + USING (tid, balance, val) + WHERE pg_pa_target.tid IS NULL + OR citus_pa_target.tid IS NULL; +RETURN ret; +END +$$ LANGUAGE PLPGSQL; +-- try simple MERGE +BEGIN; +MERGE INTO pg_pa_target t + USING pg_pa_source s + ON t.tid = s.sid + WHEN MATCHED THEN + UPDATE SET balance = balance + delta, val = val || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid, delta, 'inserted by merge'); +MERGE INTO citus_pa_target t + USING citus_pa_source s + ON t.tid = s.sid + WHEN MATCHED THEN + UPDATE SET balance = balance + delta, val = val || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid, delta, 'inserted by merge'); +SELECT pa_compare_tables(); + pa_compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- same with a constant qual +BEGIN; +MERGE INTO pg_pa_target t + USING pg_pa_source s + ON t.tid = s.sid AND tid = 1 + WHEN MATCHED THEN + UPDATE SET balance = balance + delta, val = val || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid, delta, 'inserted by merge'); +MERGE INTO citus_pa_target t + USING citus_pa_source s + ON t.tid = s.sid AND tid = 1 + WHEN MATCHED THEN + UPDATE SET balance = balance + delta, val = val || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid, delta, 'inserted by merge'); +SELECT pa_compare_tables(); + pa_compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +CREATE TABLE source_json( id integer, z int, d jsonb); +CREATE TABLE target_json( id integer, z int, d jsonb); +INSERT INTO source_json SELECT i,i FROM generate_series(0,5)i; +SELECT create_distributed_table('target_json','id'), create_distributed_table('source_json', 'id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.source_json$$) + create_distributed_table | create_distributed_table +--------------------------------------------------------------------- + | +(1 row) + +-- single shard query given source_json is filtered and Postgres is smart to pushdown +-- filter to the target_json as well +SELECT public.coordinator_plan($Q$ +EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda +USING (SELECT * FROM source_json WHERE id = 1) sdn +ON sda.id = sdn.id +WHEN NOT matched THEN + INSERT (id, z) VALUES (sdn.id, 5); +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0) (actual rows=0 loops=1) + Task Count: 1 +(2 rows) + +SELECT * FROM target_json ORDER BY 1; + id | z | d +--------------------------------------------------------------------- + 1 | 5 | +(1 row) + +-- zero shard query as filters do not match +--SELECT public.coordinator_plan($Q$ +--EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda +--USING (SELECT * FROM source_json WHERE id = 1) sdn +--ON sda.id = sdn.id AND sda.id = 2 +--WHEN NOT matched THEN +-- INSERT (id, z) VALUES (sdn.id, 5); +--$Q$); +--SELECT * FROM target_json ORDER BY 1; +-- join for source_json is happening at a different place +SELECT public.coordinator_plan($Q$ +EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda +USING source_json s1 LEFT JOIN (SELECT * FROM source_json) s2 USING(z) +ON sda.id = s1.id AND s1.id = s2.id +WHEN NOT matched THEN + INSERT (id, z) VALUES (s2.id, 5); +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0) (actual rows=0 loops=1) + Task Count: 4 +(2 rows) + +SELECT * FROM target_json ORDER BY 1; + id | z | d +--------------------------------------------------------------------- + 0 | 5 | + 1 | 5 | + 2 | 5 | + 3 | 5 | + 4 | 5 | + 5 | 5 | +(6 rows) + +-- update JSON column +SELECT public.coordinator_plan($Q$ +EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda +USING source_json sdn +ON sda.id = sdn.id +WHEN matched THEN + UPDATE SET d = '{"a" : 5}'; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0) (actual rows=0 loops=1) + Task Count: 4 +(2 rows) + +SELECT * FROM target_json ORDER BY 1; + id | z | d +--------------------------------------------------------------------- + 0 | 5 | {"a": 5} + 1 | 5 | {"a": 5} + 2 | 5 | {"a": 5} + 3 | 5 | {"a": 5} + 4 | 5 | {"a": 5} + 5 | 5 | {"a": 5} +(6 rows) + +CREATE FUNCTION immutable_hash(int) RETURNS int +AS 'SELECT hashtext( ($1 + $1)::text);' +LANGUAGE SQL +IMMUTABLE +RETURNS NULL ON NULL INPUT; +MERGE INTO target_json sda +USING source_json sdn +ON sda.id = sdn.id +WHEN matched THEN + UPDATE SET z = immutable_hash(sdn.z); +-- Test bigserial +CREATE TABLE source_serial (id integer, z int, d bigserial); +CREATE TABLE target_serial (id integer, z int, d bigserial); +INSERT INTO source_serial SELECT i,i FROM generate_series(0,100)i; +SELECT create_distributed_table('source_serial', 'id'), + create_distributed_table('target_serial', 'id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.source_serial$$) + create_distributed_table | create_distributed_table +--------------------------------------------------------------------- + | +(1 row) + +MERGE INTO target_serial sda +USING source_serial sdn +ON sda.id = sdn.id +WHEN NOT matched THEN + INSERT (id, z) VALUES (id, z); +ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables +SELECT count(*) from source_serial; + count +--------------------------------------------------------------------- + 101 +(1 row) + +SELECT count(*) from target_serial; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(distinct d) from source_serial; + count +--------------------------------------------------------------------- + 101 +(1 row) + +SELECT count(distinct d) from target_serial; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Test set operations +CREATE TABLE target_set(t1 int, t2 int); +CREATE TABLE source_set(s1 int, s2 int); +SELECT create_distributed_table('target_set', 't1'), + create_distributed_table('source_set', 's1'); + create_distributed_table | create_distributed_table +--------------------------------------------------------------------- + | +(1 row) + +INSERT INTO target_set VALUES(1, 0); +INSERT INTO source_set VALUES(1, 1); +INSERT INTO source_set VALUES(2, 2); +MERGE INTO target_set +USING (SELECT * FROM source_set UNION SELECT * FROM source_set) AS foo ON target_set.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET t2 = t2 + 100 +WHEN NOT MATCHED THEN + INSERT VALUES(foo.s1); +SELECT * FROM target_set ORDER BY 1, 2; + t1 | t2 +--------------------------------------------------------------------- + 1 | 100 + 2 | +(2 rows) + -- -- Error and Unsupported scenarios -- +MERGE INTO target_set +USING (SELECT s1,s2 FROM source_set UNION SELECT s2,s1 FROM source_set) AS foo ON target_set.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET t2 = t2 + 1; +ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position +DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column +MERGE INTO target_set +USING (SELECT 2 as s3, source_set.* FROM (SELECT * FROM source_set LIMIT 1) as foo LEFT JOIN source_set USING( s1)) AS foo +ON target_set.t1 = foo.s1 +WHEN MATCHED THEN UPDATE SET t2 = t2 + 1 +WHEN NOT MATCHED THEN INSERT VALUES(s1, s3); +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query +-- modifying CTE not supported +EXPLAIN +WITH cte_1 AS (DELETE FROM target_json) +MERGE INTO target_json sda +USING source_json sdn +ON sda.id = sdn.id +WHEN NOT matched THEN + INSERT (id, z) VALUES (sdn.id, 5); +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns +-- Grouping sets not supported +MERGE INTO citus_target t +USING (SELECT count(*), id FROM citus_source GROUP BY GROUPING SETS (id, val)) subq +ON subq.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated' +WHEN NOT MATCHED THEN + INSERT VALUES (subq.id, 99) +WHEN MATCHED AND t.id < 350 THEN + DELETE; +ERROR: cannot push down this subquery +DETAIL: could not run distributed query with GROUPING SETS, CUBE, or ROLLUP +WITH subq AS +( +SELECT count(*), id FROM citus_source GROUP BY GROUPING SETS (id, val) +) +MERGE INTO citus_target t +USING subq +ON subq.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated' +WHEN NOT MATCHED THEN + INSERT VALUES (subq.id, 99) +WHEN MATCHED AND t.id < 350 THEN + DELETE; +ERROR: cannot push down this subquery +DETAIL: could not run distributed query with GROUPING SETS, CUBE, or ROLLUP +-- try inserting unmatched distribution column value +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT DEFAULT VALUES; +ERROR: cannot perform MERGE INSERT with DEFAULTS +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT VALUES(10000); +ERROR: MERGE INSERT must refer a source column for distribution column +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT (id) VALUES(1000); +ERROR: MERGE INSERT must refer a source column for distribution column +MERGE INTO t1 t +USING s1 s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT (id) VALUES(s.val); +ERROR: MERGE INSERT must use the source table distribution column value +MERGE INTO t1 t +USING s1 s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT (val) VALUES(s.val); +ERROR: MERGE INSERT must have distribution column as value +-- try updating the distribution key column +BEGIN; +MERGE INTO target_cj t + USING source_cj1 s + ON t.tid = s.sid1 AND t.tid = 2 + WHEN MATCHED THEN + UPDATE SET tid = tid + 9, src = src || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid1, 'inserted by merge', val1); +ERROR: updating the distribution column is not allowed in MERGE actions +ROLLBACK; -- Foreign table as target MERGE INTO foreign_table USING ft_target ON (foreign_table.id = ft_target.id) @@ -1274,7 +2562,72 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1.id, s1.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is not supported with combination of distributed/local tables yet +-- Now both s1 and t1 are distributed tables +SELECT undistribute_table('t1'); +NOTICE: creating a new table for merge_schema.t1 +NOTICE: moving the data of merge_schema.t1 +NOTICE: dropping the old merge_schema.t1 +NOTICE: renaming the new table to merge_schema.t1 + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('t1', 'id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.t1$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- We have a potential pitfall where a function can be invoked in +-- the MERGE conditions which can insert/update to a random shard +CREATE OR REPLACE function merge_when_and_write() RETURNS BOOLEAN +LANGUAGE PLPGSQL AS +$$ +BEGIN + INSERT INTO t1 VALUES (100, 100); + RETURN TRUE; +END; +$$; +-- Test functions executing in MERGE statement. This is to prevent the functions from +-- doing a random sql, which may be executed in a remote node or modifying the target +-- relation which will have unexpected/suprising results. +MERGE INTO t1 USING (SELECT * FROM s1 WHERE true) s1 ON + t1.id = s1.id AND s1.id = 2 + WHEN matched THEN + UPDATE SET id = s1.id, val = random(); +ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables +-- Test STABLE function +CREATE FUNCTION add_s(integer, integer) RETURNS integer +AS 'select $1 + $2;' +LANGUAGE SQL +STABLE RETURNS NULL ON NULL INPUT; +MERGE INTO t1 +USING s1 ON t1.id = s1.id +WHEN NOT MATCHED THEN + INSERT VALUES(s1.id, add_s(s1.val, 2)); +ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables +-- Test preventing "ON" join condition from writing to the database +BEGIN; +MERGE INTO t1 +USING s1 ON t1.id = s1.id AND t1.id = 2 AND (merge_when_and_write()) +WHEN MATCHED THEN + UPDATE SET val = t1.val + s1.val; +ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables +ROLLBACK; +-- Test preventing WHEN clause(s) from writing to the database +BEGIN; +MERGE INTO t1 +USING s1 ON t1.id = s1.id AND t1.id = 2 +WHEN MATCHED AND (merge_when_and_write()) THEN + UPDATE SET val = t1.val + s1.val; +ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables +ROLLBACK; -- Joining on partition columns with sub-query MERGE INTO t1 USING (SELECT * FROM s1) sub ON (sub.val = t1.id) -- sub.val is not a distribution column @@ -1284,7 +2637,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns -- Joining on partition columns with CTE WITH s1_res AS ( SELECT * FROM s1 @@ -1297,7 +2650,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns -- Constant Join condition WITH s1_res AS ( SELECT * FROM s1 @@ -1310,7 +2663,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns -- With a single WHEN clause, which causes a non-left join WITH s1_res AS ( SELECT * FROM s1 @@ -1319,7 +2672,7 @@ WITH s1_res AS ( WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns -- -- Reference tables -- @@ -1371,7 +2724,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1.id, s1.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is not supported on reference tables yet -- -- Postgres + Citus-Distributed table -- @@ -1413,7 +2766,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1.id, s1.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is not supported with combination of distributed/local tables yet MERGE INTO t1 USING (SELECT * FROM s1) sub ON (sub.id = t1.id) WHEN MATCHED AND sub.val = 0 THEN @@ -1422,7 +2775,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is not supported with combination of distributed/local tables yet CREATE TABLE pg(val int); SELECT create_distributed_table('s1', 'id'); NOTICE: Copying data from local table... @@ -1443,7 +2796,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is not supported with combination of distributed/local tables yet -- Mix Postgres table in CTE WITH pg_res AS ( SELECT * FROM pg @@ -1456,7 +2809,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is not supported with combination of distributed/local tables yet -- Match more than one source row should fail same as Postgres behavior SELECT undistribute_table('t1'); NOTICE: creating a new table for merge_schema.t1 @@ -1511,16 +2864,245 @@ WHEN NOT MATCHED THEN INSERT VALUES(mv_source.id, mv_source.val); ERROR: cannot execute MERGE on relation "mv_source" DETAIL: This operation is not supported for materialized views. +-- Distributed tables *must* be colocated +CREATE TABLE dist_target(id int, val varchar); +SELECT create_distributed_table('dist_target', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE dist_source(id int, val varchar); +SELECT create_distributed_table('dist_source', 'id', colocate_with => 'none'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); +ERROR: For MERGE command, all the distributed tables must be colocated +-- Distributed tables *must* be joined on distribution column +CREATE TABLE dist_colocated(id int, val int); +SELECT create_distributed_table('dist_colocated', 'id', colocate_with => 'dist_target'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +MERGE INTO dist_target +USING dist_colocated +ON dist_target.id = dist_colocated.val -- val is not the distribution column +WHEN MATCHED THEN +UPDATE SET val = dist_colocated.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_colocated.id, dist_colocated.val); +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns +-- Both the source and target must be distributed +MERGE INTO dist_target +USING (SELECT 100 id) AS source +ON dist_target.id = source.id AND dist_target.val = 'const' +WHEN MATCHED THEN +UPDATE SET val = 'source' +WHEN NOT MATCHED THEN +INSERT VALUES(source.id, 'source'); +ERROR: For MERGE command, both the source and target must be distributed +-- Non-hash distributed tables (append/range). +CREATE VIEW show_tables AS +SELECT logicalrelid, partmethod +FROM pg_dist_partition +WHERE (logicalrelid = 'dist_target'::regclass) OR (logicalrelid = 'dist_source'::regclass) +ORDER BY 1; +SELECT undistribute_table('dist_source'); +NOTICE: creating a new table for merge_schema.dist_source +NOTICE: moving the data of merge_schema.dist_source +NOTICE: dropping the old merge_schema.dist_source +NOTICE: drop cascades to view show_tables +CONTEXT: SQL statement "DROP TABLE merge_schema.dist_source CASCADE" +NOTICE: renaming the new table to merge_schema.dist_source + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('dist_source', 'id', 'append'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM show_tables; + logicalrelid | partmethod +--------------------------------------------------------------------- + dist_target | h + dist_source | a +(2 rows) + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); +ERROR: For MERGE command, all the distributed tables must be colocated, for append/range distribution, colocation is not supported +HINT: Consider using hash distribution instead +SELECT undistribute_table('dist_source'); +NOTICE: creating a new table for merge_schema.dist_source +NOTICE: moving the data of merge_schema.dist_source +NOTICE: dropping the old merge_schema.dist_source +NOTICE: drop cascades to view show_tables +CONTEXT: SQL statement "DROP TABLE merge_schema.dist_source CASCADE" +NOTICE: renaming the new table to merge_schema.dist_source + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('dist_source', 'id', 'range'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM show_tables; + logicalrelid | partmethod +--------------------------------------------------------------------- + dist_target | h + dist_source | r +(2 rows) + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); +ERROR: For MERGE command, all the distributed tables must be colocated, for append/range distribution, colocation is not supported +HINT: Consider using hash distribution instead +-- Both are append tables +SELECT undistribute_table('dist_target'); +NOTICE: creating a new table for merge_schema.dist_target +NOTICE: moving the data of merge_schema.dist_target +NOTICE: dropping the old merge_schema.dist_target +NOTICE: drop cascades to view show_tables +CONTEXT: SQL statement "DROP TABLE merge_schema.dist_target CASCADE" +NOTICE: renaming the new table to merge_schema.dist_target + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT undistribute_table('dist_source'); +NOTICE: creating a new table for merge_schema.dist_source +NOTICE: moving the data of merge_schema.dist_source +NOTICE: dropping the old merge_schema.dist_source +NOTICE: drop cascades to view show_tables +CONTEXT: SQL statement "DROP TABLE merge_schema.dist_source CASCADE" +NOTICE: renaming the new table to merge_schema.dist_source + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('dist_target', 'id', 'append'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('dist_source', 'id', 'append'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM show_tables; + logicalrelid | partmethod +--------------------------------------------------------------------- + dist_target | a + dist_source | a +(2 rows) + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); +ERROR: For MERGE command, all the distributed tables must be colocated, for append/range distribution, colocation is not supported +HINT: Consider using hash distribution instead +-- Both are range tables +SELECT undistribute_table('dist_target'); +NOTICE: creating a new table for merge_schema.dist_target +NOTICE: moving the data of merge_schema.dist_target +NOTICE: dropping the old merge_schema.dist_target +NOTICE: drop cascades to view show_tables +CONTEXT: SQL statement "DROP TABLE merge_schema.dist_target CASCADE" +NOTICE: renaming the new table to merge_schema.dist_target + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT undistribute_table('dist_source'); +NOTICE: creating a new table for merge_schema.dist_source +NOTICE: moving the data of merge_schema.dist_source +NOTICE: dropping the old merge_schema.dist_source +NOTICE: drop cascades to view show_tables +CONTEXT: SQL statement "DROP TABLE merge_schema.dist_source CASCADE" +NOTICE: renaming the new table to merge_schema.dist_source + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('dist_target', 'id', 'range'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('dist_source', 'id', 'range'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM show_tables; + logicalrelid | partmethod +--------------------------------------------------------------------- + dist_target | r + dist_source | r +(2 rows) + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); +ERROR: For MERGE command, all the distributed tables must be colocated, for append/range distribution, colocation is not supported +HINT: Consider using hash distribution instead DROP SERVER foreign_server CASCADE; NOTICE: drop cascades to 3 other objects DETAIL: drop cascades to user mapping for postgres on server foreign_server -drop cascades to foreign table foreign_table_4000046 +drop cascades to foreign table foreign_table_4000043 drop cascades to foreign table foreign_table -NOTICE: foreign table "foreign_table_4000046" does not exist, skipping +NOTICE: foreign table "foreign_table_4000043" does not exist, skipping CONTEXT: SQL statement "SELECT citus_drop_all_shards(v_obj.objid, v_obj.schema_name, v_obj.object_name, drop_shards_metadata_only := false)" PL/pgSQL function citus_drop_trigger() line XX at PERFORM +DROP FUNCTION merge_when_and_write(); DROP SCHEMA merge_schema CASCADE; -NOTICE: drop cascades to 56 other objects +NOTICE: drop cascades to 84 other objects DETAIL: drop cascades to function insert_data() drop cascades to table pg_result drop cascades to table local_local @@ -1562,21 +3144,49 @@ drop cascades to table mv_target drop cascades to table mv_source_table drop cascades to materialized view mv_source drop cascades to table mv_local -drop cascades to table dist_table +drop cascades to table dist_table_4000041 drop cascades to function f_dist() drop cascades to table fn_target_4000040 drop cascades to table fn_result drop cascades to table fn_target +drop cascades to table dist_table drop cascades to table fn_local drop cascades to table ft_target -drop cascades to table ft_source_4000045 +drop cascades to table ft_source_4000042 drop cascades to table ft_source drop cascades to extension postgres_fdw +drop cascades to table target_cj +drop cascades to table source_cj1 +drop cascades to table source_cj2 +drop cascades to table pg_target +drop cascades to table pg_source +drop cascades to table citus_target +drop cascades to table citus_source +drop cascades to function compare_tables() +drop cascades to view pg_source_view +drop cascades to view citus_source_view +drop cascades to table pg_pa_target +drop cascades to table citus_pa_target +drop cascades to table pg_pa_source +drop cascades to table citus_pa_source +drop cascades to function pa_compare_tables() +drop cascades to table source_json +drop cascades to table target_json +drop cascades to function immutable_hash(integer) +drop cascades to table source_serial +drop cascades to table target_serial +drop cascades to table target_set +drop cascades to table source_set +drop cascades to function add_s(integer,integer) drop cascades to table pg -drop cascades to table t1_4000062 -drop cascades to table s1_4000063 +drop cascades to table t1_4000131 +drop cascades to table s1_4000132 drop cascades to table t1 drop cascades to table s1 +drop cascades to table dist_colocated +drop cascades to table dist_target +drop cascades to table dist_source +drop cascades to view show_tables SELECT 1 FROM master_remove_node('localhost', :master_port); ?column? --------------------------------------------------------------------- diff --git a/src/test/regress/expected/merge_arbitrary.out b/src/test/regress/expected/merge_arbitrary.out new file mode 100644 index 000000000..345ac1410 --- /dev/null +++ b/src/test/regress/expected/merge_arbitrary.out @@ -0,0 +1,150 @@ +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 +\gset +\if :server_version_ge_15 +\else +\q +\endif +SET search_path TO merge_arbitrary_schema; +INSERT INTO target_cj VALUES (1, 'target', 0); +INSERT INTO target_cj VALUES (2, 'target', 0); +INSERT INTO target_cj VALUES (2, 'target', 0); +INSERT INTO target_cj VALUES (3, 'target', 0); +INSERT INTO source_cj1 VALUES (2, 'source-1', 10); +INSERT INTO source_cj2 VALUES (2, 'source-2', 20); +BEGIN; +MERGE INTO target_cj t +USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src2 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | source-2 | 0 + 2 | source-2 | 0 + 3 | target | 0 +(4 rows) + +ROLLBACK; +BEGIN; +-- try accessing columns from either side of the source join +MERGE INTO target_cj t +USING source_cj1 s2 + INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src1, val = val2 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | source-1 | 20 + 2 | source-1 | 20 + 3 | target | 0 +(4 rows) + +ROLLBACK; +-- Test PREPARE +PREPARE insert(int, int, int) AS +MERGE INTO prept +USING (SELECT $2, s1, s2 FROM preps WHERE s2 > $3) as foo +ON prept.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET t2 = t2 + $1 +WHEN NOT MATCHED THEN + INSERT VALUES(s1, s2); +PREPARE delete(int) AS +MERGE INTO prept +USING preps +ON prept.t1 = preps.s1 +WHEN MATCHED AND prept.t2 = $1 THEN + DELETE +WHEN MATCHED THEN + UPDATE SET t2 = t2 + 1; +INSERT INTO prept VALUES(100, 0); +INSERT INTO preps VALUES(100, 0); +INSERT INTO preps VALUES(200, 0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +-- sixth time +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +-- Should have the counter as 14 (7 * 2) +SELECT * FROM prept; + t1 | t2 +--------------------------------------------------------------------- + 100 | 14 +(1 row) + +-- Test local tables +INSERT INTO s1 VALUES(1, 0); -- Matches DELETE clause +INSERT INTO s1 VALUES(2, 1); -- Matches UPDATE clause +INSERT INTO s1 VALUES(3, 1); -- No Match INSERT clause +INSERT INTO s1 VALUES(4, 1); -- No Match INSERT clause +INSERT INTO s1 VALUES(6, 1); -- No Match INSERT clause +INSERT INTO t1 VALUES(1, 0); -- Will be deleted +INSERT INTO t1 VALUES(2, 0); -- Will be updated +INSERT INTO t1 VALUES(5, 0); -- Will be intact +PREPARE local(int, int) AS +WITH s1_res AS ( + SELECT * FROM s1 +) +MERGE INTO t1 + USING s1_res ON (s1_res.id = t1.id) + WHEN MATCHED AND s1_res.val = $1 THEN + DELETE + WHEN MATCHED THEN + UPDATE SET val = t1.val + $2 + WHEN NOT MATCHED THEN + INSERT (id, val) VALUES (s1_res.id, s1_res.val); +BEGIN; +EXECUTE local(0, 1); +SELECT * FROM t1 order by id; + id | val +--------------------------------------------------------------------- + 2 | 1 + 3 | 1 + 4 | 1 + 5 | 0 + 6 | 1 +(5 rows) + +ROLLBACK; +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; +-- sixth time +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; +BEGIN; +EXECUTE local(0, 1); +SELECT * FROM t1 order by id; + id | val +--------------------------------------------------------------------- + 2 | 1 + 3 | 1 + 4 | 1 + 5 | 0 + 6 | 1 +(5 rows) + +ROLLBACK; diff --git a/src/test/regress/expected/merge_arbitrary_0.out b/src/test/regress/expected/merge_arbitrary_0.out new file mode 100644 index 000000000..a7e3fbf20 --- /dev/null +++ b/src/test/regress/expected/merge_arbitrary_0.out @@ -0,0 +1,6 @@ +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 +\gset +\if :server_version_ge_15 +\else +\q diff --git a/src/test/regress/expected/merge_arbitrary_create.out b/src/test/regress/expected/merge_arbitrary_create.out new file mode 100644 index 000000000..9b2444f17 --- /dev/null +++ b/src/test/regress/expected/merge_arbitrary_create.out @@ -0,0 +1,72 @@ +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 +\gset +\if :server_version_ge_15 +\else +\q +\endif +DROP SCHEMA IF EXISTS merge_arbitrary_schema CASCADE; +CREATE SCHEMA merge_arbitrary_schema; +SET search_path TO merge_arbitrary_schema; +SET citus.shard_count TO 4; +SET citus.next_shard_id TO 6000000; +CREATE TABLE target_cj(tid int, src text, val int); +CREATE TABLE source_cj1(sid1 int, src1 text, val1 int); +CREATE TABLE source_cj2(sid2 int, src2 text, val2 int); +SELECT create_distributed_table('target_cj', 'tid'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('source_cj1', 'sid1'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('source_cj2', 'sid2'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE prept(t1 int, t2 int); +CREATE TABLE preps(s1 int, s2 int); +SELECT create_distributed_table('prept', 't1'), create_distributed_table('preps', 's1'); + create_distributed_table | create_distributed_table +--------------------------------------------------------------------- + | +(1 row) + +PREPARE insert(int, int, int) AS +MERGE INTO prept +USING (SELECT $2, s1, s2 FROM preps WHERE s2 > $3) as foo +ON prept.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET t2 = t2 + $1 +WHEN NOT MATCHED THEN + INSERT VALUES(s1, s2); +PREPARE delete(int) AS +MERGE INTO prept +USING preps +ON prept.t1 = preps.s1 +WHEN MATCHED AND prept.t2 = $1 THEN + DELETE +WHEN MATCHED THEN + UPDATE SET t2 = t2 + 1; +-- Citus local tables +CREATE TABLE t1(id int, val int); +CREATE TABLE s1(id int, val int); +SELECT citus_add_local_table_to_metadata('t1'); + citus_add_local_table_to_metadata +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_add_local_table_to_metadata('s1'); + citus_add_local_table_to_metadata +--------------------------------------------------------------------- + +(1 row) + diff --git a/src/test/regress/expected/merge_arbitrary_create_0.out b/src/test/regress/expected/merge_arbitrary_create_0.out new file mode 100644 index 000000000..a7e3fbf20 --- /dev/null +++ b/src/test/regress/expected/merge_arbitrary_create_0.out @@ -0,0 +1,6 @@ +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 +\gset +\if :server_version_ge_15 +\else +\q diff --git a/src/test/regress/expected/multi_data_types.out b/src/test/regress/expected/multi_data_types.out index 4bc7da5c7..a88f9e1de 100644 --- a/src/test/regress/expected/multi_data_types.out +++ b/src/test/regress/expected/multi_data_types.out @@ -3,6 +3,14 @@ -- create, distribute, INSERT, SELECT and UPDATE -- =================================================================== SET citus.next_shard_id TO 530000; +-- Given that other test files depend on the existence of types created in this file, +-- we cannot drop them at the end. Instead, we drop them at the beginning of the test +-- to make this file runnable multiple times via run_test.py. +BEGIN; + SET LOCAL client_min_messages TO WARNING; + DROP TYPE IF EXISTS test_composite_type, other_composite_type, bug_status CASCADE; + DROP OPERATOR FAMILY IF EXISTS cats_op_fam USING hash; +COMMIT; -- create a custom type... CREATE TYPE test_composite_type AS ( i integer, diff --git a/src/test/regress/expected/multi_extension.out b/src/test/regress/expected/multi_extension.out index 2b165bfeb..95768bbcb 100644 --- a/src/test/regress/expected/multi_extension.out +++ b/src/test/regress/expected/multi_extension.out @@ -1258,6 +1258,43 @@ SELECT * FROM pg_dist_cleanup; 2 | 0 | 1 | table_with_orphaned_shards_102011 | 0 | 0 (2 rows) +ALTER EXTENSION citus_columnar UPDATE TO '11.2-1'; +-- Make sure that we defined dependencies from all rel objects (tables, +-- indexes, sequences ..) to columnar table access method ... +SELECT pg_class.oid INTO columnar_schema_members +FROM pg_class, pg_namespace +WHERE pg_namespace.oid=pg_class.relnamespace AND + pg_namespace.nspname='columnar_internal' AND + pg_class.relname NOT IN ('chunk_group_pkey', + 'chunk_pkey', + 'options_pkey', + 'stripe_first_row_number_idx', + 'stripe_pkey'); +SELECT refobjid INTO columnar_schema_members_pg_depend +FROM pg_depend +WHERE classid = 'pg_am'::regclass::oid AND + objid = (select oid from pg_am where amname = 'columnar') AND + objsubid = 0 AND + refclassid = 'pg_class'::regclass::oid AND + refobjsubid = 0 AND + deptype = 'n'; +-- ... , so this should be empty, +(TABLE columnar_schema_members EXCEPT TABLE columnar_schema_members_pg_depend) +UNION +(TABLE columnar_schema_members_pg_depend EXCEPT TABLE columnar_schema_members); + oid +--------------------------------------------------------------------- +(0 rows) + +-- ... , and both columnar_schema_members_pg_depend & columnar_schema_members +-- should have 5 entries. +SELECT COUNT(*)=5 FROM columnar_schema_members_pg_depend; + ?column? +--------------------------------------------------------------------- + t +(1 row) + +DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend; -- error out as cleanup records remain ALTER EXTENSION citus UPDATE TO '11.0-4'; ERROR: pg_dist_cleanup is introduced in Citus 11.1 @@ -1326,8 +1363,10 @@ SELECT * FROM multi_extension.print_extension_changes(); previous_object | current_object --------------------------------------------------------------------- | function citus_stats_tenants(boolean) SETOF record + | function citus_stats_tenants_local(boolean) SETOF record | view citus_stats_tenants -(2 rows) + | view citus_stats_tenants_local +(4 rows) DROP TABLE multi_extension.prev_objects, multi_extension.extension_diff; -- show running version diff --git a/src/test/regress/expected/multi_modifying_xacts.out b/src/test/regress/expected/multi_modifying_xacts.out index 607c327ff..0294e1060 100644 --- a/src/test/regress/expected/multi_modifying_xacts.out +++ b/src/test/regress/expected/multi_modifying_xacts.out @@ -1,5 +1,7 @@ SET citus.next_shard_id TO 1200000; SET citus.next_placement_id TO 1200000; +CREATE SCHEMA multi_modifying_xacts; +SET search_path TO multi_modifying_xacts; -- =================================================================== -- test end-to-end modification functionality -- =================================================================== @@ -190,7 +192,7 @@ ALTER TABLE labs ADD COLUMN motto text; INSERT INTO labs VALUES (6, 'Bell Labs'); ABORT; -- but the DDL should correctly roll back -SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.labs'::regclass; +SELECT "Column", "Type", "Modifiers" FROM public.table_desc WHERE relid='multi_modifying_xacts.labs'::regclass; Column | Type | Modifiers --------------------------------------------------------------------- id | bigint | not null @@ -339,7 +341,7 @@ CREATE FUNCTION reject_large_id() RETURNS trigger AS $rli$ END; $rli$ LANGUAGE plpgsql; -- register after insert trigger -SELECT * FROM run_command_on_placements('researchers', 'CREATE CONSTRAINT TRIGGER reject_large_researcher_id AFTER INSERT ON %s DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_large_id()') +SELECT * FROM run_command_on_placements('multi_modifying_xacts.researchers', 'CREATE CONSTRAINT TRIGGER reject_large_researcher_id AFTER INSERT ON %s DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE multi_modifying_xacts.reject_large_id()') ORDER BY nodeport, shardid; nodename | nodeport | shardid | success | result --------------------------------------------------------------------- @@ -498,6 +500,7 @@ AND s.logicalrelid = 'objects'::regclass; -- create trigger on one worker to reject certain values \c - - - :worker_2_port +SET search_path TO multi_modifying_xacts; SET citus.enable_metadata_sync TO OFF; CREATE FUNCTION reject_bad() RETURNS trigger AS $rb$ BEGIN @@ -514,6 +517,7 @@ AFTER INSERT ON objects_1200003 DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; -- test partial failure; worker_1 succeeds, 2 fails -- in this case, we expect the transaction to abort \set VERBOSITY terse @@ -551,6 +555,7 @@ DELETE FROM objects; -- there cannot be errors on different shards at different times -- because the first failure will fail the whole transaction \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; SET citus.enable_metadata_sync TO OFF; CREATE FUNCTION reject_bad() RETURNS trigger AS $rb$ BEGIN @@ -567,6 +572,7 @@ AFTER INSERT ON labs_1200002 DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; BEGIN; INSERT INTO objects VALUES (1, 'apple'); INSERT INTO objects VALUES (2, 'BAD'); @@ -602,12 +608,14 @@ AND (s.logicalrelid = 'objects'::regclass OR -- what if the failures happen at COMMIT time? \c - - - :worker_2_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad ON objects_1200003; CREATE CONSTRAINT TRIGGER reject_bad AFTER INSERT ON objects_1200003 DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; -- should be the same story as before, just at COMMIT time -- as we use 2PC, the transaction is rollbacked BEGIN; @@ -644,12 +652,14 @@ WHERE sp.shardid = s.shardid AND s.logicalrelid = 'objects'::regclass; -- what if all nodes have failures at COMMIT time? \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad ON labs_1200002; CREATE CONSTRAINT TRIGGER reject_bad AFTER INSERT ON labs_1200002 DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; -- reduce the log level for differences between PG14 and PG15 -- in PGconn->errorMessage -- relevant PG commit b15f254466aefbabcbed001929f6e09db59fd158 @@ -688,8 +698,10 @@ AND (s.logicalrelid = 'objects'::regclass OR -- what if one shard (objects) succeeds but another (labs) completely fails? \c - - - :worker_2_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad ON objects_1200003; \c - - - :master_port +SET search_path TO multi_modifying_xacts; SET citus.next_shard_id TO 1200004; BEGIN; INSERT INTO objects VALUES (1, 'apple'); @@ -833,6 +845,7 @@ SELECT * FROM reference_modifying_xacts; -- lets fail on of the workers at before the commit time \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; SET citus.enable_metadata_sync TO OFF; CREATE FUNCTION reject_bad_reference() RETURNS trigger AS $rb$ BEGIN @@ -849,6 +862,7 @@ AFTER INSERT ON reference_modifying_xacts_1200006 DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad_reference(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse -- try without wrapping inside a transaction INSERT INTO reference_modifying_xacts VALUES (999, 3); @@ -860,12 +874,14 @@ ERROR: illegal value COMMIT; -- lets fail one of the workers at COMMIT time \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad_reference ON reference_modifying_xacts_1200006; CREATE CONSTRAINT TRIGGER reject_bad_reference AFTER INSERT ON reference_modifying_xacts_1200006 DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad_reference(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse -- try without wrapping inside a transaction INSERT INTO reference_modifying_xacts VALUES (999, 3); @@ -890,8 +906,10 @@ ORDER BY s.logicalrelid, sp.shardstate; -- for the time-being drop the constraint \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad_reference ON reference_modifying_xacts_1200006; \c - - - :master_port +SET search_path TO multi_modifying_xacts; -- now create a hash distributed table and run tests -- including both the reference table and the hash -- distributed table @@ -923,6 +941,7 @@ INSERT INTO hash_modifying_xacts VALUES (2, 2); ABORT; -- lets fail one of the workers before COMMIT time for the hash table \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; SET citus.enable_metadata_sync TO OFF; CREATE FUNCTION reject_bad_hash() RETURNS trigger AS $rb$ BEGIN @@ -939,6 +958,7 @@ AFTER INSERT ON hash_modifying_xacts_1200007 DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad_hash(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse -- the transaction as a whole should fail BEGIN; @@ -955,6 +975,7 @@ SELECT * FROM reference_modifying_xacts WHERE key = 55; -- now lets fail on of the workers for the hash distributed table table -- when there is a reference table involved \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad_hash ON hash_modifying_xacts_1200007; -- the trigger is on execution time CREATE CONSTRAINT TRIGGER reject_bad_hash @@ -962,6 +983,7 @@ AFTER INSERT ON hash_modifying_xacts_1200007 DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad_hash(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse -- the transaction as a whole should fail BEGIN; @@ -994,11 +1016,13 @@ ORDER BY s.logicalrelid, sp.shardstate; -- and ensure that hash distributed table's -- change is rollbacked as well \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; CREATE CONSTRAINT TRIGGER reject_bad_reference AFTER INSERT ON reference_modifying_xacts_1200006 DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad_reference(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse BEGIN; -- to expand participant to include all worker nodes @@ -1127,8 +1151,10 @@ SELECT count(*) FROM pg_dist_transaction; -- in which we'll make the remote host unavailable -- first create the new user on all nodes CREATE USER test_user; +GRANT ALL ON SCHEMA multi_modifying_xacts TO test_user; -- now connect back to the master with the new user \c - test_user - :master_port +SET search_path TO multi_modifying_xacts; SET citus.next_shard_id TO 1200015; CREATE TABLE reference_failure_test (key int, value int); SELECT create_reference_table('reference_failure_test'); @@ -1148,21 +1174,24 @@ SELECT create_distributed_table('numbers_hash_failure_test', 'key'); -- ensure that the shard is created for this user \c - test_user - :worker_1_port +SET search_path TO multi_modifying_xacts; SET citus.override_table_visibility TO false; \dt reference_failure_test_1200015 - List of relations - Schema | Name | Type | Owner + List of relations + Schema | Name | Type | Owner --------------------------------------------------------------------- - public | reference_failure_test_1200015 | table | test_user + multi_modifying_xacts | reference_failure_test_1200015 | table | test_user (1 row) -- now connect with the default user, -- and rename the existing user \c - :default_user - :worker_1_port +SET search_path TO multi_modifying_xacts; ALTER USER test_user RENAME TO test_user_new; NOTICE: not propagating ALTER ROLE ... RENAME TO commands to worker nodes -- connect back to master and query the reference table \c - test_user - :master_port +SET search_path TO multi_modifying_xacts; -- should fail since the worker doesn't have test_user anymore INSERT INTO reference_failure_test VALUES (1, '1'); ERROR: connection to the remote node localhost:xxxxx failed with the following error: FATAL: role "test_user" does not exist @@ -1277,14 +1306,17 @@ WARNING: connection to the remote node localhost:xxxxx failed with the followin -- break the other node as well \c - :default_user - :worker_2_port +SET search_path TO multi_modifying_xacts; ALTER USER test_user RENAME TO test_user_new; NOTICE: not propagating ALTER ROLE ... RENAME TO commands to worker nodes \c - test_user - :master_port +SET search_path TO multi_modifying_xacts; -- fails on all shard placements INSERT INTO numbers_hash_failure_test VALUES (2,2); ERROR: connection to the remote node localhost:xxxxx failed with the following error: FATAL: role "test_user" does not exist -- connect back to the master with the proper user to continue the tests \c - :default_user - :master_port +SET search_path TO multi_modifying_xacts; SET citus.next_shard_id TO 1200020; SET citus.next_placement_id TO 1200033; -- unbreak both nodes by renaming the user back to the original name @@ -1297,6 +1329,7 @@ SELECT * FROM run_command_on_workers('ALTER USER test_user_new RENAME TO test_us DROP TABLE reference_modifying_xacts, hash_modifying_xacts, hash_modifying_xacts_second, reference_failure_test, numbers_hash_failure_test; +REVOKE ALL ON SCHEMA multi_modifying_xacts FROM test_user; DROP USER test_user; -- set up foreign keys to test transactions with co-located and reference tables BEGIN; @@ -1322,7 +1355,9 @@ SELECT create_reference_table('itemgroups'); (1 row) +SET client_min_messages TO WARNING; DROP TABLE IF EXISTS users ; +RESET client_min_messages; CREATE TABLE users ( id int PRIMARY KEY, name text, @@ -1354,18 +1389,18 @@ JOIN USING (shardid) ORDER BY id; - id | shard_name | nodename | nodeport + id | shard_name | nodename | nodeport --------------------------------------------------------------------- - 1 | users_1200022 | localhost | 57637 - 2 | users_1200025 | localhost | 57638 - 3 | users_1200023 | localhost | 57638 - 4 | users_1200023 | localhost | 57638 - 5 | users_1200022 | localhost | 57637 - 6 | users_1200024 | localhost | 57637 - 7 | users_1200023 | localhost | 57638 - 8 | users_1200022 | localhost | 57637 - 9 | users_1200025 | localhost | 57638 - 10 | users_1200022 | localhost | 57637 + 1 | multi_modifying_xacts.users_1200022 | localhost | 57637 + 2 | multi_modifying_xacts.users_1200025 | localhost | 57638 + 3 | multi_modifying_xacts.users_1200023 | localhost | 57638 + 4 | multi_modifying_xacts.users_1200023 | localhost | 57638 + 5 | multi_modifying_xacts.users_1200022 | localhost | 57637 + 6 | multi_modifying_xacts.users_1200024 | localhost | 57637 + 7 | multi_modifying_xacts.users_1200023 | localhost | 57638 + 8 | multi_modifying_xacts.users_1200022 | localhost | 57637 + 9 | multi_modifying_xacts.users_1200025 | localhost | 57638 + 10 | multi_modifying_xacts.users_1200022 | localhost | 57637 (10 rows) END; @@ -1546,5 +1581,5 @@ SELECT name FROM labs WHERE id = 1001; (1 row) RESET citus.function_opens_transaction_block; -DROP FUNCTION insert_abort(); -DROP TABLE items, users, itemgroups, usergroups, researchers, labs; +SET client_min_messages TO WARNING; +DROP SCHEMA multi_modifying_xacts CASCADE; diff --git a/src/test/regress/expected/multi_move_mx.out b/src/test/regress/expected/multi_move_mx.out index 833c9f7df..b6cc5d0d7 100644 --- a/src/test/regress/expected/multi_move_mx.out +++ b/src/test/regress/expected/multi_move_mx.out @@ -238,8 +238,40 @@ ORDER BY LIMIT 1 OFFSET 1; ERROR: operation is not allowed on this node HINT: Connect to the coordinator and run it again. +-- Check that shards of a table with GENERATED columns can be moved. +\c - - - :master_port +SET citus.shard_count TO 4; +SET citus.shard_replication_factor TO 1; +CREATE TABLE mx_table_with_generated_column (a int, b int GENERATED ALWAYS AS ( a + 3 ) STORED, c int); +SELECT create_distributed_table('mx_table_with_generated_column', 'a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Check that dropped columns are handled properly in a move. +ALTER TABLE mx_table_with_generated_column DROP COLUMN c; +-- Move a shard from worker 1 to worker 2 +SELECT + citus_move_shard_placement(shardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical') +FROM + pg_dist_shard NATURAL JOIN pg_dist_shard_placement +WHERE + logicalrelid = 'mx_table_with_generated_column'::regclass + AND nodeport = :worker_1_port +ORDER BY + shardid +LIMIT 1; + citus_move_shard_placement +--------------------------------------------------------------------- + +(1 row) + -- Cleanup \c - - - :master_port +SET client_min_messages TO WARNING; +CALL citus_cleanup_orphaned_resources(); +DROP TABLE mx_table_with_generated_column; DROP TABLE mx_table_1; DROP TABLE mx_table_2; DROP TABLE mx_table_3; diff --git a/src/test/regress/expected/multi_mx_copy_data.out b/src/test/regress/expected/multi_mx_copy_data.out index c1d3d7180..0db64c16e 100644 --- a/src/test/regress/expected/multi_mx_copy_data.out +++ b/src/test/regress/expected/multi_mx_copy_data.out @@ -1,6 +1,10 @@ -- -- MULTI_MX_COPY_DATA -- +-- We truncate them to make this test runnable multiple times. +-- Note that we cannot do that at the end of the test because +-- we need to keep the data for the other tests. +TRUNCATE lineitem_mx, orders_mx; \set nation_data_file :abs_srcdir '/data/nation.data' \set client_side_copy_command '\\copy nation_hash FROM ' :'nation_data_file' ' with delimiter '''|''';' :client_side_copy_command @@ -161,3 +165,4 @@ SET search_path TO public; :client_side_copy_command \set client_side_copy_command '\\copy supplier_mx FROM ' :'supplier_data_file' ' with delimiter '''|''';' :client_side_copy_command +DROP TABLE citus_mx_test_schema.nation_hash_replicated; diff --git a/src/test/regress/expected/multi_mx_modifying_xacts.out b/src/test/regress/expected/multi_mx_modifying_xacts.out index e486b8b1b..dfbdc7603 100644 --- a/src/test/regress/expected/multi_mx_modifying_xacts.out +++ b/src/test/regress/expected/multi_mx_modifying_xacts.out @@ -406,3 +406,6 @@ SELECT * FROM labs_mx WHERE id = 8; --------------------------------------------------------------------- (0 rows) +TRUNCATE objects_mx, labs_mx, researchers_mx; +DROP TRIGGER reject_bad_mx ON labs_mx_1220102; +DROP FUNCTION reject_bad_mx; diff --git a/src/test/regress/expected/multi_mx_router_planner.out b/src/test/regress/expected/multi_mx_router_planner.out index d006b4bb8..bf007be9d 100644 --- a/src/test/regress/expected/multi_mx_router_planner.out +++ b/src/test/regress/expected/multi_mx_router_planner.out @@ -1460,3 +1460,7 @@ DEBUG: query has a single distribution column value: 1 51 (6 rows) +SET client_min_messages to WARNING; +TRUNCATE articles_hash_mx, company_employees_mx, articles_single_shard_hash_mx; +DROP MATERIALIZED VIEW mv_articles_hash_mx_error; +DROP TABLE authors_hash_mx; diff --git a/src/test/regress/expected/multi_router_planner.out b/src/test/regress/expected/multi_router_planner.out index 56ff44b3b..e0e5bc541 100644 --- a/src/test/regress/expected/multi_router_planner.out +++ b/src/test/regress/expected/multi_router_planner.out @@ -6,6 +6,8 @@ SET citus.next_shard_id TO 840000; -- router planner, so we're disabling it in this file. We've bunch of -- other tests that triggers fast-path-router planner SET citus.enable_fast_path_router_planner TO false; +CREATE SCHEMA multi_router_planner; +SET search_path TO multi_router_planner; CREATE TABLE articles_hash ( id bigint NOT NULL, author_id bigint NOT NULL, @@ -290,10 +292,10 @@ WITH first_author AS MATERIALIZED ( UPDATE articles_hash SET title = first_author.name FROM first_author WHERE articles_hash.author_id = 2 AND articles_hash.id = first_author.id; DEBUG: Router planner doesn't support SELECT FOR UPDATE in common table expressions involving reference tables. -DEBUG: generating subplan XXX_1 for CTE first_author: SELECT articles_hash.id, auref.name FROM public.articles_hash, public.authors_reference auref WHERE ((articles_hash.author_id OPERATOR(pg_catalog.=) 2) AND (auref.id OPERATOR(pg_catalog.=) articles_hash.author_id)) FOR UPDATE OF articles_hash FOR UPDATE OF auref +DEBUG: generating subplan XXX_1 for CTE first_author: SELECT articles_hash.id, auref.name FROM multi_router_planner.articles_hash, multi_router_planner.authors_reference auref WHERE ((articles_hash.author_id OPERATOR(pg_catalog.=) 2) AND (auref.id OPERATOR(pg_catalog.=) articles_hash.author_id)) FOR UPDATE OF articles_hash FOR UPDATE OF auref DEBUG: Creating router plan DEBUG: query has a single distribution column value: 2 -DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE public.articles_hash SET title = first_author.name FROM (SELECT intermediate_result.id, intermediate_result.name FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, name character varying(20))) first_author WHERE ((articles_hash.author_id OPERATOR(pg_catalog.=) 2) AND (articles_hash.id OPERATOR(pg_catalog.=) first_author.id)) +DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE multi_router_planner.articles_hash SET title = first_author.name FROM (SELECT intermediate_result.id, intermediate_result.name FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, name character varying(20))) first_author WHERE ((articles_hash.author_id OPERATOR(pg_catalog.=) 2) AND (articles_hash.id OPERATOR(pg_catalog.=) first_author.id)) DEBUG: Creating router plan DEBUG: query has a single distribution column value: 2 WITH first_author AS MATERIALIZED ( @@ -356,10 +358,10 @@ WITH id_author AS MATERIALIZED ( SELECT id, author_id FROM articles_hash WHERE a id_title AS MATERIALIZED (SELECT id, title from articles_hash WHERE author_id = 2) SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id; DEBUG: cannot run command which targets multiple shards -DEBUG: generating subplan XXX_1 for CTE id_author: SELECT id, author_id FROM public.articles_hash WHERE (author_id OPERATOR(pg_catalog.=) 1) +DEBUG: generating subplan XXX_1 for CTE id_author: SELECT id, author_id FROM multi_router_planner.articles_hash WHERE (author_id OPERATOR(pg_catalog.=) 1) DEBUG: Creating router plan DEBUG: query has a single distribution column value: 1 -DEBUG: generating subplan XXX_2 for CTE id_title: SELECT id, title FROM public.articles_hash WHERE (author_id OPERATOR(pg_catalog.=) 2) +DEBUG: generating subplan XXX_2 for CTE id_title: SELECT id, title FROM multi_router_planner.articles_hash WHERE (author_id OPERATOR(pg_catalog.=) 2) DEBUG: Creating router plan DEBUG: query has a single distribution column value: 2 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id_author.id, id_author.author_id, id_title.id, id_title.title FROM (SELECT intermediate_result.id, intermediate_result.author_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint)) id_author, (SELECT intermediate_result.id, intermediate_result.title FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, title character varying(20))) id_title WHERE (id_author.id OPERATOR(pg_catalog.=) id_title.id) @@ -456,7 +458,7 @@ WITH new_article AS MATERIALIZED( ) SELECT * FROM new_article; DEBUG: only SELECT, UPDATE, or DELETE common table expressions may be router planned -DEBUG: generating subplan XXX_1 for CTE new_article: INSERT INTO public.articles_hash (id, author_id, title, word_count) VALUES (1, 1, 'arsenous'::character varying, 9) RETURNING id, author_id, title, word_count +DEBUG: generating subplan XXX_1 for CTE new_article: INSERT INTO multi_router_planner.articles_hash (id, author_id, title, word_count) VALUES (1, 1, 'arsenous'::character varying, 9) RETURNING id, author_id, title, word_count DEBUG: Creating router plan DEBUG: query has a single distribution column value: 1 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id, author_id, title, word_count FROM (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) new_article @@ -471,7 +473,7 @@ WITH update_article AS MATERIALIZED( ) SELECT * FROM update_article; DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for CTE update_article: UPDATE public.articles_hash SET word_count = 10 WHERE ((id OPERATOR(pg_catalog.=) 1) AND (word_count OPERATOR(pg_catalog.=) 9)) RETURNING id, author_id, title, word_count +DEBUG: generating subplan XXX_1 for CTE update_article: UPDATE multi_router_planner.articles_hash SET word_count = 10 WHERE ((id OPERATOR(pg_catalog.=) 1) AND (word_count OPERATOR(pg_catalog.=) 9)) RETURNING id, author_id, title, word_count DEBUG: Creating router plan DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id, author_id, title, word_count FROM (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) update_article DEBUG: Creating router plan @@ -485,7 +487,7 @@ WITH update_article AS MATERIALIZED ( ) SELECT coalesce(1,random()); DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for CTE update_article: UPDATE public.articles_hash SET word_count = 11 WHERE ((id OPERATOR(pg_catalog.=) 1) AND (word_count OPERATOR(pg_catalog.=) 10)) RETURNING id, author_id, title, word_count +DEBUG: generating subplan XXX_1 for CTE update_article: UPDATE multi_router_planner.articles_hash SET word_count = 11 WHERE ((id OPERATOR(pg_catalog.=) 1) AND (word_count OPERATOR(pg_catalog.=) 10)) RETURNING id, author_id, title, word_count DEBUG: Creating router plan DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT COALESCE((1)::double precision, random()) AS "coalesce" DEBUG: Creating router plan @@ -510,7 +512,7 @@ WITH update_article AS MATERIALIZED ( ) SELECT coalesce(1,random()); DEBUG: cannot router plan modification of a non-distributed table -DEBUG: generating subplan XXX_1 for CTE update_article: UPDATE public.authors_reference SET name = ''::character varying WHERE (id OPERATOR(pg_catalog.=) 0) RETURNING name, id +DEBUG: generating subplan XXX_1 for CTE update_article: UPDATE multi_router_planner.authors_reference SET name = ''::character varying WHERE (id OPERATOR(pg_catalog.=) 0) RETURNING name, id DEBUG: Creating router plan DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT COALESCE((1)::double precision, random()) AS "coalesce" DEBUG: Creating router plan @@ -524,7 +526,7 @@ WITH delete_article AS MATERIALIZED ( ) SELECT * FROM delete_article; DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for CTE delete_article: DELETE FROM public.articles_hash WHERE ((id OPERATOR(pg_catalog.=) 1) AND (word_count OPERATOR(pg_catalog.=) 10)) RETURNING id, author_id, title, word_count +DEBUG: generating subplan XXX_1 for CTE delete_article: DELETE FROM multi_router_planner.articles_hash WHERE ((id OPERATOR(pg_catalog.=) 1) AND (word_count OPERATOR(pg_catalog.=) 10)) RETURNING id, author_id, title, word_count DEBUG: Creating router plan DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id, author_id, title, word_count FROM (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) delete_article DEBUG: Creating router plan @@ -653,8 +655,8 @@ FROM articles_hash, (SELECT id, word_count FROM articles_hash) AS test WHERE tes ORDER BY test.word_count DESC, articles_hash.id LIMIT 5; DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for subquery SELECT id, word_count FROM public.articles_hash -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT articles_hash.id, test.word_count FROM public.articles_hash, (SELECT intermediate_result.id, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, word_count integer)) test WHERE (test.id OPERATOR(pg_catalog.=) articles_hash.id) ORDER BY test.word_count DESC, articles_hash.id LIMIT 5 +DEBUG: generating subplan XXX_1 for subquery SELECT id, word_count FROM multi_router_planner.articles_hash +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT articles_hash.id, test.word_count FROM multi_router_planner.articles_hash, (SELECT intermediate_result.id, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, word_count integer)) test WHERE (test.id OPERATOR(pg_catalog.=) articles_hash.id) ORDER BY test.word_count DESC, articles_hash.id LIMIT 5 DEBUG: Router planner cannot handle multi-shard select queries DEBUG: push down of limit count: 5 id | word_count @@ -672,8 +674,8 @@ WHERE test.id = articles_hash.id and articles_hash.author_id = 1 ORDER BY articles_hash.id; DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for subquery SELECT id, word_count FROM public.articles_hash -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT articles_hash.id, test.word_count FROM public.articles_hash, (SELECT intermediate_result.id, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, word_count integer)) test WHERE ((test.id OPERATOR(pg_catalog.=) articles_hash.id) AND (articles_hash.author_id OPERATOR(pg_catalog.=) 1)) ORDER BY articles_hash.id +DEBUG: generating subplan XXX_1 for subquery SELECT id, word_count FROM multi_router_planner.articles_hash +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT articles_hash.id, test.word_count FROM multi_router_planner.articles_hash, (SELECT intermediate_result.id, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, word_count integer)) test WHERE ((test.id OPERATOR(pg_catalog.=) articles_hash.id) AND (articles_hash.author_id OPERATOR(pg_catalog.=) 1)) ORDER BY articles_hash.id DEBUG: Creating router plan DEBUG: query has a single distribution column value: 1 id | word_count @@ -788,9 +790,9 @@ SELECT a.author_id as first_author, b.word_count as second_word_count WHERE a.author_id = 2 and a.author_id = b.author_id LIMIT 3; DEBUG: found no worker with all shard placements -DEBUG: generating subplan XXX_1 for CTE single_shard: SELECT id, author_id, title, word_count FROM public.articles_single_shard_hash +DEBUG: generating subplan XXX_1 for CTE single_shard: SELECT id, author_id, title, word_count FROM multi_router_planner.articles_single_shard_hash DEBUG: Creating router plan -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a.author_id AS first_author, b.word_count AS second_word_count FROM public.articles_hash a, (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) b WHERE ((a.author_id OPERATOR(pg_catalog.=) 2) AND (a.author_id OPERATOR(pg_catalog.=) b.author_id)) LIMIT 3 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a.author_id AS first_author, b.word_count AS second_word_count FROM multi_router_planner.articles_hash a, (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) b WHERE ((a.author_id OPERATOR(pg_catalog.=) 2) AND (a.author_id OPERATOR(pg_catalog.=) b.author_id)) LIMIT 3 DEBUG: Creating router plan DEBUG: query has a single distribution column value: 2 first_author | second_word_count @@ -1575,10 +1577,10 @@ SELECT 1 FROM authors_reference r JOIN ( ) num_db ON (r.id = num_db.datid) LIMIT 1; DEBUG: found no worker with all shard placements DEBUG: function does not have co-located tables -DEBUG: generating subplan XXX_1 for subquery SELECT datid FROM public.number1() s(datid) +DEBUG: generating subplan XXX_1 for subquery SELECT datid FROM multi_router_planner.number1() s(datid) DEBUG: Creating router plan DEBUG: generating subplan XXX_2 for subquery SELECT s.datid FROM ((SELECT intermediate_result.datid FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) s LEFT JOIN pg_database d ON (((s.datid)::oid OPERATOR(pg_catalog.=) d.oid))) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT 1 FROM (public.authors_reference r JOIN (SELECT intermediate_result.datid FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) num_db ON ((r.id OPERATOR(pg_catalog.=) num_db.datid))) LIMIT 1 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT 1 FROM (multi_router_planner.authors_reference r JOIN (SELECT intermediate_result.datid FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) num_db ON ((r.id OPERATOR(pg_catalog.=) num_db.datid))) LIMIT 1 DEBUG: Creating router plan ?column? --------------------------------------------------------------------- @@ -1590,10 +1592,10 @@ SELECT s.datid FROM number1() s LEFT JOIN pg_database d ON s.datid = d.oid; SELECT 1 FROM authors_reference r JOIN num_db ON (r.id = num_db.datid) LIMIT 1; DEBUG: found no worker with all shard placements DEBUG: function does not have co-located tables -DEBUG: generating subplan XXX_1 for subquery SELECT datid FROM public.number1() s(datid) +DEBUG: generating subplan XXX_1 for subquery SELECT datid FROM multi_router_planner.number1() s(datid) DEBUG: Creating router plan DEBUG: generating subplan XXX_2 for subquery SELECT s.datid FROM ((SELECT intermediate_result.datid FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) s LEFT JOIN pg_database d ON (((s.datid)::oid OPERATOR(pg_catalog.=) d.oid))) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT 1 FROM (public.authors_reference r JOIN (SELECT intermediate_result.datid FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) num_db ON ((r.id OPERATOR(pg_catalog.=) num_db.datid))) LIMIT 1 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT 1 FROM (multi_router_planner.authors_reference r JOIN (SELECT intermediate_result.datid FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) num_db ON ((r.id OPERATOR(pg_catalog.=) num_db.datid))) LIMIT 1 DEBUG: Creating router plan ?column? --------------------------------------------------------------------- @@ -1603,8 +1605,8 @@ DEBUG: Creating router plan WITH cte AS MATERIALIZED (SELECT * FROM num_db) SELECT 1 FROM authors_reference r JOIN cte ON (r.id = cte.datid) LIMIT 1; DEBUG: found no worker with all shard placements -DEBUG: generating subplan XXX_1 for CTE cte: SELECT datid FROM (SELECT s.datid FROM (public.number1() s(datid) LEFT JOIN pg_database d ON (((s.datid)::oid OPERATOR(pg_catalog.=) d.oid)))) num_db -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT 1 FROM (public.authors_reference r JOIN (SELECT intermediate_result.datid FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) cte ON ((r.id OPERATOR(pg_catalog.=) cte.datid))) LIMIT 1 +DEBUG: generating subplan XXX_1 for CTE cte: SELECT datid FROM (SELECT s.datid FROM (multi_router_planner.number1() s(datid) LEFT JOIN pg_database d ON (((s.datid)::oid OPERATOR(pg_catalog.=) d.oid)))) num_db +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT 1 FROM (multi_router_planner.authors_reference r JOIN (SELECT intermediate_result.datid FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) cte ON ((r.id OPERATOR(pg_catalog.=) cte.datid))) LIMIT 1 DEBUG: Creating router plan ?column? --------------------------------------------------------------------- @@ -1769,7 +1771,7 @@ SET citus.log_remote_commands TO on; -- single shard select queries are router plannable SELECT * FROM articles_range where author_id = 1; DEBUG: Creating router plan -NOTICE: issuing SELECT id, author_id, title, word_count FROM public.articles_range_840012 articles_range WHERE (author_id OPERATOR(pg_catalog.=) 1) +NOTICE: issuing SELECT id, author_id, title, word_count FROM multi_router_planner.articles_range_840012 articles_range WHERE (author_id OPERATOR(pg_catalog.=) 1) DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx id | author_id | title | word_count --------------------------------------------------------------------- @@ -1777,7 +1779,7 @@ DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx SELECT * FROM articles_range where author_id = 1 or author_id = 5; DEBUG: Creating router plan -NOTICE: issuing SELECT id, author_id, title, word_count FROM public.articles_range_840012 articles_range WHERE ((author_id OPERATOR(pg_catalog.=) 1) OR (author_id OPERATOR(pg_catalog.=) 5)) +NOTICE: issuing SELECT id, author_id, title, word_count FROM multi_router_planner.articles_range_840012 articles_range WHERE ((author_id OPERATOR(pg_catalog.=) 1) OR (author_id OPERATOR(pg_catalog.=) 5)) DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx id | author_id | title | word_count --------------------------------------------------------------------- @@ -1795,7 +1797,7 @@ NOTICE: executing the command locally: SELECT id, author_id, title, word_count SELECT * FROM articles_range ar join authors_range au on (ar.author_id = au.id) WHERE ar.author_id = 1; DEBUG: Creating router plan -NOTICE: issuing SELECT ar.id, ar.author_id, ar.title, ar.word_count, au.name, au.id FROM (public.articles_range_840012 ar JOIN public.authors_range_840008 au ON ((ar.author_id OPERATOR(pg_catalog.=) au.id))) WHERE (ar.author_id OPERATOR(pg_catalog.=) 1) +NOTICE: issuing SELECT ar.id, ar.author_id, ar.title, ar.word_count, au.name, au.id FROM (multi_router_planner.articles_range_840012 ar JOIN multi_router_planner.authors_range_840008 au ON ((ar.author_id OPERATOR(pg_catalog.=) au.id))) WHERE (ar.author_id OPERATOR(pg_catalog.=) 1) DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx id | author_id | title | word_count | name | id --------------------------------------------------------------------- @@ -2433,12 +2435,15 @@ SELECT create_distributed_table('failure_test', 'a', 'hash'); SET citus.enable_ddl_propagation TO off; CREATE USER router_user; -GRANT INSERT ON ALL TABLES IN SCHEMA public TO router_user; +GRANT USAGE ON SCHEMA multi_router_planner TO router_user; +GRANT INSERT ON ALL TABLES IN SCHEMA multi_router_planner TO router_user; \c - - - :worker_1_port SET citus.enable_ddl_propagation TO off; CREATE USER router_user; -GRANT INSERT ON ALL TABLES IN SCHEMA public TO router_user; +GRANT USAGE ON SCHEMA multi_router_planner TO router_user; +GRANT INSERT ON ALL TABLES IN SCHEMA multi_router_planner TO router_user; \c - router_user - :master_port +SET search_path TO multi_router_planner; -- we will fail to connect to worker 2, since the user does not exist -- still, we never mark placements inactive. Instead, fail the transaction BEGIN; @@ -2452,7 +2457,7 @@ SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'failure_test'::regclass ) - ORDER BY placementid; + ORDER BY shardid, nodeport; shardid | shardstate | nodename | nodeport --------------------------------------------------------------------- 840017 | 1 | localhost | 57637 @@ -2471,18 +2476,5 @@ DROP USER router_user; \c - - - :master_port DROP OWNED BY router_user; DROP USER router_user; -DROP TABLE failure_test; -DROP FUNCTION author_articles_max_id(); -DROP FUNCTION author_articles_id_word_count(); -DROP MATERIALIZED VIEW mv_articles_hash_empty; -DROP MATERIALIZED VIEW mv_articles_hash_data; -DROP VIEW num_db; -DROP FUNCTION number1(); -DROP TABLE articles_hash; -DROP TABLE articles_single_shard_hash; -DROP TABLE authors_hash; -DROP TABLE authors_range; -DROP TABLE authors_reference; -DROP TABLE company_employees; -DROP TABLE articles_range; -DROP TABLE articles_append; +SET client_min_messages TO WARNING; +DROP SCHEMA multi_router_planner CASCADE; diff --git a/src/test/regress/expected/multi_simple_queries.out b/src/test/regress/expected/multi_simple_queries.out index 6bd8bad4a..4578d69a8 100644 --- a/src/test/regress/expected/multi_simple_queries.out +++ b/src/test/regress/expected/multi_simple_queries.out @@ -7,6 +7,8 @@ SET citus.coordinator_aggregation_strategy TO 'disabled'; -- =================================================================== -- test end-to-end query functionality -- =================================================================== +CREATE SCHEMA simple_queries_test; +SET search_path TO simple_queries_test; CREATE TABLE articles ( id bigint NOT NULL, author_id bigint NOT NULL, @@ -382,7 +384,7 @@ SELECT author_id FROM articles 8 (3 rows) -SELECT o_orderstatus, count(*), avg(o_totalprice) FROM orders +SELECT o_orderstatus, count(*), avg(o_totalprice) FROM public.orders GROUP BY o_orderstatus HAVING count(*) > 1450 OR avg(o_totalprice) > 150000 ORDER BY o_orderstatus; @@ -392,7 +394,7 @@ SELECT o_orderstatus, count(*), avg(o_totalprice) FROM orders P | 75 | 164847.914533333333 (2 rows) -SELECT o_orderstatus, sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders +SELECT o_orderstatus, sum(l_linenumber), avg(l_linenumber) FROM public.lineitem, public.orders WHERE l_orderkey = o_orderkey AND l_orderkey > 9030 GROUP BY o_orderstatus HAVING sum(l_linenumber) > 1000 @@ -541,7 +543,7 @@ DEBUG: query has a single distribution column value: 2 -- error out on unsupported aggregate SET client_min_messages to 'NOTICE'; -CREATE AGGREGATE public.invalid(int) ( +CREATE AGGREGATE invalid(int) ( sfunc = int4pl, stype = int ); @@ -812,10 +814,11 @@ SELECT * FROM (SELECT nextval('query_seq') FROM articles LIMIT 3) vals; (3 rows) -- but not elsewhere -SELECT sum(nextval('query_seq')) FROM articles; -ERROR: relation "public.query_seq" does not exist +SELECT sum(nextval('simple_queries_test.query_seq')) FROM articles; +ERROR: relation "simple_queries_test.query_seq" does not exist CONTEXT: while executing command on localhost:xxxxx -SELECT n FROM (SELECT nextval('query_seq') n, random() FROM articles) vals; -ERROR: relation "public.query_seq" does not exist +SELECT n FROM (SELECT nextval('simple_queries_test.query_seq') n, random() FROM articles) vals; +ERROR: relation "simple_queries_test.query_seq" does not exist CONTEXT: while executing command on localhost:xxxxx -DROP SEQUENCE query_seq; +SET client_min_messages TO WARNING; +DROP SCHEMA simple_queries_test CASCADE; diff --git a/src/test/regress/expected/multi_upsert.out b/src/test/regress/expected/multi_upsert.out index 08308aba0..e41b2a3d5 100644 --- a/src/test/regress/expected/multi_upsert.out +++ b/src/test/regress/expected/multi_upsert.out @@ -1,5 +1,7 @@ -- this test file aims to test UPSERT feature on Citus SET citus.next_shard_id TO 980000; +CREATE SCHEMA upsert_test; +SET search_path TO upsert_test; CREATE TABLE upsert_test ( part_key int UNIQUE, @@ -244,3 +246,5 @@ ERROR: functions used in the WHERE clause of the ON CONFLICT clause of INSERTs INSERT INTO upsert_test (part_key, other_col) VALUES (1, 1) ON CONFLICT (part_key) DO UPDATE SET part_key = 15; ERROR: modifying the partition value of rows is not allowed +SET client_min_messages TO WARNING; +DROP SCHEMA upsert_test CASCADE; diff --git a/src/test/regress/expected/pg15.out b/src/test/regress/expected/pg15.out index 7a41b25ec..7fc102dbb 100644 --- a/src/test/regress/expected/pg15.out +++ b/src/test/regress/expected/pg15.out @@ -315,7 +315,7 @@ SELECT create_reference_table('tbl2'); MERGE INTO tbl1 USING tbl2 ON (true) WHEN MATCHED THEN DELETE; -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is not supported on reference tables yet -- now, both are reference, still not supported SELECT create_reference_table('tbl1'); create_reference_table @@ -325,7 +325,7 @@ SELECT create_reference_table('tbl1'); MERGE INTO tbl1 USING tbl2 ON (true) WHEN MATCHED THEN DELETE; -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is not supported on reference tables yet -- now, both distributed, not works SELECT undistribute_table('tbl1'); NOTICE: creating a new table for pg15.tbl1 @@ -419,29 +419,36 @@ SELECT create_distributed_table('tbl2', 'x'); MERGE INTO tbl1 USING tbl2 ON (true) WHEN MATCHED THEN DELETE; -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns -- also, not inside subqueries & ctes WITH targq AS ( SELECT * FROM tbl2 ) MERGE INTO tbl1 USING targq ON (true) WHEN MATCHED THEN DELETE; -ERROR: MERGE command is not supported on distributed/reference tables yet --- crashes on beta3, fixed on 15 stable ---WITH foo AS ( --- MERGE INTO tbl1 USING tbl2 ON (true) --- WHEN MATCHED THEN DELETE ---) SELECT * FROM foo; ---COPY ( --- MERGE INTO tbl1 USING tbl2 ON (true) --- WHEN MATCHED THEN DELETE ---) TO stdout; +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns +WITH foo AS ( + MERGE INTO tbl1 USING tbl2 ON (true) + WHEN MATCHED THEN DELETE +) SELECT * FROM foo; +ERROR: MERGE not supported in WITH query +COPY ( + MERGE INTO tbl1 USING tbl2 ON (true) + WHEN MATCHED THEN DELETE +) TO stdout; +ERROR: MERGE not supported in COPY +MERGE INTO tbl1 t +USING tbl2 +ON (true) +WHEN MATCHED THEN + DO NOTHING; +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns MERGE INTO tbl1 t USING tbl2 ON (true) WHEN MATCHED THEN UPDATE SET x = (SELECT count(*) FROM tbl2); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: updating the distribution column is not allowed in MERGE actions -- test numeric types with negative scale CREATE TABLE numeric_negative_scale(numeric_column numeric(3,-1), orig_value int); INSERT into numeric_negative_scale SELECT x,x FROM generate_series(111, 115) x; diff --git a/src/test/regress/expected/pgmerge.out b/src/test/regress/expected/pgmerge.out index b90760691..8a74336a0 100644 --- a/src/test/regress/expected/pgmerge.out +++ b/src/test/regress/expected/pgmerge.out @@ -910,7 +910,15 @@ MERGE INTO wq_target t USING wq_source s ON t.tid = s.sid WHEN MATCHED AND (merge_when_and_write()) THEN UPDATE SET balance = t.balance + s.balance; -ERROR: functions used in UPDATE queries on distributed tables must not be VOLATILE +ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables +ROLLBACK; +-- Test preventing ON condition from writing to the database +BEGIN; +MERGE INTO wq_target t +USING wq_source s ON t.tid = s.sid AND (merge_when_and_write()) +WHEN MATCHED THEN + UPDATE SET balance = t.balance + s.balance; +ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables ROLLBACK; drop function merge_when_and_write(); DROP TABLE wq_target, wq_source; @@ -1885,13 +1893,15 @@ INSERT INTO pa_target SELECT '2017-02-28', id, id * 100, 'initial' FROM generate SET client_min_messages TO DEBUG1; BEGIN; MERGE INTO pa_target t - USING (SELECT '2017-01-15' AS slogts, * FROM pa_source WHERE sid < 10) s + USING (SELECT * FROM pa_source WHERE sid < 10) s + --USING (SELECT '2017-01-15' AS slogts, * FROM pa_source WHERE sid < 10) s ON t.tid = s.sid WHEN MATCHED THEN UPDATE SET balance = balance + delta, val = val || ' updated by merge' WHEN NOT MATCHED THEN - INSERT VALUES (slogts::timestamp, sid, delta, 'inserted by merge'); -DEBUG: + INSERT VALUES ('2017-01-15', sid, delta, 'inserted by merge'); +DEBUG: + --INSERT VALUES (slogts::timestamp, sid, delta, 'inserted by merge'); SELECT * FROM pa_target ORDER BY tid; logts | tid | balance | val --------------------------------------------------------------------- @@ -2083,7 +2093,7 @@ WHEN MATCHED THEN UPDATE WHEN NOT MATCHED THEN INSERT (city_id, logdate, peaktemp, unitsales) VALUES (city_id, logdate, peaktemp, unitsales); -DEBUG: +DEBUG: RESET client_min_messages; SELECT tableoid::regclass, * FROM measurement ORDER BY city_id, logdate; tableoid | city_id | logdate | peaktemp | unitsales diff --git a/src/test/regress/expected/shard_rebalancer.out b/src/test/regress/expected/shard_rebalancer.out index 2146d67f1..1dea3b442 100644 --- a/src/test/regress/expected/shard_rebalancer.out +++ b/src/test/regress/expected/shard_rebalancer.out @@ -1482,7 +1482,6 @@ SELECT * from master_drain_node('localhost', :worker_2_port); ERROR: cannot use logical replication to transfer shards of the relation colocated_rebalance_test since it doesn't have a REPLICA IDENTITY or PRIMARY KEY DETAIL: UPDATE and DELETE commands on the shard will error out during logical replication unless there is a REPLICA IDENTITY or PRIMARY KEY. HINT: If you wish to continue without a replica identity set the shard_transfer_mode to 'force_logical' or 'block_writes'. -CONTEXT: while executing command on localhost:xxxxx -- Make sure shouldhaveshards is false select shouldhaveshards from pg_dist_node where nodeport = :worker_2_port; shouldhaveshards @@ -2714,6 +2713,39 @@ SELECT sh.logicalrelid, pl.nodeport (5 rows) DROP TABLE single_shard_colocation_1a, single_shard_colocation_1b, single_shard_colocation_1c, single_shard_colocation_2a, single_shard_colocation_2b CASCADE; +-- verify we detect if one of the tables do not have a replica identity or primary key +-- and error out in case of shard transfer mode = auto +SELECT 1 FROM citus_remove_node('localhost', :worker_2_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +create table table_with_primary_key (a int primary key); +select create_distributed_table('table_with_primary_key','a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +create table table_without_primary_key (a bigint); +select create_distributed_table('table_without_primary_key','a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- add the second node back, then rebalance +ALTER SEQUENCE pg_dist_groupid_seq RESTART WITH 16; +select 1 from citus_add_node('localhost', :worker_2_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +select rebalance_table_shards(); +ERROR: cannot use logical replication to transfer shards of the relation table_without_primary_key since it doesn't have a REPLICA IDENTITY or PRIMARY KEY +DROP TABLE table_with_primary_key, table_without_primary_key; \c - - - :worker_1_port SET citus.enable_ddl_propagation TO OFF; REVOKE ALL ON SCHEMA public FROM testrole; diff --git a/src/test/regress/expected/upgrade_columnar_after.out b/src/test/regress/expected/upgrade_columnar_after.out index 0da9bb17f..768a057f9 100644 --- a/src/test/regress/expected/upgrade_columnar_after.out +++ b/src/test/regress/expected/upgrade_columnar_after.out @@ -228,10 +228,12 @@ BEGIN; 22 (1 row) - -- make sure that serial is preserved - -- since we run "after schedule" twice and "rollback" wouldn't undo - -- sequence changes, it can be 22 or 33, not a different value - SELECT max(id) in (22, 33) FROM text_data; + -- Make sure that serial is preserved. + -- + -- Since we might run "after schedule" several times for flaky test + -- detection and "rollback" wouldn't undo sequence changes, "id" should + -- look like below: + SELECT max(id) >= 11 AND max(id) % 11 = 0 FROM text_data; ?column? --------------------------------------------------------------------- t @@ -265,7 +267,12 @@ ROLLBACK; SELECT pg_class.oid INTO columnar_schema_members FROM pg_class, pg_namespace WHERE pg_namespace.oid=pg_class.relnamespace AND - pg_namespace.nspname='columnar_internal'; + pg_namespace.nspname='columnar_internal' AND + pg_class.relname NOT IN ('chunk_group_pkey', + 'chunk_pkey', + 'options_pkey', + 'stripe_first_row_number_idx', + 'stripe_pkey'); SELECT refobjid INTO columnar_schema_members_pg_depend FROM pg_depend WHERE classid = 'pg_am'::regclass::oid AND @@ -283,8 +290,8 @@ UNION (0 rows) -- ... , and both columnar_schema_members_pg_depend & columnar_schema_members --- should have 10 entries. -SELECT COUNT(*)=10 FROM columnar_schema_members_pg_depend; +-- should have 5 entries. +SELECT COUNT(*)=5 FROM columnar_schema_members_pg_depend; ?column? --------------------------------------------------------------------- t @@ -292,12 +299,17 @@ SELECT COUNT(*)=10 FROM columnar_schema_members_pg_depend; DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend; -- Check the same for workers too. -SELECT run_command_on_workers( +SELECT success, result FROM run_command_on_workers( $$ SELECT pg_class.oid INTO columnar_schema_members FROM pg_class, pg_namespace WHERE pg_namespace.oid=pg_class.relnamespace AND - pg_namespace.nspname='columnar_internal'; + pg_namespace.nspname='columnar_internal' AND + pg_class.relname NOT IN ('chunk_group_pkey', + 'chunk_pkey', + 'options_pkey', + 'stripe_first_row_number_idx', + 'stripe_pkey'); SELECT refobjid INTO columnar_schema_members_pg_depend FROM pg_depend WHERE classid = 'pg_am'::regclass::oid AND @@ -308,44 +320,44 @@ WHERE classid = 'pg_am'::regclass::oid AND deptype = 'n'; $$ ); - run_command_on_workers + success | result --------------------------------------------------------------------- - (localhost,10201,t,"SELECT 10") - (localhost,10202,t,"SELECT 10") + t | SELECT 5 + t | SELECT 5 (2 rows) -SELECT run_command_on_workers( +SELECT success, result FROM run_command_on_workers( $$ (TABLE columnar_schema_members EXCEPT TABLE columnar_schema_members_pg_depend) UNION (TABLE columnar_schema_members_pg_depend EXCEPT TABLE columnar_schema_members); $$ ); - run_command_on_workers + success | result --------------------------------------------------------------------- - (localhost,10201,t,"") - (localhost,10202,t,"") + t | + t | (2 rows) -SELECT run_command_on_workers( +SELECT success, result FROM run_command_on_workers( $$ -SELECT COUNT(*)=10 FROM columnar_schema_members_pg_depend; +SELECT COUNT(*)=5 FROM columnar_schema_members_pg_depend; $$ ); - run_command_on_workers + success | result --------------------------------------------------------------------- - (localhost,10201,t,t) - (localhost,10202,t,t) + t | t + t | t (2 rows) -SELECT run_command_on_workers( +SELECT success, result FROM run_command_on_workers( $$ DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend; $$ ); - run_command_on_workers + success | result --------------------------------------------------------------------- - (localhost,10201,t,"DROP TABLE") - (localhost,10202,t,"DROP TABLE") + t | DROP TABLE + t | DROP TABLE (2 rows) diff --git a/src/test/regress/expected/upgrade_columnar_before.out b/src/test/regress/expected/upgrade_columnar_before.out index 28c252e30..a4895c770 100644 --- a/src/test/regress/expected/upgrade_columnar_before.out +++ b/src/test/regress/expected/upgrade_columnar_before.out @@ -1,5 +1,27 @@ -- Test if relying on topological sort of the objects, not their names, works -- fine when re-creating objects during pg_upgrade. +DO +$$ +BEGIN +IF EXISTS (SELECT * FROM pg_namespace WHERE nspname = 'upgrade_columnar') +THEN + -- Drop the the table leftover from the earlier run of + -- upgrade_columnar_before.sql. Similarly, drop the fake public schema + -- created before and rename the original one (renamed to citus_schema) + -- back to public. + -- + -- This can only happen if upgrade_columnar_before.sql is run multiple + -- times for flaky test detection. + DROP TABLE citus_schema.new_columnar_table; + DROP SCHEMA public CASCADE; + ALTER SCHEMA citus_schema RENAME TO public; + + SET LOCAL client_min_messages TO WARNING; + DROP SCHEMA upgrade_columnar CASCADE; +END IF; +END +$$ +LANGUAGE plpgsql; ALTER SCHEMA public RENAME TO citus_schema; SET search_path TO citus_schema; -- As mentioned in https://github.com/citusdata/citus/issues/5447, it diff --git a/src/test/regress/expected/upgrade_list_citus_objects.out b/src/test/regress/expected/upgrade_list_citus_objects.out index 8005c0c42..12fc4c17b 100644 --- a/src/test/regress/expected/upgrade_list_citus_objects.out +++ b/src/test/regress/expected/upgrade_list_citus_objects.out @@ -122,6 +122,7 @@ ORDER BY 1; function citus_stat_statements() function citus_stat_statements_reset() function citus_stats_tenants(boolean) + function citus_stats_tenants_local(boolean) function citus_table_is_visible(oid) function citus_table_size(regclass) function citus_task_wait(bigint,citus_task_status) @@ -318,7 +319,8 @@ ORDER BY 1; view citus_stat_activity view citus_stat_statements view citus_stats_tenants + view citus_stats_tenants_local view pg_dist_shard_placement view time_partitions -(312 rows) +(314 rows) diff --git a/src/test/regress/expected/worker_split_copy_test.out b/src/test/regress/expected/worker_split_copy_test.out index 67d515198..f4fae57e0 100644 --- a/src/test/regress/expected/worker_split_copy_test.out +++ b/src/test/regress/expected/worker_split_copy_test.out @@ -142,8 +142,90 @@ SELECT COUNT(*) FROM worker_split_copy_test."test !/ \n _""dist_123_table_810700 (1 row) -- END: List updated row count for local targets shard. +-- Check that GENERATED columns are handled properly in a shard split operation. +\c - - - :master_port +SET search_path TO worker_split_copy_test; +SET citus.shard_count TO 2; +SET citus.shard_replication_factor TO 1; +SET citus.next_shard_id TO 81080000; +-- BEGIN: Create distributed table and insert data. +CREATE TABLE worker_split_copy_test.dist_table_with_generated_col(id int primary key, new_id int GENERATED ALWAYS AS ( id + 3 ) stored, value char, col_todrop int); +SELECT create_distributed_table('dist_table_with_generated_col', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Check that dropped columns are filtered out in COPY command. +ALTER TABLE dist_table_with_generated_col DROP COLUMN col_todrop; +INSERT INTO dist_table_with_generated_col (id, value) (SELECT g.id, 'N' FROM generate_series(1, 1000) AS g(id)); +-- END: Create distributed table and insert data. +-- BEGIN: Create target shards in Worker1 and Worker2 for a 2-way split copy. +\c - - - :worker_1_port +CREATE TABLE worker_split_copy_test.dist_table_with_generated_col_81080015(id int primary key, new_id int GENERATED ALWAYS AS ( id + 3 ) stored, value char); +\c - - - :worker_2_port +CREATE TABLE worker_split_copy_test.dist_table_with_generated_col_81080016(id int primary key, new_id int GENERATED ALWAYS AS ( id + 3 ) stored, value char); +-- BEGIN: List row count for source shard and targets shard in Worker1. +\c - - - :worker_1_port +SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080000; + count +--------------------------------------------------------------------- + 510 +(1 row) + +SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080015; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- BEGIN: List row count for target shard in Worker2. +\c - - - :worker_2_port +SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080016; + count +--------------------------------------------------------------------- + 0 +(1 row) + +\c - - - :worker_1_port +SELECT * from worker_split_copy( + 81080000, -- source shard id to copy + 'id', + ARRAY[ + -- split copy info for split children 1 + ROW(81080015, -- destination shard id + -2147483648, -- split range begin + -1073741824, --split range end + :worker_1_node)::pg_catalog.split_copy_info, + -- split copy info for split children 2 + ROW(81080016, --destination shard id + -1073741823, --split range begin + -1, --split range end + :worker_2_node)::pg_catalog.split_copy_info + ] + ); + worker_split_copy +--------------------------------------------------------------------- + +(1 row) + +\c - - - :worker_1_port +SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080015; + count +--------------------------------------------------------------------- + 247 +(1 row) + +\c - - - :worker_2_port +SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080016; + count +--------------------------------------------------------------------- + 263 +(1 row) + -- BEGIN: CLEANUP. \c - - - :master_port SET client_min_messages TO WARNING; +CALL citus_cleanup_orphaned_resources(); DROP SCHEMA worker_split_copy_test CASCADE; -- END: CLEANUP. diff --git a/src/test/regress/minimal_schedule b/src/test/regress/minimal_schedule index ef2d3dc65..8b0cfff70 100644 --- a/src/test/regress/minimal_schedule +++ b/src/test/regress/minimal_schedule @@ -1,2 +1,2 @@ test: minimal_cluster_management -test: multi_test_helpers multi_test_helpers_superuser columnar_test_helpers multi_test_catalog_views tablespace +test: multi_test_helpers multi_test_helpers_superuser multi_create_fdw columnar_test_helpers multi_test_catalog_views tablespace diff --git a/src/test/regress/multi_1_schedule b/src/test/regress/multi_1_schedule index 8bd234828..4091b7a63 100644 --- a/src/test/regress/multi_1_schedule +++ b/src/test/regress/multi_1_schedule @@ -19,7 +19,7 @@ test: multi_extension test: single_node test: relation_access_tracking_single_node test: single_node_truncate -test: multi_test_helpers multi_test_helpers_superuser +test: multi_test_helpers multi_test_helpers_superuser multi_create_fdw test: multi_cluster_management # below tests are placed right after multi_cluster_management as we do @@ -91,13 +91,6 @@ test: drop_partitioned_table test: multi_fix_partition_shard_index_names test: partition_wise_join -# ---------- -# Tests for foreign data wrapper support -# ---------- -test: multi_create_fdw - - - # ---------- # Tests for statistics propagation # ---------- diff --git a/src/test/regress/pg_regress_multi.pl b/src/test/regress/pg_regress_multi.pl index 2707dc38a..edee0eef4 100755 --- a/src/test/regress/pg_regress_multi.pl +++ b/src/test/regress/pg_regress_multi.pl @@ -49,6 +49,7 @@ sub Usage() print " --pg_ctl-timeout Timeout for pg_ctl\n"; print " --connection-timeout Timeout for connecting to worker nodes\n"; print " --mitmproxy Start a mitmproxy for one of the workers\n"; + print " --worker-count Number of workers in Citus cluster (default: 2)\n"; exit 1; } @@ -84,6 +85,7 @@ my $mitmFifoPath = catfile($TMP_CHECKDIR, "mitmproxy.fifo"); my $conninfo = ""; my $publicWorker1Host = "localhost"; my $publicWorker2Host = "localhost"; +my $workerCount = 2; my $serversAreShutdown = "TRUE"; my $usingWindows = 0; @@ -116,6 +118,7 @@ GetOptions( 'conninfo=s' => \$conninfo, 'worker-1-public-hostname=s' => \$publicWorker1Host, 'worker-2-public-hostname=s' => \$publicWorker2Host, + 'worker-count=i' => \$workerCount, 'help' => sub { Usage() }); my $fixopen = "$bindir/postgres.fixopen"; @@ -318,7 +321,6 @@ my $mitmPort = 9060; # Set some default configuration options my $masterPort = 57636; -my $workerCount = 2; my @workerHosts = (); my @workerPorts = (); diff --git a/src/test/regress/sql/arbitrary_configs_router.sql b/src/test/regress/sql/arbitrary_configs_router.sql new file mode 100644 index 000000000..f59c5fa4a --- /dev/null +++ b/src/test/regress/sql/arbitrary_configs_router.sql @@ -0,0 +1,634 @@ +SET search_path TO arbitrary_configs_router; + +SET client_min_messages TO WARNING; + +-- test simple select for a single row +SELECT * FROM articles_hash WHERE author_id = 10 AND id = 50; + +-- get all titles by a single author +SELECT title FROM articles_hash WHERE author_id = 10; + +-- try ordering them by word count +SELECT title, word_count FROM articles_hash + WHERE author_id = 10 + ORDER BY word_count DESC NULLS LAST; + +-- look at last two articles by an author +SELECT title, id FROM articles_hash + WHERE author_id = 5 + ORDER BY id + LIMIT 2; + +-- find all articles by two authors in same shard +-- but plan is not fast path router plannable due to +-- two distribution columns in the query +SELECT title, author_id FROM articles_hash + WHERE author_id = 7 OR author_id = 8 + ORDER BY author_id ASC, id; + +-- having clause is supported if it goes to a single shard +-- and single dist. key on the query +SELECT author_id, sum(word_count) AS corpus_size FROM articles_hash + WHERE author_id = 1 + GROUP BY author_id + HAVING sum(word_count) > 1000 + ORDER BY sum(word_count) DESC; + +-- fast path planner only support = operator +SELECT * FROM articles_hash WHERE author_id <= 1; +SELECT * FROM articles_hash WHERE author_id IN (1, 3) ORDER BY 1,2,3,4; + +-- queries with CTEs cannot go through fast-path planning +WITH first_author AS ( SELECT id FROM articles_hash WHERE author_id = 1) +SELECT * FROM first_author; + +-- two CTE joins also cannot go through fast-path planning +WITH id_author AS ( SELECT id, author_id FROM articles_hash WHERE author_id = 1), +id_title AS (SELECT id, title from articles_hash WHERE author_id = 1) +SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id; + +-- this is a different case where each CTE is recursively planned and those goes +-- through the fast-path router planner, but the top level join is not +WITH id_author AS ( SELECT id, author_id FROM articles_hash WHERE author_id = 1), +id_title AS (SELECT id, title from articles_hash WHERE author_id = 2) +SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id; + +-- recursive CTEs are also cannot go through fast +-- path planning +WITH RECURSIVE hierarchy as ( + SELECT *, 1 AS level + FROM company_employees + WHERE company_id = 1 and manager_id = 0 + UNION + SELECT ce.*, (h.level+1) + FROM hierarchy h JOIN company_employees ce + ON (h.employee_id = ce.manager_id AND + h.company_id = ce.company_id AND + ce.company_id = 1)) +SELECT * FROM hierarchy WHERE LEVEL <= 2; + +WITH update_article AS ( + UPDATE articles_hash SET word_count = 10 WHERE id = 1 AND word_count = 9 RETURNING * +) +SELECT * FROM update_article; + +WITH delete_article AS ( + DELETE FROM articles_hash WHERE id = 1 AND word_count = 10 RETURNING * +) +SELECT * FROM delete_article; + +-- grouping sets are supported via fast-path +SELECT + id, substring(title, 2, 1) AS subtitle, count(*) + FROM articles_hash + WHERE author_id = 1 + GROUP BY GROUPING SETS ((id),(subtitle)) + ORDER BY id, subtitle; + +-- queries which involve functions in FROM clause are not supported via fast path planning +SELECT * FROM articles_hash, position('om' in 'Thomas') WHERE author_id = 1; + +-- sublinks are not supported via fast path planning +SELECT * FROM articles_hash +WHERE author_id IN (SELECT author_id FROM articles_hash WHERE author_id = 2) +ORDER BY articles_hash.id; + +-- subqueries are not supported via fast path planning +SELECT articles_hash.id,test.word_count +FROM articles_hash, (SELECT id, word_count FROM articles_hash) AS test WHERE test.id = articles_hash.id +ORDER BY test.word_count DESC, articles_hash.id LIMIT 5; + +SELECT articles_hash.id,test.word_count +FROM articles_hash, (SELECT id, word_count FROM articles_hash) AS test +WHERE test.id = articles_hash.id and articles_hash.author_id = 1 +ORDER BY articles_hash.id; + +-- simple lookup query just works +SELECT * + FROM articles_hash + WHERE author_id = 1; + +-- below query hits a single shard but with multiple filters +-- so cannot go via fast-path +SELECT * + FROM articles_hash + WHERE author_id = 1 OR author_id = 17; + +-- rename the output columns +SELECT id as article_id, word_count * id as random_value + FROM articles_hash + WHERE author_id = 1; + +-- joins do not go through fast-path planning +SELECT a.author_id as first_author, b.word_count as second_word_count + FROM articles_hash a, articles_hash b + WHERE a.author_id = 10 and a.author_id = b.author_id + LIMIT 3; + +-- single shard select with limit goes through fast-path planning +SELECT * + FROM articles_hash + WHERE author_id = 1 + LIMIT 3; + +-- single shard select with limit + offset goes through fast-path planning +SELECT * + FROM articles_hash + WHERE author_id = 1 + LIMIT 2 + OFFSET 1; + +-- single shard select with limit + offset + order by goes through fast-path planning +SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id desc + LIMIT 2 + OFFSET 1; + +-- single shard select with group by on non-partition column goes through fast-path planning +SELECT id + FROM articles_hash + WHERE author_id = 1 + GROUP BY id + ORDER BY id; + +-- single shard select with distinct goes through fast-path planning +SELECT DISTINCT id + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; + +-- single shard aggregate goes through fast-path planning +SELECT avg(word_count) + FROM articles_hash + WHERE author_id = 2; + +-- max, min, sum, count goes through fast-path planning +SELECT max(word_count) as max, min(word_count) as min, + sum(word_count) as sum, count(word_count) as cnt + FROM articles_hash + WHERE author_id = 2; + + +-- queries with aggregates and group by goes through fast-path planning +SELECT max(word_count) + FROM articles_hash + WHERE author_id = 1 + GROUP BY author_id; + + +-- set operations are not supported via fast-path planning +SELECT * FROM ( + SELECT * FROM articles_hash WHERE author_id = 1 + UNION + SELECT * FROM articles_hash WHERE author_id = 3 +) AS combination +ORDER BY id; + +-- function calls in the target list is supported via fast path +SELECT LEFT(title, 1) FROM articles_hash WHERE author_id = 1; + + +-- top-level union queries are supported through recursive planning + +-- unions in subqueries are not supported via fast-path planning +SELECT * FROM ( + (SELECT * FROM articles_hash WHERE author_id = 1) + UNION + (SELECT * FROM articles_hash WHERE author_id = 1)) uu +ORDER BY 1, 2 +LIMIT 5; + + +-- Test various filtering options for router plannable check + +-- cannot go through fast-path if there is +-- explicit coercion +SELECT * + FROM articles_hash + WHERE author_id = 1::bigint; + +-- can go through fast-path if there is +-- implicit coercion +-- This doesn't work see the related issue +-- reported https://github.com/citusdata/citus/issues/2605 +-- SELECT * +-- FROM articles_hash +-- WHERE author_id = 1.0; + +SELECT * + FROM articles_hash + WHERE author_id = 68719476736; -- this is bigint + +-- cannot go through fast-path due to +-- multiple filters on the dist. key +SELECT * + FROM articles_hash + WHERE author_id = 1 and author_id >= 1; + +-- cannot go through fast-path due to +-- multiple filters on the dist. key +SELECT * + FROM articles_hash + WHERE author_id = 1 or id = 1; + +-- goes through fast-path planning because +-- the dist. key is ANDed with the rest of the +-- filters +SELECT * + FROM articles_hash + WHERE author_id = 1 and (id = 1 or id = 41); + +-- this time there is an OR clause which prevents +-- router planning at all +SELECT * + FROM articles_hash + WHERE author_id = 1 and id = 1 or id = 41; + +-- goes through fast-path planning because +-- the dist. key is ANDed with the rest of the +-- filters +SELECT * + FROM articles_hash + WHERE author_id = 1 and (id = random()::int * 0); + +-- not router plannable due to function call on the right side +SELECT * + FROM articles_hash + WHERE author_id = (random()::int * 0 + 1); + +-- Citus does not qualify this as a fast-path because +-- dist_key = func() +SELECT * + FROM articles_hash + WHERE author_id = abs(-1); + +-- Citus does not qualify this as a fast-path because +-- dist_key = func() +SELECT * + FROM articles_hash + WHERE 1 = abs(author_id); + +-- Citus does not qualify this as a fast-path because +-- dist_key = func() +SELECT * + FROM articles_hash + WHERE author_id = abs(author_id - 2); + +-- the function is not on the dist. key, so qualify as +-- fast-path +SELECT * + FROM articles_hash + WHERE author_id = 1 and (id = abs(id - 2)); + +-- not router plannable due to is true +SELECT * + FROM articles_hash + WHERE (author_id = 1) is true; + +-- router plannable, (boolean expression) = true is collapsed to (boolean expression) +SELECT * + FROM articles_hash + WHERE (author_id = 1) = true; + +-- some more complex quals +SELECT count(*) FROM articles_hash WHERE (author_id = 15) AND (id = 1 OR word_count > 5); +SELECT count(*) FROM articles_hash WHERE (author_id = 15) OR (id = 1 AND word_count > 5); +SELECT count(*) FROM articles_hash WHERE (id = 15) OR (author_id = 1 AND word_count > 5); +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (author_id = 1 OR word_count > 5); +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (author_id = 1 AND (word_count > 5 OR id = 2)); +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (title ilike 'a%' AND (word_count > 5 OR author_id = 2)); +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (title ilike 'a%' AND (word_count > 5 AND author_id = 2)); +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (title ilike 'a%' AND ((word_count > 5 OR title ilike 'b%' ) AND (author_id = 2 AND word_count > 50))); + +-- fast-path router plannable, between operator is on another column +SELECT * + FROM articles_hash + WHERE (author_id = 1) and id between 0 and 20; + +-- fast-path router plannable, partition column expression is and'ed to rest +SELECT * + FROM articles_hash + WHERE (author_id = 1) and (id = 1 or id = 31) and title like '%s'; + +-- fast-path router plannable, order is changed +SELECT * + FROM articles_hash + WHERE (id = 1 or id = 31) and title like '%s' and (author_id = 1); + +-- fast-path router plannable +SELECT * + FROM articles_hash + WHERE (title like '%s' or title like 'a%') and (author_id = 1); + +-- fast-path router plannable +SELECT * + FROM articles_hash + WHERE (title like '%s' or title like 'a%') and (author_id = 1) and (word_count < 3000 or word_count > 8000); + +-- window functions are supported with fast-path router plannable +SELECT LAG(title, 1) over (ORDER BY word_count) prev, title, word_count + FROM articles_hash + WHERE author_id = 5; + +SELECT LAG(title, 1) over (ORDER BY word_count) prev, title, word_count + FROM articles_hash + WHERE author_id = 5 + ORDER BY word_count DESC; + +SELECT id, MIN(id) over (order by word_count) + FROM articles_hash + WHERE author_id = 1; + +SELECT id, word_count, AVG(word_count) over (order by word_count) + FROM articles_hash + WHERE author_id = 1; + +SELECT word_count, rank() OVER (PARTITION BY author_id ORDER BY word_count) + FROM articles_hash + WHERE author_id = 1; + +-- some more tests on complex target lists +SELECT DISTINCT ON (author_id, id) author_id, id, + MIN(id) over (order by avg(word_count)) * AVG(id * 5.2 + (1.0/max(word_count))) over (order by max(word_count)) as t1, + count(*) FILTER (WHERE title LIKE 'al%') as cnt_with_filter, + count(*) FILTER (WHERE '0300030' LIKE '%3%') as cnt_with_filter_2, + avg(case when id > 2 then char_length(word_count::text) * (id * strpos(word_count::text, '1')) end) as case_cnt, + COALESCE(strpos(avg(word_count)::text, '1'), 20) + FROM articles_hash as aliased_table + WHERE author_id = 1 + GROUP BY author_id, id + HAVING count(DISTINCT title) > 0 + ORDER BY author_id, id, sum(word_count) - avg(char_length(title)) DESC, COALESCE(array_upper(ARRAY[max(id)],1) * 5,0) DESC; + +-- where false queries are router plannable but not fast-path +SELECT * + FROM articles_hash + WHERE false; + +-- fast-path with false +SELECT * + FROM articles_hash + WHERE author_id = 1 and false; + +-- fast-path with false +SELECT * + FROM articles_hash + WHERE author_id = 1 and 1=0; + +SELECT * + FROM articles_hash + WHERE null and author_id = 1; + +-- we cannot qualify dist_key = X operator Y via +-- fast-path planning +SELECT * + FROM articles_hash + WHERE author_id = 1 + 1; + +-- where false with immutable function returning false +-- goes through fast-path +SELECT * + FROM articles_hash a + WHERE a.author_id = 10 and int4eq(1, 2); + +-- partition_column is null clause does not prune out any shards, +-- all shards remain after shard pruning, not router plannable +-- not fast-path router either +SELECT * + FROM articles_hash a + WHERE a.author_id is null; + +-- partition_column equals to null clause prunes out all shards +-- no shards after shard pruning, router plannable +-- not fast-path router either +SELECT * + FROM articles_hash a + WHERE a.author_id = null; + +-- union/difference /intersection with where false +-- this query was not originally router plannable, addition of 1=0 +-- makes it router plannable but not fast-path +SELECT * FROM ( + SELECT * FROM articles_hash WHERE author_id = 1 + UNION + SELECT * FROM articles_hash WHERE author_id = 2 and 1=0 +) AS combination +ORDER BY id; + +-- same with the above, but with WHERE false +SELECT * FROM ( + SELECT * FROM articles_hash WHERE author_id = 1 + UNION + SELECT * FROM articles_hash WHERE author_id = 2 and 1=0 +) AS combination WHERE false +ORDER BY id; + +-- window functions with where false +SELECT word_count, rank() OVER (PARTITION BY author_id ORDER BY word_count) + FROM articles_hash + WHERE author_id = 1 and 1=0; + +-- complex query hitting a single shard and a fast-path +SELECT + count(DISTINCT CASE + WHEN + word_count > 100 + THEN + id + ELSE + NULL + END) as c + FROM + articles_hash + WHERE + author_id = 5; +-- queries inside transactions can be fast-path router plannable +BEGIN; +SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; +END; + +-- queries inside read-only transactions can be fast-path router plannable +SET TRANSACTION READ ONLY; +SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; +END; + +-- cursor queries are fast-path router plannable +BEGIN; +DECLARE test_cursor CURSOR FOR + SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; +FETCH test_cursor; +FETCH ALL test_cursor; +FETCH test_cursor; -- fetch one row after the last +FETCH BACKWARD test_cursor; +END; + +-- queries inside copy can be router plannable +COPY ( + SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id) TO STDOUT; + +-- table creation queries inside can be fast-path router plannable +CREATE TEMP TABLE temp_articles_hash as + SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; + +-- fast-path router plannable queries may include filter for aggregates +SELECT count(*), count(*) FILTER (WHERE id < 3) + FROM articles_hash + WHERE author_id = 1; + +-- prepare queries can be router plannable +PREPARE author_1_articles as + SELECT * + FROM articles_hash + WHERE author_id = 1; + +EXECUTE author_1_articles; +EXECUTE author_1_articles; +EXECUTE author_1_articles; +EXECUTE author_1_articles; +EXECUTE author_1_articles; +EXECUTE author_1_articles; + +-- parametric prepare queries can be router plannable +PREPARE author_articles(int) as + SELECT * + FROM articles_hash + WHERE author_id = $1; + +EXECUTE author_articles(1); +EXECUTE author_articles(1); +EXECUTE author_articles(1); +EXECUTE author_articles(1); +EXECUTE author_articles(1); +EXECUTE author_articles(1); + +EXECUTE author_articles(NULL); +EXECUTE author_articles(NULL); +EXECUTE author_articles(NULL); +EXECUTE author_articles(NULL); +EXECUTE author_articles(NULL); +EXECUTE author_articles(NULL); +EXECUTE author_articles(NULL); + +PREPARE author_articles_update(int) AS + UPDATE articles_hash SET title = 'test' WHERE author_id = $1; + +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); + +-- we don't want too many details. though we're omitting +-- "DETAIL: distribution column value:", we see it acceptable +-- since the query results verifies the correctness +\set VERBOSITY terse + +SELECT author_articles_max_id(); +SELECT author_articles_max_id(); +SELECT author_articles_max_id(); +SELECT author_articles_max_id(); +SELECT author_articles_max_id(); +SELECT author_articles_max_id(); + +SELECT author_articles_max_id(1); +SELECT author_articles_max_id(1); +SELECT author_articles_max_id(1); +SELECT author_articles_max_id(1); +SELECT author_articles_max_id(1); +SELECT author_articles_max_id(1); + +SELECT * FROM author_articles_id_word_count(); +SELECT * FROM author_articles_id_word_count(); +SELECT * FROM author_articles_id_word_count(); +SELECT * FROM author_articles_id_word_count(); +SELECT * FROM author_articles_id_word_count(); +SELECT * FROM author_articles_id_word_count(); + +SELECT * FROM author_articles_id_word_count(1); +SELECT * FROM author_articles_id_word_count(1); +SELECT * FROM author_articles_id_word_count(1); +SELECT * FROM author_articles_id_word_count(1); +SELECT * FROM author_articles_id_word_count(1); +SELECT * FROM author_articles_id_word_count(1); + +\set VERBOSITY default + +-- insert .. select via coordinator could also +-- use fast-path queries +PREPARE insert_sel(int, int) AS +INSERT INTO articles_hash + SELECT * FROM articles_hash WHERE author_id = $2 AND word_count = $1 OFFSET 0; + +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); + +-- one final interesting preperad statement +-- where one of the filters is on the target list +PREPARE fast_path_agg_filter(int, int) AS + SELECT + count(*) FILTER (WHERE word_count=$1) + FROM + articles_hash + WHERE author_id = $2; + +EXECUTE fast_path_agg_filter(1,1); +EXECUTE fast_path_agg_filter(2,2); +EXECUTE fast_path_agg_filter(3,3); +EXECUTE fast_path_agg_filter(4,4); +EXECUTE fast_path_agg_filter(5,5); +EXECUTE fast_path_agg_filter(6,6); + +-- views internally become subqueries, so not fast-path router query +SELECT * FROM test_view; + +-- materialized views can be created for fast-path router plannable queries +CREATE MATERIALIZED VIEW mv_articles_hash_empty AS + SELECT * FROM articles_hash WHERE author_id = 1; +SELECT * FROM mv_articles_hash_empty; + + +SELECT id + FROM articles_hash + WHERE author_id = 1; + +INSERT INTO articles_hash VALUES (51, 1, 'amateus', 1814), (52, 1, 'second amateus', 2824); + +-- verify insert is successfull (not router plannable and executable) +SELECT id + FROM articles_hash + WHERE author_id = 1; + +SELECT count(*) FROM collections_list WHERE key = 4; +SELECT count(*) FROM collections_list_1 WHERE key = 4; +SELECT count(*) FROM collections_list_2 WHERE key = 4; +UPDATE collections_list SET value = 15 WHERE key = 4; +SELECT count(*) FILTER (where value = 15) FROM collections_list WHERE key = 4; +SELECT count(*) FILTER (where value = 15) FROM collections_list_1 WHERE key = 4; +SELECT count(*) FILTER (where value = 15) FROM collections_list_2 WHERE key = 4; + +-- test INSERT using values from generate_series() and repeat() functions +INSERT INTO authors_reference (id, name) VALUES (generate_series(1, 10), repeat('Migjeni', 3)); +SELECT * FROM authors_reference ORDER BY 1, 2; diff --git a/src/test/regress/sql/arbitrary_configs_router_create.sql b/src/test/regress/sql/arbitrary_configs_router_create.sql new file mode 100644 index 000000000..956100c7e --- /dev/null +++ b/src/test/regress/sql/arbitrary_configs_router_create.sql @@ -0,0 +1,118 @@ +CREATE SCHEMA arbitrary_configs_router; +SET search_path TO arbitrary_configs_router; + +CREATE TABLE articles_hash ( + id bigint NOT NULL, + author_id bigint NOT NULL, + title varchar(20) NOT NULL, + word_count integer +); + +SELECT create_distributed_table('articles_hash', 'author_id'); + +CREATE TABLE authors_reference (id int, name text); +SELECT create_reference_table('authors_reference'); + +-- create a bunch of test data +INSERT INTO articles_hash VALUES (1, 1, 'arsenous', 9572), (2, 2, 'abducing', 13642),( 3, 3, 'asternal', 10480),( 4, 4, 'altdorfer', 14551),( 5, 5, 'aruru', 11389), + (6, 6, 'atlases', 15459),(7, 7, 'aseptic', 12298),( 8, 8, 'agatized', 16368),(9, 9, 'alligate', 438), + (10, 10, 'aggrandize', 17277),(11, 1, 'alamo', 1347),(12, 2, 'archiblast', 18185), + (13, 3, 'aseyev', 2255),(14, 4, 'andesite', 19094),(15, 5, 'adversa', 3164), + (16, 6, 'allonym', 2),(17, 7, 'auriga', 4073),(18, 8, 'assembly', 911),(19, 9, 'aubergiste', 4981), + (20, 10, 'absentness', 1820),(21, 1, 'arcading', 5890),(22, 2, 'antipope', 2728),(23, 3, 'abhorring', 6799), + (24, 4, 'audacious', 3637),(25, 5, 'antehall', 7707),(26, 6, 'abington', 4545),(27, 7, 'arsenous', 8616), + (28, 8, 'aerophyte', 5454),(29, 9, 'amateur', 9524),(30, 10, 'andelee', 6363),(31, 1, 'athwartships', 7271), + (32, 2, 'amazon', 11342),(33, 3, 'autochrome', 8180),(34, 4, 'amnestied', 12250),(35, 5, 'aminate', 9089), + (36, 6, 'ablation', 13159),(37, 7, 'archduchies', 9997),(38, 8, 'anatine', 14067),(39, 9, 'anchises', 10906), + (40, 10, 'attemper', 14976),(41, 1, 'aznavour', 11814),(42, 2, 'ausable', 15885),(43, 3, 'affixal', 12723), + (44, 4, 'anteport', 16793),(45, 5, 'afrasia', 864),(46, 6, 'atlanta', 17702),(47, 7, 'abeyance', 1772), + (48, 8, 'alkylic', 18610),(49, 9, 'anyone', 2681),(50, 10, 'anjanette', 19519); + +CREATE TABLE company_employees (company_id int, employee_id int, manager_id int); + +SELECT create_distributed_table('company_employees', 'company_id', 'hash'); + +INSERT INTO company_employees values(1, 1, 0); +INSERT INTO company_employees values(1, 2, 1); +INSERT INTO company_employees values(1, 3, 1); +INSERT INTO company_employees values(1, 4, 2); +INSERT INTO company_employees values(1, 5, 4); + +INSERT INTO company_employees values(3, 1, 0); +INSERT INTO company_employees values(3, 15, 1); +INSERT INTO company_employees values(3, 3, 1); + +-- finally, some tests with partitioned tables +CREATE TABLE collections_list ( + key bigint, + ts timestamptz, + collection_id integer, + value numeric +) PARTITION BY LIST (collection_id ); + +CREATE TABLE collections_list_1 + PARTITION OF collections_list (key, ts, collection_id, value) + FOR VALUES IN ( 1 ); + +CREATE TABLE collections_list_2 + PARTITION OF collections_list (key, ts, collection_id, value) + FOR VALUES IN ( 2 ); + +SELECT create_distributed_table('collections_list', 'key'); +INSERT INTO collections_list SELECT i % 10, now(), (i % 2) + 1, i*i FROM generate_series(0, 50)i; + +-- queries inside plpgsql functions could be router plannable +CREATE OR REPLACE FUNCTION author_articles_max_id() RETURNS int AS $$ +DECLARE + max_id integer; +BEGIN + SELECT MAX(id) FROM articles_hash ah + WHERE author_id = 1 + into max_id; + return max_id; +END; +$$ LANGUAGE plpgsql; + +-- queries inside plpgsql functions could be router plannable +CREATE OR REPLACE FUNCTION author_articles_max_id(int) RETURNS int AS $$ +DECLARE + max_id integer; +BEGIN + SELECT MAX(id) FROM articles_hash ah + WHERE author_id = $1 + into max_id; + return max_id; +END; +$$ LANGUAGE plpgsql; + +-- check that function returning setof query are router plannable +CREATE OR REPLACE FUNCTION author_articles_id_word_count() RETURNS TABLE(id bigint, word_count int) AS $$ +DECLARE +BEGIN + RETURN QUERY + SELECT ah.id, ah.word_count + FROM articles_hash ah + WHERE author_id = 1; + +END; +$$ LANGUAGE plpgsql; + +-- check that function returning setof query are router plannable +CREATE OR REPLACE FUNCTION author_articles_id_word_count(int) RETURNS TABLE(id bigint, word_count int) AS $$ +DECLARE +BEGIN + RETURN QUERY + SELECT ah.id, ah.word_count + FROM articles_hash ah + WHERE author_id = $1; + +END; +$$ LANGUAGE plpgsql; + +-- Suppress the warning that tells that the view won't be distributed +-- because it depends on a local table. +-- +-- This only happens when running PostgresConfig. +SET client_min_messages TO ERROR; +CREATE VIEW test_view AS + SELECT * FROM articles_hash WHERE author_id = 1; diff --git a/src/test/regress/sql/citus_non_blocking_split_shards.sql b/src/test/regress/sql/citus_non_blocking_split_shards.sql index 11275a342..909beac02 100644 --- a/src/test/regress/sql/citus_non_blocking_split_shards.sql +++ b/src/test/regress/sql/citus_non_blocking_split_shards.sql @@ -53,7 +53,7 @@ SELECT create_distributed_table('sensors', 'measureid', colocate_with:='none'); CREATE TABLE reference_table (measureid integer PRIMARY KEY); SELECT create_reference_table('reference_table'); -CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY); +CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY, genid integer GENERATED ALWAYS AS ( measureid + 3 ) stored, value varchar(44), col_todrop integer); CLUSTER colocated_dist_table USING colocated_dist_table_pkey; SELECT create_distributed_table('colocated_dist_table', 'measureid', colocate_with:='sensors'); @@ -70,9 +70,11 @@ ALTER TABLE sensors ADD CONSTRAINT fkey_table_to_dist FOREIGN KEY (measureid) RE -- BEGIN : Load data into tables. INSERT INTO reference_table SELECT i FROM generate_series(0,1000)i; -INSERT INTO colocated_dist_table SELECT i FROM generate_series(0,1000)i; +INSERT INTO colocated_dist_table(measureid, value, col_todrop) SELECT i,'Value',i FROM generate_series(0,1000)i; INSERT INTO sensors SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' FROM generate_series(0,1000)i; +ALTER TABLE colocated_dist_table DROP COLUMN col_todrop; + SELECT COUNT(*) FROM sensors; SELECT COUNT(*) FROM reference_table; SELECT COUNT(*) FROM colocated_dist_table; diff --git a/src/test/regress/sql/citus_split_shard_by_split_points.sql b/src/test/regress/sql/citus_split_shard_by_split_points.sql index f5e7f005a..47b28b9d7 100644 --- a/src/test/regress/sql/citus_split_shard_by_split_points.sql +++ b/src/test/regress/sql/citus_split_shard_by_split_points.sql @@ -49,7 +49,7 @@ SELECT create_distributed_table('sensors', 'measureid', colocate_with:='none'); CREATE TABLE reference_table (measureid integer PRIMARY KEY); SELECT create_reference_table('reference_table'); -CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY); +CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY, genid integer GENERATED ALWAYS AS ( measureid + 3 ) stored, value varchar(44), col_todrop integer); CLUSTER colocated_dist_table USING colocated_dist_table_pkey; SELECT create_distributed_table('colocated_dist_table', 'measureid', colocate_with:='sensors'); @@ -66,9 +66,11 @@ ALTER TABLE sensors ADD CONSTRAINT fkey_table_to_dist FOREIGN KEY (measureid) RE -- BEGIN : Load data into tables. INSERT INTO reference_table SELECT i FROM generate_series(0,1000)i; -INSERT INTO colocated_dist_table SELECT i FROM generate_series(0,1000)i; +INSERT INTO colocated_dist_table(measureid, value, col_todrop) SELECT i,'Value',i FROM generate_series(0,1000)i; INSERT INTO sensors SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' FROM generate_series(0,1000)i; +ALTER TABLE colocated_dist_table DROP COLUMN col_todrop; + SELECT COUNT(*) FROM sensors; SELECT COUNT(*) FROM reference_table; SELECT COUNT(*) FROM colocated_dist_table; diff --git a/src/test/regress/sql/citus_stats_tenants.sql b/src/test/regress/sql/citus_stats_tenants.sql index 8f031bf18..538771c06 100644 --- a/src/test/regress/sql/citus_stats_tenants.sql +++ b/src/test/regress/sql/citus_stats_tenants.sql @@ -8,8 +8,17 @@ RETURNS VOID LANGUAGE C AS 'citus', $$clean_citus_stats_tenants$$; +CREATE OR REPLACE FUNCTION pg_catalog.sleep_until_next_period() +RETURNS VOID +LANGUAGE C +AS 'citus', $$sleep_until_next_period$$; + SELECT result FROM run_command_on_all_nodes('SELECT clean_citus_stats_tenants()'); +-- set period to a high number to prevent stats from being reset +SELECT result FROM run_command_on_all_nodes('ALTER SYSTEM SET citus.stats_tenants_period TO 1000000000'); +SELECT result FROM run_command_on_all_nodes('SELECT pg_reload_conf()'); + CREATE TABLE dist_tbl (a INT, b TEXT); SELECT create_distributed_table('dist_tbl', 'a', shard_count:=4, colocate_with:='none'); @@ -28,36 +37,9 @@ UPDATE dist_tbl SET b = a + 1 WHERE a = 3; UPDATE dist_tbl SET b = a + 1 WHERE a = 4; DELETE FROM dist_tbl WHERE a = 5; -\c - - - :worker_1_port -SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute; -\c - - - :worker_2_port -SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute; -\c - - - :master_port -SET search_path TO citus_stats_tenants; +SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants(true) ORDER BY tenant_attribute; -SELECT result FROM run_command_on_all_nodes('ALTER SYSTEM SET citus.stats_tenants_period TO 3'); -SELECT result FROM run_command_on_all_nodes('SELECT pg_reload_conf()'); - -SELECT count(*)>=0 FROM dist_tbl WHERE a = 1; -SELECT count(*)>=0 FROM dist_tbl WHERE a = 2; - -\c - - - :worker_1_port -SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute; -\c - - - :worker_2_port -SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute; -\c - - - :master_port - -SELECT pg_sleep (3); - -\c - - - :worker_1_port -SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute; -\c - - - :worker_2_port -SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants ORDER BY tenant_attribute; -\c - - - :master_port -SET search_path TO citus_stats_tenants; - -SELECT result FROM run_command_on_all_nodes('ALTER SYSTEM SET citus.stats_tenants_period TO 60'); -SELECT result FROM run_command_on_all_nodes('SELECT pg_reload_conf()'); +SELECT result FROM run_command_on_all_nodes('SELECT clean_citus_stats_tenants()'); -- queries with multiple tenants should not be counted SELECT count(*)>=0 FROM dist_tbl WHERE a IN (1, 5); @@ -65,45 +47,52 @@ SELECT count(*)>=0 FROM dist_tbl WHERE a IN (1, 5); -- queries with reference tables should not be counted SELECT count(*)>=0 FROM ref_tbl WHERE a = 1; -\c - - - :worker_1_port -SELECT tenant_attribute, query_count_in_this_period FROM citus_stats_tenants ORDER BY tenant_attribute; -\c - - - :master_port -SET search_path TO citus_stats_tenants; +SELECT tenant_attribute, query_count_in_this_period FROM citus_stats_tenants(true) ORDER BY tenant_attribute; -- queries with multiple tables but one tenant should be counted SELECT count(*)>=0 FROM dist_tbl, dist_tbl_2 WHERE dist_tbl.a = 1 AND dist_tbl_2.a = 1; SELECT count(*)>=0 FROM dist_tbl JOIN dist_tbl_2 ON dist_tbl.a = dist_tbl_2.a WHERE dist_tbl.a = 1; -\c - - - :worker_1_port -SELECT tenant_attribute, query_count_in_this_period FROM citus_stats_tenants WHERE tenant_attribute = '1'; -\c - - - :master_port -SET search_path TO citus_stats_tenants; +SELECT tenant_attribute, query_count_in_this_period FROM citus_stats_tenants(true) WHERE tenant_attribute = '1'; -- test scoring -- all of these distribution column values are from second worker +SELECT nodeid AS worker_2_nodeid FROM pg_dist_node WHERE nodeport = :worker_2_port \gset + SELECT count(*)>=0 FROM dist_tbl WHERE a = 2; SELECT count(*)>=0 FROM dist_tbl WHERE a = 3; SELECT count(*)>=0 FROM dist_tbl WHERE a = 4; SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'abcd'; -\c - - - :worker_2_port -SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) ORDER BY score DESC; -\c - - - :master_port -SET search_path TO citus_stats_tenants; +SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) WHERE nodeid = :worker_2_nodeid ORDER BY score DESC, tenant_attribute; SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'abcd'; SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'abcd'; SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde'; - -\c - - - :worker_2_port -SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) ORDER BY score DESC; -\c - - - :master_port -SET search_path TO citus_stats_tenants; - -SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde'; -SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde'; SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'cdef'; +SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) WHERE nodeid = :worker_2_nodeid ORDER BY score DESC, tenant_attribute; + +SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde'; +SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'bcde'; +SELECT count(*)>=0 FROM dist_tbl_text WHERE a = 'defg'; + +SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) WHERE nodeid = :worker_2_nodeid ORDER BY score DESC, tenant_attribute; + +-- test period passing +SELECT result FROM run_command_on_all_nodes('SELECT clean_citus_stats_tenants()'); + +SELECT count(*)>=0 FROM dist_tbl WHERE a = 1; +INSERT INTO dist_tbl VALUES (5, 'abcd'); + +\c - - - :worker_1_port +SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants_local ORDER BY tenant_attribute; + +-- simulate passing the period +SET citus.stats_tenants_period TO 2; +SELECT sleep_until_next_period(); + +SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stats_tenants_local ORDER BY tenant_attribute; \c - - - :worker_2_port SELECT tenant_attribute, query_count_in_this_period, score FROM citus_stats_tenants(true) ORDER BY score DESC; diff --git a/src/test/regress/sql/merge.sql b/src/test/regress/sql/merge.sql index c266b5333..ded90b69c 100644 --- a/src/test/regress/sql/merge.sql +++ b/src/test/regress/sql/merge.sql @@ -18,7 +18,9 @@ CREATE SCHEMA merge_schema; SET search_path TO merge_schema; SET citus.shard_count TO 4; SET citus.next_shard_id TO 4000000; -SET citus.explain_all_tasks to true; +SET citus.explain_all_tasks TO true; +SET citus.shard_replication_factor TO 1; +SET citus.max_adaptive_executor_pool_size TO 1; SELECT 1 FROM master_add_node('localhost', :master_port, groupid => 0); CREATE TABLE source @@ -143,9 +145,33 @@ SELECT undistribute_table('source'); SELECT create_distributed_table('target', 'customer_id'); SELECT create_distributed_table('source', 'customer_id'); +-- Updates one of the row with customer_id = 30002 +SELECT * from target t WHERE t.customer_id = 30002; +-- Turn on notice to print tasks sent to nodes +SET citus.log_remote_commands to true; MERGE INTO target t USING source s - ON (t.customer_id = s.customer_id) + ON (t.customer_id = s.customer_id) AND t.customer_id = 30002 + + WHEN MATCHED AND t.order_center = 'XX' THEN + DELETE + + WHEN MATCHED THEN + UPDATE SET -- Existing customer, update the order count and last_order_id + order_count = t.order_count + 1, + last_order_id = s.order_id + + WHEN NOT MATCHED THEN + DO NOTHING; + +SET citus.log_remote_commands to false; +SELECT * from target t WHERE t.customer_id = 30002; + +-- Deletes one of the row with customer_id = 30004 +SELECT * from target t WHERE t.customer_id = 30004; +MERGE INTO target t + USING source s + ON (t.customer_id = s.customer_id) AND t.customer_id = 30004 WHEN MATCHED AND t.order_center = 'XX' THEN DELETE @@ -158,6 +184,22 @@ MERGE INTO target t WHEN NOT MATCHED THEN -- New entry, record it. INSERT (customer_id, last_order_id, order_center, order_count, last_order) VALUES (customer_id, s.order_id, s.order_center, 123, s.order_time); +SELECT * from target t WHERE t.customer_id = 30004; + +-- Updating distribution column is allowed if the operation is a no-op +SELECT * from target t WHERE t.customer_id = 30000; +MERGE INTO target t +USING SOURCE s +ON (t.customer_id = s.customer_id AND t.customer_id = 30000) +WHEN MATCHED THEN + UPDATE SET customer_id = 30000; + +MERGE INTO target t +USING SOURCE s +ON (t.customer_id = s.customer_id AND t.customer_id = 30000) +WHEN MATCHED THEN + UPDATE SET customer_id = t.customer_id; +SELECT * from target t WHERE t.customer_id = 30000; -- -- Test MERGE with CTE as source @@ -197,7 +239,6 @@ MERGE INTO t1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (pg_res.id, pg_res.val); --- Two rows with id 2 and val incremented, id 3, and id 1 is deleted SELECT * FROM t1 order by id; SELECT * INTO merge_result FROM t1 order by id; @@ -243,11 +284,13 @@ SELECT create_distributed_table('t1', 'id'); SELECT create_distributed_table('s1', 'id'); +SELECT * FROM t1 order by id; +SET citus.log_remote_commands to true; WITH s1_res AS ( SELECT * FROM s1 ) MERGE INTO t1 - USING s1_res ON (s1_res.id = t1.id) + USING s1_res ON (s1_res.id = t1.id) AND t1.id = 6 WHEN MATCHED AND s1_res.val = 0 THEN DELETE @@ -255,6 +298,9 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val); +SET citus.log_remote_commands to false; +-- Other than id 6 everything else is a NO match, and should appear in target +SELECT * FROM t1 order by 1, 2; -- -- Test with multiple join conditions @@ -325,15 +371,21 @@ SELECT undistribute_table('s2'); SELECT create_distributed_table('t2', 'id'); SELECT create_distributed_table('s2', 'id'); +SELECT * FROM t2 ORDER BY 1; +SET citus.log_remote_commands to true; MERGE INTO t2 USING s2 -ON t2.id = s2.id AND t2.src = s2.src +ON t2.id = s2.id AND t2.src = s2.src AND t2.id = 4 WHEN MATCHED AND t2.val = 1 THEN UPDATE SET val = s2.val + 10 WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN - INSERT (id, val, src) VALUES (s2.id, s2.val, s2.src); + DO NOTHING; +SET citus.log_remote_commands to false; +-- Row with id = 4 is a match for delete clause, row should be deleted +-- Row with id = 3 is a NO match, row from source will be inserted +SELECT * FROM t2 ORDER BY 1; -- -- With sub-query as the MERGE source @@ -740,7 +792,8 @@ $$ language plpgsql volatile; CREATE TABLE fn_target(id int, data varchar); MERGE INTO fn_target -USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source +--USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source +USING (SELECT id, source FROM dist_table) as fn_source ON fn_source.id = fn_target.id WHEN MATCHED THEN DO NOTHING @@ -753,11 +806,12 @@ SELECT * INTO fn_result FROM fn_target ORDER BY 1 ; -- Clean the slate TRUNCATE TABLE fn_target; SELECT citus_add_local_table_to_metadata('fn_target'); -SELECT create_distributed_table('dist_table', 'id'); +SELECT citus_add_local_table_to_metadata('dist_table'); SET client_min_messages TO DEBUG1; MERGE INTO fn_target -USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source +--USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source +USING (SELECT id, source FROM dist_table) as fn_source ON fn_source.id = fn_target.id WHEN MATCHED THEN DO NOTHING @@ -824,10 +878,756 @@ RESET client_min_messages; SELECT * FROM ft_target; +-- +-- complex joins on the source side +-- + +-- source(join of two relations) relation is an unaliased join + +CREATE TABLE target_cj(tid int, src text, val int); +CREATE TABLE source_cj1(sid1 int, src1 text, val1 int); +CREATE TABLE source_cj2(sid2 int, src2 text, val2 int); + +INSERT INTO target_cj VALUES (1, 'target', 0); +INSERT INTO target_cj VALUES (2, 'target', 0); +INSERT INTO target_cj VALUES (2, 'target', 0); +INSERT INTO target_cj VALUES (3, 'target', 0); + +INSERT INTO source_cj1 VALUES (2, 'source-1', 10); +INSERT INTO source_cj2 VALUES (2, 'source-2', 20); + +BEGIN; +MERGE INTO target_cj t +USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src2 +WHEN NOT MATCHED THEN + DO NOTHING; +-- Gold result to compare against +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +BEGIN; +-- try accessing columns from either side of the source join +MERGE INTO target_cj t +USING source_cj1 s2 + INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET tid = sid2, src = src1, val = val2 +WHEN NOT MATCHED THEN + DO NOTHING; +-- Gold result to compare against +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +-- Test the same scenarios with distributed tables + +SELECT create_distributed_table('target_cj', 'tid'); +SELECT create_distributed_table('source_cj1', 'sid1'); +SELECT create_distributed_table('source_cj2', 'sid2'); + +BEGIN; +SET citus.log_remote_commands to true; +MERGE INTO target_cj t +USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src2 +WHEN NOT MATCHED THEN + DO NOTHING; +SET citus.log_remote_commands to false; +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +BEGIN; +-- try accessing columns from either side of the source join +MERGE INTO target_cj t +USING source_cj1 s2 + INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src1, val = val2 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +-- sub-query as a source +BEGIN; +MERGE INTO target_cj t +USING (SELECT * FROM source_cj1 WHERE sid1 = 2) sub +ON t.tid = sub.sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = sub.src1, val = val1 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +-- Test self-join +BEGIN; +SELECT * FROM target_cj ORDER BY 1; +set citus.log_remote_commands to true; +MERGE INTO target_cj t1 +USING (SELECT * FROM target_cj) sub +ON t1.tid = sub.tid AND t1.tid = 3 +WHEN MATCHED THEN + UPDATE SET src = sub.src, val = sub.val + 100 +WHEN NOT MATCHED THEN + DO NOTHING; +set citus.log_remote_commands to false; +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + + +-- Test PREPARE +PREPARE foo(int) AS +MERGE INTO target_cj target +USING (SELECT * FROM source_cj1) sub +ON target.tid = sub.sid1 AND target.tid = $1 +WHEN MATCHED THEN + UPDATE SET val = sub.val1 +WHEN NOT MATCHED THEN + DO NOTHING; + +SELECT * FROM target_cj ORDER BY 1; + +BEGIN; +EXECUTE foo(2); +EXECUTE foo(2); +EXECUTE foo(2); +EXECUTE foo(2); +EXECUTE foo(2); +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +BEGIN; + +SET citus.log_remote_commands to true; +SET client_min_messages TO DEBUG1; +EXECUTE foo(2); +RESET client_min_messages; + +EXECUTE foo(2); +SET citus.log_remote_commands to false; + +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +-- Test distributed tables, must be co-located and joined on distribution column. + +-- +-- We create two sets of source and target tables, one set is Postgres and the other +-- is Citus distributed. Run the _exact_ MERGE SQL on both the sets and compare the +-- final results of target tables of Postgres and Citus, the result should match. +-- This is repeated for various MERGE SQL combinations +-- +CREATE TABLE pg_target(id int, val varchar); +CREATE TABLE pg_source(id int, val varchar); +CREATE TABLE citus_target(id int, val varchar); +CREATE TABLE citus_source(id int, val varchar); + +-- Half of the source rows do not match +INSERT INTO pg_target SELECT i, 'target' FROM generate_series(250, 500) i; +INSERT INTO pg_source SELECT i, 'source' FROM generate_series(1, 500) i; + +INSERT INTO citus_target SELECT i, 'target' FROM generate_series(250, 500) i; +INSERT INTO citus_source SELECT i, 'source' FROM generate_series(1, 500) i; + +SELECT create_distributed_table('citus_target', 'id'); +SELECT create_distributed_table('citus_source', 'id'); + +-- +-- This routine compares the target tables of Postgres and Citus and +-- returns true if they match, false if the results do not match. +-- +CREATE OR REPLACE FUNCTION compare_tables() RETURNS BOOLEAN AS $$ +DECLARE ret BOOL; +BEGIN +SELECT count(1) = 0 INTO ret + FROM pg_target + FULL OUTER JOIN citus_target + USING (id, val) + WHERE pg_target.id IS NULL + OR citus_target.id IS NULL; +RETURN ret; +END +$$ LANGUAGE PLPGSQL; + +-- Make sure we start with exact data in Postgres and Citus +SELECT compare_tables(); + +-- Run the MERGE on both Postgres and Citus, and compare the final target tables + +BEGIN; +SET citus.log_remote_commands to true; + +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); + +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + +-- +-- ON clause filter on source +-- +BEGIN; +SET citus.log_remote_commands to true; + +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id AND s.id < 100 +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); + +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id AND s.id < 100 +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + +-- +-- ON clause filter on target +-- +BEGIN; +SET citus.log_remote_commands to true; + +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id AND t.id < 100 +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); + +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id AND t.id < 100 +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + +-- +-- NOT MATCHED clause filter on source +-- +BEGIN; +SET citus.log_remote_commands to true; + +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id +WHEN MATCHED THEN + DO NOTHING +WHEN NOT MATCHED AND s.id < 100 THEN + INSERT VALUES(s.id, s.val); + +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN MATCHED THEN + DO NOTHING +WHEN NOT MATCHED AND s.id < 100 THEN + INSERT VALUES(s.id, s.val); + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + +-- +-- Test constant filter in ON clause to check if shards are pruned +-- with restriction information +-- + +-- +-- Though constant filter is present, this won't prune shards as +-- NOT MATCHED clause is present +-- +BEGIN; +SET citus.log_remote_commands to true; + +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id AND s.id = 250 +WHEN MATCHED THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); + +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id AND s.id = 250 +WHEN MATCHED THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + +-- This will prune shards with restriction information as NOT MATCHED is void +BEGIN; +SET citus.log_remote_commands to true; + +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id AND s.id = 250 +WHEN MATCHED THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN NOT MATCHED THEN + DO NOTHING; + +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id AND s.id = 250 +WHEN MATCHED THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN NOT MATCHED THEN + DO NOTHING; + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + +-- Test CTE with distributed tables +CREATE VIEW pg_source_view AS SELECT * FROM pg_source WHERE id < 400; +CREATE VIEW citus_source_view AS SELECT * FROM citus_source WHERE id < 400; + +BEGIN; +SEt citus.log_remote_commands to true; + +WITH cte AS ( + SELECT * FROM pg_source_view +) +MERGE INTO pg_target t +USING cte +ON cte.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated by CTE' +WHEN NOT MATCHED THEN + INSERT VALUES (cte.id, cte.val) +WHEN MATCHED AND t.id < 350 THEN + DELETE; + +WITH cte AS ( + SELECT * FROM citus_source_view +) +MERGE INTO citus_target t +USING cte +ON cte.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated by CTE' +WHEN NOT MATCHED THEN + INSERT VALUES (cte.id, cte.val) +WHEN MATCHED AND t.id < 350 THEN + DELETE; + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + + +-- Test sub-query with distributed tables +BEGIN; +SEt citus.log_remote_commands to true; + +MERGE INTO pg_target t +USING (SELECT * FROM pg_source) subq +ON subq.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated by subquery' +WHEN NOT MATCHED THEN + INSERT VALUES (subq.id, subq.val) +WHEN MATCHED AND t.id < 350 THEN + DELETE; + +MERGE INTO citus_target t +USING (SELECT * FROM citus_source) subq +ON subq.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated by subquery' +WHEN NOT MATCHED THEN + INSERT VALUES (subq.id, subq.val) +WHEN MATCHED AND t.id < 350 THEN + DELETE; + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + +-- Test PREPARE +PREPARE pg_prep(int) AS +MERGE INTO pg_target +USING (SELECT * FROM pg_source) sub +ON pg_target.id = sub.id AND pg_target.id = $1 +WHEN MATCHED THEN + UPDATE SET val = 'Updated by prepare using ' || sub.val +WHEN NOT MATCHED THEN + INSERT VALUES (sub.id, sub.val); + +PREPARE citus_prep(int) AS +MERGE INTO citus_target +USING (SELECT * FROM citus_source) sub +ON citus_target.id = sub.id AND citus_target.id = $1 +WHEN MATCHED THEN + UPDATE SET val = 'Updated by prepare using ' || sub.val +WHEN NOT MATCHED THEN + INSERT VALUES (sub.id, sub.val); + +BEGIN; + +SELECT * FROM pg_target WHERE id = 500; -- before merge +SELECT count(*) FROM pg_target; -- before merge +EXECUTE pg_prep(500); +SELECT * FROM pg_target WHERE id = 500; -- non-cached +EXECUTE pg_prep(500); +EXECUTE pg_prep(500); +EXECUTE pg_prep(500); +EXECUTE pg_prep(500); +EXECUTE pg_prep(500); +SELECT * FROM pg_target WHERE id = 500; -- cached +SELECT count(*) FROM pg_target; -- cached + +SELECT * FROM citus_target WHERE id = 500; -- before merge +SELECT count(*) FROM citus_target; -- before merge +SET citus.log_remote_commands to true; +EXECUTE citus_prep(500); +SELECT * FROM citus_target WHERE id = 500; -- non-cached +EXECUTE citus_prep(500); +EXECUTE citus_prep(500); +EXECUTE citus_prep(500); +EXECUTE citus_prep(500); +EXECUTE citus_prep(500); +SET citus.log_remote_commands to false; +SELECT * FROM citus_target WHERE id = 500; -- cached +SELECT count(*) FROM citus_target; -- cached + +SELECT compare_tables(); +ROLLBACK; + +-- Test partitions + distributed tables + +CREATE TABLE pg_pa_target (tid integer, balance float, val text) + PARTITION BY LIST (tid); +CREATE TABLE citus_pa_target (tid integer, balance float, val text) + PARTITION BY LIST (tid); + +CREATE TABLE part1 PARTITION OF pg_pa_target FOR VALUES IN (1,4) + WITH (autovacuum_enabled=off); +CREATE TABLE part2 PARTITION OF pg_pa_target FOR VALUES IN (2,5,6) + WITH (autovacuum_enabled=off); +CREATE TABLE part3 PARTITION OF pg_pa_target FOR VALUES IN (3,8,9) + WITH (autovacuum_enabled=off); +CREATE TABLE part4 PARTITION OF pg_pa_target DEFAULT + WITH (autovacuum_enabled=off); +CREATE TABLE part5 PARTITION OF citus_pa_target FOR VALUES IN (1,4) + WITH (autovacuum_enabled=off); +CREATE TABLE part6 PARTITION OF citus_pa_target FOR VALUES IN (2,5,6) + WITH (autovacuum_enabled=off); +CREATE TABLE part7 PARTITION OF citus_pa_target FOR VALUES IN (3,8,9) + WITH (autovacuum_enabled=off); +CREATE TABLE part8 PARTITION OF citus_pa_target DEFAULT + WITH (autovacuum_enabled=off); + +CREATE TABLE pg_pa_source (sid integer, delta float); +CREATE TABLE citus_pa_source (sid integer, delta float); + +-- insert many rows to the source table +INSERT INTO pg_pa_source SELECT id, id * 10 FROM generate_series(1,14) AS id; +INSERT INTO citus_pa_source SELECT id, id * 10 FROM generate_series(1,14) AS id; +-- insert a few rows in the target table (odd numbered tid) +INSERT INTO pg_pa_target SELECT id, id * 100, 'initial' FROM generate_series(1,14,2) AS id; +INSERT INTO citus_pa_target SELECT id, id * 100, 'initial' FROM generate_series(1,14,2) AS id; + +SELECT create_distributed_table('citus_pa_target', 'tid'); +SELECT create_distributed_table('citus_pa_source', 'sid'); + +CREATE OR REPLACE FUNCTION pa_compare_tables() RETURNS BOOLEAN AS $$ +DECLARE ret BOOL; +BEGIN +SELECT count(1) = 0 INTO ret + FROM pg_pa_target + FULL OUTER JOIN citus_pa_target + USING (tid, balance, val) + WHERE pg_pa_target.tid IS NULL + OR citus_pa_target.tid IS NULL; +RETURN ret; +END +$$ LANGUAGE PLPGSQL; + +-- try simple MERGE +BEGIN; +MERGE INTO pg_pa_target t + USING pg_pa_source s + ON t.tid = s.sid + WHEN MATCHED THEN + UPDATE SET balance = balance + delta, val = val || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid, delta, 'inserted by merge'); + +MERGE INTO citus_pa_target t + USING citus_pa_source s + ON t.tid = s.sid + WHEN MATCHED THEN + UPDATE SET balance = balance + delta, val = val || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid, delta, 'inserted by merge'); + +SELECT pa_compare_tables(); +ROLLBACK; + +-- same with a constant qual +BEGIN; +MERGE INTO pg_pa_target t + USING pg_pa_source s + ON t.tid = s.sid AND tid = 1 + WHEN MATCHED THEN + UPDATE SET balance = balance + delta, val = val || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid, delta, 'inserted by merge'); + +MERGE INTO citus_pa_target t + USING citus_pa_source s + ON t.tid = s.sid AND tid = 1 + WHEN MATCHED THEN + UPDATE SET balance = balance + delta, val = val || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid, delta, 'inserted by merge'); + +SELECT pa_compare_tables(); +ROLLBACK; + +CREATE TABLE source_json( id integer, z int, d jsonb); +CREATE TABLE target_json( id integer, z int, d jsonb); + +INSERT INTO source_json SELECT i,i FROM generate_series(0,5)i; + +SELECT create_distributed_table('target_json','id'), create_distributed_table('source_json', 'id'); + +-- single shard query given source_json is filtered and Postgres is smart to pushdown +-- filter to the target_json as well +SELECT public.coordinator_plan($Q$ +EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda +USING (SELECT * FROM source_json WHERE id = 1) sdn +ON sda.id = sdn.id +WHEN NOT matched THEN + INSERT (id, z) VALUES (sdn.id, 5); +$Q$); +SELECT * FROM target_json ORDER BY 1; + +-- zero shard query as filters do not match +--SELECT public.coordinator_plan($Q$ +--EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda +--USING (SELECT * FROM source_json WHERE id = 1) sdn +--ON sda.id = sdn.id AND sda.id = 2 +--WHEN NOT matched THEN +-- INSERT (id, z) VALUES (sdn.id, 5); +--$Q$); +--SELECT * FROM target_json ORDER BY 1; + +-- join for source_json is happening at a different place +SELECT public.coordinator_plan($Q$ +EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda +USING source_json s1 LEFT JOIN (SELECT * FROM source_json) s2 USING(z) +ON sda.id = s1.id AND s1.id = s2.id +WHEN NOT matched THEN + INSERT (id, z) VALUES (s2.id, 5); +$Q$); +SELECT * FROM target_json ORDER BY 1; + +-- update JSON column +SELECT public.coordinator_plan($Q$ +EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda +USING source_json sdn +ON sda.id = sdn.id +WHEN matched THEN + UPDATE SET d = '{"a" : 5}'; +$Q$); +SELECT * FROM target_json ORDER BY 1; + +CREATE FUNCTION immutable_hash(int) RETURNS int +AS 'SELECT hashtext( ($1 + $1)::text);' +LANGUAGE SQL +IMMUTABLE +RETURNS NULL ON NULL INPUT; + +MERGE INTO target_json sda +USING source_json sdn +ON sda.id = sdn.id +WHEN matched THEN + UPDATE SET z = immutable_hash(sdn.z); + +-- Test bigserial +CREATE TABLE source_serial (id integer, z int, d bigserial); +CREATE TABLE target_serial (id integer, z int, d bigserial); +INSERT INTO source_serial SELECT i,i FROM generate_series(0,100)i; +SELECT create_distributed_table('source_serial', 'id'), + create_distributed_table('target_serial', 'id'); + +MERGE INTO target_serial sda +USING source_serial sdn +ON sda.id = sdn.id +WHEN NOT matched THEN + INSERT (id, z) VALUES (id, z); + +SELECT count(*) from source_serial; +SELECT count(*) from target_serial; + +SELECT count(distinct d) from source_serial; +SELECT count(distinct d) from target_serial; + +-- Test set operations +CREATE TABLE target_set(t1 int, t2 int); +CREATE TABLE source_set(s1 int, s2 int); + +SELECT create_distributed_table('target_set', 't1'), + create_distributed_table('source_set', 's1'); + +INSERT INTO target_set VALUES(1, 0); +INSERT INTO source_set VALUES(1, 1); +INSERT INTO source_set VALUES(2, 2); + +MERGE INTO target_set +USING (SELECT * FROM source_set UNION SELECT * FROM source_set) AS foo ON target_set.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET t2 = t2 + 100 +WHEN NOT MATCHED THEN + INSERT VALUES(foo.s1); +SELECT * FROM target_set ORDER BY 1, 2; + -- -- Error and Unsupported scenarios -- +MERGE INTO target_set +USING (SELECT s1,s2 FROM source_set UNION SELECT s2,s1 FROM source_set) AS foo ON target_set.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET t2 = t2 + 1; + +MERGE INTO target_set +USING (SELECT 2 as s3, source_set.* FROM (SELECT * FROM source_set LIMIT 1) as foo LEFT JOIN source_set USING( s1)) AS foo +ON target_set.t1 = foo.s1 +WHEN MATCHED THEN UPDATE SET t2 = t2 + 1 +WHEN NOT MATCHED THEN INSERT VALUES(s1, s3); + + +-- modifying CTE not supported +EXPLAIN +WITH cte_1 AS (DELETE FROM target_json) +MERGE INTO target_json sda +USING source_json sdn +ON sda.id = sdn.id +WHEN NOT matched THEN + INSERT (id, z) VALUES (sdn.id, 5); + +-- Grouping sets not supported +MERGE INTO citus_target t +USING (SELECT count(*), id FROM citus_source GROUP BY GROUPING SETS (id, val)) subq +ON subq.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated' +WHEN NOT MATCHED THEN + INSERT VALUES (subq.id, 99) +WHEN MATCHED AND t.id < 350 THEN + DELETE; + +WITH subq AS +( +SELECT count(*), id FROM citus_source GROUP BY GROUPING SETS (id, val) +) +MERGE INTO citus_target t +USING subq +ON subq.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated' +WHEN NOT MATCHED THEN + INSERT VALUES (subq.id, 99) +WHEN MATCHED AND t.id < 350 THEN + DELETE; + +-- try inserting unmatched distribution column value +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT DEFAULT VALUES; + +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT VALUES(10000); + +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT (id) VALUES(1000); + +MERGE INTO t1 t +USING s1 s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT (id) VALUES(s.val); + +MERGE INTO t1 t +USING s1 s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT (val) VALUES(s.val); + +-- try updating the distribution key column +BEGIN; +MERGE INTO target_cj t + USING source_cj1 s + ON t.tid = s.sid1 AND t.tid = 2 + WHEN MATCHED THEN + UPDATE SET tid = tid + 9, src = src || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid1, 'inserted by merge', val1); +ROLLBACK; + -- Foreign table as target MERGE INTO foreign_table USING ft_target ON (foreign_table.id = ft_target.id) @@ -854,6 +1654,57 @@ MERGE INTO t1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1.id, s1.val); +-- Now both s1 and t1 are distributed tables +SELECT undistribute_table('t1'); +SELECT create_distributed_table('t1', 'id'); + +-- We have a potential pitfall where a function can be invoked in +-- the MERGE conditions which can insert/update to a random shard +CREATE OR REPLACE function merge_when_and_write() RETURNS BOOLEAN +LANGUAGE PLPGSQL AS +$$ +BEGIN + INSERT INTO t1 VALUES (100, 100); + RETURN TRUE; +END; +$$; + +-- Test functions executing in MERGE statement. This is to prevent the functions from +-- doing a random sql, which may be executed in a remote node or modifying the target +-- relation which will have unexpected/suprising results. +MERGE INTO t1 USING (SELECT * FROM s1 WHERE true) s1 ON + t1.id = s1.id AND s1.id = 2 + WHEN matched THEN + UPDATE SET id = s1.id, val = random(); + +-- Test STABLE function +CREATE FUNCTION add_s(integer, integer) RETURNS integer +AS 'select $1 + $2;' +LANGUAGE SQL +STABLE RETURNS NULL ON NULL INPUT; + +MERGE INTO t1 +USING s1 ON t1.id = s1.id +WHEN NOT MATCHED THEN + INSERT VALUES(s1.id, add_s(s1.val, 2)); + +-- Test preventing "ON" join condition from writing to the database +BEGIN; +MERGE INTO t1 +USING s1 ON t1.id = s1.id AND t1.id = 2 AND (merge_when_and_write()) +WHEN MATCHED THEN + UPDATE SET val = t1.val + s1.val; +ROLLBACK; + +-- Test preventing WHEN clause(s) from writing to the database +BEGIN; +MERGE INTO t1 +USING s1 ON t1.id = s1.id AND t1.id = 2 +WHEN MATCHED AND (merge_when_and_write()) THEN + UPDATE SET val = t1.val + s1.val; +ROLLBACK; + + -- Joining on partition columns with sub-query MERGE INTO t1 USING (SELECT * FROM s1) sub ON (sub.val = t1.id) -- sub.val is not a distribution column @@ -997,6 +1848,104 @@ WHEN MATCHED THEN WHEN NOT MATCHED THEN INSERT VALUES(mv_source.id, mv_source.val); +-- Distributed tables *must* be colocated +CREATE TABLE dist_target(id int, val varchar); +SELECT create_distributed_table('dist_target', 'id'); +CREATE TABLE dist_source(id int, val varchar); +SELECT create_distributed_table('dist_source', 'id', colocate_with => 'none'); + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); + +-- Distributed tables *must* be joined on distribution column +CREATE TABLE dist_colocated(id int, val int); +SELECT create_distributed_table('dist_colocated', 'id', colocate_with => 'dist_target'); + +MERGE INTO dist_target +USING dist_colocated +ON dist_target.id = dist_colocated.val -- val is not the distribution column +WHEN MATCHED THEN +UPDATE SET val = dist_colocated.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_colocated.id, dist_colocated.val); + + +-- Both the source and target must be distributed +MERGE INTO dist_target +USING (SELECT 100 id) AS source +ON dist_target.id = source.id AND dist_target.val = 'const' +WHEN MATCHED THEN +UPDATE SET val = 'source' +WHEN NOT MATCHED THEN +INSERT VALUES(source.id, 'source'); + +-- Non-hash distributed tables (append/range). +CREATE VIEW show_tables AS +SELECT logicalrelid, partmethod +FROM pg_dist_partition +WHERE (logicalrelid = 'dist_target'::regclass) OR (logicalrelid = 'dist_source'::regclass) +ORDER BY 1; + +SELECT undistribute_table('dist_source'); +SELECT create_distributed_table('dist_source', 'id', 'append'); +SELECT * FROM show_tables; + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); + +SELECT undistribute_table('dist_source'); +SELECT create_distributed_table('dist_source', 'id', 'range'); +SELECT * FROM show_tables; + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); + +-- Both are append tables +SELECT undistribute_table('dist_target'); +SELECT undistribute_table('dist_source'); +SELECT create_distributed_table('dist_target', 'id', 'append'); +SELECT create_distributed_table('dist_source', 'id', 'append'); +SELECT * FROM show_tables; + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); + +-- Both are range tables +SELECT undistribute_table('dist_target'); +SELECT undistribute_table('dist_source'); +SELECT create_distributed_table('dist_target', 'id', 'range'); +SELECT create_distributed_table('dist_source', 'id', 'range'); +SELECT * FROM show_tables; + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); + DROP SERVER foreign_server CASCADE; +DROP FUNCTION merge_when_and_write(); DROP SCHEMA merge_schema CASCADE; SELECT 1 FROM master_remove_node('localhost', :master_port); diff --git a/src/test/regress/sql/merge_arbitrary.sql b/src/test/regress/sql/merge_arbitrary.sql new file mode 100644 index 000000000..17b7d4f90 --- /dev/null +++ b/src/test/regress/sql/merge_arbitrary.sql @@ -0,0 +1,133 @@ +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 +\gset +\if :server_version_ge_15 +\else +\q +\endif + +SET search_path TO merge_arbitrary_schema; +INSERT INTO target_cj VALUES (1, 'target', 0); +INSERT INTO target_cj VALUES (2, 'target', 0); +INSERT INTO target_cj VALUES (2, 'target', 0); +INSERT INTO target_cj VALUES (3, 'target', 0); + +INSERT INTO source_cj1 VALUES (2, 'source-1', 10); +INSERT INTO source_cj2 VALUES (2, 'source-2', 20); + +BEGIN; +MERGE INTO target_cj t +USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src2 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +BEGIN; +-- try accessing columns from either side of the source join +MERGE INTO target_cj t +USING source_cj1 s2 + INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src1, val = val2 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +-- Test PREPARE +PREPARE insert(int, int, int) AS +MERGE INTO prept +USING (SELECT $2, s1, s2 FROM preps WHERE s2 > $3) as foo +ON prept.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET t2 = t2 + $1 +WHEN NOT MATCHED THEN + INSERT VALUES(s1, s2); + +PREPARE delete(int) AS +MERGE INTO prept +USING preps +ON prept.t1 = preps.s1 +WHEN MATCHED AND prept.t2 = $1 THEN + DELETE +WHEN MATCHED THEN + UPDATE SET t2 = t2 + 1; + +INSERT INTO prept VALUES(100, 0); + +INSERT INTO preps VALUES(100, 0); +INSERT INTO preps VALUES(200, 0); + +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); + +-- sixth time +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); + +-- Should have the counter as 14 (7 * 2) +SELECT * FROM prept; + +-- Test local tables +INSERT INTO s1 VALUES(1, 0); -- Matches DELETE clause +INSERT INTO s1 VALUES(2, 1); -- Matches UPDATE clause +INSERT INTO s1 VALUES(3, 1); -- No Match INSERT clause +INSERT INTO s1 VALUES(4, 1); -- No Match INSERT clause +INSERT INTO s1 VALUES(6, 1); -- No Match INSERT clause + +INSERT INTO t1 VALUES(1, 0); -- Will be deleted +INSERT INTO t1 VALUES(2, 0); -- Will be updated +INSERT INTO t1 VALUES(5, 0); -- Will be intact + +PREPARE local(int, int) AS +WITH s1_res AS ( + SELECT * FROM s1 +) +MERGE INTO t1 + USING s1_res ON (s1_res.id = t1.id) + + WHEN MATCHED AND s1_res.val = $1 THEN + DELETE + WHEN MATCHED THEN + UPDATE SET val = t1.val + $2 + WHEN NOT MATCHED THEN + INSERT (id, val) VALUES (s1_res.id, s1_res.val); + +BEGIN; +EXECUTE local(0, 1); +SELECT * FROM t1 order by id; +ROLLBACK; + +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; + +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; + +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; + +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; + +-- sixth time +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; + +BEGIN; +EXECUTE local(0, 1); +SELECT * FROM t1 order by id; +ROLLBACK; diff --git a/src/test/regress/sql/merge_arbitrary_create.sql b/src/test/regress/sql/merge_arbitrary_create.sql new file mode 100644 index 000000000..edf9b0d9d --- /dev/null +++ b/src/test/regress/sql/merge_arbitrary_create.sql @@ -0,0 +1,50 @@ +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 +\gset +\if :server_version_ge_15 +\else +\q +\endif + +DROP SCHEMA IF EXISTS merge_arbitrary_schema CASCADE; +CREATE SCHEMA merge_arbitrary_schema; +SET search_path TO merge_arbitrary_schema; +SET citus.shard_count TO 4; +SET citus.next_shard_id TO 6000000; +CREATE TABLE target_cj(tid int, src text, val int); +CREATE TABLE source_cj1(sid1 int, src1 text, val1 int); +CREATE TABLE source_cj2(sid2 int, src2 text, val2 int); + +SELECT create_distributed_table('target_cj', 'tid'); +SELECT create_distributed_table('source_cj1', 'sid1'); +SELECT create_distributed_table('source_cj2', 'sid2'); + +CREATE TABLE prept(t1 int, t2 int); +CREATE TABLE preps(s1 int, s2 int); + +SELECT create_distributed_table('prept', 't1'), create_distributed_table('preps', 's1'); + +PREPARE insert(int, int, int) AS +MERGE INTO prept +USING (SELECT $2, s1, s2 FROM preps WHERE s2 > $3) as foo +ON prept.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET t2 = t2 + $1 +WHEN NOT MATCHED THEN + INSERT VALUES(s1, s2); + +PREPARE delete(int) AS +MERGE INTO prept +USING preps +ON prept.t1 = preps.s1 +WHEN MATCHED AND prept.t2 = $1 THEN + DELETE +WHEN MATCHED THEN + UPDATE SET t2 = t2 + 1; + +-- Citus local tables +CREATE TABLE t1(id int, val int); +CREATE TABLE s1(id int, val int); + +SELECT citus_add_local_table_to_metadata('t1'); +SELECT citus_add_local_table_to_metadata('s1'); diff --git a/src/test/regress/sql/multi_data_types.sql b/src/test/regress/sql/multi_data_types.sql index 7601bb319..d307c4c6f 100644 --- a/src/test/regress/sql/multi_data_types.sql +++ b/src/test/regress/sql/multi_data_types.sql @@ -6,6 +6,15 @@ SET citus.next_shard_id TO 530000; +-- Given that other test files depend on the existence of types created in this file, +-- we cannot drop them at the end. Instead, we drop them at the beginning of the test +-- to make this file runnable multiple times via run_test.py. +BEGIN; + SET LOCAL client_min_messages TO WARNING; + DROP TYPE IF EXISTS test_composite_type, other_composite_type, bug_status CASCADE; + DROP OPERATOR FAMILY IF EXISTS cats_op_fam USING hash; +COMMIT; + -- create a custom type... CREATE TYPE test_composite_type AS ( i integer, diff --git a/src/test/regress/sql/multi_extension.sql b/src/test/regress/sql/multi_extension.sql index 8c8ade9d8..d202227ae 100644 --- a/src/test/regress/sql/multi_extension.sql +++ b/src/test/regress/sql/multi_extension.sql @@ -556,6 +556,39 @@ ALTER EXTENSION citus UPDATE TO '11.2-1'; SELECT * FROM pg_dist_placement ORDER BY shardid; SELECT * FROM pg_dist_cleanup; +ALTER EXTENSION citus_columnar UPDATE TO '11.2-1'; + +-- Make sure that we defined dependencies from all rel objects (tables, +-- indexes, sequences ..) to columnar table access method ... +SELECT pg_class.oid INTO columnar_schema_members +FROM pg_class, pg_namespace +WHERE pg_namespace.oid=pg_class.relnamespace AND + pg_namespace.nspname='columnar_internal' AND + pg_class.relname NOT IN ('chunk_group_pkey', + 'chunk_pkey', + 'options_pkey', + 'stripe_first_row_number_idx', + 'stripe_pkey'); +SELECT refobjid INTO columnar_schema_members_pg_depend +FROM pg_depend +WHERE classid = 'pg_am'::regclass::oid AND + objid = (select oid from pg_am where amname = 'columnar') AND + objsubid = 0 AND + refclassid = 'pg_class'::regclass::oid AND + refobjsubid = 0 AND + deptype = 'n'; + +-- ... , so this should be empty, +(TABLE columnar_schema_members EXCEPT TABLE columnar_schema_members_pg_depend) +UNION +(TABLE columnar_schema_members_pg_depend EXCEPT TABLE columnar_schema_members); + +-- ... , and both columnar_schema_members_pg_depend & columnar_schema_members +-- should have 5 entries. +SELECT COUNT(*)=5 FROM columnar_schema_members_pg_depend; + +DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend; + -- error out as cleanup records remain ALTER EXTENSION citus UPDATE TO '11.0-4'; diff --git a/src/test/regress/sql/multi_modifying_xacts.sql b/src/test/regress/sql/multi_modifying_xacts.sql index 2be3a0911..506480093 100644 --- a/src/test/regress/sql/multi_modifying_xacts.sql +++ b/src/test/regress/sql/multi_modifying_xacts.sql @@ -1,6 +1,9 @@ SET citus.next_shard_id TO 1200000; SET citus.next_placement_id TO 1200000; +CREATE SCHEMA multi_modifying_xacts; +SET search_path TO multi_modifying_xacts; + -- =================================================================== -- test end-to-end modification functionality -- =================================================================== @@ -169,7 +172,7 @@ INSERT INTO labs VALUES (6, 'Bell Labs'); ABORT; -- but the DDL should correctly roll back -SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.labs'::regclass; +SELECT "Column", "Type", "Modifiers" FROM public.table_desc WHERE relid='multi_modifying_xacts.labs'::regclass; SELECT * FROM labs WHERE id = 6; -- COPY can happen after single row INSERT @@ -294,7 +297,7 @@ CREATE FUNCTION reject_large_id() RETURNS trigger AS $rli$ $rli$ LANGUAGE plpgsql; -- register after insert trigger -SELECT * FROM run_command_on_placements('researchers', 'CREATE CONSTRAINT TRIGGER reject_large_researcher_id AFTER INSERT ON %s DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_large_id()') +SELECT * FROM run_command_on_placements('multi_modifying_xacts.researchers', 'CREATE CONSTRAINT TRIGGER reject_large_researcher_id AFTER INSERT ON %s DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE multi_modifying_xacts.reject_large_id()') ORDER BY nodeport, shardid; -- hide postgresql version dependend messages for next test only @@ -418,6 +421,7 @@ AND s.logicalrelid = 'objects'::regclass; -- create trigger on one worker to reject certain values \c - - - :worker_2_port +SET search_path TO multi_modifying_xacts; SET citus.enable_metadata_sync TO OFF; CREATE FUNCTION reject_bad() RETURNS trigger AS $rb$ @@ -437,6 +441,7 @@ DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; -- test partial failure; worker_1 succeeds, 2 fails -- in this case, we expect the transaction to abort @@ -465,6 +470,7 @@ DELETE FROM objects; -- there cannot be errors on different shards at different times -- because the first failure will fail the whole transaction \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; SET citus.enable_metadata_sync TO OFF; CREATE FUNCTION reject_bad() RETURNS trigger AS $rb$ BEGIN @@ -483,6 +489,7 @@ DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; BEGIN; INSERT INTO objects VALUES (1, 'apple'); @@ -506,6 +513,7 @@ AND (s.logicalrelid = 'objects'::regclass OR -- what if the failures happen at COMMIT time? \c - - - :worker_2_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad ON objects_1200003; @@ -515,6 +523,7 @@ DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; -- should be the same story as before, just at COMMIT time -- as we use 2PC, the transaction is rollbacked @@ -547,6 +556,7 @@ AND s.logicalrelid = 'objects'::regclass; -- what if all nodes have failures at COMMIT time? \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad ON labs_1200002; @@ -556,6 +566,7 @@ DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; -- reduce the log level for differences between PG14 and PG15 -- in PGconn->errorMessage @@ -586,10 +597,12 @@ AND (s.logicalrelid = 'objects'::regclass OR -- what if one shard (objects) succeeds but another (labs) completely fails? \c - - - :worker_2_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad ON objects_1200003; \c - - - :master_port +SET search_path TO multi_modifying_xacts; SET citus.next_shard_id TO 1200004; BEGIN; INSERT INTO objects VALUES (1, 'apple'); @@ -682,6 +695,7 @@ SELECT * FROM reference_modifying_xacts; -- lets fail on of the workers at before the commit time \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; SET citus.enable_metadata_sync TO OFF; CREATE FUNCTION reject_bad_reference() RETURNS trigger AS $rb$ BEGIN @@ -700,6 +714,7 @@ DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad_reference(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse -- try without wrapping inside a transaction INSERT INTO reference_modifying_xacts VALUES (999, 3); @@ -711,6 +726,7 @@ COMMIT; -- lets fail one of the workers at COMMIT time \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad_reference ON reference_modifying_xacts_1200006; CREATE CONSTRAINT TRIGGER reject_bad_reference @@ -719,6 +735,7 @@ DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad_reference(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse -- try without wrapping inside a transaction @@ -740,10 +757,12 @@ ORDER BY s.logicalrelid, sp.shardstate; -- for the time-being drop the constraint \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad_reference ON reference_modifying_xacts_1200006; \c - - - :master_port +SET search_path TO multi_modifying_xacts; -- now create a hash distributed table and run tests -- including both the reference table and the hash @@ -777,6 +796,7 @@ ABORT; -- lets fail one of the workers before COMMIT time for the hash table \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; SET citus.enable_metadata_sync TO OFF; CREATE FUNCTION reject_bad_hash() RETURNS trigger AS $rb$ BEGIN @@ -795,6 +815,7 @@ DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad_hash(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse -- the transaction as a whole should fail @@ -809,6 +830,7 @@ SELECT * FROM reference_modifying_xacts WHERE key = 55; -- now lets fail on of the workers for the hash distributed table table -- when there is a reference table involved \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad_hash ON hash_modifying_xacts_1200007; -- the trigger is on execution time @@ -818,6 +840,7 @@ DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad_hash(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse -- the transaction as a whole should fail @@ -844,6 +867,7 @@ ORDER BY s.logicalrelid, sp.shardstate; -- change is rollbacked as well \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; CREATE CONSTRAINT TRIGGER reject_bad_reference AFTER INSERT ON reference_modifying_xacts_1200006 @@ -851,6 +875,7 @@ DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad_reference(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse BEGIN; @@ -920,9 +945,11 @@ SELECT count(*) FROM pg_dist_transaction; -- first create the new user on all nodes CREATE USER test_user; +GRANT ALL ON SCHEMA multi_modifying_xacts TO test_user; -- now connect back to the master with the new user \c - test_user - :master_port +SET search_path TO multi_modifying_xacts; SET citus.next_shard_id TO 1200015; CREATE TABLE reference_failure_test (key int, value int); SELECT create_reference_table('reference_failure_test'); @@ -934,16 +961,19 @@ SELECT create_distributed_table('numbers_hash_failure_test', 'key'); -- ensure that the shard is created for this user \c - test_user - :worker_1_port +SET search_path TO multi_modifying_xacts; SET citus.override_table_visibility TO false; \dt reference_failure_test_1200015 -- now connect with the default user, -- and rename the existing user \c - :default_user - :worker_1_port +SET search_path TO multi_modifying_xacts; ALTER USER test_user RENAME TO test_user_new; -- connect back to master and query the reference table \c - test_user - :master_port +SET search_path TO multi_modifying_xacts; -- should fail since the worker doesn't have test_user anymore INSERT INTO reference_failure_test VALUES (1, '1'); @@ -1007,15 +1037,18 @@ SELECT count(*) FROM numbers_hash_failure_test; -- break the other node as well \c - :default_user - :worker_2_port +SET search_path TO multi_modifying_xacts; ALTER USER test_user RENAME TO test_user_new; \c - test_user - :master_port +SET search_path TO multi_modifying_xacts; -- fails on all shard placements INSERT INTO numbers_hash_failure_test VALUES (2,2); -- connect back to the master with the proper user to continue the tests \c - :default_user - :master_port +SET search_path TO multi_modifying_xacts; SET citus.next_shard_id TO 1200020; SET citus.next_placement_id TO 1200033; -- unbreak both nodes by renaming the user back to the original name @@ -1024,6 +1057,7 @@ SELECT * FROM run_command_on_workers('ALTER USER test_user_new RENAME TO test_us DROP TABLE reference_modifying_xacts, hash_modifying_xacts, hash_modifying_xacts_second, reference_failure_test, numbers_hash_failure_test; +REVOKE ALL ON SCHEMA multi_modifying_xacts FROM test_user; DROP USER test_user; -- set up foreign keys to test transactions with co-located and reference tables @@ -1043,7 +1077,10 @@ CREATE TABLE itemgroups ( ); SELECT create_reference_table('itemgroups'); +SET client_min_messages TO WARNING; DROP TABLE IF EXISTS users ; +RESET client_min_messages; + CREATE TABLE users ( id int PRIMARY KEY, name text, @@ -1199,5 +1236,5 @@ SELECT insert_abort(); SELECT name FROM labs WHERE id = 1001; RESET citus.function_opens_transaction_block; -DROP FUNCTION insert_abort(); -DROP TABLE items, users, itemgroups, usergroups, researchers, labs; +SET client_min_messages TO WARNING; +DROP SCHEMA multi_modifying_xacts CASCADE; diff --git a/src/test/regress/sql/multi_move_mx.sql b/src/test/regress/sql/multi_move_mx.sql index 166069a6e..9cfa8a3db 100644 --- a/src/test/regress/sql/multi_move_mx.sql +++ b/src/test/regress/sql/multi_move_mx.sql @@ -151,8 +151,34 @@ ORDER BY shardid LIMIT 1 OFFSET 1; +-- Check that shards of a table with GENERATED columns can be moved. +\c - - - :master_port +SET citus.shard_count TO 4; +SET citus.shard_replication_factor TO 1; + +CREATE TABLE mx_table_with_generated_column (a int, b int GENERATED ALWAYS AS ( a + 3 ) STORED, c int); +SELECT create_distributed_table('mx_table_with_generated_column', 'a'); + +-- Check that dropped columns are handled properly in a move. +ALTER TABLE mx_table_with_generated_column DROP COLUMN c; + +-- Move a shard from worker 1 to worker 2 +SELECT + citus_move_shard_placement(shardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical') +FROM + pg_dist_shard NATURAL JOIN pg_dist_shard_placement +WHERE + logicalrelid = 'mx_table_with_generated_column'::regclass + AND nodeport = :worker_1_port +ORDER BY + shardid +LIMIT 1; + -- Cleanup \c - - - :master_port +SET client_min_messages TO WARNING; +CALL citus_cleanup_orphaned_resources(); +DROP TABLE mx_table_with_generated_column; DROP TABLE mx_table_1; DROP TABLE mx_table_2; DROP TABLE mx_table_3; diff --git a/src/test/regress/sql/multi_mx_copy_data.sql b/src/test/regress/sql/multi_mx_copy_data.sql index 26d4d3c42..b4598ae61 100644 --- a/src/test/regress/sql/multi_mx_copy_data.sql +++ b/src/test/regress/sql/multi_mx_copy_data.sql @@ -2,6 +2,11 @@ -- MULTI_MX_COPY_DATA -- +-- We truncate them to make this test runnable multiple times. +-- Note that we cannot do that at the end of the test because +-- we need to keep the data for the other tests. +TRUNCATE lineitem_mx, orders_mx; + \set nation_data_file :abs_srcdir '/data/nation.data' \set client_side_copy_command '\\copy nation_hash FROM ' :'nation_data_file' ' with delimiter '''|''';' :client_side_copy_command @@ -96,3 +101,5 @@ SET search_path TO public; :client_side_copy_command \set client_side_copy_command '\\copy supplier_mx FROM ' :'supplier_data_file' ' with delimiter '''|''';' :client_side_copy_command + +DROP TABLE citus_mx_test_schema.nation_hash_replicated; diff --git a/src/test/regress/sql/multi_mx_modifying_xacts.sql b/src/test/regress/sql/multi_mx_modifying_xacts.sql index cf60f023d..15335f579 100644 --- a/src/test/regress/sql/multi_mx_modifying_xacts.sql +++ b/src/test/regress/sql/multi_mx_modifying_xacts.sql @@ -331,3 +331,7 @@ COMMIT; -- no data should persists SELECT * FROM objects_mx WHERE id = 1; SELECT * FROM labs_mx WHERE id = 8; + +TRUNCATE objects_mx, labs_mx, researchers_mx; +DROP TRIGGER reject_bad_mx ON labs_mx_1220102; +DROP FUNCTION reject_bad_mx; diff --git a/src/test/regress/sql/multi_mx_router_planner.sql b/src/test/regress/sql/multi_mx_router_planner.sql index fdfd81b07..6a1271720 100644 --- a/src/test/regress/sql/multi_mx_router_planner.sql +++ b/src/test/regress/sql/multi_mx_router_planner.sql @@ -657,3 +657,8 @@ INSERT INTO articles_hash_mx VALUES (51, 1, 'amateus', 1814); SELECT id FROM articles_hash_mx WHERE author_id = 1; + +SET client_min_messages to WARNING; +TRUNCATE articles_hash_mx, company_employees_mx, articles_single_shard_hash_mx; +DROP MATERIALIZED VIEW mv_articles_hash_mx_error; +DROP TABLE authors_hash_mx; diff --git a/src/test/regress/sql/multi_router_planner.sql b/src/test/regress/sql/multi_router_planner.sql index 87104599c..142568d5d 100644 --- a/src/test/regress/sql/multi_router_planner.sql +++ b/src/test/regress/sql/multi_router_planner.sql @@ -10,6 +10,9 @@ SET citus.next_shard_id TO 840000; -- other tests that triggers fast-path-router planner SET citus.enable_fast_path_router_planner TO false; +CREATE SCHEMA multi_router_planner; +SET search_path TO multi_router_planner; + CREATE TABLE articles_hash ( id bigint NOT NULL, author_id bigint NOT NULL, @@ -1182,12 +1185,15 @@ SELECT create_distributed_table('failure_test', 'a', 'hash'); SET citus.enable_ddl_propagation TO off; CREATE USER router_user; -GRANT INSERT ON ALL TABLES IN SCHEMA public TO router_user; +GRANT USAGE ON SCHEMA multi_router_planner TO router_user; +GRANT INSERT ON ALL TABLES IN SCHEMA multi_router_planner TO router_user; \c - - - :worker_1_port SET citus.enable_ddl_propagation TO off; CREATE USER router_user; -GRANT INSERT ON ALL TABLES IN SCHEMA public TO router_user; +GRANT USAGE ON SCHEMA multi_router_planner TO router_user; +GRANT INSERT ON ALL TABLES IN SCHEMA multi_router_planner TO router_user; \c - router_user - :master_port +SET search_path TO multi_router_planner; -- we will fail to connect to worker 2, since the user does not exist -- still, we never mark placements inactive. Instead, fail the transaction BEGIN; @@ -1199,29 +1205,13 @@ SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'failure_test'::regclass ) - ORDER BY placementid; + ORDER BY shardid, nodeport; \c - postgres - :worker_1_port DROP OWNED BY router_user; DROP USER router_user; \c - - - :master_port DROP OWNED BY router_user; DROP USER router_user; -DROP TABLE failure_test; -DROP FUNCTION author_articles_max_id(); -DROP FUNCTION author_articles_id_word_count(); - -DROP MATERIALIZED VIEW mv_articles_hash_empty; -DROP MATERIALIZED VIEW mv_articles_hash_data; - -DROP VIEW num_db; -DROP FUNCTION number1(); - -DROP TABLE articles_hash; -DROP TABLE articles_single_shard_hash; -DROP TABLE authors_hash; -DROP TABLE authors_range; -DROP TABLE authors_reference; -DROP TABLE company_employees; -DROP TABLE articles_range; -DROP TABLE articles_append; +SET client_min_messages TO WARNING; +DROP SCHEMA multi_router_planner CASCADE; diff --git a/src/test/regress/sql/multi_simple_queries.sql b/src/test/regress/sql/multi_simple_queries.sql index 8d7e45255..7fcf45b1c 100644 --- a/src/test/regress/sql/multi_simple_queries.sql +++ b/src/test/regress/sql/multi_simple_queries.sql @@ -11,6 +11,9 @@ SET citus.coordinator_aggregation_strategy TO 'disabled'; -- test end-to-end query functionality -- =================================================================== +CREATE SCHEMA simple_queries_test; +SET search_path TO simple_queries_test; + CREATE TABLE articles ( id bigint NOT NULL, author_id bigint NOT NULL, @@ -203,12 +206,12 @@ SELECT author_id FROM articles HAVING author_id <= 2 OR author_id = 8 ORDER BY author_id; -SELECT o_orderstatus, count(*), avg(o_totalprice) FROM orders +SELECT o_orderstatus, count(*), avg(o_totalprice) FROM public.orders GROUP BY o_orderstatus HAVING count(*) > 1450 OR avg(o_totalprice) > 150000 ORDER BY o_orderstatus; -SELECT o_orderstatus, sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders +SELECT o_orderstatus, sum(l_linenumber), avg(l_linenumber) FROM public.lineitem, public.orders WHERE l_orderkey = o_orderkey AND l_orderkey > 9030 GROUP BY o_orderstatus HAVING sum(l_linenumber) > 1000 @@ -277,7 +280,7 @@ SELECT avg(word_count) -- error out on unsupported aggregate SET client_min_messages to 'NOTICE'; -CREATE AGGREGATE public.invalid(int) ( +CREATE AGGREGATE invalid(int) ( sfunc = int4pl, stype = int ); @@ -355,7 +358,8 @@ SELECT nextval('query_seq')*2 FROM articles LIMIT 3; SELECT * FROM (SELECT nextval('query_seq') FROM articles LIMIT 3) vals; -- but not elsewhere -SELECT sum(nextval('query_seq')) FROM articles; -SELECT n FROM (SELECT nextval('query_seq') n, random() FROM articles) vals; +SELECT sum(nextval('simple_queries_test.query_seq')) FROM articles; +SELECT n FROM (SELECT nextval('simple_queries_test.query_seq') n, random() FROM articles) vals; -DROP SEQUENCE query_seq; +SET client_min_messages TO WARNING; +DROP SCHEMA simple_queries_test CASCADE; diff --git a/src/test/regress/sql/multi_upsert.sql b/src/test/regress/sql/multi_upsert.sql index 24503b7a4..6ef72d576 100644 --- a/src/test/regress/sql/multi_upsert.sql +++ b/src/test/regress/sql/multi_upsert.sql @@ -3,6 +3,8 @@ SET citus.next_shard_id TO 980000; +CREATE SCHEMA upsert_test; +SET search_path TO upsert_test; CREATE TABLE upsert_test ( @@ -207,3 +209,6 @@ INSERT INTO upsert_test (part_key, other_col) VALUES (1, 1) ON CONFLICT (part_ke -- error out on attempt to update the partition key INSERT INTO upsert_test (part_key, other_col) VALUES (1, 1) ON CONFLICT (part_key) DO UPDATE SET part_key = 15; + +SET client_min_messages TO WARNING; +DROP SCHEMA upsert_test CASCADE; diff --git a/src/test/regress/sql/pg15.sql b/src/test/regress/sql/pg15.sql index 121b41f86..ac8062c65 100644 --- a/src/test/regress/sql/pg15.sql +++ b/src/test/regress/sql/pg15.sql @@ -269,16 +269,21 @@ WITH targq AS ( MERGE INTO tbl1 USING targq ON (true) WHEN MATCHED THEN DELETE; --- crashes on beta3, fixed on 15 stable ---WITH foo AS ( --- MERGE INTO tbl1 USING tbl2 ON (true) --- WHEN MATCHED THEN DELETE ---) SELECT * FROM foo; +WITH foo AS ( + MERGE INTO tbl1 USING tbl2 ON (true) + WHEN MATCHED THEN DELETE +) SELECT * FROM foo; ---COPY ( --- MERGE INTO tbl1 USING tbl2 ON (true) --- WHEN MATCHED THEN DELETE ---) TO stdout; +COPY ( + MERGE INTO tbl1 USING tbl2 ON (true) + WHEN MATCHED THEN DELETE +) TO stdout; + +MERGE INTO tbl1 t +USING tbl2 +ON (true) +WHEN MATCHED THEN + DO NOTHING; MERGE INTO tbl1 t USING tbl2 diff --git a/src/test/regress/sql/pgmerge.sql b/src/test/regress/sql/pgmerge.sql index 6842f516a..9b828f27e 100644 --- a/src/test/regress/sql/pgmerge.sql +++ b/src/test/regress/sql/pgmerge.sql @@ -608,6 +608,14 @@ USING wq_source s ON t.tid = s.sid WHEN MATCHED AND (merge_when_and_write()) THEN UPDATE SET balance = t.balance + s.balance; ROLLBACK; + +-- Test preventing ON condition from writing to the database +BEGIN; +MERGE INTO wq_target t +USING wq_source s ON t.tid = s.sid AND (merge_when_and_write()) +WHEN MATCHED THEN + UPDATE SET balance = t.balance + s.balance; +ROLLBACK; drop function merge_when_and_write(); DROP TABLE wq_target, wq_source; @@ -1164,12 +1172,14 @@ INSERT INTO pa_target SELECT '2017-02-28', id, id * 100, 'initial' FROM generate SET client_min_messages TO DEBUG1; BEGIN; MERGE INTO pa_target t - USING (SELECT '2017-01-15' AS slogts, * FROM pa_source WHERE sid < 10) s + USING (SELECT * FROM pa_source WHERE sid < 10) s + --USING (SELECT '2017-01-15' AS slogts, * FROM pa_source WHERE sid < 10) s ON t.tid = s.sid WHEN MATCHED THEN UPDATE SET balance = balance + delta, val = val || ' updated by merge' WHEN NOT MATCHED THEN - INSERT VALUES (slogts::timestamp, sid, delta, 'inserted by merge'); + INSERT VALUES ('2017-01-15', sid, delta, 'inserted by merge'); + --INSERT VALUES (slogts::timestamp, sid, delta, 'inserted by merge'); SELECT * FROM pa_target ORDER BY tid; ROLLBACK; RESET client_min_messages; diff --git a/src/test/regress/sql/shard_rebalancer.sql b/src/test/regress/sql/shard_rebalancer.sql index dbbc94732..da4259f5b 100644 --- a/src/test/regress/sql/shard_rebalancer.sql +++ b/src/test/regress/sql/shard_rebalancer.sql @@ -1497,6 +1497,21 @@ SELECT sh.logicalrelid, pl.nodeport DROP TABLE single_shard_colocation_1a, single_shard_colocation_1b, single_shard_colocation_1c, single_shard_colocation_2a, single_shard_colocation_2b CASCADE; +-- verify we detect if one of the tables do not have a replica identity or primary key +-- and error out in case of shard transfer mode = auto +SELECT 1 FROM citus_remove_node('localhost', :worker_2_port); + +create table table_with_primary_key (a int primary key); +select create_distributed_table('table_with_primary_key','a'); +create table table_without_primary_key (a bigint); +select create_distributed_table('table_without_primary_key','a'); + +-- add the second node back, then rebalance +ALTER SEQUENCE pg_dist_groupid_seq RESTART WITH 16; +select 1 from citus_add_node('localhost', :worker_2_port); +select rebalance_table_shards(); + +DROP TABLE table_with_primary_key, table_without_primary_key; \c - - - :worker_1_port SET citus.enable_ddl_propagation TO OFF; REVOKE ALL ON SCHEMA public FROM testrole; diff --git a/src/test/regress/sql/upgrade_columnar_after.sql b/src/test/regress/sql/upgrade_columnar_after.sql index f2839645c..133fcfde0 100644 --- a/src/test/regress/sql/upgrade_columnar_after.sql +++ b/src/test/regress/sql/upgrade_columnar_after.sql @@ -101,10 +101,12 @@ BEGIN; INSERT INTO text_data (value) SELECT generate_random_string(1024 * 10) FROM generate_series(0,10); SELECT count(DISTINCT value) FROM text_data; - -- make sure that serial is preserved - -- since we run "after schedule" twice and "rollback" wouldn't undo - -- sequence changes, it can be 22 or 33, not a different value - SELECT max(id) in (22, 33) FROM text_data; + -- Make sure that serial is preserved. + -- + -- Since we might run "after schedule" several times for flaky test + -- detection and "rollback" wouldn't undo sequence changes, "id" should + -- look like below: + SELECT max(id) >= 11 AND max(id) % 11 = 0 FROM text_data; -- since we run "after schedule" twice, rollback the transaction -- to avoid getting "table already exists" errors @@ -137,7 +139,12 @@ ROLLBACK; SELECT pg_class.oid INTO columnar_schema_members FROM pg_class, pg_namespace WHERE pg_namespace.oid=pg_class.relnamespace AND - pg_namespace.nspname='columnar_internal'; + pg_namespace.nspname='columnar_internal' AND + pg_class.relname NOT IN ('chunk_group_pkey', + 'chunk_pkey', + 'options_pkey', + 'stripe_first_row_number_idx', + 'stripe_pkey'); SELECT refobjid INTO columnar_schema_members_pg_depend FROM pg_depend WHERE classid = 'pg_am'::regclass::oid AND @@ -153,19 +160,24 @@ UNION (TABLE columnar_schema_members_pg_depend EXCEPT TABLE columnar_schema_members); -- ... , and both columnar_schema_members_pg_depend & columnar_schema_members --- should have 10 entries. -SELECT COUNT(*)=10 FROM columnar_schema_members_pg_depend; +-- should have 5 entries. +SELECT COUNT(*)=5 FROM columnar_schema_members_pg_depend; DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend; -- Check the same for workers too. -SELECT run_command_on_workers( +SELECT success, result FROM run_command_on_workers( $$ SELECT pg_class.oid INTO columnar_schema_members FROM pg_class, pg_namespace WHERE pg_namespace.oid=pg_class.relnamespace AND - pg_namespace.nspname='columnar_internal'; + pg_namespace.nspname='columnar_internal' AND + pg_class.relname NOT IN ('chunk_group_pkey', + 'chunk_pkey', + 'options_pkey', + 'stripe_first_row_number_idx', + 'stripe_pkey'); SELECT refobjid INTO columnar_schema_members_pg_depend FROM pg_depend WHERE classid = 'pg_am'::regclass::oid AND @@ -177,7 +189,7 @@ WHERE classid = 'pg_am'::regclass::oid AND $$ ); -SELECT run_command_on_workers( +SELECT success, result FROM run_command_on_workers( $$ (TABLE columnar_schema_members EXCEPT TABLE columnar_schema_members_pg_depend) UNION @@ -185,13 +197,13 @@ UNION $$ ); -SELECT run_command_on_workers( +SELECT success, result FROM run_command_on_workers( $$ -SELECT COUNT(*)=10 FROM columnar_schema_members_pg_depend; +SELECT COUNT(*)=5 FROM columnar_schema_members_pg_depend; $$ ); -SELECT run_command_on_workers( +SELECT success, result FROM run_command_on_workers( $$ DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend; $$ diff --git a/src/test/regress/sql/upgrade_columnar_before.sql b/src/test/regress/sql/upgrade_columnar_before.sql index ea71dba02..6f39f4234 100644 --- a/src/test/regress/sql/upgrade_columnar_before.sql +++ b/src/test/regress/sql/upgrade_columnar_before.sql @@ -1,5 +1,29 @@ -- Test if relying on topological sort of the objects, not their names, works -- fine when re-creating objects during pg_upgrade. + +DO +$$ +BEGIN +IF EXISTS (SELECT * FROM pg_namespace WHERE nspname = 'upgrade_columnar') +THEN + -- Drop the the table leftover from the earlier run of + -- upgrade_columnar_before.sql. Similarly, drop the fake public schema + -- created before and rename the original one (renamed to citus_schema) + -- back to public. + -- + -- This can only happen if upgrade_columnar_before.sql is run multiple + -- times for flaky test detection. + DROP TABLE citus_schema.new_columnar_table; + DROP SCHEMA public CASCADE; + ALTER SCHEMA citus_schema RENAME TO public; + + SET LOCAL client_min_messages TO WARNING; + DROP SCHEMA upgrade_columnar CASCADE; +END IF; +END +$$ +LANGUAGE plpgsql; + ALTER SCHEMA public RENAME TO citus_schema; SET search_path TO citus_schema; diff --git a/src/test/regress/sql/worker_split_copy_test.sql b/src/test/regress/sql/worker_split_copy_test.sql index 2fac91c69..e2f4f9a23 100644 --- a/src/test/regress/sql/worker_split_copy_test.sql +++ b/src/test/regress/sql/worker_split_copy_test.sql @@ -110,8 +110,66 @@ SELECT COUNT(*) FROM worker_split_copy_test."test !/ \n _""dist_123_table_810700 SELECT COUNT(*) FROM worker_split_copy_test."test !/ \n _""dist_123_table_81070016"; -- END: List updated row count for local targets shard. +-- Check that GENERATED columns are handled properly in a shard split operation. +\c - - - :master_port +SET search_path TO worker_split_copy_test; +SET citus.shard_count TO 2; +SET citus.shard_replication_factor TO 1; +SET citus.next_shard_id TO 81080000; + +-- BEGIN: Create distributed table and insert data. +CREATE TABLE worker_split_copy_test.dist_table_with_generated_col(id int primary key, new_id int GENERATED ALWAYS AS ( id + 3 ) stored, value char, col_todrop int); +SELECT create_distributed_table('dist_table_with_generated_col', 'id'); + +-- Check that dropped columns are filtered out in COPY command. +ALTER TABLE dist_table_with_generated_col DROP COLUMN col_todrop; + +INSERT INTO dist_table_with_generated_col (id, value) (SELECT g.id, 'N' FROM generate_series(1, 1000) AS g(id)); + +-- END: Create distributed table and insert data. + +-- BEGIN: Create target shards in Worker1 and Worker2 for a 2-way split copy. +\c - - - :worker_1_port +CREATE TABLE worker_split_copy_test.dist_table_with_generated_col_81080015(id int primary key, new_id int GENERATED ALWAYS AS ( id + 3 ) stored, value char); +\c - - - :worker_2_port +CREATE TABLE worker_split_copy_test.dist_table_with_generated_col_81080016(id int primary key, new_id int GENERATED ALWAYS AS ( id + 3 ) stored, value char); + +-- BEGIN: List row count for source shard and targets shard in Worker1. +\c - - - :worker_1_port +SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080000; +SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080015; + +-- BEGIN: List row count for target shard in Worker2. +\c - - - :worker_2_port +SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080016; + +\c - - - :worker_1_port +SELECT * from worker_split_copy( + 81080000, -- source shard id to copy + 'id', + ARRAY[ + -- split copy info for split children 1 + ROW(81080015, -- destination shard id + -2147483648, -- split range begin + -1073741824, --split range end + :worker_1_node)::pg_catalog.split_copy_info, + -- split copy info for split children 2 + ROW(81080016, --destination shard id + -1073741823, --split range begin + -1, --split range end + :worker_2_node)::pg_catalog.split_copy_info + ] + ); + +\c - - - :worker_1_port +SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080015; + +\c - - - :worker_2_port +SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080016; + -- BEGIN: CLEANUP. \c - - - :master_port SET client_min_messages TO WARNING; +CALL citus_cleanup_orphaned_resources(); DROP SCHEMA worker_split_copy_test CASCADE; -- END: CLEANUP. diff --git a/src/test/regress/sql_schedule b/src/test/regress/sql_schedule index f07f7af9a..9538f1482 100644 --- a/src/test/regress/sql_schedule +++ b/src/test/regress/sql_schedule @@ -14,3 +14,5 @@ test: arbitrary_configs_truncate test: arbitrary_configs_truncate_cascade test: arbitrary_configs_truncate_partition test: arbitrary_configs_alter_table_add_constraint_without_name +test: merge_arbitrary +test: arbitrary_configs_router