Merge branch 'main' into niupre-reuse-connections-for-logical-ref-fkeys

niupre-reuse-connections-for-logical-ref-fkeys
Nitish Upreti 2022-09-14 15:21:30 -07:00
commit 33a2578342
119 changed files with 3390 additions and 7326 deletions

View File

@ -6,7 +6,7 @@ orbs:
parameters: parameters:
image_suffix: image_suffix:
type: string type: string
default: '-v643b0b7' default: '-v0eef34d'
pg13_version: pg13_version:
type: string type: string
default: '13.8' default: '13.8'
@ -15,10 +15,10 @@ parameters:
default: '14.5' default: '14.5'
pg15_version: pg15_version:
type: string type: string
default: '15beta3' default: '15beta4'
upgrade_pg_versions: upgrade_pg_versions:
type: string type: string
default: '13.8-14.5-15beta3' default: '13.8-14.5-15beta4'
style_checker_tools_version: style_checker_tools_version:
type: string type: string
default: '0.8.18' default: '0.8.18'

View File

@ -19,6 +19,7 @@
#include "distributed/commands/utility_hook.h" #include "distributed/commands/utility_hook.h"
#include "distributed/listutils.h" #include "distributed/listutils.h"
#include "distributed/metadata_cache.h" #include "distributed/metadata_cache.h"
#include "distributed/multi_partitioning_utils.h"
static bool IsClusterStmtVerbose_compat(ClusterStmt *clusterStmt); static bool IsClusterStmtVerbose_compat(ClusterStmt *clusterStmt);
@ -69,6 +70,27 @@ PreprocessClusterStmt(Node *node, const char *clusterCommand,
return NIL; return NIL;
} }
/*
* We do not support CLUSTER command on partitioned tables as it can not be run inside
* transaction blocks. PostgreSQL currently does not support CLUSTER command on
* partitioned tables in a transaction block. Although Citus can execute commands
* outside of transaction block -- such as VACUUM -- we cannot do that here because
* CLUSTER command is also not allowed from a function call as well. By default, Citus
* uses `worker_apply_shard_ddl_command()`, where we should avoid it for this case.
*/
if (PartitionedTable(relationId))
{
if (EnableUnsupportedFeatureMessages)
{
ereport(WARNING, (errmsg("not propagating CLUSTER command for partitioned "
"table to worker nodes"),
errhint("Provide a child partition table names in order to "
"CLUSTER distributed partitioned tables.")));
}
return NIL;
}
if (IsClusterStmtVerbose_compat(clusterStmt)) if (IsClusterStmtVerbose_compat(clusterStmt))
{ {
ereport(ERROR, (errmsg("cannot run CLUSTER command"), ereport(ERROR, (errmsg("cannot run CLUSTER command"),

View File

@ -60,10 +60,10 @@
#include "distributed/reference_table_utils.h" #include "distributed/reference_table_utils.h"
#include "distributed/relation_access_tracking.h" #include "distributed/relation_access_tracking.h"
#include "distributed/remote_commands.h" #include "distributed/remote_commands.h"
#include "distributed/repair_shards.h"
#include "distributed/resource_lock.h" #include "distributed/resource_lock.h"
#include "distributed/shard_rebalancer.h" #include "distributed/shard_rebalancer.h"
#include "distributed/shard_split.h" #include "distributed/shard_split.h"
#include "distributed/shard_transfer.h"
#include "distributed/shared_library_init.h" #include "distributed/shared_library_init.h"
#include "distributed/shard_rebalancer.h" #include "distributed/shard_rebalancer.h"
#include "distributed/worker_protocol.h" #include "distributed/worker_protocol.h"
@ -564,8 +564,10 @@ CreateDistributedTableConcurrently(Oid relationId, char *distributionColumnName,
* such that we can create foreign keys and joins work immediately after creation. * such that we can create foreign keys and joins work immediately after creation.
* We do this after applying all essential checks to error out early in case of * We do this after applying all essential checks to error out early in case of
* user error. * user error.
*
* Use force_logical since this function is meant to not block writes.
*/ */
EnsureReferenceTablesExistOnAllNodes(); EnsureReferenceTablesExistOnAllNodesExtended(TRANSFER_MODE_FORCE_LOGICAL);
/* /*
* At this point, the table is a Citus local table, which means it does * At this point, the table is a Citus local table, which means it does

View File

@ -734,6 +734,17 @@ static DistributeObjectOps Sequence_AlterOwner = {
.address = AlterSequenceOwnerStmtObjectAddress, .address = AlterSequenceOwnerStmtObjectAddress,
.markDistributed = false, .markDistributed = false,
}; };
#if (PG_VERSION_NUM >= PG_VERSION_15)
static DistributeObjectOps Sequence_AlterPersistence = {
.deparse = DeparseAlterSequencePersistenceStmt,
.qualify = QualifyAlterSequencePersistenceStmt,
.preprocess = PreprocessAlterSequencePersistenceStmt,
.postprocess = NULL,
.operationType = DIST_OPS_ALTER,
.address = AlterSequencePersistenceStmtObjectAddress,
.markDistributed = false,
};
#endif
static DistributeObjectOps Sequence_Drop = { static DistributeObjectOps Sequence_Drop = {
.deparse = DeparseDropSequenceStmt, .deparse = DeparseDropSequenceStmt,
.qualify = QualifyDropSequenceStmt, .qualify = QualifyDropSequenceStmt,
@ -1463,6 +1474,41 @@ GetDistributeObjectOps(Node *node)
case OBJECT_SEQUENCE: case OBJECT_SEQUENCE:
{ {
#if (PG_VERSION_NUM >= PG_VERSION_15)
ListCell *cmdCell = NULL;
foreach(cmdCell, stmt->cmds)
{
AlterTableCmd *cmd = castNode(AlterTableCmd, lfirst(cmdCell));
switch (cmd->subtype)
{
case AT_ChangeOwner:
{
return &Sequence_AlterOwner;
}
case AT_SetLogged:
{
return &Sequence_AlterPersistence;
}
case AT_SetUnLogged:
{
return &Sequence_AlterPersistence;
}
default:
{
return &NoDistributeOps;
}
}
}
#endif
/*
* Prior to PG15, the only Alter Table statement
* with Sequence as its object was an
* Alter Owner statement
*/
return &Sequence_AlterOwner; return &Sequence_AlterOwner;
} }

View File

@ -712,6 +712,121 @@ PostprocessAlterSequenceOwnerStmt(Node *node, const char *queryString)
} }
#if (PG_VERSION_NUM >= PG_VERSION_15)
/*
* PreprocessAlterSequencePersistenceStmt is called for change of persistence
* of sequences before the persistence is changed on the local instance.
*
* If the sequence for which the persistence is changed is distributed, we execute
* the change on all the workers to keep the type in sync across the cluster.
*/
List *
PreprocessAlterSequencePersistenceStmt(Node *node, const char *queryString,
ProcessUtilityContext processUtilityContext)
{
AlterTableStmt *stmt = castNode(AlterTableStmt, node);
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_SEQUENCE);
List *sequenceAddresses = GetObjectAddressListFromParseTree((Node *) stmt, false,
false);
/* the code-path only supports a single object */
Assert(list_length(sequenceAddresses) == 1);
if (!ShouldPropagateAnyObject(sequenceAddresses))
{
return NIL;
}
EnsureCoordinator();
QualifyTreeNode((Node *) stmt);
const char *sql = DeparseTreeNode((Node *) stmt);
List *commands = list_make3(DISABLE_DDL_PROPAGATION, (void *) sql,
ENABLE_DDL_PROPAGATION);
return NodeDDLTaskList(NON_COORDINATOR_METADATA_NODES, commands);
}
/*
* AlterSequencePersistenceStmtObjectAddress returns the ObjectAddress of the
* sequence that is the subject of the AlterPersistenceStmt.
*/
List *
AlterSequencePersistenceStmtObjectAddress(Node *node, bool missing_ok, bool isPostprocess)
{
AlterTableStmt *stmt = castNode(AlterTableStmt, node);
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_SEQUENCE);
RangeVar *sequence = stmt->relation;
Oid seqOid = RangeVarGetRelid(sequence, NoLock, missing_ok);
ObjectAddress *sequenceAddress = palloc0(sizeof(ObjectAddress));
ObjectAddressSet(*sequenceAddress, RelationRelationId, seqOid);
return list_make1(sequenceAddress);
}
/*
* PreprocessSequenceAlterTableStmt is called for change of persistence or owner
* of sequences before the persistence/owner is changed on the local instance.
*
* Altering persistence or owner are the only ALTER commands of a sequence
* that may pass through an AlterTableStmt as well
*/
List *
PreprocessSequenceAlterTableStmt(Node *node, const char *queryString,
ProcessUtilityContext processUtilityContext)
{
AlterTableStmt *stmt = castNode(AlterTableStmt, node);
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_SEQUENCE);
ListCell *cmdCell = NULL;
foreach(cmdCell, stmt->cmds)
{
AlterTableCmd *cmd = castNode(AlterTableCmd, lfirst(cmdCell));
switch (cmd->subtype)
{
case AT_ChangeOwner:
{
return PreprocessAlterSequenceOwnerStmt(node,
queryString,
processUtilityContext);
}
case AT_SetLogged:
{
return PreprocessAlterSequencePersistenceStmt(node,
queryString,
processUtilityContext);
}
case AT_SetUnLogged:
{
return PreprocessAlterSequencePersistenceStmt(node,
queryString,
processUtilityContext);
}
default:
{
/* normally we shouldn't ever reach this */
ereport(ERROR, (errmsg("unsupported subtype for alter sequence command"),
errdetail("sub command type: %d",
cmd->subtype)));
}
}
}
return NIL;
}
#endif
/* /*
* PreprocessGrantOnSequenceStmt is executed before the statement is applied to the local * PreprocessGrantOnSequenceStmt is executed before the statement is applied to the local
* postgres instance. * postgres instance.

View File

@ -733,20 +733,40 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand,
/* /*
* check whether we are dealing with a sequence or view here * check whether we are dealing with a sequence or view here
* if yes, it must be ALTER TABLE .. OWNER TO .. command
* since this is the only ALTER command of a sequence or view that
* passes through an AlterTableStmt
*/ */
char relKind = get_rel_relkind(leftRelationId); char relKind = get_rel_relkind(leftRelationId);
if (relKind == RELKIND_SEQUENCE) if (relKind == RELKIND_SEQUENCE)
{ {
AlterTableStmt *stmtCopy = copyObject(alterTableStatement); AlterTableStmt *stmtCopy = copyObject(alterTableStatement);
AlterTableStmtObjType_compat(stmtCopy) = OBJECT_SEQUENCE; AlterTableStmtObjType_compat(stmtCopy) = OBJECT_SEQUENCE;
#if (PG_VERSION_NUM >= PG_VERSION_15)
/*
* it must be ALTER TABLE .. OWNER TO ..
* or ALTER TABLE .. SET LOGGED/UNLOGGED command
* since these are the only ALTER commands of a sequence that
* pass through an AlterTableStmt
*/
return PreprocessSequenceAlterTableStmt((Node *) stmtCopy, alterTableCommand,
processUtilityContext);
#else
/*
* it must be ALTER TABLE .. OWNER TO .. command
* since this is the only ALTER command of a sequence that
* passes through an AlterTableStmt
*/
return PreprocessAlterSequenceOwnerStmt((Node *) stmtCopy, alterTableCommand, return PreprocessAlterSequenceOwnerStmt((Node *) stmtCopy, alterTableCommand,
processUtilityContext); processUtilityContext);
#endif
} }
else if (relKind == RELKIND_VIEW) else if (relKind == RELKIND_VIEW)
{ {
/*
* it must be ALTER TABLE .. OWNER TO .. command
* since this is the only ALTER command of a view that
* passes through an AlterTableStmt
*/
AlterTableStmt *stmtCopy = copyObject(alterTableStatement); AlterTableStmt *stmtCopy = copyObject(alterTableStatement);
AlterTableStmtObjType_compat(stmtCopy) = OBJECT_VIEW; AlterTableStmtObjType_compat(stmtCopy) = OBJECT_VIEW;
return PreprocessAlterViewStmt((Node *) stmtCopy, alterTableCommand, return PreprocessAlterViewStmt((Node *) stmtCopy, alterTableCommand,

View File

@ -256,7 +256,12 @@ pg_get_sequencedef_string(Oid sequenceRelationId)
char *qualifiedSequenceName = generate_qualified_relation_name(sequenceRelationId); char *qualifiedSequenceName = generate_qualified_relation_name(sequenceRelationId);
char *typeName = format_type_be(pgSequenceForm->seqtypid); char *typeName = format_type_be(pgSequenceForm->seqtypid);
char *sequenceDef = psprintf(CREATE_SEQUENCE_COMMAND, qualifiedSequenceName, char *sequenceDef = psprintf(CREATE_SEQUENCE_COMMAND,
#if (PG_VERSION_NUM >= PG_VERSION_15)
get_rel_persistence(sequenceRelationId) ==
RELPERSISTENCE_UNLOGGED ? "UNLOGGED " : "",
#endif
qualifiedSequenceName,
typeName, typeName,
pgSequenceForm->seqincrement, pgSequenceForm->seqmin, pgSequenceForm->seqincrement, pgSequenceForm->seqmin,
pgSequenceForm->seqmax, pgSequenceForm->seqstart, pgSequenceForm->seqmax, pgSequenceForm->seqstart,
@ -800,6 +805,13 @@ deparse_shard_index_statement(IndexStmt *origStmt, Oid distrelid, int64 shardid,
appendStringInfoString(buffer, ") "); appendStringInfoString(buffer, ") ");
} }
#if PG_VERSION_NUM >= PG_VERSION_15
if (indexStmt->nulls_not_distinct)
{
appendStringInfoString(buffer, "NULLS NOT DISTINCT ");
}
#endif /* PG_VERSION_15 */
if (indexStmt->options != NIL) if (indexStmt->options != NIL)
{ {
appendStringInfoString(buffer, "WITH ("); appendStringInfoString(buffer, "WITH (");

View File

@ -27,6 +27,9 @@ static void AppendSequenceNameList(StringInfo buf, List *objects, ObjectType obj
static void AppendRenameSequenceStmt(StringInfo buf, RenameStmt *stmt); static void AppendRenameSequenceStmt(StringInfo buf, RenameStmt *stmt);
static void AppendAlterSequenceSchemaStmt(StringInfo buf, AlterObjectSchemaStmt *stmt); static void AppendAlterSequenceSchemaStmt(StringInfo buf, AlterObjectSchemaStmt *stmt);
static void AppendAlterSequenceOwnerStmt(StringInfo buf, AlterTableStmt *stmt); static void AppendAlterSequenceOwnerStmt(StringInfo buf, AlterTableStmt *stmt);
#if (PG_VERSION_NUM >= PG_VERSION_15)
static void AppendAlterSequencePersistenceStmt(StringInfo buf, AlterTableStmt *stmt);
#endif
static void AppendGrantOnSequenceStmt(StringInfo buf, GrantStmt *stmt); static void AppendGrantOnSequenceStmt(StringInfo buf, GrantStmt *stmt);
static void AppendGrantOnSequenceSequences(StringInfo buf, GrantStmt *stmt); static void AppendGrantOnSequenceSequences(StringInfo buf, GrantStmt *stmt);
@ -258,6 +261,96 @@ AppendAlterSequenceOwnerStmt(StringInfo buf, AlterTableStmt *stmt)
} }
#if (PG_VERSION_NUM >= PG_VERSION_15)
/*
* DeparseAlterSequencePersistenceStmt builds and returns a string representing
* the AlterTableStmt consisting of changing the persistence of a sequence
*/
char *
DeparseAlterSequencePersistenceStmt(Node *node)
{
AlterTableStmt *stmt = castNode(AlterTableStmt, node);
StringInfoData str = { 0 };
initStringInfo(&str);
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_SEQUENCE);
AppendAlterSequencePersistenceStmt(&str, stmt);
return str.data;
}
/*
* AppendAlterSequencePersistenceStmt appends a string representing the
* AlterTableStmt to a buffer consisting of changing the persistence of a sequence
*/
static void
AppendAlterSequencePersistenceStmt(StringInfo buf, AlterTableStmt *stmt)
{
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_SEQUENCE);
RangeVar *seq = stmt->relation;
char *qualifiedSequenceName = quote_qualified_identifier(seq->schemaname,
seq->relname);
appendStringInfoString(buf, "ALTER SEQUENCE ");
if (stmt->missing_ok)
{
appendStringInfoString(buf, "IF EXISTS ");
}
appendStringInfoString(buf, qualifiedSequenceName);
ListCell *cmdCell = NULL;
foreach(cmdCell, stmt->cmds)
{
if (cmdCell != list_head(stmt->cmds))
{
/*
* As of PG15, we cannot reach this code because ALTER SEQUENCE
* is only supported for a single sequence. Still, let's be
* defensive for future PG changes
*/
ereport(ERROR, (errmsg("More than one subcommand is not supported "
"for ALTER SEQUENCE")));
}
AlterTableCmd *alterTableCmd = castNode(AlterTableCmd, lfirst(cmdCell));
switch (alterTableCmd->subtype)
{
case AT_SetLogged:
{
appendStringInfoString(buf, " SET LOGGED;");
break;
}
case AT_SetUnLogged:
{
appendStringInfoString(buf, " SET UNLOGGED;");
break;
}
default:
{
/*
* normally we shouldn't ever reach this
* because we enter this function after making sure this stmt is of the form
* ALTER SEQUENCE .. SET LOGGED/UNLOGGED
*/
ereport(ERROR, (errmsg("unsupported subtype for alter sequence command"),
errdetail("sub command type: %d",
alterTableCmd->subtype)));
}
}
}
}
#endif
/* /*
* DeparseGrantOnSequenceStmt builds and returns a string representing the GrantOnSequenceStmt * DeparseGrantOnSequenceStmt builds and returns a string representing the GrantOnSequenceStmt
*/ */

View File

@ -51,6 +51,37 @@ QualifyAlterSequenceOwnerStmt(Node *node)
} }
#if (PG_VERSION_NUM >= PG_VERSION_15)
/*
* QualifyAlterSequencePersistenceStmt transforms a
* ALTER SEQUENCE .. SET LOGGED/UNLOGGED
* statement in place and makes the sequence name fully qualified.
*/
void
QualifyAlterSequencePersistenceStmt(Node *node)
{
AlterTableStmt *stmt = castNode(AlterTableStmt, node);
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_SEQUENCE);
RangeVar *seq = stmt->relation;
if (seq->schemaname == NULL)
{
Oid seqOid = RangeVarGetRelid(seq, NoLock, stmt->missing_ok);
if (OidIsValid(seqOid))
{
Oid schemaOid = get_rel_namespace(seqOid);
seq->schemaname = get_namespace_name(schemaOid);
}
}
}
#endif
/* /*
* QualifyAlterSequenceSchemaStmt transforms a * QualifyAlterSequenceSchemaStmt transforms a
* ALTER SEQUENCE .. SET SCHEMA .. * ALTER SEQUENCE .. SET SCHEMA ..

View File

@ -430,12 +430,6 @@ static void get_coercion_expr(Node *arg, deparse_context *context,
Node *parentNode); Node *parentNode);
static void get_const_expr(Const *constval, deparse_context *context, static void get_const_expr(Const *constval, deparse_context *context,
int showtype); int showtype);
static void get_json_constructor(JsonConstructorExpr *ctor,
deparse_context *context, bool showimplicit);
static void get_json_agg_constructor(JsonConstructorExpr *ctor,
deparse_context *context,
const char *funcname,
bool is_json_objectagg);
static void get_const_collation(Const *constval, deparse_context *context); static void get_const_collation(Const *constval, deparse_context *context);
static void simple_quote_literal(StringInfo buf, const char *val); static void simple_quote_literal(StringInfo buf, const char *val);
static void get_sublink_expr(SubLink *sublink, deparse_context *context); static void get_sublink_expr(SubLink *sublink, deparse_context *context);
@ -465,10 +459,6 @@ static char *generate_function_name(Oid funcid, int nargs,
List *argnames, Oid *argtypes, List *argnames, Oid *argtypes,
bool has_variadic, bool *use_variadic_p, bool has_variadic, bool *use_variadic_p,
ParseExprKind special_exprkind); ParseExprKind special_exprkind);
static void get_json_path_spec(Node *path_spec, deparse_context *context,
bool showimplicit);
static void get_json_table_columns(TableFunc *tf, JsonTableParent *node,
deparse_context *context, bool showimplicit);
#define only_marker(rte) ((rte)->inh ? "" : "ONLY ") #define only_marker(rte) ((rte)->inh ? "" : "ONLY ")
@ -661,7 +651,6 @@ pg_get_rule_expr(Node *expression)
return buffer->data; return buffer->data;
} }
/* /*
* set_rtable_names: select RTE aliases to be used in printing a query * set_rtable_names: select RTE aliases to be used in printing a query
* *
@ -1981,7 +1970,6 @@ pop_ancestor_plan(deparse_namespace *dpns, deparse_namespace *save_dpns)
*dpns = *save_dpns; *dpns = *save_dpns;
} }
/* ---------- /* ----------
* deparse_shard_query - Parse back a query for execution on a shard * deparse_shard_query - Parse back a query for execution on a shard
* *
@ -1998,7 +1986,6 @@ deparse_shard_query(Query *query, Oid distrelid, int64 shardid,
0, WRAP_COLUMN_DEFAULT, 0); 0, WRAP_COLUMN_DEFAULT, 0);
} }
/* ---------- /* ----------
* get_query_def - Parse back one query parsetree * get_query_def - Parse back one query parsetree
* *
@ -2026,7 +2013,6 @@ get_query_def(Query *query, StringInfo buf, List *parentnamespace,
prettyFlags, wrapColumn, startIndent); prettyFlags, wrapColumn, startIndent);
} }
/* ---------- /* ----------
* get_query_def_extended - Parse back one query parsetree, optionally * get_query_def_extended - Parse back one query parsetree, optionally
* with extension using a shard identifier. * with extension using a shard identifier.
@ -2954,8 +2940,7 @@ get_rule_sortgroupclause(Index ref, List *tlist, bool force_colno,
bool need_paren = (PRETTY_PAREN(context) bool need_paren = (PRETTY_PAREN(context)
|| IsA(expr, FuncExpr) || IsA(expr, FuncExpr)
|| IsA(expr, Aggref) || IsA(expr, Aggref)
|| IsA(expr, WindowFunc) || IsA(expr, WindowFunc));
|| IsA(expr, JsonConstructorExpr));
if (need_paren) if (need_paren)
appendStringInfoChar(context->buf, '('); appendStringInfoChar(context->buf, '(');
@ -3447,7 +3432,6 @@ get_insert_query_def(Query *query, deparse_context *context,
} }
} }
/* ---------- /* ----------
* get_update_query_def - Parse back an UPDATE parsetree * get_update_query_def - Parse back an UPDATE parsetree
* ---------- * ----------
@ -3528,7 +3512,6 @@ get_update_query_def(Query *query, deparse_context *context,
} }
} }
/* ---------- /* ----------
* get_update_query_targetlist_def - Parse back an UPDATE targetlist * get_update_query_targetlist_def - Parse back an UPDATE targetlist
* ---------- * ----------
@ -3680,7 +3663,6 @@ get_update_query_targetlist_def(Query *query, List *targetList,
} }
} }
/* ---------- /* ----------
* get_delete_query_def - Parse back a DELETE parsetree * get_delete_query_def - Parse back a DELETE parsetree
* ---------- * ----------
@ -3756,7 +3738,6 @@ get_delete_query_def(Query *query, deparse_context *context,
} }
} }
/* ---------- /* ----------
* get_utility_query_def - Parse back a UTILITY parsetree * get_utility_query_def - Parse back a UTILITY parsetree
* ---------- * ----------
@ -4890,7 +4871,6 @@ get_simple_binary_op_name(OpExpr *expr)
return NULL; return NULL;
} }
/* /*
* isSimpleNode - check if given node is simple (doesn't need parenthesizing) * isSimpleNode - check if given node is simple (doesn't need parenthesizing)
* *
@ -4927,8 +4907,6 @@ isSimpleNode(Node *node, Node *parentNode, int prettyFlags)
case T_GroupingFunc: case T_GroupingFunc:
case T_WindowFunc: case T_WindowFunc:
case T_FuncExpr: case T_FuncExpr:
case T_JsonConstructorExpr:
case T_JsonExpr:
/* function-like: name(..) or name[..] */ /* function-like: name(..) or name[..] */
return true; return true;
@ -5022,7 +5000,6 @@ isSimpleNode(Node *node, Node *parentNode, int prettyFlags)
case T_NullTest: case T_NullTest:
case T_BooleanTest: case T_BooleanTest:
case T_DistinctExpr: case T_DistinctExpr:
case T_JsonIsPredicate:
switch (nodeTag(parentNode)) switch (nodeTag(parentNode))
{ {
case T_FuncExpr: case T_FuncExpr:
@ -5047,7 +5024,6 @@ isSimpleNode(Node *node, Node *parentNode, int prettyFlags)
case T_GroupingFunc: /* own parentheses */ case T_GroupingFunc: /* own parentheses */
case T_WindowFunc: /* own parentheses */ case T_WindowFunc: /* own parentheses */
case T_CaseExpr: /* other separators */ case T_CaseExpr: /* other separators */
case T_JsonExpr: /* own parentheses */
return true; return true;
default: default:
return false; return false;
@ -5104,11 +5080,6 @@ isSimpleNode(Node *node, Node *parentNode, int prettyFlags)
return false; return false;
} }
case T_JsonValueExpr:
/* maybe simple, check args */
return isSimpleNode((Node *) ((JsonValueExpr *) node)->raw_expr,
node, prettyFlags);
default: default:
break; break;
} }
@ -5116,7 +5087,6 @@ isSimpleNode(Node *node, Node *parentNode, int prettyFlags)
return false; return false;
} }
/* /*
* appendContextKeyword - append a keyword to buffer * appendContextKeyword - append a keyword to buffer
* *
@ -5184,7 +5154,6 @@ removeStringInfoSpaces(StringInfo str)
str->data[--(str->len)] = '\0'; str->data[--(str->len)] = '\0';
} }
/* /*
* get_rule_expr_paren - deparse expr using get_rule_expr, * get_rule_expr_paren - deparse expr using get_rule_expr,
* embracing the string with parentheses if necessary for prettyPrint. * embracing the string with parentheses if necessary for prettyPrint.
@ -5214,122 +5183,6 @@ get_rule_expr_paren(Node *node, deparse_context *context,
appendStringInfoChar(context->buf, ')'); appendStringInfoChar(context->buf, ')');
} }
/*
* get_json_path_spec - Parse back a JSON path specification
*/
static void
get_json_path_spec(Node *path_spec, deparse_context *context, bool showimplicit)
{
if (IsA(path_spec, Const))
get_const_expr((Const *) path_spec, context, -1);
else
get_rule_expr(path_spec, context, showimplicit);
}
/*
* get_json_format - Parse back a JsonFormat node
*/
static void
get_json_format(JsonFormat *format, StringInfo buf)
{
if (format->format_type == JS_FORMAT_DEFAULT)
return;
appendStringInfoString(buf,
format->format_type == JS_FORMAT_JSONB ?
" FORMAT JSONB" : " FORMAT JSON");
if (format->encoding != JS_ENC_DEFAULT)
{
const char *encoding =
format->encoding == JS_ENC_UTF16 ? "UTF16" :
format->encoding == JS_ENC_UTF32 ? "UTF32" : "UTF8";
appendStringInfo(buf, " ENCODING %s", encoding);
}
}
/*
* get_json_returning - Parse back a JsonReturning structure
*/
static void
get_json_returning(JsonReturning *returning, StringInfo buf,
bool json_format_by_default)
{
if (!OidIsValid(returning->typid))
return;
appendStringInfo(buf, " RETURNING %s",
format_type_with_typemod(returning->typid,
returning->typmod));
if (!json_format_by_default ||
returning->format->format_type !=
(returning->typid == JSONBOID ? JS_FORMAT_JSONB : JS_FORMAT_JSON))
get_json_format(returning->format, buf);
}
static void
get_json_behavior(JsonBehavior *behavior, deparse_context *context,
const char *on)
{
/*
* The order of array elements must correspond to the order of
* JsonBehaviorType members.
*/
const char *behavior_names[] =
{
" NULL",
" ERROR",
" EMPTY",
" TRUE",
" FALSE",
" UNKNOWN",
" EMPTY ARRAY",
" EMPTY OBJECT",
" DEFAULT "
};
if ((int) behavior->btype < 0 || behavior->btype >= lengthof(behavior_names))
elog(ERROR, "invalid json behavior type: %d", behavior->btype);
appendStringInfoString(context->buf, behavior_names[behavior->btype]);
if (behavior->btype == JSON_BEHAVIOR_DEFAULT)
get_rule_expr(behavior->default_expr, context, false);
appendStringInfo(context->buf, " ON %s", on);
}
/*
* get_json_expr_options
*
* Parse back common options for JSON_QUERY, JSON_VALUE, JSON_EXISTS and
* JSON_TABLE columns.
*/
static void
get_json_expr_options(JsonExpr *jsexpr, deparse_context *context,
JsonBehaviorType default_behavior)
{
if (jsexpr->op == JSON_QUERY_OP)
{
if (jsexpr->wrapper == JSW_CONDITIONAL)
appendStringInfo(context->buf, " WITH CONDITIONAL WRAPPER");
else if (jsexpr->wrapper == JSW_UNCONDITIONAL)
appendStringInfo(context->buf, " WITH UNCONDITIONAL WRAPPER");
if (jsexpr->omit_quotes)
appendStringInfo(context->buf, " OMIT QUOTES");
}
if (jsexpr->op != JSON_EXISTS_OP &&
jsexpr->on_empty->btype != default_behavior)
get_json_behavior(jsexpr->on_empty, context, "EMPTY");
if (jsexpr->on_error->btype != default_behavior)
get_json_behavior(jsexpr->on_error, context, "ERROR");
}
/* ---------- /* ----------
* get_rule_expr - Parse back an expression * get_rule_expr - Parse back an expression
* *
@ -6510,115 +6363,6 @@ get_rule_expr(Node *node, deparse_context *context,
} }
break; break;
case T_JsonValueExpr:
{
JsonValueExpr *jve = (JsonValueExpr *) node;
get_rule_expr((Node *) jve->raw_expr, context, false);
get_json_format(jve->format, context->buf);
}
break;
case T_JsonConstructorExpr:
get_json_constructor((JsonConstructorExpr *) node, context, false);
break;
case T_JsonIsPredicate:
{
JsonIsPredicate *pred = (JsonIsPredicate *) node;
if (!PRETTY_PAREN(context))
appendStringInfoChar(context->buf, '(');
get_rule_expr_paren(pred->expr, context, true, node);
appendStringInfoString(context->buf, " IS JSON");
/* TODO: handle FORMAT clause */
switch (pred->item_type)
{
case JS_TYPE_SCALAR:
appendStringInfoString(context->buf, " SCALAR");
break;
case JS_TYPE_ARRAY:
appendStringInfoString(context->buf, " ARRAY");
break;
case JS_TYPE_OBJECT:
appendStringInfoString(context->buf, " OBJECT");
break;
default:
break;
}
if (pred->unique_keys)
appendStringInfoString(context->buf, " WITH UNIQUE KEYS");
if (!PRETTY_PAREN(context))
appendStringInfoChar(context->buf, ')');
}
break;
case T_JsonExpr:
{
JsonExpr *jexpr = (JsonExpr *) node;
switch (jexpr->op)
{
case JSON_QUERY_OP:
appendStringInfoString(buf, "JSON_QUERY(");
break;
case JSON_VALUE_OP:
appendStringInfoString(buf, "JSON_VALUE(");
break;
case JSON_EXISTS_OP:
appendStringInfoString(buf, "JSON_EXISTS(");
break;
default:
elog(ERROR, "unexpected JsonExpr type: %d", jexpr->op);
break;
}
get_rule_expr(jexpr->formatted_expr, context, showimplicit);
appendStringInfoString(buf, ", ");
get_json_path_spec(jexpr->path_spec, context, showimplicit);
if (jexpr->passing_values)
{
ListCell *lc1,
*lc2;
bool needcomma = false;
appendStringInfoString(buf, " PASSING ");
forboth(lc1, jexpr->passing_names,
lc2, jexpr->passing_values)
{
if (needcomma)
appendStringInfoString(buf, ", ");
needcomma = true;
get_rule_expr((Node *) lfirst(lc2), context, showimplicit);
appendStringInfo(buf, " AS %s",
((String *) lfirst_node(String, lc1))->sval);
}
}
if (jexpr->op != JSON_EXISTS_OP ||
jexpr->returning->typid != BOOLOID)
get_json_returning(jexpr->returning, context->buf,
jexpr->op == JSON_QUERY_OP);
get_json_expr_options(jexpr, context,
jexpr->op == JSON_EXISTS_OP ?
JSON_BEHAVIOR_FALSE : JSON_BEHAVIOR_NULL);
appendStringInfoString(buf, ")");
}
break;
case T_List: case T_List:
{ {
char *sep; char *sep;
@ -6746,7 +6490,6 @@ looks_like_function(Node *node)
case T_MinMaxExpr: case T_MinMaxExpr:
case T_SQLValueFunction: case T_SQLValueFunction:
case T_XmlExpr: case T_XmlExpr:
case T_JsonExpr:
/* these are all accepted by func_expr_common_subexpr */ /* these are all accepted by func_expr_common_subexpr */
return true; return true;
default: default:
@ -6755,7 +6498,6 @@ looks_like_function(Node *node)
return false; return false;
} }
/* /*
* get_oper_expr - Parse back an OpExpr node * get_oper_expr - Parse back an OpExpr node
*/ */
@ -6894,90 +6636,6 @@ get_func_expr(FuncExpr *expr, deparse_context *context,
appendStringInfoChar(buf, ')'); appendStringInfoChar(buf, ')');
} }
static void
get_json_constructor_options(JsonConstructorExpr *ctor, StringInfo buf)
{
if (ctor->absent_on_null)
{
if (ctor->type == JSCTOR_JSON_OBJECT ||
ctor->type == JSCTOR_JSON_OBJECTAGG)
appendStringInfoString(buf, " ABSENT ON NULL");
}
else
{
if (ctor->type == JSCTOR_JSON_ARRAY ||
ctor->type == JSCTOR_JSON_ARRAYAGG)
appendStringInfoString(buf, " NULL ON NULL");
}
if (ctor->unique)
appendStringInfoString(buf, " WITH UNIQUE KEYS");
if (!((ctor->type == JSCTOR_JSON_PARSE ||
ctor->type == JSCTOR_JSON_SCALAR) &&
ctor->returning->typid == JSONOID))
get_json_returning(ctor->returning, buf, true);
}
static void
get_json_constructor(JsonConstructorExpr *ctor, deparse_context *context,
bool showimplicit)
{
StringInfo buf = context->buf;
const char *funcname;
int nargs;
ListCell *lc;
switch (ctor->type)
{
case JSCTOR_JSON_PARSE:
funcname = "JSON";
break;
case JSCTOR_JSON_SCALAR:
funcname = "JSON_SCALAR";
break;
case JSCTOR_JSON_SERIALIZE:
funcname = "JSON_SERIALIZE";
break;
case JSCTOR_JSON_OBJECT:
funcname = "JSON_OBJECT";
break;
case JSCTOR_JSON_ARRAY:
funcname = "JSON_ARRAY";
break;
case JSCTOR_JSON_OBJECTAGG:
get_json_agg_constructor(ctor, context, "JSON_OBJECTAGG", true);
return;
case JSCTOR_JSON_ARRAYAGG:
get_json_agg_constructor(ctor, context, "JSON_ARRAYAGG", false);
return;
default:
elog(ERROR, "invalid JsonConstructorExprType %d", ctor->type);
}
appendStringInfo(buf, "%s(", funcname);
nargs = 0;
foreach(lc, ctor->args)
{
if (nargs > 0)
{
const char *sep = ctor->type == JSCTOR_JSON_OBJECT &&
(nargs % 2) != 0 ? " : " : ", ";
appendStringInfoString(buf, sep);
}
get_rule_expr((Node *) lfirst(lc), context, true);
nargs++;
}
get_json_constructor_options(ctor, buf);
appendStringInfo(buf, ")");
}
/* /*
* get_proc_expr - Parse back a CallStmt node * get_proc_expr - Parse back a CallStmt node
*/ */
@ -7023,17 +6681,16 @@ get_proc_expr(CallStmt *stmt, deparse_context *context,
} }
/* /*
* get_agg_expr_helper - Parse back an Aggref node * get_agg_expr - Parse back an Aggref node
*/ */
static void static void
get_agg_expr_helper(Aggref *aggref, deparse_context *context, get_agg_expr(Aggref *aggref, deparse_context *context,
Aggref *original_aggref, const char *funcname, Aggref *original_aggref)
const char *options, bool is_json_objectagg)
{ {
StringInfo buf = context->buf; StringInfo buf = context->buf;
Oid argtypes[FUNC_MAX_ARGS]; Oid argtypes[FUNC_MAX_ARGS];
int nargs; int nargs;
bool use_variadic = false; bool use_variadic;
/* /*
* For a combining aggregate, we look up and deparse the corresponding * For a combining aggregate, we look up and deparse the corresponding
@ -7064,14 +6721,13 @@ get_agg_expr_helper(Aggref *aggref, deparse_context *context,
/* Extract the argument types as seen by the parser */ /* Extract the argument types as seen by the parser */
nargs = get_aggregate_argtypes(aggref, argtypes); nargs = get_aggregate_argtypes(aggref, argtypes);
if (!funcname)
funcname = generate_function_name(aggref->aggfnoid, nargs, NIL,
argtypes, aggref->aggvariadic,
&use_variadic,
context->special_exprkind);
/* Print the aggregate name, schema-qualified if needed */ /* Print the aggregate name, schema-qualified if needed */
appendStringInfo(buf, "%s(%s", funcname, appendStringInfo(buf, "%s(%s",
generate_function_name(aggref->aggfnoid, nargs,
NIL, argtypes,
aggref->aggvariadic,
&use_variadic,
context->special_exprkind),
(aggref->aggdistinct != NIL) ? "DISTINCT " : ""); (aggref->aggdistinct != NIL) ? "DISTINCT " : "");
if (AGGKIND_IS_ORDERED_SET(aggref->aggkind)) if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
@ -7107,18 +6763,7 @@ get_agg_expr_helper(Aggref *aggref, deparse_context *context,
if (tle->resjunk) if (tle->resjunk)
continue; continue;
if (i++ > 0) if (i++ > 0)
{ appendStringInfoString(buf, ", ");
if (is_json_objectagg)
{
if (i > 2)
break; /* skip ABSENT ON NULL and WITH UNIQUE
* args */
appendStringInfoString(buf, " : ");
}
else
appendStringInfoString(buf, ", ");
}
if (use_variadic && i == nargs) if (use_variadic && i == nargs)
appendStringInfoString(buf, "VARIADIC "); appendStringInfoString(buf, "VARIADIC ");
get_rule_expr(arg, context, true); get_rule_expr(arg, context, true);
@ -7132,9 +6777,6 @@ get_agg_expr_helper(Aggref *aggref, deparse_context *context,
} }
} }
if (options)
appendStringInfoString(buf, options);
if (aggref->aggfilter != NULL) if (aggref->aggfilter != NULL)
{ {
appendStringInfoString(buf, ") FILTER (WHERE "); appendStringInfoString(buf, ") FILTER (WHERE ");
@ -7144,16 +6786,6 @@ get_agg_expr_helper(Aggref *aggref, deparse_context *context,
appendStringInfoChar(buf, ')'); appendStringInfoChar(buf, ')');
} }
/*
* get_agg_expr - Parse back an Aggref node
*/
static void
get_agg_expr(Aggref *aggref, deparse_context *context, Aggref *original_aggref)
{
get_agg_expr_helper(aggref, context, original_aggref, NULL, NULL,
false);
}
/* /*
* This is a helper function for get_agg_expr(). It's used when we deparse * This is a helper function for get_agg_expr(). It's used when we deparse
* a combining Aggref; resolve_special_varno locates the corresponding partial * a combining Aggref; resolve_special_varno locates the corresponding partial
@ -7173,12 +6805,10 @@ get_agg_combine_expr(Node *node, deparse_context *context, void *callback_arg)
} }
/* /*
* get_windowfunc_expr_helper - Parse back a WindowFunc node * get_windowfunc_expr - Parse back a WindowFunc node
*/ */
static void static void
get_windowfunc_expr_helper(WindowFunc *wfunc, deparse_context *context, get_windowfunc_expr(WindowFunc *wfunc, deparse_context *context)
const char *funcname, const char *options,
bool is_json_objectagg)
{ {
StringInfo buf = context->buf; StringInfo buf = context->buf;
Oid argtypes[FUNC_MAX_ARGS]; Oid argtypes[FUNC_MAX_ARGS];
@ -7202,30 +6832,17 @@ get_windowfunc_expr_helper(WindowFunc *wfunc, deparse_context *context,
nargs++; nargs++;
} }
if (!funcname) appendStringInfo(buf, "%s(",
funcname = generate_function_name(wfunc->winfnoid, nargs, argnames, generate_function_name(wfunc->winfnoid, nargs,
argtypes, false, NULL, argnames, argtypes,
context->special_exprkind); false, NULL,
context->special_exprkind));
appendStringInfo(buf, "%s(", funcname);
/* winstar can be set only in zero-argument aggregates */ /* winstar can be set only in zero-argument aggregates */
if (wfunc->winstar) if (wfunc->winstar)
appendStringInfoChar(buf, '*'); appendStringInfoChar(buf, '*');
else else
{ get_rule_expr((Node *) wfunc->args, context, true);
if (is_json_objectagg)
{
get_rule_expr((Node *) linitial(wfunc->args), context, false);
appendStringInfoString(buf, " : ");
get_rule_expr((Node *) lsecond(wfunc->args), context, false);
}
else
get_rule_expr((Node *) wfunc->args, context, true);
}
if (options)
appendStringInfoString(buf, options);
if (wfunc->aggfilter != NULL) if (wfunc->aggfilter != NULL)
{ {
@ -7262,15 +6879,6 @@ get_windowfunc_expr_helper(WindowFunc *wfunc, deparse_context *context,
} }
} }
/*
* get_windowfunc_expr - Parse back a WindowFunc node
*/
static void
get_windowfunc_expr(WindowFunc *wfunc, deparse_context *context)
{
get_windowfunc_expr_helper(wfunc, context, NULL, NULL, false);
}
/* /*
* get_func_sql_syntax - Parse back a SQL-syntax function call * get_func_sql_syntax - Parse back a SQL-syntax function call
* *
@ -7511,31 +7119,6 @@ get_func_sql_syntax(FuncExpr *expr, deparse_context *context)
return false; return false;
} }
/*
* get_json_agg_constructor - Parse back an aggregate JsonConstructorExpr node
*/
static void
get_json_agg_constructor(JsonConstructorExpr *ctor, deparse_context *context,
const char *funcname, bool is_json_objectagg)
{
StringInfoData options;
initStringInfo(&options);
get_json_constructor_options(ctor, &options);
if (IsA(ctor->func, Aggref))
get_agg_expr_helper((Aggref *) ctor->func, context,
(Aggref *) ctor->func,
funcname, options.data, is_json_objectagg);
else if (IsA(ctor->func, WindowFunc))
get_windowfunc_expr_helper((WindowFunc *) ctor->func, context,
funcname, options.data,
is_json_objectagg);
else
elog(ERROR, "invalid JsonConstructorExpr underlying node type: %d",
nodeTag(ctor->func));
}
/* ---------- /* ----------
* get_coercion_expr * get_coercion_expr
* *
@ -7775,7 +7358,6 @@ simple_quote_literal(StringInfo buf, const char *val)
appendStringInfoChar(buf, '\''); appendStringInfoChar(buf, '\'');
} }
/* ---------- /* ----------
* get_sublink_expr - Parse back a sublink * get_sublink_expr - Parse back a sublink
* ---------- * ----------
@ -7900,16 +7482,17 @@ get_sublink_expr(SubLink *sublink, deparse_context *context)
appendStringInfoChar(buf, ')'); appendStringInfoChar(buf, ')');
} }
/* ---------- /* ----------
* get_xmltable - Parse back a XMLTABLE function * get_tablefunc - Parse back a table function
* ---------- * ----------
*/ */
static void static void
get_xmltable(TableFunc *tf, deparse_context *context, bool showimplicit) get_tablefunc(TableFunc *tf, deparse_context *context, bool showimplicit)
{ {
StringInfo buf = context->buf; StringInfo buf = context->buf;
/* XMLTABLE is the only existing implementation. */
appendStringInfoString(buf, "XMLTABLE("); appendStringInfoString(buf, "XMLTABLE(");
if (tf->ns_uris != NIL) if (tf->ns_uris != NIL)
@ -8000,271 +7583,6 @@ get_xmltable(TableFunc *tf, deparse_context *context, bool showimplicit)
appendStringInfoChar(buf, ')'); appendStringInfoChar(buf, ')');
} }
/*
* get_json_nested_columns - Parse back nested JSON_TABLE columns
*/
static void
get_json_table_nested_columns(TableFunc *tf, Node *node,
deparse_context *context, bool showimplicit,
bool needcomma)
{
if (IsA(node, JsonTableSibling))
{
JsonTableSibling *n = (JsonTableSibling *) node;
get_json_table_nested_columns(tf, n->larg, context, showimplicit,
needcomma);
get_json_table_nested_columns(tf, n->rarg, context, showimplicit, true);
}
else
{
JsonTableParent *n = castNode(JsonTableParent, node);
if (needcomma)
appendStringInfoChar(context->buf, ',');
appendStringInfoChar(context->buf, ' ');
appendContextKeyword(context, "NESTED PATH ", 0, 0, 0);
get_const_expr(n->path, context, -1);
appendStringInfo(context->buf, " AS %s", quote_identifier(n->name));
get_json_table_columns(tf, n, context, showimplicit);
}
}
/*
* get_json_table_plan - Parse back a JSON_TABLE plan
*/
static void
get_json_table_plan(TableFunc *tf, Node *node, deparse_context *context,
bool parenthesize)
{
if (parenthesize)
appendStringInfoChar(context->buf, '(');
if (IsA(node, JsonTableSibling))
{
JsonTableSibling *n = (JsonTableSibling *) node;
get_json_table_plan(tf, n->larg, context,
IsA(n->larg, JsonTableSibling) ||
castNode(JsonTableParent, n->larg)->child);
appendStringInfoString(context->buf, n->cross ? " CROSS " : " UNION ");
get_json_table_plan(tf, n->rarg, context,
IsA(n->rarg, JsonTableSibling) ||
castNode(JsonTableParent, n->rarg)->child);
}
else
{
JsonTableParent *n = castNode(JsonTableParent, node);
appendStringInfoString(context->buf, quote_identifier(n->name));
if (n->child)
{
appendStringInfoString(context->buf,
n->outerJoin ? " OUTER " : " INNER ");
get_json_table_plan(tf, n->child, context,
IsA(n->child, JsonTableSibling));
}
}
if (parenthesize)
appendStringInfoChar(context->buf, ')');
}
/*
* get_json_table_columns - Parse back JSON_TABLE columns
*/
static void
get_json_table_columns(TableFunc *tf, JsonTableParent *node,
deparse_context *context, bool showimplicit)
{
StringInfo buf = context->buf;
JsonExpr *jexpr = castNode(JsonExpr, tf->docexpr);
ListCell *lc_colname;
ListCell *lc_coltype;
ListCell *lc_coltypmod;
ListCell *lc_colvarexpr;
int colnum = 0;
appendStringInfoChar(buf, ' ');
appendContextKeyword(context, "COLUMNS (", 0, 0, 0);
if (PRETTY_INDENT(context))
context->indentLevel += PRETTYINDENT_VAR;
forfour(lc_colname, tf->colnames,
lc_coltype, tf->coltypes,
lc_coltypmod, tf->coltypmods,
lc_colvarexpr, tf->colvalexprs)
{
char *colname = strVal(lfirst(lc_colname));
JsonExpr *colexpr;
Oid typid;
int32 typmod;
bool ordinality;
JsonBehaviorType default_behavior;
typid = lfirst_oid(lc_coltype);
typmod = lfirst_int(lc_coltypmod);
colexpr = castNode(JsonExpr, lfirst(lc_colvarexpr));
if (colnum < node->colMin)
{
colnum++;
continue;
}
if (colnum > node->colMax)
break;
if (colnum > node->colMin)
appendStringInfoString(buf, ", ");
colnum++;
ordinality = !colexpr;
appendContextKeyword(context, "", 0, 0, 0);
appendStringInfo(buf, "%s %s", quote_identifier(colname),
ordinality ? "FOR ORDINALITY" :
format_type_with_typemod(typid, typmod));
if (ordinality)
continue;
if (colexpr->op == JSON_EXISTS_OP)
{
appendStringInfoString(buf, " EXISTS");
default_behavior = JSON_BEHAVIOR_FALSE;
}
else
{
if (colexpr->op == JSON_QUERY_OP)
{
char typcategory;
bool typispreferred;
get_type_category_preferred(typid, &typcategory, &typispreferred);
if (typcategory == TYPCATEGORY_STRING)
appendStringInfoString(buf,
colexpr->format->format_type == JS_FORMAT_JSONB ?
" FORMAT JSONB" : " FORMAT JSON");
}
default_behavior = JSON_BEHAVIOR_NULL;
}
if (jexpr->on_error->btype == JSON_BEHAVIOR_ERROR)
default_behavior = JSON_BEHAVIOR_ERROR;
appendStringInfoString(buf, " PATH ");
get_json_path_spec(colexpr->path_spec, context, showimplicit);
get_json_expr_options(colexpr, context, default_behavior);
}
if (node->child)
get_json_table_nested_columns(tf, node->child, context, showimplicit,
node->colMax >= node->colMin);
if (PRETTY_INDENT(context))
context->indentLevel -= PRETTYINDENT_VAR;
appendContextKeyword(context, ")", 0, 0, 0);
}
/* ----------
* get_json_table - Parse back a JSON_TABLE function
* ----------
*/
static void
get_json_table(TableFunc *tf, deparse_context *context, bool showimplicit)
{
StringInfo buf = context->buf;
JsonExpr *jexpr = castNode(JsonExpr, tf->docexpr);
JsonTableParent *root = castNode(JsonTableParent, tf->plan);
appendStringInfoString(buf, "JSON_TABLE(");
if (PRETTY_INDENT(context))
context->indentLevel += PRETTYINDENT_VAR;
appendContextKeyword(context, "", 0, 0, 0);
get_rule_expr(jexpr->formatted_expr, context, showimplicit);
appendStringInfoString(buf, ", ");
get_const_expr(root->path, context, -1);
appendStringInfo(buf, " AS %s", quote_identifier(root->name));
if (jexpr->passing_values)
{
ListCell *lc1,
*lc2;
bool needcomma = false;
appendStringInfoChar(buf, ' ');
appendContextKeyword(context, "PASSING ", 0, 0, 0);
if (PRETTY_INDENT(context))
context->indentLevel += PRETTYINDENT_VAR;
forboth(lc1, jexpr->passing_names,
lc2, jexpr->passing_values)
{
if (needcomma)
appendStringInfoString(buf, ", ");
needcomma = true;
appendContextKeyword(context, "", 0, 0, 0);
get_rule_expr((Node *) lfirst(lc2), context, false);
appendStringInfo(buf, " AS %s",
quote_identifier((lfirst_node(String, lc1))->sval)
);
}
if (PRETTY_INDENT(context))
context->indentLevel -= PRETTYINDENT_VAR;
}
get_json_table_columns(tf, root, context, showimplicit);
appendStringInfoChar(buf, ' ');
appendContextKeyword(context, "PLAN ", 0, 0, 0);
get_json_table_plan(tf, (Node *) root, context, true);
if (jexpr->on_error->btype != JSON_BEHAVIOR_EMPTY)
get_json_behavior(jexpr->on_error, context, "ERROR");
if (PRETTY_INDENT(context))
context->indentLevel -= PRETTYINDENT_VAR;
appendContextKeyword(context, ")", 0, 0, 0);
}
/* ----------
* get_tablefunc - Parse back a table function
* ----------
*/
static void
get_tablefunc(TableFunc *tf, deparse_context *context, bool showimplicit)
{
/* XMLTABLE and JSON_TABLE are the only existing implementations. */
if (tf->functype == TFT_XMLTABLE)
get_xmltable(tf, context, showimplicit);
else if (tf->functype == TFT_JSON_TABLE)
get_json_table(tf, context, showimplicit);
}
/* ---------- /* ----------
* get_from_clause - Parse back a FROM clause * get_from_clause - Parse back a FROM clause
* *
@ -8866,7 +8184,6 @@ get_tablesample_def(TableSampleClause *tablesample, deparse_context *context)
} }
} }
/* /*
* get_opclass_name - fetch name of an index operator class * get_opclass_name - fetch name of an index operator class
* *
@ -9137,7 +8454,6 @@ generate_relation_name(Oid relid, List *namespaces)
return result; return result;
} }
/* /*
* generate_rte_shard_name returns the qualified name of the shard given a * generate_rte_shard_name returns the qualified name of the shard given a
* CITUS_RTE_SHARD range table entry. * CITUS_RTE_SHARD range table entry.
@ -9156,7 +8472,6 @@ generate_rte_shard_name(RangeTblEntry *rangeTableEntry)
return generate_fragment_name(shardSchemaName, shardTableName); return generate_fragment_name(shardSchemaName, shardTableName);
} }
/* /*
* generate_fragment_name * generate_fragment_name
* Compute the name to display for a shard or merged table * Compute the name to display for a shard or merged table

View File

@ -2724,6 +2724,79 @@ GetNextBackgroundTaskTaskId(void)
} }
/*
* HasNonTerminalJobOfType returns true if there is a job of a given type that is not in
* its terminal state.
*
* Some jobs would want a single instance to be able to run at once. Before submitting a
* new job if could see if there is a job of their type already executing.
*
* If a job is found the options jobIdOut is populated with the jobId.
*/
bool
HasNonTerminalJobOfType(const char *jobType, int64 *jobIdOut)
{
Relation pgDistBackgroundJob =
table_open(DistBackgroundJobRelationId(), AccessShareLock);
/* find any job in states listed here */
BackgroundJobStatus jobStatus[] = {
BACKGROUND_JOB_STATUS_RUNNING,
BACKGROUND_JOB_STATUS_CANCELLING,
BACKGROUND_JOB_STATUS_FAILING,
BACKGROUND_JOB_STATUS_SCHEDULED
};
NameData jobTypeName = { 0 };
namestrcpy(&jobTypeName, jobType);
bool foundJob = false;
for (int i = 0; !foundJob && i < lengthof(jobStatus); i++)
{
ScanKeyData scanKey[2] = { 0 };
const bool indexOK = true;
/* pg_dist_background_job.status == jobStatus[i] */
ScanKeyInit(&scanKey[0], Anum_pg_dist_background_job_state,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(BackgroundJobStatusOid(jobStatus[i])));
/* pg_dist_background_job.job_type == jobType */
ScanKeyInit(&scanKey[1], Anum_pg_dist_background_job_job_type,
BTEqualStrategyNumber, F_NAMEEQ,
NameGetDatum(&jobTypeName));
SysScanDesc scanDescriptor =
systable_beginscan(pgDistBackgroundJob,
InvalidOid, /* TODO use an actual index here */
indexOK, NULL, lengthof(scanKey), scanKey);
HeapTuple taskTuple = NULL;
if (HeapTupleIsValid(taskTuple = systable_getnext(scanDescriptor)))
{
foundJob = true;
if (jobIdOut)
{
Datum values[Natts_pg_dist_background_job] = { 0 };
bool isnull[Natts_pg_dist_background_job] = { 0 };
TupleDesc tupleDesc = RelationGetDescr(pgDistBackgroundJob);
heap_deform_tuple(taskTuple, tupleDesc, values, isnull);
*jobIdOut = DatumGetInt64(values[Anum_pg_dist_background_job_job_id - 1]);
}
}
systable_endscan(scanDescriptor);
}
table_close(pgDistBackgroundJob, NoLock);
return foundJob;
}
/* /*
* CreateBackgroundJob is a helper function to insert a new Background Job into Citus' * CreateBackgroundJob is a helper function to insert a new Background Job into Citus'
* catalog. After inserting the new job's metadataa into the catalog it returns the job_id * catalog. After inserting the new job's metadataa into the catalog it returns the job_id
@ -3949,7 +4022,7 @@ CancelTasksForJob(int64 jobid)
} }
/* make sure the current user has the rights to cancel this task */ /* make sure the current user has the rights to cancel this task */
Oid taskOwner = DatumGetObjectId(values[Anum_pg_dist_background_task_owner]); Oid taskOwner = DatumGetObjectId(values[Anum_pg_dist_background_task_owner - 1]);
if (superuser_arg(taskOwner) && !superuser()) if (superuser_arg(taskOwner) && !superuser())
{ {
/* must be a superuser to cancel tasks owned by superuser */ /* must be a superuser to cancel tasks owned by superuser */

View File

@ -133,7 +133,9 @@ isolate_tenant_to_new_shard(PG_FUNCTION_ARGS)
FmgrInfo *hashFunction = cacheEntry->hashFunction; FmgrInfo *hashFunction = cacheEntry->hashFunction;
/* get hashed value of the distribution value */ /* get hashed value of the distribution value */
Datum hashedValueDatum = FunctionCall1(hashFunction, tenantIdDatum); Datum hashedValueDatum = FunctionCall1Coll(hashFunction,
cacheEntry->partitionColumn->varcollid,
tenantIdDatum);
int hashedValue = DatumGetInt32(hashedValueDatum); int hashedValue = DatumGetInt32(hashedValueDatum);
List *shardSplitPointsList = NIL; List *shardSplitPointsList = NIL;

View File

@ -26,6 +26,7 @@
#include "commands/dbcommands.h" #include "commands/dbcommands.h"
#include "commands/sequence.h" #include "commands/sequence.h"
#include "distributed/argutils.h" #include "distributed/argutils.h"
#include "distributed/background_jobs.h"
#include "distributed/citus_safe_lib.h" #include "distributed/citus_safe_lib.h"
#include "distributed/citus_ruleutils.h" #include "distributed/citus_ruleutils.h"
#include "distributed/colocation_utils.h" #include "distributed/colocation_utils.h"
@ -43,10 +44,10 @@
#include "distributed/pg_dist_rebalance_strategy.h" #include "distributed/pg_dist_rebalance_strategy.h"
#include "distributed/reference_table_utils.h" #include "distributed/reference_table_utils.h"
#include "distributed/remote_commands.h" #include "distributed/remote_commands.h"
#include "distributed/repair_shards.h"
#include "distributed/resource_lock.h" #include "distributed/resource_lock.h"
#include "distributed/shard_rebalancer.h" #include "distributed/shard_rebalancer.h"
#include "distributed/shard_cleaner.h" #include "distributed/shard_cleaner.h"
#include "distributed/shard_transfer.h"
#include "distributed/tuplestore.h" #include "distributed/tuplestore.h"
#include "distributed/utils/array_type.h" #include "distributed/utils/array_type.h"
#include "distributed/worker_protocol.h" #include "distributed/worker_protocol.h"
@ -73,6 +74,7 @@ typedef struct RebalanceOptions
bool drainOnly; bool drainOnly;
float4 improvementThreshold; float4 improvementThreshold;
Form_pg_dist_rebalance_strategy rebalanceStrategy; Form_pg_dist_rebalance_strategy rebalanceStrategy;
const char *operationName;
} RebalanceOptions; } RebalanceOptions;
@ -227,6 +229,8 @@ static float4 NodeCapacity(WorkerNode *workerNode, void *context);
static ShardCost GetShardCost(uint64 shardId, void *context); static ShardCost GetShardCost(uint64 shardId, void *context);
static List * NonColocatedDistRelationIdList(void); static List * NonColocatedDistRelationIdList(void);
static void RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid); static void RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid);
static int64 RebalanceTableShardsBackground(RebalanceOptions *options, Oid
shardReplicationModeOid);
static void AcquireRebalanceColocationLock(Oid relationId, const char *operationName); static void AcquireRebalanceColocationLock(Oid relationId, const char *operationName);
static void ExecutePlacementUpdates(List *placementUpdateList, Oid static void ExecutePlacementUpdates(List *placementUpdateList, Oid
shardReplicationModeOid, char *noticeOperation); shardReplicationModeOid, char *noticeOperation);
@ -245,6 +249,8 @@ static uint64 WorkerShardSize(HTAB *workerShardStatistics,
static void AddToWorkerShardIdSet(HTAB *shardsByWorker, char *workerName, int workerPort, static void AddToWorkerShardIdSet(HTAB *shardsByWorker, char *workerName, int workerPort,
uint64 shardId); uint64 shardId);
static HTAB * BuildShardSizesHash(ProgressMonitorData *monitor, HTAB *shardStatistics); static HTAB * BuildShardSizesHash(ProgressMonitorData *monitor, HTAB *shardStatistics);
static void ErrorOnConcurrentRebalance(RebalanceOptions *);
/* declarations for dynamic loading */ /* declarations for dynamic loading */
PG_FUNCTION_INFO_V1(rebalance_table_shards); PG_FUNCTION_INFO_V1(rebalance_table_shards);
@ -256,10 +262,18 @@ PG_FUNCTION_INFO_V1(master_drain_node);
PG_FUNCTION_INFO_V1(citus_shard_cost_by_disk_size); PG_FUNCTION_INFO_V1(citus_shard_cost_by_disk_size);
PG_FUNCTION_INFO_V1(citus_validate_rebalance_strategy_functions); PG_FUNCTION_INFO_V1(citus_validate_rebalance_strategy_functions);
PG_FUNCTION_INFO_V1(pg_dist_rebalance_strategy_enterprise_check); PG_FUNCTION_INFO_V1(pg_dist_rebalance_strategy_enterprise_check);
PG_FUNCTION_INFO_V1(citus_rebalance_start);
PG_FUNCTION_INFO_V1(citus_rebalance_stop);
PG_FUNCTION_INFO_V1(citus_rebalance_wait);
bool RunningUnderIsolationTest = false; bool RunningUnderIsolationTest = false;
int MaxRebalancerLoggedIgnoredMoves = 5; int MaxRebalancerLoggedIgnoredMoves = 5;
static const char *PlacementUpdateTypeNames[] = {
[PLACEMENT_UPDATE_INVALID_FIRST] = "unknown",
[PLACEMENT_UPDATE_MOVE] = "move",
[PLACEMENT_UPDATE_COPY] = "copy",
};
#ifdef USE_ASSERT_CHECKING #ifdef USE_ASSERT_CHECKING
@ -792,6 +806,7 @@ SetupRebalanceMonitor(List *placementUpdateList,
event->shardId = colocatedUpdate->shardId; event->shardId = colocatedUpdate->shardId;
event->sourcePort = colocatedUpdate->sourceNode->workerPort; event->sourcePort = colocatedUpdate->sourceNode->workerPort;
event->targetPort = colocatedUpdate->targetNode->workerPort; event->targetPort = colocatedUpdate->targetNode->workerPort;
event->updateType = colocatedUpdate->updateType;
pg_atomic_init_u64(&event->progress, initialProgressState); pg_atomic_init_u64(&event->progress, initialProgressState);
eventIndex++; eventIndex++;
@ -858,6 +873,93 @@ rebalance_table_shards(PG_FUNCTION_ARGS)
} }
/*
* citus_rebalance_start rebalances the shards across the workers.
*
* SQL signature:
*
* citus_rebalance_start(
* rebalance_strategy name DEFAULT NULL,
* drain_only boolean DEFAULT false,
* shard_transfer_mode citus.shard_transfer_mode default 'auto'
* ) RETURNS VOID
*/
Datum
citus_rebalance_start(PG_FUNCTION_ARGS)
{
CheckCitusVersion(ERROR);
List *relationIdList = NonColocatedDistRelationIdList();
Form_pg_dist_rebalance_strategy strategy =
GetRebalanceStrategy(PG_GETARG_NAME_OR_NULL(0));
PG_ENSURE_ARGNOTNULL(1, "drain_only");
bool drainOnly = PG_GETARG_BOOL(1);
PG_ENSURE_ARGNOTNULL(2, "shard_transfer_mode");
Oid shardTransferModeOid = PG_GETARG_OID(2);
RebalanceOptions options = {
.relationIdList = relationIdList,
.threshold = strategy->defaultThreshold,
.maxShardMoves = 10000000,
.excludedShardArray = construct_empty_array(INT4OID),
.drainOnly = drainOnly,
.rebalanceStrategy = strategy,
.improvementThreshold = strategy->improvementThreshold,
};
int jobId = RebalanceTableShardsBackground(&options, shardTransferModeOid);
if (jobId == 0)
{
PG_RETURN_NULL();
}
PG_RETURN_INT64(jobId);
}
/*
* citus_rebalance_stop stops any ongoing background rebalance that is executing.
* Raises an error when there is no backgound rebalance ongoing at the moment.
*/
Datum
citus_rebalance_stop(PG_FUNCTION_ARGS)
{
CheckCitusVersion(ERROR);
int64 jobId = 0;
if (!HasNonTerminalJobOfType("rebalance", &jobId))
{
ereport(ERROR, (errmsg("no ongoing rebalance that can be stopped")));
}
DirectFunctionCall1(citus_job_cancel, Int64GetDatum(jobId));
PG_RETURN_VOID();
}
/*
* citus_rebalance_wait waits till an ongoing background rebalance has finished execution.
* A warning will be displayed if no rebalance is ongoing.
*/
Datum
citus_rebalance_wait(PG_FUNCTION_ARGS)
{
CheckCitusVersion(ERROR);
int64 jobId = 0;
if (!HasNonTerminalJobOfType("rebalance", &jobId))
{
ereport(WARNING, (errmsg("no ongoing rebalance that can be waited on")));
PG_RETURN_VOID();
}
citus_job_wait_internal(jobId, NULL);
PG_RETURN_VOID();
}
/* /*
* GetRebalanceStrategy returns the rebalance strategy from * GetRebalanceStrategy returns the rebalance strategy from
* pg_dist_rebalance_strategy matching the given name. If name is NULL it * pg_dist_rebalance_strategy matching the given name. If name is NULL it
@ -1138,8 +1240,8 @@ get_rebalance_progress(PG_FUNCTION_ARGS)
shardSize = shardSizesStat->totalSize; shardSize = shardSizesStat->totalSize;
} }
Datum values[11]; Datum values[12];
bool nulls[11]; bool nulls[12];
memset(values, 0, sizeof(values)); memset(values, 0, sizeof(values));
memset(nulls, 0, sizeof(nulls)); memset(nulls, 0, sizeof(nulls));
@ -1155,6 +1257,8 @@ get_rebalance_progress(PG_FUNCTION_ARGS)
values[8] = UInt64GetDatum(pg_atomic_read_u64(&step->progress)); values[8] = UInt64GetDatum(pg_atomic_read_u64(&step->progress));
values[9] = UInt64GetDatum(sourceSize); values[9] = UInt64GetDatum(sourceSize);
values[10] = UInt64GetDatum(targetSize); values[10] = UInt64GetDatum(targetSize);
values[11] = PointerGetDatum(
cstring_to_text(PlacementUpdateTypeNames[step->updateType]));
tuplestore_putvalues(tupstore, tupdesc, values, nulls); tuplestore_putvalues(tupstore, tupdesc, values, nulls);
} }
@ -1579,17 +1683,14 @@ RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid)
return; return;
} }
Oid relationId = InvalidOid;
char *operationName = "rebalance"; char *operationName = "rebalance";
if (options->drainOnly) if (options->drainOnly)
{ {
operationName = "move"; operationName = "move";
} }
foreach_oid(relationId, options->relationIdList) options->operationName = operationName;
{ ErrorOnConcurrentRebalance(options);
AcquireRebalanceColocationLock(relationId, operationName);
}
List *placementUpdateList = GetRebalanceSteps(options); List *placementUpdateList = GetRebalanceSteps(options);
@ -1609,6 +1710,168 @@ RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid)
} }
/*
* ErrorOnConcurrentRebalance raises an error with extra information when there is already
* a rebalance running.
*/
static void
ErrorOnConcurrentRebalance(RebalanceOptions *options)
{
Oid relationId = InvalidOid;
foreach_oid(relationId, options->relationIdList)
{
/* this provides the legacy error when the lock can't be acquired */
AcquireRebalanceColocationLock(relationId, options->operationName);
}
int64 jobId = 0;
if (HasNonTerminalJobOfType("rebalance", &jobId))
{
ereport(ERROR, (
errmsg("A rebalance is already running as job %ld", jobId),
errdetail("A rebalance was already scheduled as background job"),
errhint("To monitor progress, run: SELECT * FROM "
"pg_dist_background_task WHERE job_id = %ld ORDER BY task_id "
"ASC; or SELECT * FROM get_rebalance_progress();",
jobId)));
}
}
/*
* RebalanceTableShardsBackground rebalances the shards for the relations
* inside the relationIdList across the different workers. It does so using our
* background job+task infrastructure.
*/
static int64
RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationModeOid)
{
if (list_length(options->relationIdList) == 0)
{
ereport(NOTICE, (errmsg("No tables to rebalance")));
return 0;
}
char *operationName = "rebalance";
if (options->drainOnly)
{
operationName = "move";
}
options->operationName = operationName;
ErrorOnConcurrentRebalance(options);
const char shardTransferMode = LookupShardTransferMode(shardReplicationModeOid);
List *colocatedTableList = NIL;
Oid relationId = InvalidOid;
foreach_oid(relationId, options->relationIdList)
{
colocatedTableList = list_concat(colocatedTableList,
ColocatedTableList(relationId));
}
Oid colocatedTableId = InvalidOid;
foreach_oid(colocatedTableId, colocatedTableList)
{
EnsureTableOwner(colocatedTableId);
}
if (shardTransferMode == TRANSFER_MODE_AUTOMATIC)
{
/* make sure that all tables included in the rebalance have a replica identity*/
VerifyTablesHaveReplicaIdentity(colocatedTableList);
}
List *placementUpdateList = GetRebalanceSteps(options);
if (list_length(placementUpdateList) == 0)
{
ereport(NOTICE, (errmsg("No moves available for rebalancing")));
return 0;
}
DropOrphanedShardsInSeparateTransaction();
/* find the name of the shard transfer mode to interpolate in the scheduled command */
Datum shardTranferModeLabelDatum =
DirectFunctionCall1(enum_out, shardReplicationModeOid);
char *shardTranferModeLabel = DatumGetCString(shardTranferModeLabelDatum);
/* schedule planned moves */
int64 jobId = CreateBackgroundJob("rebalance", "Rebalance all colocation groups");
/* buffer used to construct the sql command for the tasks */
StringInfoData buf = { 0 };
initStringInfo(&buf);
/*
* Currently we only have two tasks that any move can depend on:
* - replicating reference tables
* - the previous move
*
* prevJobIdx tells what slot to write the id of the task into. We only use both slots
* if we are actually replicating reference tables.
*/
int64 prevJobId[2] = { 0 };
int prevJobIdx = 0;
List *referenceTableIdList = NIL;
if (HasNodesWithMissingReferenceTables(&referenceTableIdList))
{
VerifyTablesHaveReplicaIdentity(referenceTableIdList);
/*
* Reference tables need to be copied to (newly-added) nodes, this needs to be the
* first task before we can move any other table.
*/
appendStringInfo(&buf,
"SELECT pg_catalog.replicate_reference_tables(%s)",
quote_literal_cstr(shardTranferModeLabel));
BackgroundTask *task = ScheduleBackgroundTask(jobId, GetUserId(), buf.data,
prevJobIdx, prevJobId);
prevJobId[prevJobIdx] = task->taskid;
prevJobIdx++;
}
PlacementUpdateEvent *move = NULL;
bool first = true;
int prevMoveIndex = prevJobIdx;
foreach_ptr(move, placementUpdateList)
{
resetStringInfo(&buf);
appendStringInfo(&buf,
"SELECT pg_catalog.citus_move_shard_placement(%ld,%s,%u,%s,%u,%s)",
move->shardId,
quote_literal_cstr(move->sourceNode->workerName),
move->sourceNode->workerPort,
quote_literal_cstr(move->targetNode->workerName),
move->targetNode->workerPort,
quote_literal_cstr(shardTranferModeLabel));
BackgroundTask *task = ScheduleBackgroundTask(jobId, GetUserId(), buf.data,
prevJobIdx, prevJobId);
prevJobId[prevMoveIndex] = task->taskid;
if (first)
{
first = false;
prevJobIdx++;
}
}
ereport(NOTICE,
(errmsg("Scheduled %d moves as job %ld",
list_length(placementUpdateList), jobId),
errdetail("Rebalance scheduled as background job"),
errhint("To monitor progress, run: "
"SELECT * FROM pg_dist_background_task WHERE job_id = %ld ORDER BY "
"task_id ASC; or SELECT * FROM get_rebalance_progress();",
jobId)));
return jobId;
}
/* /*
* UpdateShardPlacement copies or moves a shard placement by calling * UpdateShardPlacement copies or moves a shard placement by calling
* the corresponding functions in Citus in a subtransaction. * the corresponding functions in Citus in a subtransaction.
@ -1621,7 +1884,6 @@ UpdateShardPlacement(PlacementUpdateEvent *placementUpdateEvent,
uint64 shardId = placementUpdateEvent->shardId; uint64 shardId = placementUpdateEvent->shardId;
WorkerNode *sourceNode = placementUpdateEvent->sourceNode; WorkerNode *sourceNode = placementUpdateEvent->sourceNode;
WorkerNode *targetNode = placementUpdateEvent->targetNode; WorkerNode *targetNode = placementUpdateEvent->targetNode;
const char *doRepair = "false";
Datum shardTranferModeLabelDatum = Datum shardTranferModeLabelDatum =
DirectFunctionCall1(enum_out, shardReplicationModeOid); DirectFunctionCall1(enum_out, shardReplicationModeOid);
@ -1665,13 +1927,12 @@ UpdateShardPlacement(PlacementUpdateEvent *placementUpdateEvent,
else if (updateType == PLACEMENT_UPDATE_COPY) else if (updateType == PLACEMENT_UPDATE_COPY)
{ {
appendStringInfo(placementUpdateCommand, appendStringInfo(placementUpdateCommand,
"SELECT citus_copy_shard_placement(%ld,%s,%u,%s,%u,%s,%s)", "SELECT citus_copy_shard_placement(%ld,%s,%u,%s,%u,%s)",
shardId, shardId,
quote_literal_cstr(sourceNode->workerName), quote_literal_cstr(sourceNode->workerName),
sourceNode->workerPort, sourceNode->workerPort,
quote_literal_cstr(targetNode->workerName), quote_literal_cstr(targetNode->workerName),
targetNode->workerPort, targetNode->workerPort,
doRepair,
quote_literal_cstr(shardTranferModeLabel)); quote_literal_cstr(shardTranferModeLabel));
} }
else else

View File

@ -29,7 +29,7 @@
#include "distributed/remote_commands.h" #include "distributed/remote_commands.h"
#include "distributed/shard_split.h" #include "distributed/shard_split.h"
#include "distributed/reference_table_utils.h" #include "distributed/reference_table_utils.h"
#include "distributed/repair_shards.h" #include "distributed/shard_transfer.h"
#include "distributed/resource_lock.h" #include "distributed/resource_lock.h"
#include "distributed/multi_partitioning_utils.h" #include "distributed/multi_partitioning_utils.h"
#include "distributed/worker_manager.h" #include "distributed/worker_manager.h"
@ -132,8 +132,9 @@ static void UpdateDistributionColumnsForShardGroup(List *colocatedShardList,
uint32 colocationId); uint32 colocationId);
static void InsertSplitChildrenShardMetadata(List *shardGroupSplitIntervalListList, static void InsertSplitChildrenShardMetadata(List *shardGroupSplitIntervalListList,
List *workersForPlacementList); List *workersForPlacementList);
static void CreatePartitioningHierarchy(List *shardGroupSplitIntervalListList, static void CreatePartitioningHierarchyForBlockingSplit(
List *workersForPlacementList); List *shardGroupSplitIntervalListList,
List *workersForPlacementList);
static void CreateForeignKeyConstraints(List *shardGroupSplitIntervalListList, static void CreateForeignKeyConstraints(List *shardGroupSplitIntervalListList,
List *workersForPlacementList); List *workersForPlacementList);
static Task * CreateTaskForDDLCommandList(List *ddlCommandList, WorkerNode *workerNode); static Task * CreateTaskForDDLCommandList(List *ddlCommandList, WorkerNode *workerNode);
@ -233,9 +234,7 @@ ErrorIfCannotSplitShard(SplitOperation splitOperation, ShardInterval *sourceShar
"for the shard %lu", "for the shard %lu",
SplitOperationName[splitOperation], SplitOperationName[splitOperation],
SplitTargetName[splitOperation], SplitTargetName[splitOperation],
relationName, shardId), relationName, shardId)));
errhint("Use master_copy_shard_placement UDF to "
"repair the inactive shard placement.")));
} }
} }
} }
@ -632,8 +631,9 @@ BlockingShardSplit(SplitOperation splitOperation,
workersForPlacementList); workersForPlacementList);
/* create partitioning hierarchy, if any */ /* create partitioning hierarchy, if any */
CreatePartitioningHierarchy(shardGroupSplitIntervalListList, CreatePartitioningHierarchyForBlockingSplit(
workersForPlacementList); shardGroupSplitIntervalListList,
workersForPlacementList);
/* /*
* Create foreign keys if exists after the metadata changes happening in * Create foreign keys if exists after the metadata changes happening in
@ -1220,8 +1220,8 @@ InsertSplitChildrenShardMetadata(List *shardGroupSplitIntervalListList,
* hierarchy between the shardList, if any. * hierarchy between the shardList, if any.
*/ */
static void static void
CreatePartitioningHierarchy(List *shardGroupSplitIntervalListList, CreatePartitioningHierarchyForBlockingSplit(List *shardGroupSplitIntervalListList,
List *workersForPlacementList) List *workersForPlacementList)
{ {
/* Create partition heirarchy between shards */ /* Create partition heirarchy between shards */
List *shardIntervalList = NIL; List *shardIntervalList = NIL;
@ -1612,51 +1612,18 @@ NonBlockingShardSplit(SplitOperation splitOperation,
snapshot, distributionColumnOverrides); snapshot, distributionColumnOverrides);
/* /*
* 9) Create replica identities, this needs to be done before enabling * 9) Logically replicate all the changes and do most of the table DDL,
* the subscriptions. * like index and foreign key creation.
*/ */
CreateReplicaIdentities(logicalRepTargetList); CompleteNonBlockingShardTransfer(sourceColocatedShardIntervalList,
sourceConnection,
publicationInfoHash,
logicalRepTargetList,
groupedLogicalRepTargetsHash,
SHARD_SPLIT);
/* /*
* 10) Enable the subscriptions: Start the catchup phase * 10) Delete old shards metadata and either mark the shards as
*/
EnableSubscriptions(logicalRepTargetList);
/* 11) Wait for subscriptions to be ready */
WaitForAllSubscriptionsToBecomeReady(groupedLogicalRepTargetsHash);
/* 12) Wait for subscribers to catchup till source LSN */
WaitForAllSubscriptionsToCatchUp(sourceConnection, groupedLogicalRepTargetsHash);
/* 13) Create Auxilary structures */
CreateAuxiliaryStructuresForShardGroup(shardGroupSplitIntervalListList,
workersForPlacementList,
false /* includeReplicaIdentity*/);
/* 14) Wait for subscribers to catchup till source LSN */
WaitForAllSubscriptionsToCatchUp(sourceConnection, groupedLogicalRepTargetsHash);
/* Used for testing */
ConflictOnlyWithIsolationTesting();
/* 15) Block writes on source shards */
BlockWritesToShardList(sourceColocatedShardIntervalList);
/* 16) Wait for subscribers to catchup till source LSN */
WaitForAllSubscriptionsToCatchUp(sourceConnection, groupedLogicalRepTargetsHash);
/* 17) Drop Subscribers */
DropSubscriptions(logicalRepTargetList);
/* 18) Drop replication slots
*/
DropReplicationSlots(sourceConnection, logicalRepTargetList);
/* 19) Drop Publications */
DropPublications(sourceConnection, publicationInfoHash);
/*
* 20) Delete old shards metadata and either mark the shards as
* to be deferred drop or physically delete them. * to be deferred drop or physically delete them.
* Have to do that before creating the new shard metadata, * Have to do that before creating the new shard metadata,
* because there's cross-checks preventing inconsistent metadata * because there's cross-checks preventing inconsistent metadata
@ -1674,7 +1641,7 @@ NonBlockingShardSplit(SplitOperation splitOperation,
DropShardListMetadata(sourceColocatedShardIntervalList); DropShardListMetadata(sourceColocatedShardIntervalList);
/* /*
* 21) In case of create_distributed_table_concurrently, which converts * 11) In case of create_distributed_table_concurrently, which converts
* a Citus local table to a distributed table, update the distributed * a Citus local table to a distributed table, update the distributed
* table metadata now. * table metadata now.
* *
@ -1706,34 +1673,36 @@ NonBlockingShardSplit(SplitOperation splitOperation,
targetColocationId); targetColocationId);
} }
/* 22) Insert new shard and placement metdata */ /* 12) Insert new shard and placement metdata */
InsertSplitChildrenShardMetadata(shardGroupSplitIntervalListList, InsertSplitChildrenShardMetadata(shardGroupSplitIntervalListList,
workersForPlacementList); workersForPlacementList);
CreatePartitioningHierarchy(shardGroupSplitIntervalListList, /* 13) create partitioning hierarchy, if any, this needs to be done
workersForPlacementList); * after the metadata is correct, because it fails for some
* uninvestigated reason otherwise.
*/
CreatePartitioningHierarchy(logicalRepTargetList);
/* /*
* 23) Create foreign keys if exists after the metadata changes happening in * 14) Create foreign keys if exists after the metadata changes happening in
* DropShardList() and InsertSplitChildrenShardMetadata() because the foreign * DropShardList() and InsertSplitChildrenShardMetadata() because the foreign
* key creation depends on the new metadata. * key creation depends on the new metadata.
*/ */
CreateForeignKeyConstraints(shardGroupSplitIntervalListList, CreateUncheckedForeignKeyConstraints(logicalRepTargetList);
workersForPlacementList);
/* /*
* 24) Release shared memory allocated by worker_split_shard_replication_setup udf * 15) Release shared memory allocated by worker_split_shard_replication_setup udf
* at source node. * at source node.
*/ */
ExecuteSplitShardReleaseSharedMemory(sourceShardToCopyNode); ExecuteSplitShardReleaseSharedMemory(sourceShardToCopyNode);
/* 25) Close source connection */ /* 16) Close source connection */
CloseConnection(sourceConnection); CloseConnection(sourceConnection);
/* 26) Close all subscriber connections */ /* 17) Close all subscriber connections */
CloseGroupedLogicalRepTargetsConnections(groupedLogicalRepTargetsHash); CloseGroupedLogicalRepTargetsConnections(groupedLogicalRepTargetsHash);
/* 27) Close connection of template replication slot */ /* 18) Close connection of template replication slot */
CloseConnection(sourceReplicationConnection); CloseConnection(sourceReplicationConnection);
} }
PG_CATCH(); PG_CATCH();

View File

@ -1,9 +1,8 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* repair_shards.c * shard_transfer.c
* *
* This file contains functions to repair unhealthy shard placements using data * This file contains functions to transfer shards between nodes.
* from healthy ones.
* *
* Copyright (c) Citus Data, Inc. * Copyright (c) Citus Data, Inc.
* *
@ -31,7 +30,6 @@
#include "distributed/listutils.h" #include "distributed/listutils.h"
#include "distributed/shard_cleaner.h" #include "distributed/shard_cleaner.h"
#include "distributed/coordinator_protocol.h" #include "distributed/coordinator_protocol.h"
#include "distributed/repair_shards.h"
#include "distributed/metadata_cache.h" #include "distributed/metadata_cache.h"
#include "distributed/metadata_sync.h" #include "distributed/metadata_sync.h"
#include "distributed/multi_join_order.h" #include "distributed/multi_join_order.h"
@ -43,6 +41,7 @@
#include "distributed/resource_lock.h" #include "distributed/resource_lock.h"
#include "distributed/shard_rebalancer.h" #include "distributed/shard_rebalancer.h"
#include "distributed/shard_split.h" #include "distributed/shard_split.h"
#include "distributed/shard_transfer.h"
#include "distributed/worker_manager.h" #include "distributed/worker_manager.h"
#include "distributed/worker_protocol.h" #include "distributed/worker_protocol.h"
#include "distributed/worker_transaction.h" #include "distributed/worker_transaction.h"
@ -76,9 +75,9 @@ static bool CanUseLogicalReplication(Oid relationId, char shardReplicationMode);
static void ErrorIfTableCannotBeReplicated(Oid relationId); static void ErrorIfTableCannotBeReplicated(Oid relationId);
static void ErrorIfTargetNodeIsNotSafeToCopyTo(const char *targetNodeName, static void ErrorIfTargetNodeIsNotSafeToCopyTo(const char *targetNodeName,
int targetNodePort); int targetNodePort);
static void RepairShardPlacement(int64 shardId, const char *sourceNodeName, static void ErrorIfSameNode(char *sourceNodeName, int sourceNodePort,
int32 sourceNodePort, const char *targetNodeName, char *targetNodeName, int targetNodePort,
int32 targetNodePort); const char *operationName);
static void ReplicateColocatedShardPlacement(int64 shardId, char *sourceNodeName, static void ReplicateColocatedShardPlacement(int64 shardId, char *sourceNodeName,
int32 sourceNodePort, char *targetNodeName, int32 sourceNodePort, char *targetNodeName,
int32 targetNodePort, int32 targetNodePort,
@ -95,12 +94,6 @@ static void CopyShardTablesViaLogicalReplication(List *shardIntervalList,
static void CopyShardTablesViaBlockWrites(List *shardIntervalList, char *sourceNodeName, static void CopyShardTablesViaBlockWrites(List *shardIntervalList, char *sourceNodeName,
int32 sourceNodePort, int32 sourceNodePort,
char *targetNodeName, int32 targetNodePort); char *targetNodeName, int32 targetNodePort);
static List * CopyPartitionShardsCommandList(ShardInterval *shardInterval,
const char *sourceNodeName,
int32 sourceNodePort);
static void EnsureShardCanBeRepaired(int64 shardId, const char *sourceNodeName,
int32 sourceNodePort, const char *targetNodeName,
int32 targetNodePort);
static void EnsureShardCanBeCopied(int64 shardId, const char *sourceNodeName, static void EnsureShardCanBeCopied(int64 shardId, const char *sourceNodeName,
int32 sourceNodePort, const char *targetNodeName, int32 sourceNodePort, const char *targetNodeName,
int32 targetNodePort); int32 targetNodePort);
@ -117,6 +110,8 @@ static void UpdateColocatedShardPlacementMetadataOnWorkers(int64 shardId,
int32 sourceNodePort, int32 sourceNodePort,
char *targetNodeName, char *targetNodeName,
int32 targetNodePort); int32 targetNodePort);
static bool IsShardListOnNode(List *colocatedShardList, char *targetNodeName,
uint32 targetPort);
static void CheckSpaceConstraints(MultiConnection *connection, static void CheckSpaceConstraints(MultiConnection *connection,
uint64 colocationSizeInBytes); uint64 colocationSizeInBytes);
static void EnsureEnoughDiskSpaceForShardMove(List *colocatedShardList, static void EnsureEnoughDiskSpaceForShardMove(List *colocatedShardList,
@ -150,14 +145,8 @@ bool CheckAvailableSpaceBeforeMove = true;
/* /*
* citus_copy_shard_placement implements a user-facing UDF to repair data from * citus_copy_shard_placement implements a user-facing UDF to copy a placement
* a healthy (source) node to an inactive (target) node. To accomplish this it * from a source node to a target node, including all co-located placements.
* entirely recreates the table structure before copying all data. During this
* time all modifications are paused to the shard. After successful repair, the
* inactive placement is marked healthy and modifications may continue. If the
* repair fails at any point, this function throws an error, leaving the node
* in an unhealthy state. Please note that citus_copy_shard_placement copies
* given shard along with its co-located shards.
*/ */
Datum Datum
citus_copy_shard_placement(PG_FUNCTION_ARGS) citus_copy_shard_placement(PG_FUNCTION_ARGS)
@ -165,6 +154,35 @@ citus_copy_shard_placement(PG_FUNCTION_ARGS)
CheckCitusVersion(ERROR); CheckCitusVersion(ERROR);
EnsureCoordinator(); EnsureCoordinator();
int64 shardId = PG_GETARG_INT64(0);
text *sourceNodeNameText = PG_GETARG_TEXT_P(1);
int32 sourceNodePort = PG_GETARG_INT32(2);
text *targetNodeNameText = PG_GETARG_TEXT_P(3);
int32 targetNodePort = PG_GETARG_INT32(4);
Oid shardReplicationModeOid = PG_GETARG_OID(5);
char *sourceNodeName = text_to_cstring(sourceNodeNameText);
char *targetNodeName = text_to_cstring(targetNodeNameText);
char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);
ReplicateColocatedShardPlacement(shardId, sourceNodeName, sourceNodePort,
targetNodeName, targetNodePort,
shardReplicationMode);
PG_RETURN_VOID();
}
/*
* master_copy_shard_placement is a wrapper function for old UDF name.
*/
Datum
master_copy_shard_placement(PG_FUNCTION_ARGS)
{
CheckCitusVersion(ERROR);
EnsureCoordinator();
int64 shardId = PG_GETARG_INT64(0); int64 shardId = PG_GETARG_INT64(0);
text *sourceNodeNameText = PG_GETARG_TEXT_P(1); text *sourceNodeNameText = PG_GETARG_TEXT_P(1);
int32 sourceNodePort = PG_GETARG_INT32(2); int32 sourceNodePort = PG_GETARG_INT32(2);
@ -177,137 +195,21 @@ citus_copy_shard_placement(PG_FUNCTION_ARGS)
char *targetNodeName = text_to_cstring(targetNodeNameText); char *targetNodeName = text_to_cstring(targetNodeNameText);
char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid); char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);
if (doRepair && shardReplicationMode == TRANSFER_MODE_FORCE_LOGICAL)
{
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("logical replication cannot be used for repairs")));
}
ShardInterval *shardInterval = LoadShardInterval(shardId);
ErrorIfTableCannotBeReplicated(shardInterval->relationId);
ErrorIfTargetNodeIsNotSafeToCopyTo(targetNodeName, targetNodePort);
AcquirePlacementColocationLock(shardInterval->relationId, ExclusiveLock,
doRepair ? "repair" : "copy");
if (doRepair) if (doRepair)
{ {
RepairShardPlacement(shardId, sourceNodeName, sourceNodePort, targetNodeName, ereport(WARNING, (errmsg("do_repair argument is deprecated")));
targetNodePort);
}
else
{
ReplicateColocatedShardPlacement(shardId, sourceNodeName, sourceNodePort,
targetNodeName, targetNodePort,
shardReplicationMode);
} }
ReplicateColocatedShardPlacement(shardId, sourceNodeName, sourceNodePort,
targetNodeName, targetNodePort,
shardReplicationMode);
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
/*
* master_copy_shard_placement is a wrapper function for old UDF name.
*/
Datum
master_copy_shard_placement(PG_FUNCTION_ARGS)
{
return citus_copy_shard_placement(fcinfo);
}
/*
* ShardListSizeInBytes returns the size in bytes of a set of shard tables.
*/
uint64
ShardListSizeInBytes(List *shardList, char *workerNodeName, uint32
workerNodePort)
{
uint32 connectionFlag = 0;
/* we skip child tables of a partitioned table if this boolean variable is true */
bool optimizePartitionCalculations = true;
StringInfo tableSizeQuery = GenerateSizeQueryOnMultiplePlacements(shardList,
TOTAL_RELATION_SIZE,
optimizePartitionCalculations);
MultiConnection *connection = GetNodeConnection(connectionFlag, workerNodeName,
workerNodePort);
PGresult *result = NULL;
int queryResult = ExecuteOptionalRemoteCommand(connection, tableSizeQuery->data,
&result);
if (queryResult != RESPONSE_OKAY)
{
ereport(ERROR, (errcode(ERRCODE_CONNECTION_FAILURE),
errmsg("cannot get the size because of a connection error")));
}
List *sizeList = ReadFirstColumnAsText(result);
if (list_length(sizeList) != 1)
{
ereport(ERROR, (errmsg(
"received wrong number of rows from worker, expected 1 received %d",
list_length(sizeList))));
}
StringInfo totalSizeStringInfo = (StringInfo) linitial(sizeList);
char *totalSizeString = totalSizeStringInfo->data;
uint64 totalSize = SafeStringToUint64(totalSizeString);
PQclear(result);
ForgetResults(connection);
return totalSize;
}
/*
* CheckSpaceConstraints checks there is enough space to place the colocation
* on the node that the connection is connected to.
*/
static void
CheckSpaceConstraints(MultiConnection *connection, uint64 colocationSizeInBytes)
{
uint64 diskAvailableInBytes = 0;
uint64 diskSizeInBytes = 0;
bool success =
GetNodeDiskSpaceStatsForConnection(connection, &diskAvailableInBytes,
&diskSizeInBytes);
if (!success)
{
ereport(ERROR, (errmsg("Could not fetch disk stats for node: %s-%d",
connection->hostname, connection->port)));
}
uint64 diskAvailableInBytesAfterShardMove = 0;
if (diskAvailableInBytes < colocationSizeInBytes)
{
/*
* even though the space will be less than "0", we set it to 0 for convenience.
*/
diskAvailableInBytes = 0;
}
else
{
diskAvailableInBytesAfterShardMove = diskAvailableInBytes - colocationSizeInBytes;
}
uint64 desiredNewDiskAvailableInBytes = diskSizeInBytes *
(DesiredPercentFreeAfterMove / 100);
if (diskAvailableInBytesAfterShardMove < desiredNewDiskAvailableInBytes)
{
ereport(ERROR, (errmsg("not enough empty space on node if the shard is moved, "
"actual available space after move will be %ld bytes, "
"desired available space after move is %ld bytes,"
"estimated size increase on node after move is %ld bytes.",
diskAvailableInBytesAfterShardMove,
desiredNewDiskAvailableInBytes, colocationSizeInBytes),
errhint(
"consider lowering citus.desired_percent_disk_available_after_move.")));
}
}
/* /*
* citus_move_shard_placement moves given shard (and its co-located shards) from one * citus_move_shard_placement moves given shard (and its co-located shards) from one
* node to the other node. To accomplish this it entirely recreates the table structure * node to the other node. To accomplish this it entirely recreates the table structure
@ -339,6 +241,10 @@ citus_move_shard_placement(PG_FUNCTION_ARGS)
ListCell *colocatedTableCell = NULL; ListCell *colocatedTableCell = NULL;
ListCell *colocatedShardCell = NULL; ListCell *colocatedShardCell = NULL;
ErrorIfSameNode(sourceNodeName, sourceNodePort,
targetNodeName, targetNodePort,
"move");
Oid relationId = RelationIdForShard(shardId); Oid relationId = RelationIdForShard(shardId);
ErrorIfMoveUnsupportedTableType(relationId); ErrorIfMoveUnsupportedTableType(relationId);
ErrorIfTargetNodeIsNotSafeToMove(targetNodeName, targetNodePort); ErrorIfTargetNodeIsNotSafeToMove(targetNodeName, targetNodePort);
@ -370,8 +276,8 @@ citus_move_shard_placement(PG_FUNCTION_ARGS)
{ {
char *relationName = get_rel_name(colocatedTableId); char *relationName = get_rel_name(colocatedTableId);
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot repair shard"), errmsg("cannot move shard"),
errdetail("Table %s is a foreign table. Repairing " errdetail("Table %s is a foreign table. Moving "
"shards backed by foreign tables is " "shards backed by foreign tables is "
"not supported.", relationName))); "not supported.", relationName)));
} }
@ -379,6 +285,20 @@ citus_move_shard_placement(PG_FUNCTION_ARGS)
/* we sort colocatedShardList so that lock operations will not cause any deadlocks */ /* we sort colocatedShardList so that lock operations will not cause any deadlocks */
colocatedShardList = SortList(colocatedShardList, CompareShardIntervalsById); colocatedShardList = SortList(colocatedShardList, CompareShardIntervalsById);
/*
* If there are no active placements on the source and only active placements on
* the target node, we assume the copy to already be done.
*/
if (IsShardListOnNode(colocatedShardList, targetNodeName, targetNodePort) &&
!IsShardListOnNode(colocatedShardList, sourceNodeName, sourceNodePort))
{
ereport(WARNING, (errmsg("shard is already present on node %s:%d",
targetNodeName, targetNodePort),
errdetail("Move may have already completed.")));
PG_RETURN_VOID();
}
foreach(colocatedShardCell, colocatedShardList) foreach(colocatedShardCell, colocatedShardList)
{ {
ShardInterval *colocatedShard = (ShardInterval *) lfirst(colocatedShardCell); ShardInterval *colocatedShard = (ShardInterval *) lfirst(colocatedShardCell);
@ -493,6 +413,39 @@ citus_move_shard_placement(PG_FUNCTION_ARGS)
} }
/*
* IsShardListOnNode determines whether a co-located shard list has
* active placements on a given node.
*/
static bool
IsShardListOnNode(List *colocatedShardList, char *targetNodeName, uint32 targetNodePort)
{
WorkerNode *workerNode = FindWorkerNode(targetNodeName, targetNodePort);
if (workerNode == NULL)
{
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("Moving shards to a non-existing node is not supported")));
}
/*
* We exhaustively search all co-located shards
*/
ShardInterval *shardInterval = NULL;
foreach_ptr(shardInterval, colocatedShardList)
{
uint64 shardId = shardInterval->shardId;
List *placementList = ActiveShardPlacementListOnGroup(shardId,
workerNode->groupId);
if (placementList == NIL)
{
return false;
}
}
return true;
}
/* /*
* EnsureEnoughDiskSpaceForShardMove checks that there is enough space for * EnsureEnoughDiskSpaceForShardMove checks that there is enough space for
* shard moves of the given colocated shard list from source node to target node. * shard moves of the given colocated shard list from source node to target node.
@ -518,6 +471,98 @@ EnsureEnoughDiskSpaceForShardMove(List *colocatedShardList,
} }
/*
* ShardListSizeInBytes returns the size in bytes of a set of shard tables.
*/
uint64
ShardListSizeInBytes(List *shardList, char *workerNodeName, uint32
workerNodePort)
{
uint32 connectionFlag = 0;
/* we skip child tables of a partitioned table if this boolean variable is true */
bool optimizePartitionCalculations = true;
StringInfo tableSizeQuery = GenerateSizeQueryOnMultiplePlacements(shardList,
TOTAL_RELATION_SIZE,
optimizePartitionCalculations);
MultiConnection *connection = GetNodeConnection(connectionFlag, workerNodeName,
workerNodePort);
PGresult *result = NULL;
int queryResult = ExecuteOptionalRemoteCommand(connection, tableSizeQuery->data,
&result);
if (queryResult != RESPONSE_OKAY)
{
ereport(ERROR, (errcode(ERRCODE_CONNECTION_FAILURE),
errmsg("cannot get the size because of a connection error")));
}
List *sizeList = ReadFirstColumnAsText(result);
if (list_length(sizeList) != 1)
{
ereport(ERROR, (errmsg(
"received wrong number of rows from worker, expected 1 received %d",
list_length(sizeList))));
}
StringInfo totalSizeStringInfo = (StringInfo) linitial(sizeList);
char *totalSizeString = totalSizeStringInfo->data;
uint64 totalSize = SafeStringToUint64(totalSizeString);
PQclear(result);
ForgetResults(connection);
return totalSize;
}
/*
* CheckSpaceConstraints checks there is enough space to place the colocation
* on the node that the connection is connected to.
*/
static void
CheckSpaceConstraints(MultiConnection *connection, uint64 colocationSizeInBytes)
{
uint64 diskAvailableInBytes = 0;
uint64 diskSizeInBytes = 0;
bool success =
GetNodeDiskSpaceStatsForConnection(connection, &diskAvailableInBytes,
&diskSizeInBytes);
if (!success)
{
ereport(ERROR, (errmsg("Could not fetch disk stats for node: %s-%d",
connection->hostname, connection->port)));
}
uint64 diskAvailableInBytesAfterShardMove = 0;
if (diskAvailableInBytes < colocationSizeInBytes)
{
/*
* even though the space will be less than "0", we set it to 0 for convenience.
*/
diskAvailableInBytes = 0;
}
else
{
diskAvailableInBytesAfterShardMove = diskAvailableInBytes - colocationSizeInBytes;
}
uint64 desiredNewDiskAvailableInBytes = diskSizeInBytes *
(DesiredPercentFreeAfterMove / 100);
if (diskAvailableInBytesAfterShardMove < desiredNewDiskAvailableInBytes)
{
ereport(ERROR, (errmsg("not enough empty space on node if the shard is moved, "
"actual available space after move will be %ld bytes, "
"desired available space after move is %ld bytes,"
"estimated size increase on node after move is %ld bytes.",
diskAvailableInBytesAfterShardMove,
desiredNewDiskAvailableInBytes, colocationSizeInBytes),
errhint(
"consider lowering citus.desired_percent_disk_available_after_move.")));
}
}
/* /*
* ErrorIfTargetNodeIsNotSafeToMove throws error if the target node is not * ErrorIfTargetNodeIsNotSafeToMove throws error if the target node is not
* eligible for moving shards. * eligible for moving shards.
@ -563,6 +608,25 @@ ErrorIfTargetNodeIsNotSafeToMove(const char *targetNodeName, int targetNodePort)
} }
/*
* ErrorIfSameNode throws an error if the two host:port combinations
* are the same.
*/
static void
ErrorIfSameNode(char *sourceNodeName, int sourceNodePort,
char *targetNodeName, int targetNodePort,
const char *operationName)
{
if (strncmp(sourceNodeName, targetNodeName, MAX_NODE_LENGTH) == 0 &&
sourceNodePort == targetNodePort)
{
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot %s shard to the same node",
operationName)));
}
}
/* /*
* master_move_shard_placement is a wrapper around citus_move_shard_placement. * master_move_shard_placement is a wrapper around citus_move_shard_placement.
*/ */
@ -885,122 +949,6 @@ LookupShardTransferMode(Oid shardReplicationModeOid)
} }
/*
* RepairShardPlacement repairs given shard from a source node to target node.
* This function is not co-location aware. It only repairs given shard.
*/
static void
RepairShardPlacement(int64 shardId, const char *sourceNodeName, int32 sourceNodePort,
const char *targetNodeName, int32 targetNodePort)
{
ShardInterval *shardInterval = LoadShardInterval(shardId);
Oid distributedTableId = shardInterval->relationId;
char *tableOwner = TableOwner(shardInterval->relationId);
/* prevent table from being dropped */
LockRelationOid(distributedTableId, AccessShareLock);
EnsureTableOwner(distributedTableId);
if (IsForeignTable(distributedTableId))
{
char *relationName = get_rel_name(distributedTableId);
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot repair shard"),
errdetail("Table %s is a foreign table. Repairing "
"shards backed by foreign tables is "
"not supported.", relationName)));
}
/*
* Let's not allow repairing partitions to prevent any edge cases.
* We're already not allowing any kind of modifications on the partitions
* so their placements are not likely to be marked as INVALID. The only
* possible case to mark placement of a partition as invalid is
* "ALTER TABLE parent_table DETACH PARTITION partition_table". But,
* given that the table would become a regular distributed table if the
* command succeeds, we're OK since the regular distributed tables can
* be repaired later on.
*/
EnsurePartitionTableNotReplicated(distributedTableId);
/*
* We take a lock on the referenced table if there is a foreign constraint
* during the copy procedure. If we do not block DMLs on the referenced
* table, we cannot avoid the inconsistency between the two copies of the
* data. Currently, we do not support replication factor > 1 on the tables
* with foreign constraints, so this command will fail for this case anyway.
* However, it is taken as a precaution in case we support it one day.
*/
LockReferencedReferenceShardDistributionMetadata(shardId, ExclusiveLock);
/*
* We plan to move the placement to the healthy state, so we need to grab a shard
* metadata lock (in exclusive mode).
*/
LockShardDistributionMetadata(shardId, ExclusiveLock);
/*
* For shard repair, there should be healthy placement in source node and unhealthy
* placement in the target node.
*/
EnsureShardCanBeRepaired(shardId, sourceNodeName, sourceNodePort, targetNodeName,
targetNodePort);
/*
* If the shard belongs to a partitioned table, we need to load the data after
* creating the partitions and the partitioning hierarcy.
*/
bool partitionedTable = PartitionedTableNoLock(distributedTableId);
bool includeData = !partitionedTable;
/* we generate necessary commands to recreate the shard in target node */
List *ddlCommandList =
CopyShardCommandList(shardInterval, sourceNodeName, sourceNodePort, includeData);
List *foreignConstraintCommandList = CopyShardForeignConstraintCommandList(
shardInterval);
ddlCommandList = list_concat(ddlCommandList, foreignConstraintCommandList);
/*
* CopyShardCommandList() drops the table which cascades to partitions if the
* table is a partitioned table. This means that we need to create both parent
* table and its partitions.
*
* We also skipped copying the data, so include it here.
*/
if (partitionedTable)
{
char *shardName = ConstructQualifiedShardName(shardInterval);
StringInfo copyShardDataCommand = makeStringInfo();
List *partitionCommandList =
CopyPartitionShardsCommandList(shardInterval, sourceNodeName, sourceNodePort);
ddlCommandList = list_concat(ddlCommandList, partitionCommandList);
/* finally copy the data as well */
appendStringInfo(copyShardDataCommand, WORKER_APPEND_TABLE_TO_SHARD,
quote_literal_cstr(shardName), /* table to append */
quote_literal_cstr(shardName), /* remote table name */
quote_literal_cstr(sourceNodeName), /* remote host */
sourceNodePort); /* remote port */
ddlCommandList = lappend(ddlCommandList, copyShardDataCommand->data);
}
EnsureNoModificationsHaveBeenDone();
SendCommandListToWorkerOutsideTransaction(targetNodeName, targetNodePort, tableOwner,
ddlCommandList);
/* after successful repair, we update shard state as healthy*/
List *placementList = ShardPlacementListWithoutOrphanedPlacements(shardId);
ShardPlacement *placement = SearchShardPlacementInListOrError(placementList,
targetNodeName,
targetNodePort);
UpdateShardPlacementState(placement->placementId, SHARD_STATE_ACTIVE);
}
/* /*
* ReplicateColocatedShardPlacement replicates the given shard and its * ReplicateColocatedShardPlacement replicates the given shard and its
* colocated shards from a source node to target node. * colocated shards from a source node to target node.
@ -1013,6 +961,16 @@ ReplicateColocatedShardPlacement(int64 shardId, char *sourceNodeName,
ShardInterval *shardInterval = LoadShardInterval(shardId); ShardInterval *shardInterval = LoadShardInterval(shardId);
Oid distributedTableId = shardInterval->relationId; Oid distributedTableId = shardInterval->relationId;
ErrorIfSameNode(sourceNodeName, sourceNodePort,
targetNodeName, targetNodePort,
"copy");
ErrorIfTableCannotBeReplicated(shardInterval->relationId);
ErrorIfTargetNodeIsNotSafeToCopyTo(targetNodeName, targetNodePort);
EnsureNoModificationsHaveBeenDone();
AcquirePlacementColocationLock(shardInterval->relationId, ExclusiveLock, "copy");
List *colocatedTableList = ColocatedTableList(distributedTableId); List *colocatedTableList = ColocatedTableList(distributedTableId);
List *colocatedShardList = ColocatedShardIntervalList(shardInterval); List *colocatedShardList = ColocatedShardIntervalList(shardInterval);
@ -1025,6 +983,33 @@ ReplicateColocatedShardPlacement(int64 shardId, char *sourceNodeName,
*/ */
colocatedShardList = SortList(colocatedShardList, CompareShardIntervalsById); colocatedShardList = SortList(colocatedShardList, CompareShardIntervalsById);
/*
* If there are active placements on both nodes, we assume the copy to already
* be done.
*/
if (IsShardListOnNode(colocatedShardList, targetNodeName, targetNodePort) &&
IsShardListOnNode(colocatedShardList, sourceNodeName, sourceNodePort))
{
ereport(WARNING, (errmsg("shard is already present on node %s:%d",
targetNodeName, targetNodePort),
errdetail("Copy may have already completed.")));
return;
}
WorkerNode *sourceNode = FindWorkerNode(sourceNodeName, sourceNodePort);
WorkerNode *targetNode = FindWorkerNode(targetNodeName, targetNodePort);
Oid relationId = RelationIdForShard(shardId);
PlacementUpdateEvent *placementUpdateEvent = palloc0(
sizeof(PlacementUpdateEvent));
placementUpdateEvent->updateType = PLACEMENT_UPDATE_COPY;
placementUpdateEvent->shardId = shardId;
placementUpdateEvent->sourceNode = sourceNode;
placementUpdateEvent->targetNode = targetNode;
SetupRebalanceMonitor(list_make1(placementUpdateEvent), relationId,
REBALANCE_PROGRESS_MOVING);
/* /*
* At this point of the shard replication, we don't need to block the writes to * At this point of the shard replication, we don't need to block the writes to
* shards when logical replication is used. * shards when logical replication is used.
@ -1093,6 +1078,7 @@ ReplicateColocatedShardPlacement(int64 shardId, char *sourceNodeName,
SendCommandToWorkersWithMetadata(placementCommand); SendCommandToWorkersWithMetadata(placementCommand);
} }
} }
FinalizeCurrentProgressMonitor();
} }
@ -1439,88 +1425,6 @@ CreateShardCopyCommand(ShardInterval *shard,
} }
/*
* CopyPartitionShardsCommandList gets a shardInterval which is a shard that
* belongs to partitioned table (this is asserted).
*
* The function returns a list of commands which re-creates all the partitions
* of the input shardInterval.
*/
static List *
CopyPartitionShardsCommandList(ShardInterval *shardInterval, const char *sourceNodeName,
int32 sourceNodePort)
{
Oid distributedTableId = shardInterval->relationId;
List *ddlCommandList = NIL;
Assert(PartitionedTableNoLock(distributedTableId));
List *partitionList = PartitionList(distributedTableId);
Oid partitionOid = InvalidOid;
foreach_oid(partitionOid, partitionList)
{
uint64 partitionShardId =
ColocatedShardIdInRelation(partitionOid, shardInterval->shardIndex);
ShardInterval *partitionShardInterval = LoadShardInterval(partitionShardId);
bool includeData = false;
List *copyCommandList =
CopyShardCommandList(partitionShardInterval, sourceNodeName, sourceNodePort,
includeData);
ddlCommandList = list_concat(ddlCommandList, copyCommandList);
char *attachPartitionCommand =
GenerateAttachShardPartitionCommand(partitionShardInterval);
ddlCommandList = lappend(ddlCommandList, attachPartitionCommand);
}
return ddlCommandList;
}
/*
* EnsureShardCanBeRepaired checks if the given shard has a healthy placement in the source
* node and inactive node on the target node.
*/
static void
EnsureShardCanBeRepaired(int64 shardId, const char *sourceNodeName, int32 sourceNodePort,
const char *targetNodeName, int32 targetNodePort)
{
List *shardPlacementList =
ShardPlacementListIncludingOrphanedPlacements(shardId);
ShardPlacement *sourcePlacement = SearchShardPlacementInListOrError(
shardPlacementList,
sourceNodeName,
sourceNodePort);
if (sourcePlacement->shardState != SHARD_STATE_ACTIVE)
{
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("source placement must be in active state")));
}
ShardPlacement *targetPlacement = SearchShardPlacementInListOrError(
shardPlacementList,
targetNodeName,
targetNodePort);
/*
* shardStateInactive is a legacy state for a placement. As of Citus 11,
* we never mark any placement as INACTIVE.
*
* Still, we prefer to keep this function/code here, as users may need
* to recover placements that are marked as inactive pre Citus 11.
*
*/
int shardStateInactive = 3;
if (targetPlacement->shardState != shardStateInactive)
{
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("target placement must be in inactive state")));
}
}
/* /*
* EnsureShardCanBeCopied checks if the given shard has a healthy placement in the source * EnsureShardCanBeCopied checks if the given shard has a healthy placement in the source
* node and no placements in the target node. * node and no placements in the target node.
@ -1544,6 +1448,7 @@ EnsureShardCanBeCopied(int64 shardId, const char *sourceNodeName, int32 sourceNo
ShardPlacement *targetPlacement = SearchShardPlacementInList(shardPlacementList, ShardPlacement *targetPlacement = SearchShardPlacementInList(shardPlacementList,
targetNodeName, targetNodeName,
targetNodePort); targetNodePort);
if (targetPlacement != NULL) if (targetPlacement != NULL)
{ {
if (targetPlacement->shardState == SHARD_STATE_TO_DELETE) if (targetPlacement->shardState == SHARD_STATE_TO_DELETE)
@ -1901,7 +1806,7 @@ RecreateTableDDLCommandList(Oid relationId)
else else
{ {
ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("repair target is not a regular, foreign or partitioned " errmsg("target is not a regular, foreign or partitioned "
"table"))); "table")));
} }

View File

@ -1154,8 +1154,7 @@ HasComplexRangeTableType(Query *queryTree)
if (rangeTableEntry->rtekind != RTE_RELATION && if (rangeTableEntry->rtekind != RTE_RELATION &&
rangeTableEntry->rtekind != RTE_SUBQUERY && rangeTableEntry->rtekind != RTE_SUBQUERY &&
rangeTableEntry->rtekind != RTE_FUNCTION && rangeTableEntry->rtekind != RTE_FUNCTION &&
rangeTableEntry->rtekind != RTE_VALUES && rangeTableEntry->rtekind != RTE_VALUES)
!IsJsonTableRTE(rangeTableEntry))
{ {
hasComplexRangeTableType = true; hasComplexRangeTableType = true;
} }

View File

@ -60,8 +60,7 @@ typedef enum RecurringTuplesType
RECURRING_TUPLES_FUNCTION, RECURRING_TUPLES_FUNCTION,
RECURRING_TUPLES_EMPTY_JOIN_TREE, RECURRING_TUPLES_EMPTY_JOIN_TREE,
RECURRING_TUPLES_RESULT_FUNCTION, RECURRING_TUPLES_RESULT_FUNCTION,
RECURRING_TUPLES_VALUES, RECURRING_TUPLES_VALUES
RECURRING_TUPLES_JSON_TABLE
} RecurringTuplesType; } RecurringTuplesType;
/* /*
@ -346,8 +345,7 @@ IsFunctionOrValuesRTE(Node *node)
RangeTblEntry *rangeTblEntry = (RangeTblEntry *) node; RangeTblEntry *rangeTblEntry = (RangeTblEntry *) node;
if (rangeTblEntry->rtekind == RTE_FUNCTION || if (rangeTblEntry->rtekind == RTE_FUNCTION ||
rangeTblEntry->rtekind == RTE_VALUES || rangeTblEntry->rtekind == RTE_VALUES)
IsJsonTableRTE(rangeTblEntry))
{ {
return true; return true;
} }
@ -720,13 +718,6 @@ DeferErrorIfFromClauseRecurs(Query *queryTree)
"the FROM clause contains VALUES", NULL, "the FROM clause contains VALUES", NULL,
NULL); NULL);
} }
else if (recurType == RECURRING_TUPLES_JSON_TABLE)
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"correlated subqueries are not supported when "
"the FROM clause contains JSON_TABLE", NULL,
NULL);
}
/* /*
@ -954,13 +945,6 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin(
"There exist a VALUES clause in the outer " "There exist a VALUES clause in the outer "
"part of the outer join", NULL); "part of the outer join", NULL);
} }
else if (recurType == RECURRING_TUPLES_JSON_TABLE)
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"cannot pushdown the subquery",
"There exist a JSON_TABLE clause in the outer "
"part of the outer join", NULL);
}
return NULL; return NULL;
} }
@ -1251,8 +1235,7 @@ DeferErrorIfUnsupportedTableCombination(Query *queryTree)
*/ */
if (rangeTableEntry->rtekind == RTE_RELATION || if (rangeTableEntry->rtekind == RTE_RELATION ||
rangeTableEntry->rtekind == RTE_SUBQUERY || rangeTableEntry->rtekind == RTE_SUBQUERY ||
rangeTableEntry->rtekind == RTE_RESULT || rangeTableEntry->rtekind == RTE_RESULT)
IsJsonTableRTE(rangeTableEntry)) /* TODO: can we have volatile???*/
{ {
/* accepted */ /* accepted */
} }
@ -1420,13 +1403,6 @@ DeferErrorIfUnsupportedUnionQuery(Query *subqueryTree)
"VALUES is not supported within a " "VALUES is not supported within a "
"UNION", NULL); "UNION", NULL);
} }
else if (recurType == RECURRING_TUPLES_JSON_TABLE)
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"cannot push down this subquery",
"JSON_TABLE is not supported within a "
"UNION", NULL);
}
return NULL; return NULL;
} }
@ -1526,11 +1502,6 @@ RecurringTypeDescription(RecurringTuplesType recurType)
return "a VALUES clause"; return "a VALUES clause";
} }
case RECURRING_TUPLES_JSON_TABLE:
{
return "a JSON_TABLE";
}
case RECURRING_TUPLES_INVALID: case RECURRING_TUPLES_INVALID:
{ {
/* /*
@ -1727,8 +1698,7 @@ DeferredErrorIfUnsupportedLateralSubquery(PlannerInfo *plannerInfo,
* strings anyway. * strings anyway.
*/ */
if (recurType != RECURRING_TUPLES_VALUES && if (recurType != RECURRING_TUPLES_VALUES &&
recurType != RECURRING_TUPLES_RESULT_FUNCTION && recurType != RECURRING_TUPLES_RESULT_FUNCTION)
recurType != RECURRING_TUPLES_JSON_TABLE)
{ {
recurTypeDescription = psprintf("%s (%s)", recurTypeDescription, recurTypeDescription = psprintf("%s (%s)", recurTypeDescription,
recurringRangeTableEntry->eref-> recurringRangeTableEntry->eref->
@ -1805,26 +1775,6 @@ ContainsRecurringRangeTable(List *rangeTable, RecurringTuplesType *recurType)
} }
/*
* IsJsonTableRTE checks whether the RTE refers to a JSON_TABLE
* table function, which was introduced in PostgreSQL 15.
*/
bool
IsJsonTableRTE(RangeTblEntry *rte)
{
#if PG_VERSION_NUM >= PG_VERSION_15
if (rte == NULL)
{
return false;
}
return (rte->rtekind == RTE_TABLEFUNC &&
rte->tablefunc->functype == TFT_JSON_TABLE);
#endif
return false;
}
/* /*
* HasRecurringTuples returns whether any part of the expression will generate * HasRecurringTuples returns whether any part of the expression will generate
* the same set of tuples in every query on shards when executing a distributed * the same set of tuples in every query on shards when executing a distributed
@ -1886,11 +1836,6 @@ HasRecurringTuples(Node *node, RecurringTuplesType *recurType)
*recurType = RECURRING_TUPLES_VALUES; *recurType = RECURRING_TUPLES_VALUES;
return true; return true;
} }
else if (IsJsonTableRTE(rangeTableEntry))
{
*recurType = RECURRING_TUPLES_JSON_TABLE;
return true;
}
return false; return false;
} }

View File

@ -44,9 +44,9 @@
#include "distributed/priority.h" #include "distributed/priority.h"
#include "distributed/distributed_planner.h" #include "distributed/distributed_planner.h"
#include "distributed/remote_commands.h" #include "distributed/remote_commands.h"
#include "distributed/repair_shards.h"
#include "distributed/resource_lock.h" #include "distributed/resource_lock.h"
#include "distributed/shard_rebalancer.h" #include "distributed/shard_rebalancer.h"
#include "distributed/shard_transfer.h"
#include "distributed/version_compat.h" #include "distributed/version_compat.h"
#include "nodes/bitmapset.h" #include "nodes/bitmapset.h"
#include "parser/scansup.h" #include "parser/scansup.h"
@ -114,31 +114,20 @@ bool PlacementMovedUsingLogicalReplicationInTX = false;
static int logicalReplicationProgressReportTimeout = 10 * 1000; static int logicalReplicationProgressReportTimeout = 10 * 1000;
static void CreateForeignKeyConstraints(List *logicalRepTargetList);
static List * PrepareReplicationSubscriptionList(List *shardList); static List * PrepareReplicationSubscriptionList(List *shardList);
static List * GetReplicaIdentityCommandListForShard(Oid relationId, uint64 shardId); static List * GetReplicaIdentityCommandListForShard(Oid relationId, uint64 shardId);
static List * GetIndexCommandListForShardBackingReplicaIdentity(Oid relationId, static List * GetIndexCommandListForShardBackingReplicaIdentity(Oid relationId,
uint64 shardId); uint64 shardId);
static void CreatePostLogicalReplicationDataLoadObjects(List *shardList, static void CreatePostLogicalReplicationDataLoadObjects(List *logicalRepTargetList,
char *targetNodeName, LogicalRepType type);
int32 targetNodePort); static void ExecuteCreateIndexCommands(List *logicalRepTargetList);
static void ExecuteCreateIndexCommands(List *shardList, char *targetNodeName, static void ExecuteCreateConstraintsBackedByIndexCommands(List *logicalRepTargetList);
int targetNodePort);
static void ExecuteCreateConstraintsBackedByIndexCommands(List *shardList,
char *targetNodeName,
int targetNodePort);
static List * ConvertNonExistingPlacementDDLCommandsToTasks(List *shardCommandList, static List * ConvertNonExistingPlacementDDLCommandsToTasks(List *shardCommandList,
uint64 shardId,
char *targetNodeName, char *targetNodeName,
int targetNodePort); int targetNodePort);
static void ExecuteClusterOnCommands(List *shardList, char *targetNodeName, static void ExecuteClusterOnCommands(List *logicalRepTargetList);
int targetNodePort); static void ExecuteCreateIndexStatisticsCommands(List *logicalRepTargetList);
static void ExecuteCreateIndexStatisticsCommands(List *shardList, char *targetNodeName, static void ExecuteRemainingPostLoadTableCommands(List *logicalRepTargetList);
int targetNodePort);
static void ExecuteRemainingPostLoadTableCommands(List *shardList, char *targetNodeName,
int targetNodePort);
static void CreatePartitioningHierarchy(List *shardList, char *targetNodeName,
int targetNodePort);
static char * escape_param_str(const char *str); static char * escape_param_str(const char *str);
static XLogRecPtr GetRemoteLSN(MultiConnection *connection, char *command); static XLogRecPtr GetRemoteLSN(MultiConnection *connection, char *command);
static bool RelationSubscriptionsAreReady( static bool RelationSubscriptionsAreReady(
@ -208,10 +197,6 @@ LogicallyReplicateShards(List *shardList, char *sourceNodeName, int sourceNodePo
*/ */
ClaimConnectionExclusively(sourceConnection); ClaimConnectionExclusively(sourceConnection);
MultiConnection *sourceReplicationConnection =
GetReplicationConnection(sourceNodeName, sourceNodePort);
WorkerNode *sourceNode = FindWorkerNode(sourceNodeName, sourceNodePort); WorkerNode *sourceNode = FindWorkerNode(sourceNodeName, sourceNodePort);
WorkerNode *targetNode = FindWorkerNode(targetNodeName, targetNodePort); WorkerNode *targetNode = FindWorkerNode(targetNodeName, targetNodePort);
@ -229,6 +214,9 @@ LogicallyReplicateShards(List *shardList, char *sourceNodeName, int sourceNodePo
PG_TRY(); PG_TRY();
{ {
MultiConnection *sourceReplicationConnection =
GetReplicationConnection(sourceConnection->hostname, sourceConnection->port);
/* set up the publication on the source and subscription on the target */ /* set up the publication on the source and subscription on the target */
CreatePublications(sourceConnection, publicationInfoHash); CreatePublications(sourceConnection, publicationInfoHash);
char *snapshot = CreateReplicationSlots( char *snapshot = CreateReplicationSlots(
@ -239,7 +227,7 @@ LogicallyReplicateShards(List *shardList, char *sourceNodeName, int sourceNodePo
CreateSubscriptions( CreateSubscriptions(
sourceConnection, sourceConnection,
databaseName, sourceConnection->database,
logicalRepTargetList); logicalRepTargetList);
/* only useful for isolation testing, see the function comment for the details */ /* only useful for isolation testing, see the function comment for the details */
@ -256,77 +244,14 @@ LogicallyReplicateShards(List *shardList, char *sourceNodeName, int sourceNodePo
CloseConnection(sourceReplicationConnection); CloseConnection(sourceReplicationConnection);
/* /*
* We have to create the primary key (or any other replica identity) * Start the replication and copy all data
* before the update/delete operations that are queued will be
* replicated. Because if the replica identity does not exist on the
* target, the replication would fail.
*
* So we it right after the initial data COPY, but before enabling the
* susbcriptions. We do it at this latest possible moment, because its
* much cheaper to build an index at once than to create it
* incrementally. So this way we create the primary key index in one go
* for all data from the initial COPY.
*/ */
CreateReplicaIdentities(logicalRepTargetList); CompleteNonBlockingShardTransfer(shardList,
sourceConnection,
/* Start applying the changes from the replication slots to catch up. */ publicationInfoHash,
EnableSubscriptions(logicalRepTargetList); logicalRepTargetList,
groupedLogicalRepTargetsHash,
/* SHARD_MOVE);
* The following check is a leftover from when used subscriptions with
* copy_data=true. It's probably not really necessary anymore, but it
* seemed like a nice check to keep. At least for debugging issues it
* seems nice to report differences between the subscription never
* becoming ready and the subscriber not applying WAL. It's also not
* entirely clear if the catchup check handles the case correctly where
* the subscription is not in the ready state yet, because so far it
* never had to.
*/
WaitForAllSubscriptionsToBecomeReady(groupedLogicalRepTargetsHash);
/*
* Wait until all the subscriptions are caught up to changes that
* happened after the initial COPY on the shards.
*/
WaitForAllSubscriptionsToCatchUp(sourceConnection, groupedLogicalRepTargetsHash);
/*
* Now lets create the post-load objects, such as the indexes, constraints
* and partitioning hierarchy. Once they are done, wait until the replication
* catches up again. So we don't block writes too long.
*/
CreatePostLogicalReplicationDataLoadObjects(shardList, targetNodeName,
targetNodePort);
WaitForAllSubscriptionsToCatchUp(sourceConnection, groupedLogicalRepTargetsHash);
/*
* We're almost done, we'll block the writes to the shards that we're
* replicating and expect all the subscription to catch up quickly
* afterwards.
*
* Notice that although shards in partitioned relation are excluded from
* logical replication, they are still locked against modification, and
* foreign constraints are created on them too.
*/
BlockWritesToShardList(shardList);
WaitForAllSubscriptionsToCatchUp(sourceConnection, groupedLogicalRepTargetsHash);
/*
* We're creating the foreign constraints to reference tables after the
* data is already replicated and all the necessary locks are acquired.
*
* We prefer to do it here because the placements of reference tables
* are always valid, and any modification during the shard move would
* cascade to the hash distributed tables' shards if we had created
* the constraints earlier.
*/
CreateForeignKeyConstraints(logicalRepTargetList);
/* we're done, cleanup the publication and subscription */
DropSubscriptions(logicalRepTargetList);
DropReplicationSlots(sourceConnection, logicalRepTargetList);
DropPublications(sourceConnection, publicationInfoHash);
/* /*
* We use these connections exclusively for subscription management, * We use these connections exclusively for subscription management,
@ -405,6 +330,104 @@ CreateGroupedLogicalRepTargetsHash(List *logicalRepTargetList)
} }
/*
* CompleteNonBlockingShardTransfer uses logical replication to apply the changes
* made on the source to the target. It also runs all DDL on the target shards
* that need to be run after the data copy.
*
* For shard splits it skips the partition hierarchy and foreign key creation
* though, since those need to happen after the metadata is updated.
*/
void
CompleteNonBlockingShardTransfer(List *shardList,
MultiConnection *sourceConnection,
HTAB *publicationInfoHash,
List *logicalRepTargetList,
HTAB *groupedLogicalRepTargetsHash,
LogicalRepType type)
{
/*
* We have to create the primary key (or any other replica identity)
* before the update/delete operations that are queued will be
* replicated. Because if the replica identity does not exist on the
* target, the replication would fail.
*
* So we it right after the initial data COPY, but before enabling the
* susbcriptions. We do it at this latest possible moment, because its
* much cheaper to build an index at once than to create it
* incrementally. So this way we create the primary key index in one go
* for all data from the initial COPY.
*/
CreateReplicaIdentities(logicalRepTargetList);
/* Start applying the changes from the replication slots to catch up. */
EnableSubscriptions(logicalRepTargetList);
/*
* The following check is a leftover from when used subscriptions with
* copy_data=true. It's probably not really necessary anymore, but it
* seemed like a nice check to keep. At least for debugging issues it
* seems nice to report differences between the subscription never
* becoming ready and the subscriber not applying WAL. It's also not
* entirely clear if the catchup check handles the case correctly where
* the subscription is not in the ready state yet, because so far it
* never had to.
*/
WaitForAllSubscriptionsToBecomeReady(groupedLogicalRepTargetsHash);
/*
* Wait until all the subscriptions are caught up to changes that
* happened after the initial COPY on the shards.
*/
WaitForAllSubscriptionsToCatchUp(sourceConnection, groupedLogicalRepTargetsHash);
/*
* Now lets create the post-load objects, such as the indexes, constraints
* and partitioning hierarchy. Once they are done, wait until the replication
* catches up again. So we don't block writes too long.
*/
CreatePostLogicalReplicationDataLoadObjects(logicalRepTargetList, type);
WaitForAllSubscriptionsToCatchUp(sourceConnection, groupedLogicalRepTargetsHash);
/* only useful for isolation testing, see the function comment for the details */
ConflictOnlyWithIsolationTesting();
/*
* We're almost done, we'll block the writes to the shards that we're
* replicating and expect all the subscription to catch up quickly
* afterwards.
*
* Notice that although shards in partitioned relation are excluded from
* logical replication, they are still locked against modification, and
* foreign constraints are created on them too.
*/
BlockWritesToShardList(shardList);
WaitForAllSubscriptionsToCatchUp(sourceConnection, groupedLogicalRepTargetsHash);
if (type != SHARD_SPLIT)
{
/*
* We're creating the foreign constraints to reference tables after the
* data is already replicated and all the necessary locks are acquired.
*
* We prefer to do it here because the placements of reference tables
* are always valid, and any modification during the shard move would
* cascade to the hash distributed tables' shards if we had created
* the constraints earlier. The same is true for foreign keys between
* tables owned by different users.
*/
CreateUncheckedForeignKeyConstraints(logicalRepTargetList);
}
/* we're done, cleanup the publication and subscription */
DropSubscriptions(logicalRepTargetList);
DropReplicationSlots(sourceConnection, logicalRepTargetList);
DropPublications(sourceConnection, publicationInfoHash);
}
/* /*
* CreateShardMovePublicationInfoHash creates hashmap of PublicationInfos for a * CreateShardMovePublicationInfoHash creates hashmap of PublicationInfos for a
* shard move. Even though we only support moving a shard to a single target * shard move. Even though we only support moving a shard to a single target
@ -742,8 +765,8 @@ GetReplicaIdentityCommandListForShard(Oid relationId, uint64 shardId)
* the objects that can be created after the data is moved with logical replication. * the objects that can be created after the data is moved with logical replication.
*/ */
static void static void
CreatePostLogicalReplicationDataLoadObjects(List *shardList, char *targetNodeName, CreatePostLogicalReplicationDataLoadObjects(List *logicalRepTargetList,
int32 targetNodePort) LogicalRepType type)
{ {
/* /*
* We create indexes in 4 steps. * We create indexes in 4 steps.
@ -759,20 +782,25 @@ CreatePostLogicalReplicationDataLoadObjects(List *shardList, char *targetNodeNam
* table and setting the statistics of indexes, depends on the indexes being * table and setting the statistics of indexes, depends on the indexes being
* created. That's why the execution is divided into four distinct stages. * created. That's why the execution is divided into four distinct stages.
*/ */
ExecuteCreateIndexCommands(shardList, targetNodeName, targetNodePort); ExecuteCreateIndexCommands(logicalRepTargetList);
ExecuteCreateConstraintsBackedByIndexCommands(shardList, targetNodeName, ExecuteCreateConstraintsBackedByIndexCommands(logicalRepTargetList);
targetNodePort); ExecuteClusterOnCommands(logicalRepTargetList);
ExecuteClusterOnCommands(shardList, targetNodeName, targetNodePort); ExecuteCreateIndexStatisticsCommands(logicalRepTargetList);
ExecuteCreateIndexStatisticsCommands(shardList, targetNodeName, targetNodePort);
/* /*
* Once the indexes are created, there are few more objects like triggers and table * Once the indexes are created, there are few more objects like triggers and table
* statistics that should be created after the data move. * statistics that should be created after the data move.
*/ */
ExecuteRemainingPostLoadTableCommands(shardList, targetNodeName, targetNodePort); ExecuteRemainingPostLoadTableCommands(logicalRepTargetList);
/* create partitioning hierarchy, if any */ /*
CreatePartitioningHierarchy(shardList, targetNodeName, targetNodePort); * Creating the partitioning hierarchy errors out in shard splits when
*/
if (type != SHARD_SPLIT)
{
/* create partitioning hierarchy, if any */
CreatePartitioningHierarchy(logicalRepTargetList);
}
} }
@ -784,27 +812,31 @@ CreatePostLogicalReplicationDataLoadObjects(List *shardList, char *targetNodeNam
* commands fail. * commands fail.
*/ */
static void static void
ExecuteCreateIndexCommands(List *shardList, char *targetNodeName, int targetNodePort) ExecuteCreateIndexCommands(List *logicalRepTargetList)
{ {
List *taskList = NIL; List *taskList = NIL;
ListCell *shardCell = NULL; LogicalRepTarget *target = NULL;
foreach(shardCell, shardList) foreach_ptr(target, logicalRepTargetList)
{ {
ShardInterval *shardInterval = (ShardInterval *) lfirst(shardCell); ShardInterval *shardInterval = NULL;
Oid relationId = shardInterval->relationId; foreach_ptr(shardInterval, target->newShards)
{
Oid relationId = shardInterval->relationId;
List *tableCreateIndexCommandList = List *tableCreateIndexCommandList =
GetTableIndexAndConstraintCommandsExcludingReplicaIdentity(relationId, GetTableIndexAndConstraintCommandsExcludingReplicaIdentity(relationId,
INCLUDE_CREATE_INDEX_STATEMENTS); INCLUDE_CREATE_INDEX_STATEMENTS);
List *shardCreateIndexCommandList = List *shardCreateIndexCommandList =
WorkerApplyShardDDLCommandList(tableCreateIndexCommandList, WorkerApplyShardDDLCommandList(tableCreateIndexCommandList,
shardInterval->shardId); shardInterval->shardId);
List *taskListForShard = List *taskListForShard =
ConvertNonExistingPlacementDDLCommandsToTasks(shardCreateIndexCommandList, ConvertNonExistingPlacementDDLCommandsToTasks(
shardInterval->shardId, shardCreateIndexCommandList,
targetNodeName, targetNodePort); target->superuserConnection->hostname,
taskList = list_concat(taskList, taskListForShard); target->superuserConnection->port);
taskList = list_concat(taskList, taskListForShard);
}
} }
/* /*
@ -819,8 +851,7 @@ ExecuteCreateIndexCommands(List *shardList, char *targetNodeName, int targetNode
*/ */
ereport(DEBUG1, (errmsg("Creating post logical replication objects " ereport(DEBUG1, (errmsg("Creating post logical replication objects "
"(indexes) on node %s:%d", targetNodeName, "(indexes)")));
targetNodePort)));
ExecuteTaskListOutsideTransaction(ROW_MODIFY_NONE, taskList, ExecuteTaskListOutsideTransaction(ROW_MODIFY_NONE, taskList,
MaxAdaptiveExecutorPoolSize, MaxAdaptiveExecutorPoolSize,
@ -836,45 +867,47 @@ ExecuteCreateIndexCommands(List *shardList, char *targetNodeName, int targetNode
* commands fail. * commands fail.
*/ */
static void static void
ExecuteCreateConstraintsBackedByIndexCommands(List *shardList, char *targetNodeName, ExecuteCreateConstraintsBackedByIndexCommands(List *logicalRepTargetList)
int targetNodePort)
{ {
ereport(DEBUG1, (errmsg("Creating post logical replication objects " ereport(DEBUG1, (errmsg("Creating post logical replication objects "
"(constraints backed by indexes) on node %s:%d", "(constraints backed by indexes)")));
targetNodeName,
targetNodePort)));
MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext, MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext,
"CreateConstraintsBackedByIndexContext", "CreateConstraintsBackedByIndexContext",
ALLOCSET_DEFAULT_SIZES); ALLOCSET_DEFAULT_SIZES);
MemoryContext oldContext = MemoryContextSwitchTo(localContext); MemoryContext oldContext = MemoryContextSwitchTo(localContext);
ListCell *shardCell = NULL; LogicalRepTarget *target = NULL;
foreach(shardCell, shardList) foreach_ptr(target, logicalRepTargetList)
{ {
ShardInterval *shardInterval = (ShardInterval *) lfirst(shardCell); ShardInterval *shardInterval = NULL;
Oid relationId = shardInterval->relationId; foreach_ptr(shardInterval, target->newShards)
List *tableCreateConstraintCommandList =
GetTableIndexAndConstraintCommandsExcludingReplicaIdentity(relationId,
INCLUDE_CREATE_CONSTRAINT_STATEMENTS);
if (tableCreateConstraintCommandList == NIL)
{ {
/* no constraints backed by indexes, skip */ Oid relationId = shardInterval->relationId;
List *tableCreateConstraintCommandList =
GetTableIndexAndConstraintCommandsExcludingReplicaIdentity(relationId,
INCLUDE_CREATE_CONSTRAINT_STATEMENTS);
if (tableCreateConstraintCommandList == NIL)
{
/* no constraints backed by indexes, skip */
MemoryContextReset(localContext);
continue;
}
List *shardCreateConstraintCommandList =
WorkerApplyShardDDLCommandList(tableCreateConstraintCommandList,
shardInterval->shardId);
char *tableOwner = TableOwner(shardInterval->relationId);
SendCommandListToWorkerOutsideTransaction(
target->superuserConnection->hostname,
target->superuserConnection->port,
tableOwner,
shardCreateConstraintCommandList);
MemoryContextReset(localContext); MemoryContextReset(localContext);
continue;
} }
List *shardCreateConstraintCommandList =
WorkerApplyShardDDLCommandList(tableCreateConstraintCommandList,
shardInterval->shardId);
char *tableOwner = TableOwner(shardInterval->relationId);
SendCommandListToWorkerOutsideTransaction(targetNodeName, targetNodePort,
tableOwner,
shardCreateConstraintCommandList);
MemoryContextReset(localContext);
} }
MemoryContextSwitchTo(oldContext); MemoryContextSwitchTo(oldContext);
@ -890,7 +923,6 @@ ExecuteCreateConstraintsBackedByIndexCommands(List *shardList, char *targetNodeN
*/ */
static List * static List *
ConvertNonExistingPlacementDDLCommandsToTasks(List *shardCommandList, ConvertNonExistingPlacementDDLCommandsToTasks(List *shardCommandList,
uint64 shardId,
char *targetNodeName, char *targetNodeName,
int targetNodePort) int targetNodePort)
{ {
@ -911,7 +943,6 @@ ConvertNonExistingPlacementDDLCommandsToTasks(List *shardCommandList,
SetPlacementNodeMetadata(taskPlacement, workerNode); SetPlacementNodeMetadata(taskPlacement, workerNode);
task->taskPlacementList = list_make1(taskPlacement); task->taskPlacementList = list_make1(taskPlacement);
task->anchorShardId = shardId;
taskList = lappend(taskList, task); taskList = lappend(taskList, task);
taskId++; taskId++;
@ -929,34 +960,36 @@ ConvertNonExistingPlacementDDLCommandsToTasks(List *shardCommandList,
* is aborted. * is aborted.
*/ */
static void static void
ExecuteClusterOnCommands(List *shardList, char *targetNodeName, int targetNodePort) ExecuteClusterOnCommands(List *logicalRepTargetList)
{ {
List *taskList = NIL; List *taskList = NIL;
ListCell *shardCell; LogicalRepTarget *target = NULL;
foreach(shardCell, shardList) foreach_ptr(target, logicalRepTargetList)
{ {
ShardInterval *shardInterval = (ShardInterval *) lfirst(shardCell); ShardInterval *shardInterval = NULL;
Oid relationId = shardInterval->relationId; foreach_ptr(shardInterval, target->newShards)
{
Oid relationId = shardInterval->relationId;
List *tableAlterTableClusterOnCommandList = List *tableAlterTableClusterOnCommandList =
GetTableIndexAndConstraintCommandsExcludingReplicaIdentity(relationId, GetTableIndexAndConstraintCommandsExcludingReplicaIdentity(relationId,
INCLUDE_INDEX_CLUSTERED_STATEMENTS); INCLUDE_INDEX_CLUSTERED_STATEMENTS);
List *shardAlterTableClusterOnCommandList = List *shardAlterTableClusterOnCommandList =
WorkerApplyShardDDLCommandList(tableAlterTableClusterOnCommandList, WorkerApplyShardDDLCommandList(tableAlterTableClusterOnCommandList,
shardInterval->shardId); shardInterval->shardId);
List *taskListForShard = List *taskListForShard =
ConvertNonExistingPlacementDDLCommandsToTasks( ConvertNonExistingPlacementDDLCommandsToTasks(
shardAlterTableClusterOnCommandList, shardAlterTableClusterOnCommandList,
shardInterval->shardId, target->superuserConnection->hostname,
targetNodeName, targetNodePort); target->superuserConnection->port);
taskList = list_concat(taskList, taskListForShard); taskList = list_concat(taskList, taskListForShard);
}
} }
ereport(DEBUG1, (errmsg("Creating post logical replication objects " ereport(DEBUG1, (errmsg("Creating post logical replication objects "
"(CLUSTER ON) on node %s:%d", targetNodeName, "(CLUSTER ON)")));
targetNodePort)));
ExecuteTaskListOutsideTransaction(ROW_MODIFY_NONE, taskList, ExecuteTaskListOutsideTransaction(ROW_MODIFY_NONE, taskList,
MaxAdaptiveExecutorPoolSize, MaxAdaptiveExecutorPoolSize,
@ -972,48 +1005,51 @@ ExecuteClusterOnCommands(List *shardList, char *targetNodeName, int targetNodePo
* is aborted. * is aborted.
*/ */
static void static void
ExecuteCreateIndexStatisticsCommands(List *shardList, char *targetNodeName, int ExecuteCreateIndexStatisticsCommands(List *logicalRepTargetList)
targetNodePort)
{ {
ereport(DEBUG1, (errmsg("Creating post logical replication objects " ereport(DEBUG1, (errmsg("Creating post logical replication objects "
"(index statistics) on node %s:%d", targetNodeName, "(index statistics)")));
targetNodePort)));
MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext, MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext,
"CreateIndexStatisticsContext", "CreateIndexStatisticsContext",
ALLOCSET_DEFAULT_SIZES); ALLOCSET_DEFAULT_SIZES);
MemoryContext oldContext = MemoryContextSwitchTo(localContext); MemoryContext oldContext = MemoryContextSwitchTo(localContext);
ListCell *shardCell; LogicalRepTarget *target = NULL;
foreach(shardCell, shardList) foreach_ptr(target, logicalRepTargetList)
{ {
ShardInterval *shardInterval = (ShardInterval *) lfirst(shardCell); ShardInterval *shardInterval = NULL;
Oid relationId = shardInterval->relationId; foreach_ptr(shardInterval, target->newShards)
List *tableAlterIndexSetStatisticsCommandList =
GetTableIndexAndConstraintCommandsExcludingReplicaIdentity(relationId,
INCLUDE_INDEX_STATISTICS_STATEMENTTS);
List *shardAlterIndexSetStatisticsCommandList =
WorkerApplyShardDDLCommandList(tableAlterIndexSetStatisticsCommandList,
shardInterval->shardId);
if (shardAlterIndexSetStatisticsCommandList == NIL)
{ {
/* no index statistics exists, skip */ Oid relationId = shardInterval->relationId;
List *tableAlterIndexSetStatisticsCommandList =
GetTableIndexAndConstraintCommandsExcludingReplicaIdentity(relationId,
INCLUDE_INDEX_STATISTICS_STATEMENTTS);
List *shardAlterIndexSetStatisticsCommandList =
WorkerApplyShardDDLCommandList(tableAlterIndexSetStatisticsCommandList,
shardInterval->shardId);
if (shardAlterIndexSetStatisticsCommandList == NIL)
{
/* no index statistics exists, skip */
MemoryContextReset(localContext);
continue;
}
/*
* These remaining operations do not require significant resources, so no
* need to create them in parallel.
*/
char *tableOwner = TableOwner(shardInterval->relationId);
SendCommandListToWorkerOutsideTransaction(
target->superuserConnection->hostname,
target->superuserConnection->port,
tableOwner,
shardAlterIndexSetStatisticsCommandList);
MemoryContextReset(localContext); MemoryContextReset(localContext);
continue;
} }
/*
* These remaining operations do not require significant resources, so no
* need to create them in parallel.
*/
char *tableOwner = TableOwner(shardInterval->relationId);
SendCommandListToWorkerOutsideTransaction(targetNodeName, targetNodePort,
tableOwner,
shardAlterIndexSetStatisticsCommandList);
MemoryContextReset(localContext);
} }
MemoryContextSwitchTo(oldContext); MemoryContextSwitchTo(oldContext);
@ -1026,52 +1062,55 @@ ExecuteCreateIndexStatisticsCommands(List *shardList, char *targetNodeName, int
* in the given target node. * in the given target node.
*/ */
static void static void
ExecuteRemainingPostLoadTableCommands(List *shardList, char *targetNodeName, int ExecuteRemainingPostLoadTableCommands(List *logicalRepTargetList)
targetNodePort)
{ {
ereport(DEBUG1, (errmsg("Creating post logical replication objects " ereport(DEBUG1, (errmsg("Creating post logical replication objects "
"(triggers and table statistics) on node %s:%d", "(triggers and table statistics)"
targetNodeName, )));
targetNodePort)));
MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext, MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext,
"CreateTableStatisticsContext", "CreateTableStatisticsContext",
ALLOCSET_DEFAULT_SIZES); ALLOCSET_DEFAULT_SIZES);
MemoryContext oldContext = MemoryContextSwitchTo(localContext); MemoryContext oldContext = MemoryContextSwitchTo(localContext);
ListCell *shardCell = NULL; LogicalRepTarget *target = NULL;
foreach(shardCell, shardList) foreach_ptr(target, logicalRepTargetList)
{ {
ShardInterval *shardInterval = (ShardInterval *) lfirst(shardCell); ShardInterval *shardInterval = NULL;
Oid relationId = shardInterval->relationId; foreach_ptr(shardInterval, target->newShards)
bool includeIndexes = false;
bool includeReplicaIdentity = false;
List *tablePostLoadTableCommandList =
GetPostLoadTableCreationCommands(relationId, includeIndexes,
includeReplicaIdentity);
List *shardPostLoadTableCommandList =
WorkerApplyShardDDLCommandList(tablePostLoadTableCommandList,
shardInterval->shardId);
if (shardPostLoadTableCommandList == NIL)
{ {
/* no index statistics exists, skip */ Oid relationId = shardInterval->relationId;
continue;
bool includeIndexes = false;
bool includeReplicaIdentity = false;
List *tablePostLoadTableCommandList =
GetPostLoadTableCreationCommands(relationId, includeIndexes,
includeReplicaIdentity);
List *shardPostLoadTableCommandList =
WorkerApplyShardDDLCommandList(tablePostLoadTableCommandList,
shardInterval->shardId);
if (shardPostLoadTableCommandList == NIL)
{
/* no index statistics exists, skip */
continue;
}
/*
* These remaining operations do not require significant resources, so no
* need to create them in parallel.
*/
char *tableOwner = TableOwner(shardInterval->relationId);
SendCommandListToWorkerOutsideTransaction(
target->superuserConnection->hostname,
target->superuserConnection->port,
tableOwner,
shardPostLoadTableCommandList);
MemoryContextReset(localContext);
} }
/*
* These remaining operations do not require significant resources, so no
* need to create them in parallel.
*/
char *tableOwner = TableOwner(shardInterval->relationId);
SendCommandListToWorkerOutsideTransaction(targetNodeName, targetNodePort,
tableOwner,
shardPostLoadTableCommandList);
MemoryContextReset(localContext);
} }
MemoryContextSwitchTo(oldContext); MemoryContextSwitchTo(oldContext);
@ -1082,40 +1121,42 @@ ExecuteRemainingPostLoadTableCommands(List *shardList, char *targetNodeName, int
* CreatePartitioningHierarchy gets a shardList and creates the partitioning * CreatePartitioningHierarchy gets a shardList and creates the partitioning
* hierarchy between the shardList, if any, * hierarchy between the shardList, if any,
*/ */
static void void
CreatePartitioningHierarchy(List *shardList, char *targetNodeName, int targetNodePort) CreatePartitioningHierarchy(List *logicalRepTargetList)
{ {
ereport(DEBUG1, (errmsg("Creating post logical replication objects " ereport(DEBUG1, (errmsg("Creating post logical replication objects "
"(partitioning hierarchy) on node %s:%d", targetNodeName, "(partitioning hierarchy)")));
targetNodePort)));
MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext, MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext,
"CreatePartitioningHierarchy", "CreatePartitioningHierarchy",
ALLOCSET_DEFAULT_SIZES); ALLOCSET_DEFAULT_SIZES);
MemoryContext oldContext = MemoryContextSwitchTo(localContext); MemoryContext oldContext = MemoryContextSwitchTo(localContext);
ListCell *shardCell = NULL; LogicalRepTarget *target = NULL;
foreach(shardCell, shardList) foreach_ptr(target, logicalRepTargetList)
{ {
ShardInterval *shardInterval = (ShardInterval *) lfirst(shardCell); ShardInterval *shardInterval = NULL;
foreach_ptr(shardInterval, target->newShards)
if (PartitionTable(shardInterval->relationId))
{ {
char *attachPartitionCommand = if (PartitionTable(shardInterval->relationId))
GenerateAttachShardPartitionCommand(shardInterval); {
char *attachPartitionCommand =
GenerateAttachShardPartitionCommand(shardInterval);
char *tableOwner = TableOwner(shardInterval->relationId); char *tableOwner = TableOwner(shardInterval->relationId);
/* /*
* Attaching partition may acquire conflicting locks when created in * Attaching partition may acquire conflicting locks when created in
* parallel, so create them sequentially. Also attaching partition * parallel, so create them sequentially. Also attaching partition
* is a quick operation, so it is fine to execute sequentially. * is a quick operation, so it is fine to execute sequentially.
*/ */
SendCommandListToWorkerOutsideTransaction(targetNodeName, targetNodePort, SendCommandListToWorkerOutsideTransaction(
tableOwner, target->superuserConnection->hostname,
list_make1( target->superuserConnection->port,
attachPartitionCommand)); tableOwner,
MemoryContextReset(localContext); list_make1(attachPartitionCommand));
MemoryContextReset(localContext);
}
} }
} }
@ -1124,17 +1165,17 @@ CreatePartitioningHierarchy(List *shardList, char *targetNodeName, int targetNod
/* /*
* CreateForeignKeyConstraints is used to create the foreign constraints * CreateUncheckedForeignKeyConstraints is used to create the foreign
* on the logical replication target without checking that they are actually * constraints on the logical replication target without checking that they are
* valid. * actually valid.
* *
* We skip the validation phase of foreign keys to after a shard * We skip the validation phase of foreign keys to after a shard
* move/copy/split because the validation is pretty costly and given that the * move/copy/split because the validation is pretty costly and given that the
* source placements are already valid, the validation in the target nodes is * source placements are already valid, the validation in the target nodes is
* useless. * useless.
*/ */
static void void
CreateForeignKeyConstraints(List *logicalRepTargetList) CreateUncheckedForeignKeyConstraints(List *logicalRepTargetList)
{ {
MemoryContext localContext = MemoryContext localContext =
AllocSetContextCreate(CurrentMemoryContext, AllocSetContextCreate(CurrentMemoryContext,

View File

@ -73,9 +73,9 @@
#include "distributed/recursive_planning.h" #include "distributed/recursive_planning.h"
#include "distributed/reference_table_utils.h" #include "distributed/reference_table_utils.h"
#include "distributed/relation_access_tracking.h" #include "distributed/relation_access_tracking.h"
#include "distributed/repair_shards.h"
#include "distributed/run_from_same_connection.h" #include "distributed/run_from_same_connection.h"
#include "distributed/shard_cleaner.h" #include "distributed/shard_cleaner.h"
#include "distributed/shard_transfer.h"
#include "distributed/shared_connection_stats.h" #include "distributed/shared_connection_stats.h"
#include "distributed/shardsplit_shared_memory.h" #include "distributed/shardsplit_shared_memory.h"
#include "distributed/query_pushdown_planning.h" #include "distributed/query_pushdown_planning.h"

View File

@ -1,6 +1,9 @@
#include "udfs/citus_locks/11.1-1.sql" #include "udfs/citus_locks/11.1-1.sql"
#include "udfs/citus_tables/11.1-1.sql"
#include "udfs/citus_shards/11.1-1.sql"
#include "udfs/create_distributed_table_concurrently/11.1-1.sql" #include "udfs/create_distributed_table_concurrently/11.1-1.sql"
#include "udfs/citus_internal_delete_partition_metadata/11.1-1.sql" #include "udfs/citus_internal_delete_partition_metadata/11.1-1.sql"
#include "udfs/citus_copy_shard_placement/11.1-1.sql"
DROP FUNCTION pg_catalog.worker_create_schema(bigint,text); DROP FUNCTION pg_catalog.worker_create_schema(bigint,text);
DROP FUNCTION pg_catalog.worker_cleanup_job_schema_cache(); DROP FUNCTION pg_catalog.worker_cleanup_job_schema_cache();
@ -167,3 +170,7 @@ GRANT SELECT ON pg_catalog.pg_dist_background_task_depend TO PUBLIC;
#include "udfs/citus_job_wait/11.1-1.sql" #include "udfs/citus_job_wait/11.1-1.sql"
#include "udfs/citus_job_cancel/11.1-1.sql" #include "udfs/citus_job_cancel/11.1-1.sql"
#include "udfs/citus_rebalance_start/11.1-1.sql"
#include "udfs/citus_rebalance_stop/11.1-1.sql"
#include "udfs/citus_rebalance_wait/11.1-1.sql"
#include "udfs/get_rebalance_progress/11.1-1.sql"

View File

@ -93,6 +93,9 @@ DROP FUNCTION pg_catalog.get_all_active_transactions(OUT datid oid, OUT process_
DROP VIEW pg_catalog.citus_locks; DROP VIEW pg_catalog.citus_locks;
DROP FUNCTION pg_catalog.citus_locks(); DROP FUNCTION pg_catalog.citus_locks();
#include "../udfs/citus_tables/10.0-4.sql"
#include "../udfs/citus_shards/10.1-1.sql"
DROP FUNCTION pg_catalog.replicate_reference_tables(citus.shard_transfer_mode); DROP FUNCTION pg_catalog.replicate_reference_tables(citus.shard_transfer_mode);
#include "../udfs/replicate_reference_tables/9.3-2.sql" #include "../udfs/replicate_reference_tables/9.3-2.sql"
@ -106,6 +109,9 @@ DROP SEQUENCE pg_catalog.pg_dist_operationid_seq;
DROP SEQUENCE pg_catalog.pg_dist_cleanup_recordid_seq; DROP SEQUENCE pg_catalog.pg_dist_cleanup_recordid_seq;
DROP PROCEDURE pg_catalog.citus_cleanup_orphaned_resources(); DROP PROCEDURE pg_catalog.citus_cleanup_orphaned_resources();
DROP FUNCTION pg_catalog.citus_rebalance_start(name, bool, citus.shard_transfer_mode);
DROP FUNCTION pg_catalog.citus_rebalance_stop();
DROP FUNCTION pg_catalog.citus_rebalance_wait();
DROP FUNCTION pg_catalog.citus_job_cancel(bigint); DROP FUNCTION pg_catalog.citus_job_cancel(bigint);
DROP FUNCTION pg_catalog.citus_job_wait(bigint, pg_catalog.citus_job_status); DROP FUNCTION pg_catalog.citus_job_wait(bigint, pg_catalog.citus_job_status);
DROP TABLE pg_catalog.pg_dist_background_task_depend; DROP TABLE pg_catalog.pg_dist_background_task_depend;
@ -113,3 +119,6 @@ DROP TABLE pg_catalog.pg_dist_background_task;
DROP TYPE pg_catalog.citus_task_status; DROP TYPE pg_catalog.citus_task_status;
DROP TABLE pg_catalog.pg_dist_background_job; DROP TABLE pg_catalog.pg_dist_background_job;
DROP TYPE pg_catalog.citus_job_status; DROP TYPE pg_catalog.citus_job_status;
DROP FUNCTION pg_catalog.citus_copy_shard_placement;
#include "../udfs/citus_copy_shard_placement/10.0-1.sql"
#include "../udfs/get_rebalance_progress/10.1-1.sql"

View File

@ -0,0 +1,19 @@
DROP FUNCTION pg_catalog.citus_copy_shard_placement;
CREATE FUNCTION pg_catalog.citus_copy_shard_placement(
shard_id bigint,
source_node_name text,
source_node_port integer,
target_node_name text,
target_node_port integer,
transfer_mode citus.shard_transfer_mode default 'auto')
RETURNS void
LANGUAGE C STRICT
AS 'MODULE_PATHNAME', $$citus_copy_shard_placement$$;
COMMENT ON FUNCTION pg_catalog.citus_copy_shard_placement(shard_id bigint,
source_node_name text,
source_node_port integer,
target_node_name text,
target_node_port integer,
shard_transfer_mode citus.shard_transfer_mode)
IS 'copy a shard from the source node to the destination node';

View File

@ -1,10 +1,10 @@
DROP FUNCTION pg_catalog.citus_copy_shard_placement;
CREATE FUNCTION pg_catalog.citus_copy_shard_placement( CREATE FUNCTION pg_catalog.citus_copy_shard_placement(
shard_id bigint, shard_id bigint,
source_node_name text, source_node_name text,
source_node_port integer, source_node_port integer,
target_node_name text, target_node_name text,
target_node_port integer, target_node_port integer,
do_repair bool DEFAULT true,
transfer_mode citus.shard_transfer_mode default 'auto') transfer_mode citus.shard_transfer_mode default 'auto')
RETURNS void RETURNS void
LANGUAGE C STRICT LANGUAGE C STRICT
@ -15,6 +15,5 @@ COMMENT ON FUNCTION pg_catalog.citus_copy_shard_placement(shard_id bigint,
source_node_port integer, source_node_port integer,
target_node_name text, target_node_name text,
target_node_port integer, target_node_port integer,
do_repair bool,
shard_transfer_mode citus.shard_transfer_mode) shard_transfer_mode citus.shard_transfer_mode)
IS 'copy a shard from the source node to the destination node'; IS 'copy a shard from the source node to the destination node';

View File

@ -0,0 +1,11 @@
CREATE OR REPLACE FUNCTION pg_catalog.citus_rebalance_start(
rebalance_strategy name DEFAULT NULL,
drain_only boolean DEFAULT false,
shard_transfer_mode citus.shard_transfer_mode default 'auto'
)
RETURNS bigint
AS 'MODULE_PATHNAME'
LANGUAGE C VOLATILE;
COMMENT ON FUNCTION pg_catalog.citus_rebalance_start(name, boolean, citus.shard_transfer_mode)
IS 'rebalance the shards in the cluster in the background';
GRANT EXECUTE ON FUNCTION pg_catalog.citus_rebalance_start(name, boolean, citus.shard_transfer_mode) TO PUBLIC;

View File

@ -0,0 +1,11 @@
CREATE OR REPLACE FUNCTION pg_catalog.citus_rebalance_start(
rebalance_strategy name DEFAULT NULL,
drain_only boolean DEFAULT false,
shard_transfer_mode citus.shard_transfer_mode default 'auto'
)
RETURNS bigint
AS 'MODULE_PATHNAME'
LANGUAGE C VOLATILE;
COMMENT ON FUNCTION pg_catalog.citus_rebalance_start(name, boolean, citus.shard_transfer_mode)
IS 'rebalance the shards in the cluster in the background';
GRANT EXECUTE ON FUNCTION pg_catalog.citus_rebalance_start(name, boolean, citus.shard_transfer_mode) TO PUBLIC;

View File

@ -0,0 +1,7 @@
CREATE OR REPLACE FUNCTION pg_catalog.citus_rebalance_stop()
RETURNS VOID
AS 'MODULE_PATHNAME'
LANGUAGE C VOLATILE;
COMMENT ON FUNCTION pg_catalog.citus_rebalance_stop()
IS 'stop a rebalance that is running in the background';
GRANT EXECUTE ON FUNCTION pg_catalog.citus_rebalance_stop() TO PUBLIC;

View File

@ -0,0 +1,7 @@
CREATE OR REPLACE FUNCTION pg_catalog.citus_rebalance_stop()
RETURNS VOID
AS 'MODULE_PATHNAME'
LANGUAGE C VOLATILE;
COMMENT ON FUNCTION pg_catalog.citus_rebalance_stop()
IS 'stop a rebalance that is running in the background';
GRANT EXECUTE ON FUNCTION pg_catalog.citus_rebalance_stop() TO PUBLIC;

View File

@ -0,0 +1,7 @@
CREATE OR REPLACE FUNCTION pg_catalog.citus_rebalance_wait()
RETURNS VOID
AS 'MODULE_PATHNAME'
LANGUAGE C VOLATILE;
COMMENT ON FUNCTION pg_catalog.citus_rebalance_wait()
IS 'wait on a running rebalance in the background';
GRANT EXECUTE ON FUNCTION pg_catalog.citus_rebalance_wait() TO PUBLIC;

View File

@ -0,0 +1,7 @@
CREATE OR REPLACE FUNCTION pg_catalog.citus_rebalance_wait()
RETURNS VOID
AS 'MODULE_PATHNAME'
LANGUAGE C VOLATILE;
COMMENT ON FUNCTION pg_catalog.citus_rebalance_wait()
IS 'wait on a running rebalance in the background';
GRANT EXECUTE ON FUNCTION pg_catalog.citus_rebalance_wait() TO PUBLIC;

View File

@ -0,0 +1,45 @@
CREATE OR REPLACE VIEW pg_catalog.citus_shards AS
SELECT
pg_dist_shard.logicalrelid AS table_name,
pg_dist_shard.shardid,
shard_name(pg_dist_shard.logicalrelid, pg_dist_shard.shardid) as shard_name,
CASE WHEN partkey IS NOT NULL THEN 'distributed' WHEN repmodel = 't' THEN 'reference' ELSE 'local' END AS citus_table_type,
colocationid AS colocation_id,
pg_dist_node.nodename,
pg_dist_node.nodeport,
size as shard_size
FROM
pg_dist_shard
JOIN
pg_dist_placement
ON
pg_dist_shard.shardid = pg_dist_placement.shardid
JOIN
pg_dist_node
ON
pg_dist_placement.groupid = pg_dist_node.groupid
JOIN
pg_dist_partition
ON
pg_dist_partition.logicalrelid = pg_dist_shard.logicalrelid
LEFT JOIN
(SELECT (regexp_matches(table_name,'_(\d+)$'))[1]::int as shard_id, max(size) as size from citus_shard_sizes() GROUP BY shard_id) as shard_sizes
ON
pg_dist_shard.shardid = shard_sizes.shard_id
WHERE
pg_dist_placement.shardstate = 1
AND
-- filter out tables owned by extensions
pg_dist_partition.logicalrelid NOT IN (
SELECT
objid
FROM
pg_depend
WHERE
classid = 'pg_class'::regclass AND refclassid = 'pg_extension'::regclass AND deptype = 'e'
)
ORDER BY
pg_dist_shard.logicalrelid::text, shardid
;
GRANT SELECT ON pg_catalog.citus_shards TO public;

View File

@ -28,6 +28,16 @@ ON
pg_dist_shard.shardid = shard_sizes.shard_id pg_dist_shard.shardid = shard_sizes.shard_id
WHERE WHERE
pg_dist_placement.shardstate = 1 pg_dist_placement.shardstate = 1
AND
-- filter out tables owned by extensions
pg_dist_partition.logicalrelid NOT IN (
SELECT
objid
FROM
pg_depend
WHERE
classid = 'pg_class'::regclass AND refclassid = 'pg_extension'::regclass AND deptype = 'e'
)
ORDER BY ORDER BY
pg_dist_shard.logicalrelid::text, shardid pg_dist_shard.logicalrelid::text, shardid
; ;

View File

@ -0,0 +1,48 @@
DO $$
declare
citus_tables_create_query text;
BEGIN
citus_tables_create_query=$CTCQ$
CREATE OR REPLACE VIEW %I.citus_tables AS
SELECT
logicalrelid AS table_name,
CASE WHEN partkey IS NOT NULL THEN 'distributed' ELSE
CASE when repmodel = 't' THEN 'reference' ELSE 'local' END
END AS citus_table_type,
coalesce(column_to_column_name(logicalrelid, partkey), '<none>') AS distribution_column,
colocationid AS colocation_id,
pg_size_pretty(citus_total_relation_size(logicalrelid, fail_on_error := false)) AS table_size,
(select count(*) from pg_dist_shard where logicalrelid = p.logicalrelid) AS shard_count,
pg_get_userbyid(relowner) AS table_owner,
amname AS access_method
FROM
pg_dist_partition p
JOIN
pg_class c ON (p.logicalrelid = c.oid)
LEFT JOIN
pg_am a ON (a.oid = c.relam)
WHERE
-- filter out tables owned by extensions
logicalrelid NOT IN (
SELECT
objid
FROM
pg_depend
WHERE
classid = 'pg_class'::regclass AND refclassid = 'pg_extension'::regclass AND deptype = 'e'
)
ORDER BY
logicalrelid::text;
$CTCQ$;
IF EXISTS (SELECT 1 FROM pg_namespace WHERE nspname = 'public') THEN
EXECUTE format(citus_tables_create_query, 'public');
GRANT SELECT ON public.citus_tables TO public;
ELSE
EXECUTE format(citus_tables_create_query, 'citus');
ALTER VIEW citus.citus_tables SET SCHEMA pg_catalog;
GRANT SELECT ON pg_catalog.citus_tables TO public;
END IF;
END;
$$;

View File

@ -6,7 +6,9 @@ citus_tables_create_query=$CTCQ$
CREATE OR REPLACE VIEW %I.citus_tables AS CREATE OR REPLACE VIEW %I.citus_tables AS
SELECT SELECT
logicalrelid AS table_name, logicalrelid AS table_name,
CASE WHEN partkey IS NOT NULL THEN 'distributed' ELSE 'reference' END AS citus_table_type, CASE WHEN partkey IS NOT NULL THEN 'distributed' ELSE
CASE when repmodel = 't' THEN 'reference' ELSE 'local' END
END AS citus_table_type,
coalesce(column_to_column_name(logicalrelid, partkey), '<none>') AS distribution_column, coalesce(column_to_column_name(logicalrelid, partkey), '<none>') AS distribution_column,
colocationid AS colocation_id, colocationid AS colocation_id,
pg_size_pretty(citus_total_relation_size(logicalrelid, fail_on_error := false)) AS table_size, pg_size_pretty(citus_total_relation_size(logicalrelid, fail_on_error := false)) AS table_size,
@ -20,7 +22,15 @@ citus_tables_create_query=$CTCQ$
LEFT JOIN LEFT JOIN
pg_am a ON (a.oid = c.relam) pg_am a ON (a.oid = c.relam)
WHERE WHERE
partkey IS NOT NULL OR repmodel = 't' -- filter out tables owned by extensions
logicalrelid NOT IN (
SELECT
objid
FROM
pg_depend
WHERE
classid = 'pg_class'::regclass AND refclassid = 'pg_extension'::regclass AND deptype = 'e'
)
ORDER BY ORDER BY
logicalrelid::text; logicalrelid::text;
$CTCQ$; $CTCQ$;

View File

@ -0,0 +1,20 @@
DROP FUNCTION pg_catalog.get_rebalance_progress();
CREATE OR REPLACE FUNCTION pg_catalog.get_rebalance_progress()
RETURNS TABLE(sessionid integer,
table_name regclass,
shardid bigint,
shard_size bigint,
sourcename text,
sourceport int,
targetname text,
targetport int,
progress bigint,
source_shard_size bigint,
target_shard_size bigint,
operation_type text
)
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT;
COMMENT ON FUNCTION pg_catalog.get_rebalance_progress()
IS 'provides progress information about the ongoing rebalance operations';

View File

@ -11,7 +11,9 @@ CREATE OR REPLACE FUNCTION pg_catalog.get_rebalance_progress()
targetport int, targetport int,
progress bigint, progress bigint,
source_shard_size bigint, source_shard_size bigint,
target_shard_size bigint) target_shard_size bigint,
operation_type text
)
AS 'MODULE_PATHNAME' AS 'MODULE_PATHNAME'
LANGUAGE C STRICT; LANGUAGE C STRICT;
COMMENT ON FUNCTION pg_catalog.get_rebalance_progress() COMMENT ON FUNCTION pg_catalog.get_rebalance_progress()

View File

@ -157,6 +157,22 @@ citus_job_wait(PG_FUNCTION_ARGS)
desiredStatus = BackgroundJobStatusByOid(PG_GETARG_OID(1)); desiredStatus = BackgroundJobStatusByOid(PG_GETARG_OID(1));
} }
citus_job_wait_internal(jobid, hasDesiredStatus ? &desiredStatus : NULL);
PG_RETURN_VOID();
}
/*
* citus_job_wait_internal imaplements the waiting on a job for reuse in other areas where
* we want to wait on jobs. eg the background rebalancer.
*
* When a desiredStatus is provided it will provide an error when a different state is
* reached and the state cannot ever reach the desired state anymore.
*/
void
citus_job_wait_internal(int64 jobid, BackgroundJobStatus *desiredStatus)
{
/* /*
* Since we are wait polling we will actually allocate memory on every poll. To make * Since we are wait polling we will actually allocate memory on every poll. To make
* sure we don't put unneeded pressure on the memory we create a context that we clear * sure we don't put unneeded pressure on the memory we create a context that we clear
@ -177,10 +193,9 @@ citus_job_wait(PG_FUNCTION_ARGS)
if (!job) if (!job)
{ {
ereport(ERROR, (errmsg("no job found for job with jobid: %ld", jobid))); ereport(ERROR, (errmsg("no job found for job with jobid: %ld", jobid)));
PG_RETURN_VOID();
} }
if (hasDesiredStatus && job->state == desiredStatus) if (desiredStatus && job->state == *desiredStatus)
{ {
/* job has reached its desired status, done waiting */ /* job has reached its desired status, done waiting */
break; break;
@ -188,7 +203,7 @@ citus_job_wait(PG_FUNCTION_ARGS)
if (IsBackgroundJobStatusTerminal(job->state)) if (IsBackgroundJobStatusTerminal(job->state))
{ {
if (hasDesiredStatus) if (desiredStatus)
{ {
/* /*
* We have reached a terminal state, which is not the desired state we * We have reached a terminal state, which is not the desired state we
@ -201,7 +216,7 @@ citus_job_wait(PG_FUNCTION_ARGS)
reachedStatusOid); reachedStatusOid);
char *reachedStatusName = DatumGetCString(reachedStatusNameDatum); char *reachedStatusName = DatumGetCString(reachedStatusNameDatum);
Oid desiredStatusOid = BackgroundJobStatusOid(desiredStatus); Oid desiredStatusOid = BackgroundJobStatusOid(*desiredStatus);
Datum desiredStatusNameDatum = DirectFunctionCall1(enum_out, Datum desiredStatusNameDatum = DirectFunctionCall1(enum_out,
desiredStatusOid); desiredStatusOid);
char *desiredStatusName = DatumGetCString(desiredStatusNameDatum); char *desiredStatusName = DatumGetCString(desiredStatusNameDatum);
@ -228,8 +243,6 @@ citus_job_wait(PG_FUNCTION_ARGS)
MemoryContextSwitchTo(oldContext); MemoryContextSwitchTo(oldContext);
MemoryContextDelete(waitContext); MemoryContextDelete(waitContext);
PG_RETURN_VOID();
} }
@ -886,9 +899,10 @@ ConsumeTaskWorkerOutput(shm_mq_handle *responseq, StringInfo message, bool *hadE
{ {
*hadError = true; *hadError = true;
} }
__attribute__((fallthrough));
} }
/* FALLTHROUGH */
case 'N': /* NoticeResponse */ case 'N': /* NoticeResponse */
{ {
ErrorData edata = { 0 }; ErrorData edata = { 0 };

View File

@ -83,7 +83,7 @@ replicate_reference_tables(PG_FUNCTION_ARGS)
/* /*
* EnsureReferenceTablesExistOnAllNodes ensures that a shard placement for every * EnsureReferenceTablesExistOnAllNodes ensures that a shard placement for every
* reference table exists on all nodes. If a node does not have a set of shard * reference table exists on all nodes. If a node does not have a set of shard
* placements, then master_copy_shard_placement is called in a subtransaction * placements, then citus_copy_shard_placement is called in a subtransaction
* to pull the data to the new node. * to pull the data to the new node.
*/ */
void void
@ -96,7 +96,7 @@ EnsureReferenceTablesExistOnAllNodes(void)
/* /*
* EnsureReferenceTablesExistOnAllNodesExtended ensures that a shard placement for every * EnsureReferenceTablesExistOnAllNodesExtended ensures that a shard placement for every
* reference table exists on all nodes. If a node does not have a set of shard placements, * reference table exists on all nodes. If a node does not have a set of shard placements,
* then master_copy_shard_placement is called in a subtransaction to pull the data to the * then citus_copy_shard_placement is called in a subtransaction to pull the data to the
* new node. * new node.
* *
* The transferMode is passed on to the implementation of the copy to control the locks * The transferMode is passed on to the implementation of the copy to control the locks
@ -193,7 +193,7 @@ EnsureReferenceTablesExistOnAllNodesExtended(char transferMode)
} }
/* /*
* master_copy_shard_placement triggers metadata sync-up, which tries to * citus_copy_shard_placement triggers metadata sync-up, which tries to
* acquire a ShareLock on pg_dist_node. We do master_copy_shad_placement * acquire a ShareLock on pg_dist_node. We do master_copy_shad_placement
* in a separate connection. If we have modified pg_dist_node in the * in a separate connection. If we have modified pg_dist_node in the
* current backend, this will cause a deadlock. * current backend, this will cause a deadlock.
@ -207,7 +207,7 @@ EnsureReferenceTablesExistOnAllNodesExtended(char transferMode)
/* /*
* Modifications to reference tables in current transaction are not visible * Modifications to reference tables in current transaction are not visible
* to master_copy_shard_placement, since it is done in a separate backend. * to citus_copy_shard_placement, since it is done in a separate backend.
*/ */
if (AnyRelationsModifiedInTransaction(referenceTableIdList)) if (AnyRelationsModifiedInTransaction(referenceTableIdList))
{ {
@ -235,7 +235,7 @@ EnsureReferenceTablesExistOnAllNodesExtended(char transferMode)
newWorkerNode->workerPort))); newWorkerNode->workerPort)));
/* /*
* Call master_copy_shard_placement using citus extension owner. Current * Call citus_copy_shard_placement using citus extension owner. Current
* user might not have permissions to do the copy. * user might not have permissions to do the copy.
*/ */
const char *userName = CitusExtensionOwnerName(); const char *userName = CitusExtensionOwnerName();
@ -293,6 +293,63 @@ EnsureReferenceTablesExistOnAllNodesExtended(char transferMode)
} }
/*
* HasNodesWithMissingReferenceTables checks if all reference tables are already copied to
* all nodes. When a node doesn't have a copy of the reference tables we call them missing
* and this function will return true.
*
* The caller might be interested in the list of all reference tables after this check and
* this the list of tables is written to *referenceTableList if a non-null pointer is
* passed.
*/
bool
HasNodesWithMissingReferenceTables(List **referenceTableList)
{
int colocationId = GetReferenceTableColocationId();
if (colocationId == INVALID_COLOCATION_ID)
{
/* we have no reference table yet. */
return false;
}
LockColocationId(colocationId, AccessShareLock);
List *referenceTableIdList = CitusTableTypeIdList(REFERENCE_TABLE);
if (referenceTableList)
{
*referenceTableList = referenceTableIdList;
}
if (list_length(referenceTableIdList) <= 0)
{
return false;
}
Oid referenceTableId = linitial_oid(referenceTableIdList);
List *shardIntervalList = LoadShardIntervalList(referenceTableId);
if (list_length(shardIntervalList) == 0)
{
const char *referenceTableName = get_rel_name(referenceTableId);
/* check for corrupt metadata */
ereport(ERROR, (errmsg("reference table \"%s\" does not have a shard",
referenceTableName)));
}
ShardInterval *shardInterval = (ShardInterval *) linitial(shardIntervalList);
uint64 shardId = shardInterval->shardId;
List *newWorkersList = WorkersWithoutReferenceTablePlacement(shardId,
AccessShareLock);
if (list_length(newWorkersList) <= 0)
{
return false;
}
return true;
}
/* /*
* AnyRelationsModifiedInTransaction returns true if any of the given relations * AnyRelationsModifiedInTransaction returns true if any of the given relations
* were modified in the current transaction. * were modified in the current transaction.
@ -348,7 +405,7 @@ WorkersWithoutReferenceTablePlacement(uint64 shardId, LOCKMODE lockMode)
/* /*
* CopyShardPlacementToWorkerNodeQuery returns the master_copy_shard_placement * CopyShardPlacementToWorkerNodeQuery returns the citus_copy_shard_placement
* command to copy the given shard placement to given node. * command to copy the given shard placement to given node.
*/ */
static StringInfo static StringInfo
@ -364,8 +421,8 @@ CopyShardPlacementToWorkerNodeQuery(ShardPlacement *sourceShardPlacement,
"auto"; "auto";
appendStringInfo(queryString, appendStringInfo(queryString,
"SELECT master_copy_shard_placement(" "SELECT citus_copy_shard_placement("
UINT64_FORMAT ", %s, %d, %s, %d, do_repair := false, " UINT64_FORMAT ", %s, %d, %s, %d, "
"transfer_mode := %s)", "transfer_mode := %s)",
sourceShardPlacement->shardId, sourceShardPlacement->shardId,
quote_literal_cstr(sourceShardPlacement->nodeName), quote_literal_cstr(sourceShardPlacement->nodeName),

View File

@ -15,9 +15,15 @@
#include "postmaster/bgworker.h" #include "postmaster/bgworker.h"
#include "distributed/metadata_utility.h"
extern BackgroundWorkerHandle * StartCitusBackgroundTaskQueueMonitor(Oid database, extern BackgroundWorkerHandle * StartCitusBackgroundTaskQueueMonitor(Oid database,
Oid extensionOwner); Oid extensionOwner);
extern void CitusBackgroundTaskQueueMonitorMain(Datum arg); extern void CitusBackgroundTaskQueueMonitorMain(Datum arg);
extern void CitusBackgroundTaskExecuter(Datum main_arg); extern void CitusBackgroundTaskExecuter(Datum main_arg);
extern Datum citus_job_cancel(PG_FUNCTION_ARGS);
extern Datum citus_job_wait(PG_FUNCTION_ARGS);
extern void citus_job_wait_internal(int64 jobid, BackgroundJobStatus *desiredStatus);
#endif /*CITUS_BACKGROUND_JOBS_H */ #endif /*CITUS_BACKGROUND_JOBS_H */

View File

@ -20,12 +20,6 @@
#include "nodes/parsenodes.h" #include "nodes/parsenodes.h"
#include "nodes/pg_list.h" #include "nodes/pg_list.h"
#define CREATE_SEQUENCE_COMMAND \
"CREATE SEQUENCE IF NOT EXISTS %s AS %s INCREMENT BY " INT64_FORMAT \
" MINVALUE " INT64_FORMAT " MAXVALUE " INT64_FORMAT \
" START WITH " INT64_FORMAT " CACHE " INT64_FORMAT " %sCYCLE"
/* Function declarations for version independent Citus ruleutils wrapper functions */ /* Function declarations for version independent Citus ruleutils wrapper functions */
extern char * pg_get_extensiondef_string(Oid tableRelationId); extern char * pg_get_extensiondef_string(Oid tableRelationId);
extern Oid get_extension_schema(Oid ext_oid); extern Oid get_extension_schema(Oid ext_oid);

View File

@ -459,6 +459,13 @@ extern List * PostprocessAlterSequenceSchemaStmt(Node *node, const char *querySt
extern List * PreprocessAlterSequenceOwnerStmt(Node *node, const char *queryString, extern List * PreprocessAlterSequenceOwnerStmt(Node *node, const char *queryString,
ProcessUtilityContext processUtilityContext); ProcessUtilityContext processUtilityContext);
extern List * PostprocessAlterSequenceOwnerStmt(Node *node, const char *queryString); extern List * PostprocessAlterSequenceOwnerStmt(Node *node, const char *queryString);
#if (PG_VERSION_NUM >= PG_VERSION_15)
extern List * PreprocessAlterSequencePersistenceStmt(Node *node, const char *queryString,
ProcessUtilityContext
processUtilityContext);
extern List * PreprocessSequenceAlterTableStmt(Node *node, const char *queryString,
ProcessUtilityContext processUtilityContext);
#endif
extern List * PreprocessDropSequenceStmt(Node *node, const char *queryString, extern List * PreprocessDropSequenceStmt(Node *node, const char *queryString,
ProcessUtilityContext processUtilityContext); ProcessUtilityContext processUtilityContext);
extern List * SequenceDropStmtObjectAddress(Node *stmt, bool missing_ok, bool extern List * SequenceDropStmtObjectAddress(Node *stmt, bool missing_ok, bool
@ -474,6 +481,10 @@ extern List * AlterSequenceSchemaStmtObjectAddress(Node *node, bool missing_ok,
isPostprocess); isPostprocess);
extern List * AlterSequenceOwnerStmtObjectAddress(Node *node, bool missing_ok, bool extern List * AlterSequenceOwnerStmtObjectAddress(Node *node, bool missing_ok, bool
isPostprocess); isPostprocess);
#if (PG_VERSION_NUM >= PG_VERSION_15)
extern List * AlterSequencePersistenceStmtObjectAddress(Node *node, bool missing_ok, bool
isPostprocess);
#endif
extern List * RenameSequenceStmtObjectAddress(Node *node, bool missing_ok, bool extern List * RenameSequenceStmtObjectAddress(Node *node, bool missing_ok, bool
isPostprocess); isPostprocess);
extern void ErrorIfUnsupportedSeqStmt(CreateSeqStmt *createSeqStmt); extern void ErrorIfUnsupportedSeqStmt(CreateSeqStmt *createSeqStmt);

View File

@ -47,7 +47,7 @@
#define CANDIDATE_NODE_FIELDS 2 #define CANDIDATE_NODE_FIELDS 2
#define WORKER_NODE_FIELDS 2 #define WORKER_NODE_FIELDS 2
/* transfer mode for master_copy_shard_placement */ /* transfer mode for citus_copy_shard_placement */
#define TRANSFER_MODE_AUTOMATIC 'a' #define TRANSFER_MODE_AUTOMATIC 'a'
#define TRANSFER_MODE_FORCE_LOGICAL 'l' #define TRANSFER_MODE_FORCE_LOGICAL 'l'
#define TRANSFER_MODE_BLOCK_WRITES 'b' #define TRANSFER_MODE_BLOCK_WRITES 'b'
@ -286,9 +286,6 @@ extern Datum isolate_tenant_to_new_shard(PG_FUNCTION_ARGS);
/* function declarations for shard split functionality */ /* function declarations for shard split functionality */
extern Datum citus_split_shard_by_split_points(PG_FUNCTION_ARGS); extern Datum citus_split_shard_by_split_points(PG_FUNCTION_ARGS);
/* function declarations for shard repair functionality */
extern Datum master_copy_shard_placement(PG_FUNCTION_ARGS);
/* function declarations for shard copy functinality */ /* function declarations for shard copy functinality */
extern List * CopyShardCommandList(ShardInterval *shardInterval, const extern List * CopyShardCommandList(ShardInterval *shardInterval, const
char *sourceNodeName, char *sourceNodeName,

View File

@ -226,6 +226,9 @@ extern char * DeparseDropSequenceStmt(Node *node);
extern char * DeparseRenameSequenceStmt(Node *node); extern char * DeparseRenameSequenceStmt(Node *node);
extern char * DeparseAlterSequenceSchemaStmt(Node *node); extern char * DeparseAlterSequenceSchemaStmt(Node *node);
extern char * DeparseAlterSequenceOwnerStmt(Node *node); extern char * DeparseAlterSequenceOwnerStmt(Node *node);
#if (PG_VERSION_NUM >= PG_VERSION_15)
extern char * DeparseAlterSequencePersistenceStmt(Node *node);
#endif
extern char * DeparseGrantOnSequenceStmt(Node *node); extern char * DeparseGrantOnSequenceStmt(Node *node);
/* forward declarations for qualify_sequence_stmt.c */ /* forward declarations for qualify_sequence_stmt.c */
@ -233,6 +236,9 @@ extern void QualifyRenameSequenceStmt(Node *node);
extern void QualifyDropSequenceStmt(Node *node); extern void QualifyDropSequenceStmt(Node *node);
extern void QualifyAlterSequenceSchemaStmt(Node *node); extern void QualifyAlterSequenceSchemaStmt(Node *node);
extern void QualifyAlterSequenceOwnerStmt(Node *node); extern void QualifyAlterSequenceOwnerStmt(Node *node);
#if (PG_VERSION_NUM >= PG_VERSION_15)
extern void QualifyAlterSequencePersistenceStmt(Node *node);
#endif
extern void QualifyGrantOnSequenceStmt(Node *node); extern void QualifyGrantOnSequenceStmt(Node *node);
#endif /* CITUS_DEPARSER_H */ #endif /* CITUS_DEPARSER_H */

View File

@ -384,6 +384,7 @@ extern void EnsureSequenceTypeSupported(Oid seqOid, Oid attributeTypeId, Oid
extern void AlterSequenceType(Oid seqOid, Oid typeOid); extern void AlterSequenceType(Oid seqOid, Oid typeOid);
extern void EnsureRelationHasCompatibleSequenceTypes(Oid relationId); extern void EnsureRelationHasCompatibleSequenceTypes(Oid relationId);
extern bool HasRunnableBackgroundTask(void); extern bool HasRunnableBackgroundTask(void);
extern bool HasNonTerminalJobOfType(const char *jobType, int64 *jobIdOut);
extern int64 CreateBackgroundJob(const char *jobType, const char *description); extern int64 CreateBackgroundJob(const char *jobType, const char *description);
extern BackgroundTask * ScheduleBackgroundTask(int64 jobId, Oid owner, char *command, extern BackgroundTask * ScheduleBackgroundTask(int64 jobId, Oid owner, char *command,
int dependingTaskCount, int dependingTaskCount,

View File

@ -177,5 +177,13 @@ extern void RecreateGroupedLogicalRepTargetsConnections(
char *user, char *user,
char *databaseName); char *databaseName);
extern void CloseGroupedLogicalRepTargetsConnections(HTAB *groupedLogicalRepTargetsHash); extern void CloseGroupedLogicalRepTargetsConnections(HTAB *groupedLogicalRepTargetsHash);
extern void CompleteNonBlockingShardTransfer(List *shardList,
MultiConnection *sourceConnection,
HTAB *publicationInfoHash,
List *logicalRepTargetList,
HTAB *groupedLogicalRepTargetsHash,
LogicalRepType type);
extern void CreateUncheckedForeignKeyConstraints(List *logicalRepTargetList);
extern void CreatePartitioningHierarchy(List *logicalRepTargetList);
#endif /* MULTI_LOGICAL_REPLICATION_H_ */ #endif /* MULTI_LOGICAL_REPLICATION_H_ */

View File

@ -46,6 +46,6 @@ extern DeferredErrorMessage * DeferErrorIfCannotPushdownSubquery(Query *subquery
bool bool
outerMostQueryHasLimit); outerMostQueryHasLimit);
extern DeferredErrorMessage * DeferErrorIfUnsupportedUnionQuery(Query *queryTree); extern DeferredErrorMessage * DeferErrorIfUnsupportedUnionQuery(Query *queryTree);
extern bool IsJsonTableRTE(RangeTblEntry *rte);
#endif /* QUERY_PUSHDOWN_PLANNING_H */ #endif /* QUERY_PUSHDOWN_PLANNING_H */

View File

@ -20,6 +20,7 @@
extern void EnsureReferenceTablesExistOnAllNodes(void); extern void EnsureReferenceTablesExistOnAllNodes(void);
extern void EnsureReferenceTablesExistOnAllNodesExtended(char transferMode); extern void EnsureReferenceTablesExistOnAllNodesExtended(char transferMode);
extern bool HasNodesWithMissingReferenceTables(List **referenceTableList);
extern uint32 CreateReferenceTableColocationId(void); extern uint32 CreateReferenceTableColocationId(void);
extern uint32 GetReferenceTableColocationId(void); extern uint32 GetReferenceTableColocationId(void);
extern void DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId, extern void DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId,

View File

@ -106,6 +106,7 @@ typedef struct PlacementUpdateEventProgress
int sourcePort; int sourcePort;
char targetName[255]; char targetName[255];
int targetPort; int targetPort;
PlacementUpdateType updateType;
pg_atomic_uint64 progress; pg_atomic_uint64 progress;
} PlacementUpdateEventProgress; } PlacementUpdateEventProgress;

View File

@ -18,6 +18,10 @@
#define RelationCreateStorage_compat(a, b, c) RelationCreateStorage(a, b, c) #define RelationCreateStorage_compat(a, b, c) RelationCreateStorage(a, b, c)
#define parse_analyze_varparams_compat(a, b, c, d, e) parse_analyze_varparams(a, b, c, d, \ #define parse_analyze_varparams_compat(a, b, c, d, e) parse_analyze_varparams(a, b, c, d, \
e) e)
#define CREATE_SEQUENCE_COMMAND \
"CREATE %sSEQUENCE IF NOT EXISTS %s AS %s INCREMENT BY " INT64_FORMAT \
" MINVALUE " INT64_FORMAT " MAXVALUE " INT64_FORMAT \
" START WITH " INT64_FORMAT " CACHE " INT64_FORMAT " %sCYCLE"
#else #else
#include "nodes/value.h" #include "nodes/value.h"
@ -62,6 +66,11 @@ RelationGetSmgr(Relation rel)
} }
#define CREATE_SEQUENCE_COMMAND \
"CREATE SEQUENCE IF NOT EXISTS %s AS %s INCREMENT BY " INT64_FORMAT \
" MINVALUE " INT64_FORMAT " MAXVALUE " INT64_FORMAT \
" START WITH " INT64_FORMAT " CACHE " INT64_FORMAT " %sCYCLE"
#endif #endif
#if PG_VERSION_NUM >= PG_VERSION_14 #if PG_VERSION_NUM >= PG_VERSION_14

View File

@ -283,3 +283,8 @@ s/^(DETAIL: "[a-z\ ]+ )pg_temp_[0-9]+(\..*" will be created only locally)$/\1pg
# will be replaced with # will be replaced with
# WARNING: "function func(bigint)" has dependency on unsupported object "schema pg_temp_xxx" # WARNING: "function func(bigint)" has dependency on unsupported object "schema pg_temp_xxx"
s/^(WARNING|ERROR)(: "[a-z\ ]+ .*" has dependency on unsupported object) "schema pg_temp_[0-9]+"$/\1\2 "schema pg_temp_xxx"/g s/^(WARNING|ERROR)(: "[a-z\ ]+ .*" has dependency on unsupported object) "schema pg_temp_[0-9]+"$/\1\2 "schema pg_temp_xxx"/g
# remove jobId's from the messages of the background rebalancer
s/^ERROR: A rebalance is already running as job [0-9]+$/ERROR: A rebalance is already running as job xxx/g
s/^NOTICE: Scheduled ([0-9]+) moves as job [0-9]+$/NOTICE: Scheduled \1 moves as job xxx/g
s/^HINT: (.*) job_id = [0-9]+ (.*)$/HINT: \1 job_id = xxx \2/g

View File

@ -452,11 +452,12 @@ SELECT citus_add_local_table_to_metadata('table_type_citus_local');
CREATE TABLE table_type_pg_local (a INT); CREATE TABLE table_type_pg_local (a INT);
SELECT table_name, citus_table_type, distribution_column, shard_count, access_method FROM public.citus_tables WHERE table_name::text LIKE 'table\_type%' ORDER BY 1; SELECT table_name, citus_table_type, distribution_column, shard_count, access_method FROM public.citus_tables WHERE table_name::text LIKE 'table\_type%' ORDER BY 1;
table_name | citus_table_type | distribution_column | shard_count | access_method table_name | citus_table_type | distribution_column | shard_count | access_method
--------------------------------------------------------------------- ---------------------------------------------------------------------
table_type_dist | distributed | a | 4 | heap table_type_dist | distributed | a | 4 | heap
table_type_ref | reference | <none> | 1 | heap table_type_ref | reference | <none> | 1 | heap
(2 rows) table_type_citus_local | local | <none> | 1 | heap
(3 rows)
SELECT c.relname, a.amname FROM pg_class c, pg_am a where c.relname SIMILAR TO 'table_type\D*' AND c.relnamespace = 'alter_table_set_access_method'::regnamespace AND c.relam = a.oid; SELECT c.relname, a.amname FROM pg_class c, pg_am a where c.relname SIMILAR TO 'table_type\D*' AND c.relnamespace = 'alter_table_set_access_method'::regnamespace AND c.relam = a.oid;
relname | amname relname | amname
@ -508,11 +509,12 @@ NOTICE: renaming the new table to alter_table_set_access_method.table_type_citu
(1 row) (1 row)
SELECT table_name, citus_table_type, distribution_column, shard_count, access_method FROM public.citus_tables WHERE table_name::text LIKE 'table\_type%' ORDER BY 1; SELECT table_name, citus_table_type, distribution_column, shard_count, access_method FROM public.citus_tables WHERE table_name::text LIKE 'table\_type%' ORDER BY 1;
table_name | citus_table_type | distribution_column | shard_count | access_method table_name | citus_table_type | distribution_column | shard_count | access_method
--------------------------------------------------------------------- ---------------------------------------------------------------------
table_type_dist | distributed | a | 4 | columnar table_type_dist | distributed | a | 4 | columnar
table_type_ref | reference | <none> | 1 | columnar table_type_ref | reference | <none> | 1 | columnar
(2 rows) table_type_citus_local | local | <none> | 1 | columnar
(3 rows)
SELECT c.relname, a.amname FROM pg_class c, pg_am a where c.relname SIMILAR TO 'table_type\D*' AND c.relnamespace = 'alter_table_set_access_method'::regnamespace AND c.relam = a.oid; SELECT c.relname, a.amname FROM pg_class c, pg_am a where c.relname SIMILAR TO 'table_type\D*' AND c.relnamespace = 'alter_table_set_access_method'::regnamespace AND c.relam = a.oid;
relname | amname relname | amname

View File

@ -0,0 +1,214 @@
CREATE SCHEMA background_rebalance;
SET search_path TO background_rebalance;
SET citus.next_shard_id TO 85674000;
SET citus.shard_replication_factor TO 1;
ALTER SYSTEM SET citus.background_task_queue_interval TO '1s';
SELECT pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
CREATE TABLE t1 (a int PRIMARY KEY);
SELECT create_distributed_table('t1', 'a', shard_count => 4, colocate_with => 'none');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- verify the rebalance works - no-op - when the shards are balanced. Noop is shown by wait complaining there is nothing
-- to wait on.
SELECT 1 FROM citus_rebalance_start();
NOTICE: No moves available for rebalancing
?column?
---------------------------------------------------------------------
1
(1 row)
SELECT citus_rebalance_wait();
WARNING: no ongoing rebalance that can be waited on
citus_rebalance_wait
---------------------------------------------------------------------
(1 row)
SELECT citus_move_shard_placement(85674000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode => 'block_writes');
citus_move_shard_placement
---------------------------------------------------------------------
(1 row)
-- rebalance a table in the background
SELECT 1 FROM citus_rebalance_start();
NOTICE: Scheduled 1 moves as job xxx
DETAIL: Rebalance scheduled as background job
HINT: To monitor progress, run: SELECT * FROM pg_dist_background_task WHERE job_id = xxx ORDER BY task_id ASC; or SELECT * FROM get_rebalance_progress();
?column?
---------------------------------------------------------------------
1
(1 row)
SELECT citus_rebalance_wait();
citus_rebalance_wait
---------------------------------------------------------------------
(1 row)
SELECT citus_move_shard_placement(85674000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode => 'block_writes');
citus_move_shard_placement
---------------------------------------------------------------------
(1 row)
CREATE TABLE t2 (a int);
SELECT create_distributed_table('t2', 'a' , colocate_with => 't1');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- show that we get an error when a table in the colocation group can't be moved non-blocking
SELECT 1 FROM citus_rebalance_start();
ERROR: cannot use logical replication to transfer shards of the relation t2 since it doesn't have a REPLICA IDENTITY or PRIMARY KEY
DETAIL: UPDATE and DELETE commands on the shard will error out during logical replication unless there is a REPLICA IDENTITY or PRIMARY KEY.
HINT: If you wish to continue without a replica identity set the shard_transfer_mode to 'force_logical' or 'block_writes'.
SELECT 1 FROM citus_rebalance_start(shard_transfer_mode => 'block_writes');
NOTICE: Scheduled 1 moves as job xxx
DETAIL: Rebalance scheduled as background job
HINT: To monitor progress, run: SELECT * FROM pg_dist_background_task WHERE job_id = xxx ORDER BY task_id ASC; or SELECT * FROM get_rebalance_progress();
?column?
---------------------------------------------------------------------
1
(1 row)
SELECT citus_rebalance_wait();
citus_rebalance_wait
---------------------------------------------------------------------
(1 row)
DROP TABLE t2;
SELECT citus_move_shard_placement(85674000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode => 'block_writes');
citus_move_shard_placement
---------------------------------------------------------------------
(1 row)
-- show we can stop a rebalance, the stop causes the move to not have happened, eg, our move back below fails.
SELECT 1 FROM citus_rebalance_start();
NOTICE: Scheduled 1 moves as job xxx
DETAIL: Rebalance scheduled as background job
HINT: To monitor progress, run: SELECT * FROM pg_dist_background_task WHERE job_id = xxx ORDER BY task_id ASC; or SELECT * FROM get_rebalance_progress();
?column?
---------------------------------------------------------------------
1
(1 row)
SELECT citus_rebalance_stop();
citus_rebalance_stop
---------------------------------------------------------------------
(1 row)
-- waiting on this rebalance is racy, as it sometimes sees no rebalance is ongoing while other times it actually sees it ongoing
-- we simply sleep a bit here
SELECT pg_sleep(1);
pg_sleep
---------------------------------------------------------------------
(1 row)
-- failing move due to a stopped rebalance, first clean orphans to make the error stable
SET client_min_messages TO WARNING;
CALL citus_cleanup_orphaned_shards();
RESET client_min_messages;
SELECT citus_move_shard_placement(85674000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode => 'block_writes');
WARNING: shard is already present on node localhost:xxxxx
DETAIL: Move may have already completed.
citus_move_shard_placement
---------------------------------------------------------------------
(1 row)
-- show we can't start the rebalancer twice
SELECT 1 FROM citus_rebalance_start();
NOTICE: Scheduled 1 moves as job xxx
DETAIL: Rebalance scheduled as background job
HINT: To monitor progress, run: SELECT * FROM pg_dist_background_task WHERE job_id = xxx ORDER BY task_id ASC; or SELECT * FROM get_rebalance_progress();
?column?
---------------------------------------------------------------------
1
(1 row)
SELECT 1 FROM citus_rebalance_start();
ERROR: A rebalance is already running as job xxx
DETAIL: A rebalance was already scheduled as background job
HINT: To monitor progress, run: SELECT * FROM pg_dist_background_task WHERE job_id = xxx ORDER BY task_id ASC; or SELECT * FROM get_rebalance_progress();
SELECT citus_rebalance_wait();
citus_rebalance_wait
---------------------------------------------------------------------
(1 row)
-- show that the old rebalancer cannot be started with a background rebalance in progress
SELECT citus_move_shard_placement(85674000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode => 'block_writes');
citus_move_shard_placement
---------------------------------------------------------------------
(1 row)
SELECT 1 FROM citus_rebalance_start();
NOTICE: Scheduled 1 moves as job xxx
DETAIL: Rebalance scheduled as background job
HINT: To monitor progress, run: SELECT * FROM pg_dist_background_task WHERE job_id = xxx ORDER BY task_id ASC; or SELECT * FROM get_rebalance_progress();
?column?
---------------------------------------------------------------------
1
(1 row)
SELECT rebalance_table_shards();
ERROR: A rebalance is already running as job xxx
DETAIL: A rebalance was already scheduled as background job
HINT: To monitor progress, run: SELECT * FROM pg_dist_background_task WHERE job_id = xxx ORDER BY task_id ASC; or SELECT * FROM get_rebalance_progress();
SELECT citus_rebalance_wait();
citus_rebalance_wait
---------------------------------------------------------------------
(1 row)
DROP TABLE t1;
-- make sure a non-super user can stop rebalancing
CREATE USER non_super_user_rebalance WITH LOGIN;
GRANT ALL ON SCHEMA background_rebalance TO non_super_user_rebalance;
SET ROLE non_super_user_rebalance;
CREATE TABLE non_super_user_t1 (a int PRIMARY KEY);
SELECT create_distributed_table('non_super_user_t1', 'a', shard_count => 4, colocate_with => 'none');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT citus_move_shard_placement(85674008, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode => 'block_writes');
citus_move_shard_placement
---------------------------------------------------------------------
(1 row)
SELECT 1 FROM citus_rebalance_start();
NOTICE: Scheduled 1 moves as job xxx
DETAIL: Rebalance scheduled as background job
HINT: To monitor progress, run: SELECT * FROM pg_dist_background_task WHERE job_id = xxx ORDER BY task_id ASC; or SELECT * FROM get_rebalance_progress();
?column?
---------------------------------------------------------------------
1
(1 row)
SELECT citus_rebalance_stop();
citus_rebalance_stop
---------------------------------------------------------------------
(1 row)
RESET ROLE;
SET client_min_messages TO WARNING;
DROP SCHEMA background_rebalance CASCADE;

View File

@ -1,4 +1,4 @@
-- Tests for master_copy_shard_placement, which can be used for adding replicas in statement-based replication -- Tests for citus_copy_shard_placement, which can be used for adding replicas in statement-based replication
CREATE SCHEMA mcsp; CREATE SCHEMA mcsp;
SET search_path TO mcsp; SET search_path TO mcsp;
SET citus.next_shard_id TO 8139000; SET citus.next_shard_id TO 8139000;
@ -35,7 +35,7 @@ SELECT create_distributed_table('history','key');
(1 row) (1 row)
-- Mark tables as non-mx tables, in order to be able to test master_copy_shard_placement -- Mark tables as non-mx tables, in order to be able to test citus_copy_shard_placement
UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid IN UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid IN
('data'::regclass, 'history'::regclass); ('data'::regclass, 'history'::regclass);
INSERT INTO data VALUES ('key-1', 'value-1'); INSERT INTO data VALUES ('key-1', 'value-1');
@ -43,48 +43,49 @@ INSERT INTO data VALUES ('key-2', 'value-2');
INSERT INTO history VALUES ('key-1', '2020-02-01', 'old'); INSERT INTO history VALUES ('key-1', '2020-02-01', 'old');
INSERT INTO history VALUES ('key-1', '2019-10-01', 'older'); INSERT INTO history VALUES ('key-1', '2019-10-01', 'older');
-- verify we error out if no healthy placement exists at source -- verify we error out if no healthy placement exists at source
SELECT master_copy_shard_placement( SELECT citus_copy_shard_placement(
get_shard_id_for_distribution_column('data', 'key-1'), get_shard_id_for_distribution_column('data', 'key-1'),
'localhost', :worker_1_port, 'localhost', :worker_1_port,
'localhost', :worker_2_port, 'localhost', :worker_2_port,
do_repair := false,
transfer_mode := 'block_writes'); transfer_mode := 'block_writes');
ERROR: could not find placement matching "localhost:xxxxx" ERROR: could not find placement matching "localhost:xxxxx"
HINT: Confirm the placement still exists and try again. HINT: Confirm the placement still exists and try again.
-- verify we error out if source and destination are the same -- verify we error out if source and destination are the same
SELECT master_copy_shard_placement( SELECT citus_copy_shard_placement(
get_shard_id_for_distribution_column('data', 'key-1'), get_shard_id_for_distribution_column('data', 'key-1'),
'localhost', :worker_2_port, 'localhost', :worker_2_port,
'localhost', :worker_2_port, 'localhost', :worker_2_port,
do_repair := false,
transfer_mode := 'block_writes'); transfer_mode := 'block_writes');
ERROR: shard xxxxx already exists in the target node ERROR: cannot copy shard to the same node
-- verify we error out if target already contains a healthy placement -- verify we warn if target already contains a healthy placement
SELECT master_copy_shard_placement( SELECT citus_copy_shard_placement(
(SELECT shardid FROM pg_dist_shard WHERE logicalrelid='ref_table'::regclass::oid), (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='ref_table'::regclass::oid),
'localhost', :worker_1_port, 'localhost', :worker_1_port,
'localhost', :worker_2_port, 'localhost', :worker_2_port,
do_repair := false,
transfer_mode := 'block_writes'); transfer_mode := 'block_writes');
ERROR: shard xxxxx already exists in the target node WARNING: shard is already present on node localhost:xxxxx
DETAIL: Copy may have already completed.
citus_copy_shard_placement
---------------------------------------------------------------------
(1 row)
-- verify we error out if table has foreign key constraints -- verify we error out if table has foreign key constraints
INSERT INTO ref_table SELECT 1, value FROM data; INSERT INTO ref_table SELECT 1, value FROM data;
ALTER TABLE data ADD CONSTRAINT distfk FOREIGN KEY (value) REFERENCES ref_table (b) MATCH FULL; ALTER TABLE data ADD CONSTRAINT distfk FOREIGN KEY (value) REFERENCES ref_table (b) MATCH FULL;
SELECT master_copy_shard_placement( SELECT citus_copy_shard_placement(
get_shard_id_for_distribution_column('data', 'key-1'), get_shard_id_for_distribution_column('data', 'key-1'),
'localhost', :worker_2_port, 'localhost', :worker_2_port,
'localhost', :worker_1_port, 'localhost', :worker_1_port);
do_repair := false);
ERROR: cannot replicate shards with foreign keys ERROR: cannot replicate shards with foreign keys
ALTER TABLE data DROP CONSTRAINT distfk; ALTER TABLE data DROP CONSTRAINT distfk;
-- replicate shard that contains key-1 -- replicate shard that contains key-1
SELECT master_copy_shard_placement( SELECT citus_copy_shard_placement(
get_shard_id_for_distribution_column('data', 'key-1'), get_shard_id_for_distribution_column('data', 'key-1'),
'localhost', :worker_2_port, 'localhost', :worker_2_port,
'localhost', :worker_1_port, 'localhost', :worker_1_port,
do_repair := false,
transfer_mode := 'block_writes'); transfer_mode := 'block_writes');
master_copy_shard_placement citus_copy_shard_placement
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
@ -127,11 +128,10 @@ SELECT create_distributed_table('mx_table', 'a');
(1 row) (1 row)
SELECT master_copy_shard_placement( SELECT citus_copy_shard_placement(
get_shard_id_for_distribution_column('mx_table', '1'), get_shard_id_for_distribution_column('mx_table', '1'),
'localhost', :worker_1_port, 'localhost', :worker_1_port,
'localhost', :worker_2_port, 'localhost', :worker_2_port,
do_repair := false,
transfer_mode := 'block_writes'); transfer_mode := 'block_writes');
ERROR: Table 'mx_table' is streaming replicated. Shards of streaming replicated tables cannot be copied ERROR: Table 'mx_table' is streaming replicated. Shards of streaming replicated tables cannot be copied
SET client_min_messages TO ERROR; SET client_min_messages TO ERROR;

View File

@ -302,8 +302,8 @@ SELECT get_shard_id_for_distribution_column('citus_local_table_1');
1504014 1504014
(1 row) (1 row)
-- master_copy_shard_placement is not supported -- citus_copy_shard_placement is not supported
SELECT master_copy_shard_placement(shardid, 'localhost', :master_port, 'localhost', :worker_1_port, true) SELECT citus_copy_shard_placement(shardid, 'localhost', :master_port, 'localhost', :worker_1_port)
FROM (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='citus_local_table_1'::regclass) as shardid; FROM (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='citus_local_table_1'::regclass) as shardid;
ERROR: Table 'citus_local_table_1' is a local table. Replicating shard of a local table added to metadata currently is not supported ERROR: Table 'citus_local_table_1' is a local table. Replicating shard of a local table added to metadata currently is not supported
-- undistribute_table is supported -- undistribute_table is supported

View File

@ -24,12 +24,12 @@ SELECT citus_add_local_table_to_metadata('citus_local_table');
-- isolate_tenant_to_new_shard is not supported -- isolate_tenant_to_new_shard is not supported
SELECT isolate_tenant_to_new_shard('citus_local_table', 100, shard_transfer_mode => 'block_writes'); SELECT isolate_tenant_to_new_shard('citus_local_table', 100, shard_transfer_mode => 'block_writes');
ERROR: cannot isolate tenant because tenant isolation is only support for hash distributed tables ERROR: cannot isolate tenant because tenant isolation is only support for hash distributed tables
-- master_copy_shard_placement is not supported -- citus_copy_shard_placement is not supported
SELECT master_copy_shard_placement(shardid, 'localhost', :master_port, 'localhost', :worker_1_port, false) SELECT citus_copy_shard_placement(shardid, 'localhost', :master_port, 'localhost', :worker_1_port, false)
FROM (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='citus_local_table'::regclass) as shardid; FROM (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='citus_local_table'::regclass) as shardid;
ERROR: Table 'citus_local_table' is a local table. Replicating shard of a local table added to metadata currently is not supported ERROR: function citus_copy_shard_placement(bigint, unknown, integer, unknown, integer, boolean) does not exist at character 8
-- master_move_shard_placement is not supported -- citus_move_shard_placement is not supported
SELECT master_move_shard_placement(shardid, 'localhost', :master_port, 'localhost', :worker_1_port) SELECT citus_move_shard_placement(shardid, 'localhost', :master_port, 'localhost', :worker_1_port)
FROM (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='citus_local_table'::regclass) as shardid; FROM (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='citus_local_table'::regclass) as shardid;
ERROR: table citus_local_tables_ent.citus_local_table is a local table, moving shard of a local table added to metadata is currently not supported ERROR: table citus_local_tables_ent.citus_local_table is a local table, moving shard of a local table added to metadata is currently not supported
-- replicate_table_shards is not suported -- replicate_table_shards is not suported

View File

@ -233,24 +233,6 @@ SELECT citus.mitmproxy('conn.onQuery(query="CREATE SUBSCRIPTION").cancel(' || :p
(1 row) (1 row)
SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical');
ERROR: canceling statement due to user request
-- failure on colocated table constraints
SELECT citus.mitmproxy('conn.onQuery(query="ALTER TABLE tenant_isolation.table_2 ADD CONSTRAINT").after(1).kill()');
mitmproxy
---------------------------------------------------------------------
(1 row)
SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical');
ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open
-- cancellation on colocated table constraints
SELECT citus.mitmproxy('conn.onQuery(query="ALTER TABLE tenant_isolation.table_2 ADD CONSTRAINT").after(2).cancel(' || :pid || ')');
mitmproxy
---------------------------------------------------------------------
(1 row)
SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical'); SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical');
ERROR: canceling statement due to user request ERROR: canceling statement due to user request
-- failure on catching up LSN -- failure on catching up LSN
@ -382,15 +364,6 @@ SELECT citus.mitmproxy('conn.onQuery(query="ADD CONSTRAINT table_2_ref_id_fkey F
SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical'); SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical');
ERROR: connection not open ERROR: connection not open
CONTEXT: while executing command on localhost:xxxxx CONTEXT: while executing command on localhost:xxxxx
-- failure on foreign key creation
SELECT citus.mitmproxy('conn.onQuery(query="ADD CONSTRAINT table_2_ref_id_fkey FOREIGN KEY").after(2).cancel(' || :pid || ')');
mitmproxy
---------------------------------------------------------------------
(1 row)
SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical');
ERROR: canceling statement due to user request
-- failure on shard split transaction -- failure on shard split transaction
SELECT citus.mitmproxy('conn.onQuery(query="BEGIN").kill()'); SELECT citus.mitmproxy('conn.onQuery(query="BEGIN").kill()');
mitmproxy mitmproxy

View File

@ -213,36 +213,9 @@ SELECT logicalrelid FROM pg_dist_partition WHERE colocationid = 92448300 ORDER B
rep2 rep2
(2 rows) (2 rows)
UPDATE pg_dist_placement SET shardstate = 3 WHERE shardid = 92448300 AND groupid = 0;
SELECT shardid, shardstate, nodeport FROM pg_dist_shard_placement WHERE shardid = 92448300 ORDER BY placementid;
shardid | shardstate | nodeport
---------------------------------------------------------------------
92448300 | 4 | 57637
92448300 | 1 | 57638
92448300 | 3 | 57636
(3 rows)
-- cannot copy from an orphaned shard -- cannot copy from an orphaned shard
SELECT * FROM citus_copy_shard_placement(92448300, 'localhost', :worker_1_port, 'localhost', :master_port); SELECT * FROM citus_copy_shard_placement(92448300, 'localhost', :worker_1_port, 'localhost', :master_port);
ERROR: source placement must be in active state ERROR: source placement must be in active state
-- cannot copy to an orphaned shard
SELECT * FROM citus_copy_shard_placement(92448300, 'localhost', :worker_2_port, 'localhost', :worker_1_port);
ERROR: target placement must be in inactive state
-- can still copy to an inactive shard
SELECT * FROM citus_copy_shard_placement(92448300, 'localhost', :worker_2_port, 'localhost', :master_port);
citus_copy_shard_placement
---------------------------------------------------------------------
(1 row)
SELECT shardid, shardstate, nodeport FROM pg_dist_shard_placement WHERE shardid = 92448300 ORDER BY placementid;
shardid | shardstate | nodeport
---------------------------------------------------------------------
92448300 | 4 | 57637
92448300 | 1 | 57638
92448300 | 1 | 57636
(3 rows)
-- Make sure we don't send a query to the orphaned shard -- Make sure we don't send a query to the orphaned shard
BEGIN; BEGIN;
SET LOCAL citus.log_remote_commands TO ON; SET LOCAL citus.log_remote_commands TO ON;

View File

@ -1,53 +1,53 @@
Parsed test spec with 2 sessions Parsed test spec with 2 sessions
starting permutation: s1-load-cache s2-load-cache s2-set-placement-inactive s2-begin s2-repair-placement s1-repair-placement s2-commit starting permutation: s1-load-cache s2-load-cache s2-delete-inactive s2-begin s2-repair-placement s1-repair-placement s2-commit
step s1-load-cache: step s1-load-cache:
COPY test_hash_table FROM PROGRAM 'echo 1,1 && echo 2,2 && echo 3,3 && echo 4,4 && echo 5,5' WITH CSV; COPY test_hash_table FROM PROGRAM 'echo 1,1 && echo 2,2 && echo 3,3 && echo 4,4 && echo 5,5' WITH CSV;
step s2-load-cache: step s2-load-cache:
COPY test_hash_table FROM PROGRAM 'echo 1,1 && echo 2,2 && echo 3,3 && echo 4,4 && echo 5,5' WITH CSV; COPY test_hash_table FROM PROGRAM 'echo 1,1 && echo 2,2 && echo 3,3 && echo 4,4 && echo 5,5' WITH CSV;
step s2-set-placement-inactive: step s2-delete-inactive:
UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard_for_test_table) AND nodeport = 57638; DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard_for_test_table) AND nodeport = 57638;
step s2-begin: step s2-begin:
BEGIN; BEGIN;
step s2-repair-placement: step s2-repair-placement:
SELECT master_copy_shard_placement((SELECT * FROM selected_shard_for_test_table), 'localhost', 57637, 'localhost', 57638); SELECT citus_copy_shard_placement((SELECT * FROM selected_shard_for_test_table), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes');
master_copy_shard_placement citus_copy_shard_placement
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
step s1-repair-placement: step s1-repair-placement:
SELECT master_copy_shard_placement((SELECT * FROM selected_shard_for_test_table), 'localhost', 57637, 'localhost', 57638); SELECT citus_copy_shard_placement((SELECT * FROM selected_shard_for_test_table), 'localhost', 57637, 'localhost', 57638);
ERROR: could not acquire the lock required to repair public.test_hash_table ERROR: could not acquire the lock required to copy public.test_hash_table
step s2-commit: step s2-commit:
COMMIT; COMMIT;
starting permutation: s2-set-placement-inactive s2-begin s2-repair-placement s1-repair-placement s2-commit starting permutation: s2-delete-inactive s2-begin s2-repair-placement s1-repair-placement s2-commit
step s2-set-placement-inactive: step s2-delete-inactive:
UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard_for_test_table) AND nodeport = 57638; DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard_for_test_table) AND nodeport = 57638;
step s2-begin: step s2-begin:
BEGIN; BEGIN;
step s2-repair-placement: step s2-repair-placement:
SELECT master_copy_shard_placement((SELECT * FROM selected_shard_for_test_table), 'localhost', 57637, 'localhost', 57638); SELECT citus_copy_shard_placement((SELECT * FROM selected_shard_for_test_table), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes');
master_copy_shard_placement citus_copy_shard_placement
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
step s1-repair-placement: step s1-repair-placement:
SELECT master_copy_shard_placement((SELECT * FROM selected_shard_for_test_table), 'localhost', 57637, 'localhost', 57638); SELECT citus_copy_shard_placement((SELECT * FROM selected_shard_for_test_table), 'localhost', 57637, 'localhost', 57638);
ERROR: could not acquire the lock required to repair public.test_hash_table ERROR: could not acquire the lock required to copy public.test_hash_table
step s2-commit: step s2-commit:
COMMIT; COMMIT;

View File

@ -1,6 +1,6 @@
Parsed test spec with 2 sessions Parsed test spec with 2 sessions
starting permutation: s1-load-cache s1-insert s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-update s2-commit s1-commit s2-print-content starting permutation: s1-load-cache s1-insert s1-begin s1-select s2-delete-inactive s2-begin s2-repair-placement s1-update s2-commit s1-commit s2-print-content
step s1-load-cache: step s1-load-cache:
TRUNCATE test_repair_placement_vs_modification; TRUNCATE test_repair_placement_vs_modification;
@ -19,16 +19,16 @@ count
1 1
(1 row) (1 row)
step s2-set-placement-inactive: step s2-delete-inactive:
UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638;
step s2-begin: step s2-begin:
BEGIN; BEGIN;
step s2-repair-placement: step s2-repair-placement:
SELECT master_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); SELECT citus_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes');
master_copy_shard_placement citus_copy_shard_placement
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
@ -60,7 +60,7 @@ nodeport|success|result
(2 rows) (2 rows)
starting permutation: s1-load-cache s1-insert s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-delete s2-commit s1-commit s2-print-content starting permutation: s1-load-cache s1-insert s1-begin s1-select s2-delete-inactive s2-begin s2-repair-placement s1-delete s2-commit s1-commit s2-print-content
step s1-load-cache: step s1-load-cache:
TRUNCATE test_repair_placement_vs_modification; TRUNCATE test_repair_placement_vs_modification;
@ -79,16 +79,16 @@ count
1 1
(1 row) (1 row)
step s2-set-placement-inactive: step s2-delete-inactive:
UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638;
step s2-begin: step s2-begin:
BEGIN; BEGIN;
step s2-repair-placement: step s2-repair-placement:
SELECT master_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); SELECT citus_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes');
master_copy_shard_placement citus_copy_shard_placement
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
@ -120,7 +120,7 @@ nodeport|success|result
(2 rows) (2 rows)
starting permutation: s1-load-cache s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-insert s2-commit s1-commit s2-print-content starting permutation: s1-load-cache s1-begin s1-select s2-delete-inactive s2-begin s2-repair-placement s1-insert s2-commit s1-commit s2-print-content
step s1-load-cache: step s1-load-cache:
TRUNCATE test_repair_placement_vs_modification; TRUNCATE test_repair_placement_vs_modification;
@ -136,16 +136,16 @@ count
0 0
(1 row) (1 row)
step s2-set-placement-inactive: step s2-delete-inactive:
UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638;
step s2-begin: step s2-begin:
BEGIN; BEGIN;
step s2-repair-placement: step s2-repair-placement:
SELECT master_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); SELECT citus_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes');
master_copy_shard_placement citus_copy_shard_placement
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
@ -177,7 +177,7 @@ nodeport|success|result
(2 rows) (2 rows)
starting permutation: s1-load-cache s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-copy s2-commit s1-commit s2-print-content starting permutation: s1-load-cache s1-begin s1-select s2-delete-inactive s2-begin s2-repair-placement s1-copy s2-commit s1-commit s2-print-content
step s1-load-cache: step s1-load-cache:
TRUNCATE test_repair_placement_vs_modification; TRUNCATE test_repair_placement_vs_modification;
@ -193,16 +193,16 @@ count
0 0
(1 row) (1 row)
step s2-set-placement-inactive: step s2-delete-inactive:
UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638;
step s2-begin: step s2-begin:
BEGIN; BEGIN;
step s2-repair-placement: step s2-repair-placement:
SELECT master_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); SELECT citus_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes');
master_copy_shard_placement citus_copy_shard_placement
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
@ -234,7 +234,7 @@ nodeport|success|result
(2 rows) (2 rows)
starting permutation: s1-load-cache s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-ddl s2-commit s1-commit s2-print-index-count starting permutation: s1-load-cache s1-begin s1-select s2-delete-inactive s2-begin s2-repair-placement s1-ddl s2-commit s1-commit s2-print-index-count
step s1-load-cache: step s1-load-cache:
TRUNCATE test_repair_placement_vs_modification; TRUNCATE test_repair_placement_vs_modification;
@ -250,16 +250,16 @@ count
0 0
(1 row) (1 row)
step s2-set-placement-inactive: step s2-delete-inactive:
UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638;
step s2-begin: step s2-begin:
BEGIN; BEGIN;
step s2-repair-placement: step s2-repair-placement:
SELECT master_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); SELECT citus_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes');
master_copy_shard_placement citus_copy_shard_placement
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
@ -291,7 +291,7 @@ nodeport|success|result
(4 rows) (4 rows)
starting permutation: s1-insert s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-update s2-commit s1-commit s2-print-content starting permutation: s1-insert s1-begin s1-select s2-delete-inactive s2-begin s2-repair-placement s1-update s2-commit s1-commit s2-print-content
step s1-insert: step s1-insert:
INSERT INTO test_repair_placement_vs_modification VALUES (5, 10); INSERT INTO test_repair_placement_vs_modification VALUES (5, 10);
@ -307,16 +307,16 @@ count
1 1
(1 row) (1 row)
step s2-set-placement-inactive: step s2-delete-inactive:
UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638;
step s2-begin: step s2-begin:
BEGIN; BEGIN;
step s2-repair-placement: step s2-repair-placement:
SELECT master_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); SELECT citus_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes');
master_copy_shard_placement citus_copy_shard_placement
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
@ -348,7 +348,7 @@ nodeport|success|result
(2 rows) (2 rows)
starting permutation: s1-insert s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-delete s2-commit s1-commit s2-print-content starting permutation: s1-insert s1-begin s1-select s2-delete-inactive s2-begin s2-repair-placement s1-delete s2-commit s1-commit s2-print-content
step s1-insert: step s1-insert:
INSERT INTO test_repair_placement_vs_modification VALUES (5, 10); INSERT INTO test_repair_placement_vs_modification VALUES (5, 10);
@ -364,16 +364,16 @@ count
1 1
(1 row) (1 row)
step s2-set-placement-inactive: step s2-delete-inactive:
UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638;
step s2-begin: step s2-begin:
BEGIN; BEGIN;
step s2-repair-placement: step s2-repair-placement:
SELECT master_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); SELECT citus_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes');
master_copy_shard_placement citus_copy_shard_placement
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
@ -405,7 +405,7 @@ nodeport|success|result
(2 rows) (2 rows)
starting permutation: s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-insert s2-commit s1-commit s2-print-content starting permutation: s1-begin s1-select s2-delete-inactive s2-begin s2-repair-placement s1-insert s2-commit s1-commit s2-print-content
step s1-begin: step s1-begin:
BEGIN; BEGIN;
SET LOCAL citus.select_opens_transaction_block TO off; SET LOCAL citus.select_opens_transaction_block TO off;
@ -418,16 +418,16 @@ count
0 0
(1 row) (1 row)
step s2-set-placement-inactive: step s2-delete-inactive:
UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638;
step s2-begin: step s2-begin:
BEGIN; BEGIN;
step s2-repair-placement: step s2-repair-placement:
SELECT master_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); SELECT citus_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes');
master_copy_shard_placement citus_copy_shard_placement
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
@ -459,7 +459,7 @@ nodeport|success|result
(2 rows) (2 rows)
starting permutation: s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-copy s2-commit s1-commit s2-print-content starting permutation: s1-begin s1-select s2-delete-inactive s2-begin s2-repair-placement s1-copy s2-commit s1-commit s2-print-content
step s1-begin: step s1-begin:
BEGIN; BEGIN;
SET LOCAL citus.select_opens_transaction_block TO off; SET LOCAL citus.select_opens_transaction_block TO off;
@ -472,16 +472,16 @@ count
0 0
(1 row) (1 row)
step s2-set-placement-inactive: step s2-delete-inactive:
UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638;
step s2-begin: step s2-begin:
BEGIN; BEGIN;
step s2-repair-placement: step s2-repair-placement:
SELECT master_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); SELECT citus_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes');
master_copy_shard_placement citus_copy_shard_placement
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
@ -513,7 +513,7 @@ nodeport|success|result
(2 rows) (2 rows)
starting permutation: s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-ddl s2-commit s1-commit s2-print-index-count starting permutation: s1-begin s1-select s2-delete-inactive s2-begin s2-repair-placement s1-ddl s2-commit s1-commit s2-print-index-count
step s1-begin: step s1-begin:
BEGIN; BEGIN;
SET LOCAL citus.select_opens_transaction_block TO off; SET LOCAL citus.select_opens_transaction_block TO off;
@ -526,16 +526,16 @@ count
0 0
(1 row) (1 row)
step s2-set-placement-inactive: step s2-delete-inactive:
UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638;
step s2-begin: step s2-begin:
BEGIN; BEGIN;
step s2-repair-placement: step s2-repair-placement:
SELECT master_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); SELECT citus_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes');
master_copy_shard_placement citus_copy_shard_placement
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)

View File

@ -585,12 +585,12 @@ create_distributed_table_concurrently
(1 row) (1 row)
step s4-print-colocations: step s4-print-colocations:
SELECT * FROM pg_dist_colocation ORDER BY colocationid; SELECT shardcount, replicationfactor, distributioncolumntype, distributioncolumncollation FROM pg_dist_colocation ORDER BY colocationid;
colocationid|shardcount|replicationfactor|distributioncolumntype|distributioncolumncollation shardcount|replicationfactor|distributioncolumntype|distributioncolumncollation
--------------------------------------------------------------------- ---------------------------------------------------------------------
123173| 4| 1| 21| 0 4| 1| 21| 0
123174| 4| 1| 23| 0 4| 1| 23| 0
(2 rows) (2 rows)
citus_remove_node citus_remove_node
@ -651,12 +651,12 @@ create_distributed_table_concurrently
(1 row) (1 row)
step s4-print-colocations: step s4-print-colocations:
SELECT * FROM pg_dist_colocation ORDER BY colocationid; SELECT shardcount, replicationfactor, distributioncolumntype, distributioncolumncollation FROM pg_dist_colocation ORDER BY colocationid;
colocationid|shardcount|replicationfactor|distributioncolumntype|distributioncolumncollation shardcount|replicationfactor|distributioncolumntype|distributioncolumncollation
--------------------------------------------------------------------- ---------------------------------------------------------------------
123175| 4| 1| 23| 0 4| 1| 23| 0
123176| 4| 1| 21| 0 4| 1| 21| 0
(2 rows) (2 rows)
citus_remove_node citus_remove_node
@ -700,12 +700,12 @@ create_distributed_table_concurrently
(1 row) (1 row)
step s4-print-colocations: step s4-print-colocations:
SELECT * FROM pg_dist_colocation ORDER BY colocationid; SELECT shardcount, replicationfactor, distributioncolumntype, distributioncolumncollation FROM pg_dist_colocation ORDER BY colocationid;
colocationid|shardcount|replicationfactor|distributioncolumntype|distributioncolumncollation shardcount|replicationfactor|distributioncolumntype|distributioncolumncollation
--------------------------------------------------------------------- ---------------------------------------------------------------------
123177| 4| 1| 21| 0 4| 1| 21| 0
123178| 4| 1| 23| 0 4| 1| 23| 0
(2 rows) (2 rows)
citus_remove_node citus_remove_node
@ -748,12 +748,12 @@ create_distributed_table_concurrently
(1 row) (1 row)
step s4-print-colocations: step s4-print-colocations:
SELECT * FROM pg_dist_colocation ORDER BY colocationid; SELECT shardcount, replicationfactor, distributioncolumntype, distributioncolumncollation FROM pg_dist_colocation ORDER BY colocationid;
colocationid|shardcount|replicationfactor|distributioncolumntype|distributioncolumncollation shardcount|replicationfactor|distributioncolumntype|distributioncolumncollation
--------------------------------------------------------------------- ---------------------------------------------------------------------
123179| 4| 1| 21| 0 4| 1| 21| 0
123180| 4| 1| 23| 0 4| 1| 23| 0
(2 rows) (2 rows)
citus_remove_node citus_remove_node

View File

@ -1,236 +0,0 @@
Parsed test spec with 2 sessions
starting permutation: s2-invalidate-57637 s1-begin s1-insertone s2-repair s1-commit
create_distributed_table
---------------------------------------------------------------------
(1 row)
step s2-invalidate-57637:
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57637;
step s1-begin:
BEGIN;
step s1-insertone:
INSERT INTO test_dml_vs_repair VALUES(1, 1);
step s2-repair:
SELECT master_copy_shard_placement((SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass), 'localhost', 57638, 'localhost', 57637);
<waiting ...>
step s1-commit:
COMMIT;
step s2-repair: <... completed>
master_copy_shard_placement
---------------------------------------------------------------------
(1 row)
starting permutation: s1-insertone s2-invalidate-57637 s1-begin s1-insertall s2-repair s1-commit
create_distributed_table
---------------------------------------------------------------------
(1 row)
step s1-insertone:
INSERT INTO test_dml_vs_repair VALUES(1, 1);
step s2-invalidate-57637:
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57637;
step s1-begin:
BEGIN;
step s1-insertall:
INSERT INTO test_dml_vs_repair SELECT test_id, data+1 FROM test_dml_vs_repair;
step s2-repair:
SELECT master_copy_shard_placement((SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass), 'localhost', 57638, 'localhost', 57637);
<waiting ...>
step s1-commit:
COMMIT;
step s2-repair: <... completed>
master_copy_shard_placement
---------------------------------------------------------------------
(1 row)
starting permutation: s2-invalidate-57637 s2-begin s2-repair s1-insertone s2-commit s2-invalidate-57638 s1-display s2-invalidate-57637 s2-revalidate-57638 s1-display
create_distributed_table
---------------------------------------------------------------------
(1 row)
step s2-invalidate-57637:
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57637;
step s2-begin:
BEGIN;
step s2-repair:
SELECT master_copy_shard_placement((SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass), 'localhost', 57638, 'localhost', 57637);
master_copy_shard_placement
---------------------------------------------------------------------
(1 row)
step s1-insertone:
INSERT INTO test_dml_vs_repair VALUES(1, 1);
<waiting ...>
step s2-commit:
COMMIT;
step s1-insertone: <... completed>
step s2-invalidate-57638:
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57638;
step s1-display:
SELECT * FROM test_dml_vs_repair WHERE test_id = 1 ORDER BY test_id;
test_id|data
---------------------------------------------------------------------
1| 1
(1 row)
step s2-invalidate-57637:
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57637;
step s2-revalidate-57638:
UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57638;
step s1-display:
SELECT * FROM test_dml_vs_repair WHERE test_id = 1 ORDER BY test_id;
test_id|data
---------------------------------------------------------------------
1| 1
(1 row)
starting permutation: s2-invalidate-57637 s1-prepared-insertone s2-begin s2-repair s1-prepared-insertone s2-commit s2-invalidate-57638 s1-display s2-invalidate-57637 s2-revalidate-57638 s1-display
create_distributed_table
---------------------------------------------------------------------
(1 row)
step s2-invalidate-57637:
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57637;
step s1-prepared-insertone:
EXECUTE insertone;
step s2-begin:
BEGIN;
step s2-repair:
SELECT master_copy_shard_placement((SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass), 'localhost', 57638, 'localhost', 57637);
master_copy_shard_placement
---------------------------------------------------------------------
(1 row)
step s1-prepared-insertone:
EXECUTE insertone;
<waiting ...>
step s2-commit:
COMMIT;
step s1-prepared-insertone: <... completed>
step s2-invalidate-57638:
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57638;
step s1-display:
SELECT * FROM test_dml_vs_repair WHERE test_id = 1 ORDER BY test_id;
test_id|data
---------------------------------------------------------------------
1| 1
1| 1
(2 rows)
step s2-invalidate-57637:
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57637;
step s2-revalidate-57638:
UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57638;
step s1-display:
SELECT * FROM test_dml_vs_repair WHERE test_id = 1 ORDER BY test_id;
test_id|data
---------------------------------------------------------------------
1| 1
1| 1
(2 rows)
starting permutation: s2-invalidate-57637 s1-insertone s1-prepared-insertall s2-begin s2-repair s1-prepared-insertall s2-commit s2-invalidate-57638 s1-display s2-invalidate-57637 s2-revalidate-57638 s1-display
create_distributed_table
---------------------------------------------------------------------
(1 row)
step s2-invalidate-57637:
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57637;
step s1-insertone:
INSERT INTO test_dml_vs_repair VALUES(1, 1);
step s1-prepared-insertall:
EXECUTE insertall;
step s2-begin:
BEGIN;
step s2-repair:
SELECT master_copy_shard_placement((SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass), 'localhost', 57638, 'localhost', 57637);
master_copy_shard_placement
---------------------------------------------------------------------
(1 row)
step s1-prepared-insertall:
EXECUTE insertall;
<waiting ...>
step s2-commit:
COMMIT;
step s1-prepared-insertall: <... completed>
step s2-invalidate-57638:
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57638;
step s1-display:
SELECT * FROM test_dml_vs_repair WHERE test_id = 1 ORDER BY test_id;
test_id|data
---------------------------------------------------------------------
1| 1
1| 2
1| 2
1| 3
(4 rows)
step s2-invalidate-57637:
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57637;
step s2-revalidate-57638:
UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57638;
step s1-display:
SELECT * FROM test_dml_vs_repair WHERE test_id = 1 ORDER BY test_id;
test_id|data
---------------------------------------------------------------------
1| 1
1| 2
1| 2
1| 3
(4 rows)

View File

@ -575,3 +575,31 @@ master_set_node_property
(1 row) (1 row)
starting permutation: s1-rebalance-all s2-citus-rebalance-start s1-commit
create_distributed_table
---------------------------------------------------------------------
(1 row)
step s1-rebalance-all:
BEGIN;
select rebalance_table_shards();
rebalance_table_shards
---------------------------------------------------------------------
(1 row)
step s2-citus-rebalance-start:
SELECT 1 FROM citus_rebalance_start();
ERROR: could not acquire the lock required to rebalance public.colocated1
step s1-commit:
COMMIT;
master_set_node_property
---------------------------------------------------------------------
(1 row)

View File

@ -28,15 +28,16 @@ step s7-get-progress:
targetname, targetname,
targetport, targetport,
target_shard_size, target_shard_size,
progress progress,
operation_type
FROM get_rebalance_progress(); FROM get_rebalance_progress();
table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 0| 1 colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 0| 1|move
colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 0| 1 colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 0| 1|move
colocated1|1500002| 196608|localhost | 57637| 196608|localhost | 57638| 0| 0 colocated1|1500002| 196608|localhost | 57637| 196608|localhost | 57638| 0| 0|move
colocated2|1500006| 8192|localhost | 57637| 8192|localhost | 57638| 0| 0 colocated2|1500006| 8192|localhost | 57637| 8192|localhost | 57638| 0| 0|move
(4 rows) (4 rows)
step s2-unlock-1-start: step s2-unlock-1-start:
@ -71,10 +72,11 @@ step s7-get-progress:
targetname, targetname,
targetport, targetport,
target_shard_size, target_shard_size,
progress progress,
operation_type
FROM get_rebalance_progress(); FROM get_rebalance_progress();
table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
(0 rows) (0 rows)
@ -109,15 +111,16 @@ step s7-get-progress:
targetname, targetname,
targetport, targetport,
target_shard_size, target_shard_size,
progress progress,
operation_type
FROM get_rebalance_progress(); FROM get_rebalance_progress();
table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
colocated1|1500001| 73728|localhost | 57637| 0|localhost | 57638| 73728| 2 colocated1|1500001| 73728|localhost | 57637| 0|localhost | 57638| 73728| 2|move
colocated2|1500005| 401408|localhost | 57637| 0|localhost | 57638| 401408| 2 colocated2|1500005| 401408|localhost | 57637| 0|localhost | 57638| 401408| 2|move
colocated1|1500002| 196608|localhost | 57637| 196608|localhost | 57638| 0| 1 colocated1|1500002| 196608|localhost | 57637| 196608|localhost | 57638| 0| 1|move
colocated2|1500006| 8192|localhost | 57637| 8192|localhost | 57638| 0| 1 colocated2|1500006| 8192|localhost | 57637| 8192|localhost | 57638| 0| 1|move
(4 rows) (4 rows)
step s3-unlock-2-start: step s3-unlock-2-start:
@ -152,10 +155,11 @@ step s7-get-progress:
targetname, targetname,
targetport, targetport,
target_shard_size, target_shard_size,
progress progress,
operation_type
FROM get_rebalance_progress(); FROM get_rebalance_progress();
table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
(0 rows) (0 rows)
@ -202,15 +206,16 @@ step s7-get-progress:
targetname, targetname,
targetport, targetport,
target_shard_size, target_shard_size,
progress progress,
operation_type
FROM get_rebalance_progress(); FROM get_rebalance_progress();
table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 73728| 1 colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 73728| 1|move
colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 401408| 1 colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 401408| 1|move
colocated1|1500002| 196608|localhost | 57637| 196608|localhost | 57638| 0| 0 colocated1|1500002| 196608|localhost | 57637| 196608|localhost | 57638| 0| 0|move
colocated2|1500006| 8192|localhost | 57637| 8192|localhost | 57638| 0| 0 colocated2|1500006| 8192|localhost | 57637| 8192|localhost | 57638| 0| 0|move
(4 rows) (4 rows)
step s7-release-lock: step s7-release-lock:
@ -245,10 +250,11 @@ step s7-get-progress:
targetname, targetname,
targetport, targetport,
target_shard_size, target_shard_size,
progress progress,
operation_type
FROM get_rebalance_progress(); FROM get_rebalance_progress();
table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
(0 rows) (0 rows)
@ -287,15 +293,16 @@ step s7-get-progress:
targetname, targetname,
targetport, targetport,
target_shard_size, target_shard_size,
progress progress,
operation_type
FROM get_rebalance_progress(); FROM get_rebalance_progress();
table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 8192| 1 colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 8192| 1|move
colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 8192| 1 colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 8192| 1|move
colocated1|1500002| 196608|localhost | 57637| 196608|localhost | 57638| 0| 0 colocated1|1500002| 196608|localhost | 57637| 196608|localhost | 57638| 0| 0|move
colocated2|1500006| 8192|localhost | 57637| 8192|localhost | 57638| 0| 0 colocated2|1500006| 8192|localhost | 57637| 8192|localhost | 57638| 0| 0|move
(4 rows) (4 rows)
step s6-release-advisory-lock: step s6-release-advisory-lock:
@ -335,10 +342,11 @@ step s7-get-progress:
targetname, targetname,
targetport, targetport,
target_shard_size, target_shard_size,
progress progress,
operation_type
FROM get_rebalance_progress(); FROM get_rebalance_progress();
table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
(0 rows) (0 rows)
@ -384,13 +392,14 @@ step s7-get-progress:
targetname, targetname,
targetport, targetport,
target_shard_size, target_shard_size,
progress progress,
operation_type
FROM get_rebalance_progress(); FROM get_rebalance_progress();
table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 73728| 1 colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 73728| 1|move
colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 401408| 1 colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 401408| 1|move
(2 rows) (2 rows)
step s7-release-lock: step s7-release-lock:
@ -417,10 +426,11 @@ step s7-get-progress:
targetname, targetname,
targetport, targetport,
target_shard_size, target_shard_size,
progress progress,
operation_type
FROM get_rebalance_progress(); FROM get_rebalance_progress();
table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
(0 rows) (0 rows)
@ -455,13 +465,14 @@ step s7-get-progress:
targetname, targetname,
targetport, targetport,
target_shard_size, target_shard_size,
progress progress,
operation_type
FROM get_rebalance_progress(); FROM get_rebalance_progress();
table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 0| 1 colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 0| 1|move
colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 0| 1 colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 0| 1|move
(2 rows) (2 rows)
step s2-unlock-1-start: step s2-unlock-1-start:
@ -488,10 +499,11 @@ step s7-get-progress:
targetname, targetname,
targetport, targetport,
target_shard_size, target_shard_size,
progress progress,
operation_type
FROM get_rebalance_progress(); FROM get_rebalance_progress();
table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
(0 rows) (0 rows)
@ -537,13 +549,14 @@ step s7-get-progress:
targetname, targetname,
targetport, targetport,
target_shard_size, target_shard_size,
progress progress,
operation_type
FROM get_rebalance_progress(); FROM get_rebalance_progress();
table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 73728| 1 colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 73728| 1|move
colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 401408| 1 colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 401408| 1|move
(2 rows) (2 rows)
step s7-release-lock: step s7-release-lock:
@ -570,10 +583,11 @@ step s7-get-progress:
targetname, targetname,
targetport, targetport,
target_shard_size, target_shard_size,
progress progress,
operation_type
FROM get_rebalance_progress(); FROM get_rebalance_progress();
table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
(0 rows) (0 rows)
@ -581,6 +595,57 @@ step enable-deferred-drop:
ALTER SYSTEM RESET citus.defer_drop_after_shard_move; ALTER SYSTEM RESET citus.defer_drop_after_shard_move;
starting permutation: s2-lock-1-start s1-shard-copy-c1-block-writes s7-get-progress s2-unlock-1-start s1-commit
master_set_node_property
---------------------------------------------------------------------
(1 row)
step s2-lock-1-start:
BEGIN;
DELETE FROM colocated1 WHERE test_id = 1;
DELETE FROM separate WHERE test_id = 1;
step s1-shard-copy-c1-block-writes:
BEGIN;
UPDATE pg_dist_partition SET repmodel = 'c' WHERE logicalrelid IN ('colocated1', 'colocated2');
SELECT citus_copy_shard_placement(1500001, 'localhost', 57637, 'localhost', 57638, transfer_mode:='block_writes');
<waiting ...>
step s7-get-progress:
set LOCAL client_min_messages=NOTICE;
SELECT
table_name,
shardid,
shard_size,
sourcename,
sourceport,
source_shard_size,
targetname,
targetport,
target_shard_size,
progress,
operation_type
FROM get_rebalance_progress();
table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type
---------------------------------------------------------------------
colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 0| 1|copy
colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 0| 1|copy
(2 rows)
step s2-unlock-1-start:
ROLLBACK;
step s1-shard-copy-c1-block-writes: <... completed>
citus_copy_shard_placement
---------------------------------------------------------------------
(1 row)
step s1-commit:
COMMIT;
starting permutation: s6-acquire-advisory-lock s1-shard-move-c1-online s7-get-progress s6-release-advisory-lock s1-commit s7-get-progress enable-deferred-drop starting permutation: s6-acquire-advisory-lock s1-shard-move-c1-online s7-get-progress s6-release-advisory-lock s1-commit s7-get-progress enable-deferred-drop
master_set_node_property master_set_node_property
--------------------------------------------------------------------- ---------------------------------------------------------------------
@ -611,13 +676,14 @@ step s7-get-progress:
targetname, targetname,
targetport, targetport,
target_shard_size, target_shard_size,
progress progress,
operation_type
FROM get_rebalance_progress(); FROM get_rebalance_progress();
table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 8192| 1 colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 8192| 1|move
colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 8192| 1 colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 8192| 1|move
(2 rows) (2 rows)
step s6-release-advisory-lock: step s6-release-advisory-lock:
@ -649,10 +715,11 @@ step s7-get-progress:
targetname, targetname,
targetport, targetport,
target_shard_size, target_shard_size,
progress progress,
operation_type
FROM get_rebalance_progress(); FROM get_rebalance_progress();
table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
(0 rows) (0 rows)
@ -698,13 +765,14 @@ step s7-get-progress:
targetname, targetname,
targetport, targetport,
target_shard_size, target_shard_size,
progress progress,
operation_type
FROM get_rebalance_progress(); FROM get_rebalance_progress();
table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 73728| 1 colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 73728| 1|move
colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 401408| 1 colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 401408| 1|move
(2 rows) (2 rows)
step s7-release-lock: step s7-release-lock:
@ -731,10 +799,11 @@ step s7-get-progress:
targetname, targetname,
targetport, targetport,
target_shard_size, target_shard_size,
progress progress,
operation_type
FROM get_rebalance_progress(); FROM get_rebalance_progress();
table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
(0 rows) (0 rows)
@ -742,6 +811,65 @@ step enable-deferred-drop:
ALTER SYSTEM RESET citus.defer_drop_after_shard_move; ALTER SYSTEM RESET citus.defer_drop_after_shard_move;
starting permutation: s6-acquire-advisory-lock s1-shard-copy-c1-online s7-get-progress s6-release-advisory-lock s1-commit
master_set_node_property
---------------------------------------------------------------------
(1 row)
step s6-acquire-advisory-lock:
SELECT pg_advisory_lock(44000, 55152);
pg_advisory_lock
---------------------------------------------------------------------
(1 row)
step s1-shard-copy-c1-online:
BEGIN;
UPDATE pg_dist_partition SET repmodel = 'c' WHERE logicalrelid IN ('colocated1', 'colocated2');
SELECT citus_copy_shard_placement(1500001, 'localhost', 57637, 'localhost', 57638, transfer_mode:='force_logical');
<waiting ...>
step s7-get-progress:
set LOCAL client_min_messages=NOTICE;
SELECT
table_name,
shardid,
shard_size,
sourcename,
sourceport,
source_shard_size,
targetname,
targetport,
target_shard_size,
progress,
operation_type
FROM get_rebalance_progress();
table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type
---------------------------------------------------------------------
colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 8192| 1|copy
colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 8192| 1|copy
(2 rows)
step s6-release-advisory-lock:
SELECT pg_advisory_unlock(44000, 55152);
pg_advisory_unlock
---------------------------------------------------------------------
t
(1 row)
step s1-shard-copy-c1-online: <... completed>
citus_copy_shard_placement
---------------------------------------------------------------------
(1 row)
step s1-commit:
COMMIT;
starting permutation: s2-lock-1-start s1-shard-move-c1-block-writes s4-shard-move-sep-block-writes s7-get-progress s2-unlock-1-start s1-commit s4-commit s7-get-progress enable-deferred-drop starting permutation: s2-lock-1-start s1-shard-move-c1-block-writes s4-shard-move-sep-block-writes s7-get-progress s2-unlock-1-start s1-commit s4-commit s7-get-progress enable-deferred-drop
master_set_node_property master_set_node_property
--------------------------------------------------------------------- ---------------------------------------------------------------------
@ -773,14 +901,15 @@ step s7-get-progress:
targetname, targetname,
targetport, targetport,
target_shard_size, target_shard_size,
progress progress,
operation_type
FROM get_rebalance_progress(); FROM get_rebalance_progress();
table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 0| 1 colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 0| 1|move
colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 0| 1 colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 0| 1|move
separate |1500009| 122880|localhost | 57637| 122880|localhost | 57638| 0| 1 separate |1500009| 122880|localhost | 57637| 122880|localhost | 57638| 0| 1|move
(3 rows) (3 rows)
step s2-unlock-1-start: step s2-unlock-1-start:
@ -816,10 +945,11 @@ step s7-get-progress:
targetname, targetname,
targetport, targetport,
target_shard_size, target_shard_size,
progress progress,
operation_type
FROM get_rebalance_progress(); FROM get_rebalance_progress();
table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
(0 rows) (0 rows)
@ -869,14 +999,15 @@ step s7-get-progress:
targetname, targetname,
targetport, targetport,
target_shard_size, target_shard_size,
progress progress,
operation_type
FROM get_rebalance_progress(); FROM get_rebalance_progress();
table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 73728| 1 colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 73728| 1|move
colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 401408| 1 colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 401408| 1|move
separate |1500009| 122880|localhost | 57637| 122880|localhost | 57638| 147456| 1 separate |1500009| 122880|localhost | 57637| 122880|localhost | 57638| 147456| 1|move
(3 rows) (3 rows)
step s7-release-lock: step s7-release-lock:
@ -912,10 +1043,11 @@ step s7-get-progress:
targetname, targetname,
targetport, targetport,
target_shard_size, target_shard_size,
progress progress,
operation_type
FROM get_rebalance_progress(); FROM get_rebalance_progress();
table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
(0 rows) (0 rows)

File diff suppressed because it is too large Load Diff

View File

@ -1,9 +0,0 @@
--
-- PG15+ test
--
SHOW server_version \gset
SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15
\gset
\if :server_version_ge_15
\else
\q

View File

@ -59,7 +59,7 @@ SELECT master_create_empty_shard('table6_append');
13000021 13000021
(1 row) (1 row)
-- Mark tables as non-mx tables, in order to be able to test master_copy_shard_placement -- Mark tables as non-mx tables, in order to be able to test citus_copy_shard_placement
UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid IN UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid IN
('table1_group1'::regclass, 'table2_group1'::regclass, 'table5_groupX'::regclass); ('table1_group1'::regclass, 'table2_group1'::regclass, 'table5_groupX'::regclass);
-- test copy -- test copy
@ -90,13 +90,13 @@ ORDER BY s.shardid, sp.nodeport;
(12 rows) (12 rows)
-- try to copy colocated shards without a replica identity -- try to copy colocated shards without a replica identity
SELECT master_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false); SELECT citus_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port);
ERROR: cannot use logical replication to transfer shards of the relation table2_group1 since it doesn't have a REPLICA IDENTITY or PRIMARY KEY ERROR: cannot use logical replication to transfer shards of the relation table2_group1 since it doesn't have a REPLICA IDENTITY or PRIMARY KEY
DETAIL: UPDATE and DELETE commands on the shard will error out during logical replication unless there is a REPLICA IDENTITY or PRIMARY KEY. DETAIL: UPDATE and DELETE commands on the shard will error out during logical replication unless there is a REPLICA IDENTITY or PRIMARY KEY.
HINT: If you wish to continue without a replica identity set the shard_transfer_mode to 'force_logical' or 'block_writes'. HINT: If you wish to continue without a replica identity set the shard_transfer_mode to 'force_logical' or 'block_writes'.
-- copy colocated shards -- copy colocated shards
SELECT master_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false, 'force_logical'); SELECT citus_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical');
master_copy_shard_placement citus_copy_shard_placement
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
@ -143,9 +143,15 @@ SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table2_
(1 row) (1 row)
\c - - - :master_port \c - - - :master_port
-- copy colocated shards again to see error message -- copy colocated shards again to see warning
SELECT master_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false, 'force_logical'); SELECT citus_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical');
ERROR: shard xxxxx already exists in the target node WARNING: shard is already present on node localhost:xxxxx
DETAIL: Copy may have already completed.
citus_copy_shard_placement
---------------------------------------------------------------------
(1 row)
-- test copying NOT colocated shard -- test copying NOT colocated shard
-- status before shard copy -- status before shard copy
SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
@ -169,8 +175,8 @@ ORDER BY s.shardid, sp.nodeport;
(8 rows) (8 rows)
-- copy NOT colocated shard -- copy NOT colocated shard
SELECT master_copy_shard_placement(13000012, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false, 'force_logical'); SELECT citus_copy_shard_placement(13000012, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical');
master_copy_shard_placement citus_copy_shard_placement
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
@ -214,8 +220,8 @@ ORDER BY s.shardid, sp.nodeport;
(2 rows) (2 rows)
-- copy shard in append distributed table -- copy shard in append distributed table
SELECT master_copy_shard_placement(13000020, 'localhost', :worker_2_port, 'localhost', :worker_1_port, false, 'force_logical'); SELECT citus_copy_shard_placement(13000020, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'force_logical');
master_copy_shard_placement citus_copy_shard_placement
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
@ -300,6 +306,15 @@ ORDER BY s.shardid, sp.nodeport;
13000011 | table2_group1 | 57638 13000011 | table2_group1 | 57638
(14 rows) (14 rows)
-- moving the shard again is idempotent
SELECT citus_move_shard_placement(13000001, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'force_logical');
WARNING: shard is already present on node localhost:xxxxx
DETAIL: Move may have already completed.
citus_move_shard_placement
---------------------------------------------------------------------
(1 row)
-- also connect worker to verify we successfully moved given shard (and other colocated shards) -- also connect worker to verify we successfully moved given shard (and other colocated shards)
\c - - - :worker_1_port \c - - - :worker_1_port
SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table1_group1_13000001'::regclass; SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table1_group1_13000001'::regclass;
@ -412,8 +427,9 @@ ORDER BY s.shardid, sp.nodeport;
(3 rows) (3 rows)
-- try to move shard from wrong node -- try to move shard from wrong node
SELECT master_move_shard_placement(13000021, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical'); SELECT master_move_shard_placement(13000021, 'localhost', :master_port, 'localhost', :worker_1_port, 'force_logical');
ERROR: source placement must be in active state ERROR: could not find placement matching "localhost:xxxxx"
HINT: Confirm the placement still exists and try again.
-- test shard move with foreign constraints -- test shard move with foreign constraints
DROP TABLE IF EXISTS table1_group1, table2_group1; DROP TABLE IF EXISTS table1_group1, table2_group1;
SET citus.shard_count TO 6; SET citus.shard_count TO 6;
@ -524,7 +540,7 @@ SELECT "Constraint", "Definition" FROM table_fkeys
\c - - - :master_port \c - - - :master_port
-- test shard copy with foreign constraints -- test shard copy with foreign constraints
-- we expect it to error out because we do not support foreign constraints with replication factor > 1 -- we expect it to error out because we do not support foreign constraints with replication factor > 1
SELECT master_copy_shard_placement(13000022, 'localhost', :worker_2_port, 'localhost', :worker_1_port, false); SELECT citus_copy_shard_placement(13000022, 'localhost', :worker_2_port, 'localhost', :worker_1_port);
ERROR: cannot replicate shards with foreign keys ERROR: cannot replicate shards with foreign keys
-- lets also test that master_move_shard_placement doesn't break serials -- lets also test that master_move_shard_placement doesn't break serials
CREATE TABLE serial_move_test (key int, other_val serial); CREATE TABLE serial_move_test (key int, other_val serial);

View File

@ -1,231 +0,0 @@
--
-- MULTI_COLOCATED_SHARD_TRANSFER
--
-- These tables are created in multi_colocation_utils test
-- test repair
-- manually set shardstate as inactive
UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE nodeport = :worker_2_port AND (shardid = 1300000 OR shardid = 1300004);
UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE nodeport = :worker_2_port AND shardid = 1300016;
UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE nodeport = :worker_2_port AND shardid = 1300020;
-- test repairing colocated shards
-- status before shard repair
SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport, p.colocationid, sp.shardstate
FROM
pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
WHERE
p.logicalrelid = s.logicalrelid AND
s.shardid = sp.shardid AND
colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass)
ORDER BY s.shardid, sp.nodeport;
shardid | logicalrelid | nodeport | colocationid | shardstate
---------------------------------------------------------------------
1300000 | table1_group1 | 57637 | 1000 | 1
1300000 | table1_group1 | 57638 | 1000 | 3
1300001 | table1_group1 | 57637 | 1000 | 1
1300001 | table1_group1 | 57638 | 1000 | 1
1300002 | table1_group1 | 57637 | 1000 | 1
1300002 | table1_group1 | 57638 | 1000 | 1
1300003 | table1_group1 | 57637 | 1000 | 1
1300003 | table1_group1 | 57638 | 1000 | 1
1300004 | table2_group1 | 57637 | 1000 | 1
1300004 | table2_group1 | 57638 | 1000 | 3
1300005 | table2_group1 | 57637 | 1000 | 1
1300005 | table2_group1 | 57638 | 1000 | 1
1300006 | table2_group1 | 57637 | 1000 | 1
1300006 | table2_group1 | 57638 | 1000 | 1
1300007 | table2_group1 | 57637 | 1000 | 1
1300007 | table2_group1 | 57638 | 1000 | 1
(16 rows)
-- repair colocated shards
SELECT master_copy_shard_placement(1300000, 'localhost', :worker_1_port, 'localhost', :worker_2_port);
master_copy_shard_placement
---------------------------------------------------------------------
(1 row)
-- status after shard repair
SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport, p.colocationid, sp.shardstate
FROM
pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
WHERE
p.logicalrelid = s.logicalrelid AND
s.shardid = sp.shardid AND
colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass)
ORDER BY s.shardid, sp.nodeport;
shardid | logicalrelid | nodeport | colocationid | shardstate
---------------------------------------------------------------------
1300000 | table1_group1 | 57637 | 1000 | 1
1300000 | table1_group1 | 57638 | 1000 | 1
1300001 | table1_group1 | 57637 | 1000 | 1
1300001 | table1_group1 | 57638 | 1000 | 1
1300002 | table1_group1 | 57637 | 1000 | 1
1300002 | table1_group1 | 57638 | 1000 | 1
1300003 | table1_group1 | 57637 | 1000 | 1
1300003 | table1_group1 | 57638 | 1000 | 1
1300004 | table2_group1 | 57637 | 1000 | 1
1300004 | table2_group1 | 57638 | 1000 | 3
1300005 | table2_group1 | 57637 | 1000 | 1
1300005 | table2_group1 | 57638 | 1000 | 1
1300006 | table2_group1 | 57637 | 1000 | 1
1300006 | table2_group1 | 57638 | 1000 | 1
1300007 | table2_group1 | 57637 | 1000 | 1
1300007 | table2_group1 | 57638 | 1000 | 1
(16 rows)
-- test repairing NOT colocated shard
-- status before shard repair
SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport, p.colocationid, sp.shardstate
FROM
pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
WHERE
p.logicalrelid = s.logicalrelid AND
s.shardid = sp.shardid AND
p.logicalrelid = 'table5_groupX'::regclass
ORDER BY s.shardid, sp.nodeport;
shardid | logicalrelid | nodeport | colocationid | shardstate
---------------------------------------------------------------------
1300016 | table5_groupx | 57637 | 0 | 1
1300016 | table5_groupx | 57638 | 0 | 3
1300017 | table5_groupx | 57637 | 0 | 1
1300017 | table5_groupx | 57638 | 0 | 1
1300018 | table5_groupx | 57637 | 0 | 1
1300018 | table5_groupx | 57638 | 0 | 1
1300019 | table5_groupx | 57637 | 0 | 1
1300019 | table5_groupx | 57638 | 0 | 1
(8 rows)
-- repair NOT colocated shard
SELECT master_copy_shard_placement(1300016, 'localhost', :worker_1_port, 'localhost', :worker_2_port);
master_copy_shard_placement
---------------------------------------------------------------------
(1 row)
-- status after shard repair
SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport, p.colocationid, sp.shardstate
FROM
pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
WHERE
p.logicalrelid = s.logicalrelid AND
s.shardid = sp.shardid AND
p.logicalrelid = 'table5_groupX'::regclass
ORDER BY s.shardid, sp.nodeport;
shardid | logicalrelid | nodeport | colocationid | shardstate
---------------------------------------------------------------------
1300016 | table5_groupx | 57637 | 0 | 1
1300016 | table5_groupx | 57638 | 0 | 1
1300017 | table5_groupx | 57637 | 0 | 1
1300017 | table5_groupx | 57638 | 0 | 1
1300018 | table5_groupx | 57637 | 0 | 1
1300018 | table5_groupx | 57638 | 0 | 1
1300019 | table5_groupx | 57637 | 0 | 1
1300019 | table5_groupx | 57638 | 0 | 1
(8 rows)
-- test repairing shard in append distributed table
-- status before shard repair
SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport, p.colocationid, sp.shardstate
FROM
pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
WHERE
p.logicalrelid = s.logicalrelid AND
s.shardid = sp.shardid AND
p.logicalrelid = 'table6_append'::regclass
ORDER BY s.shardid, sp.nodeport;
shardid | logicalrelid | nodeport | colocationid | shardstate
---------------------------------------------------------------------
1300020 | table6_append | 57637 | 0 | 1
1300020 | table6_append | 57638 | 0 | 3
1300021 | table6_append | 57637 | 0 | 1
1300021 | table6_append | 57638 | 0 | 1
(4 rows)
-- repair shard in append distributed table
SELECT master_copy_shard_placement(1300020, 'localhost', :worker_1_port, 'localhost', :worker_2_port);
master_copy_shard_placement
---------------------------------------------------------------------
(1 row)
-- status after shard repair
SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport, p.colocationid, sp.shardstate
FROM
pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
WHERE
p.logicalrelid = s.logicalrelid AND
s.shardid = sp.shardid AND
p.logicalrelid = 'table6_append'::regclass
ORDER BY s.shardid, sp.nodeport;
shardid | logicalrelid | nodeport | colocationid | shardstate
---------------------------------------------------------------------
1300020 | table6_append | 57637 | 0 | 1
1300020 | table6_append | 57638 | 0 | 1
1300021 | table6_append | 57637 | 0 | 1
1300021 | table6_append | 57638 | 0 | 1
(4 rows)
-- test repair while all placements of one shard in colocation group is unhealthy
-- manually set shardstate as inactive
UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid = 1300000;
-- status before shard repair
SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport, p.colocationid, sp.shardstate
FROM
pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
WHERE
p.logicalrelid = s.logicalrelid AND
s.shardid = sp.shardid AND
colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass)
ORDER BY s.shardid, sp.nodeport;
shardid | logicalrelid | nodeport | colocationid | shardstate
---------------------------------------------------------------------
1300000 | table1_group1 | 57637 | 1000 | 3
1300000 | table1_group1 | 57638 | 1000 | 3
1300001 | table1_group1 | 57637 | 1000 | 1
1300001 | table1_group1 | 57638 | 1000 | 1
1300002 | table1_group1 | 57637 | 1000 | 1
1300002 | table1_group1 | 57638 | 1000 | 1
1300003 | table1_group1 | 57637 | 1000 | 1
1300003 | table1_group1 | 57638 | 1000 | 1
1300004 | table2_group1 | 57637 | 1000 | 1
1300004 | table2_group1 | 57638 | 1000 | 3
1300005 | table2_group1 | 57637 | 1000 | 1
1300005 | table2_group1 | 57638 | 1000 | 1
1300006 | table2_group1 | 57637 | 1000 | 1
1300006 | table2_group1 | 57638 | 1000 | 1
1300007 | table2_group1 | 57637 | 1000 | 1
1300007 | table2_group1 | 57638 | 1000 | 1
(16 rows)
-- repair while all placements of one shard in colocation group is unhealthy
SELECT master_copy_shard_placement(1300000, 'localhost', :worker_1_port, 'localhost', :worker_2_port);
ERROR: source placement must be in active state
-- status after shard repair
SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport, p.colocationid, sp.shardstate
FROM
pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
WHERE
p.logicalrelid = s.logicalrelid AND
s.shardid = sp.shardid AND
colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass)
ORDER BY s.shardid, sp.nodeport;
shardid | logicalrelid | nodeport | colocationid | shardstate
---------------------------------------------------------------------
1300000 | table1_group1 | 57637 | 1000 | 3
1300000 | table1_group1 | 57638 | 1000 | 3
1300001 | table1_group1 | 57637 | 1000 | 1
1300001 | table1_group1 | 57638 | 1000 | 1
1300002 | table1_group1 | 57637 | 1000 | 1
1300002 | table1_group1 | 57638 | 1000 | 1
1300003 | table1_group1 | 57637 | 1000 | 1
1300003 | table1_group1 | 57638 | 1000 | 1
1300004 | table2_group1 | 57637 | 1000 | 1
1300004 | table2_group1 | 57638 | 1000 | 3
1300005 | table2_group1 | 57637 | 1000 | 1
1300005 | table2_group1 | 57638 | 1000 | 1
1300006 | table2_group1 | 57637 | 1000 | 1
1300006 | table2_group1 | 57638 | 1000 | 1
1300007 | table2_group1 | 57637 | 1000 | 1
1300007 | table2_group1 | 57638 | 1000 | 1
(16 rows)

View File

@ -1111,59 +1111,66 @@ ERROR: extension "citus" already exists
-- Snapshot of state at 11.1-1 -- Snapshot of state at 11.1-1
ALTER EXTENSION citus UPDATE TO '11.1-1'; ALTER EXTENSION citus UPDATE TO '11.1-1';
SELECT * FROM multi_extension.print_extension_changes(); SELECT * FROM multi_extension.print_extension_changes();
previous_object | current_object previous_object | current_object
--------------------------------------------------------------------- ---------------------------------------------------------------------
access method columnar | access method columnar |
function alter_columnar_table_reset(regclass,boolean,boolean,boolean,boolean) void | function alter_columnar_table_reset(regclass,boolean,boolean,boolean,boolean) void |
function alter_columnar_table_set(regclass,integer,integer,name,integer) void | function alter_columnar_table_set(regclass,integer,integer,name,integer) void |
function citus_internal.columnar_ensure_am_depends_catalog() void | function citus_copy_shard_placement(bigint,text,integer,text,integer,boolean,citus.shard_transfer_mode) void |
function citus_internal.downgrade_columnar_storage(regclass) void | function citus_internal.columnar_ensure_am_depends_catalog() void |
function citus_internal.upgrade_columnar_storage(regclass) void | function citus_internal.downgrade_columnar_storage(regclass) void |
function columnar.columnar_handler(internal) table_am_handler | function citus_internal.upgrade_columnar_storage(regclass) void |
function isolate_tenant_to_new_shard(regclass,"any",text) bigint | function columnar.columnar_handler(internal) table_am_handler |
function replicate_reference_tables() void | function get_rebalance_progress() TABLE(sessionid integer, table_name regclass, shardid bigint, shard_size bigint, sourcename text, sourceport integer, targetname text, targetport integer, progress bigint, source_shard_size bigint, target_shard_size bigint) |
function worker_cleanup_job_schema_cache() void | function isolate_tenant_to_new_shard(regclass,"any",text) bigint |
function worker_create_schema(bigint,text) void | function replicate_reference_tables() void |
function worker_fetch_foreign_file(text,text,bigint,text[],integer[]) void | function worker_cleanup_job_schema_cache() void |
function worker_fetch_partition_file(bigint,integer,integer,integer,text,integer) void | function worker_create_schema(bigint,text) void |
function worker_hash_partition_table(bigint,integer,text,text,oid,anyarray) void | function worker_fetch_foreign_file(text,text,bigint,text[],integer[]) void |
function worker_merge_files_into_table(bigint,integer,text[],text[]) void | function worker_fetch_partition_file(bigint,integer,integer,integer,text,integer) void |
function worker_range_partition_table(bigint,integer,text,text,oid,anyarray) void | function worker_hash_partition_table(bigint,integer,text,text,oid,anyarray) void |
function worker_repartition_cleanup(bigint) void | function worker_merge_files_into_table(bigint,integer,text[],text[]) void |
schema columnar | function worker_range_partition_table(bigint,integer,text,text,oid,anyarray) void |
sequence columnar.storageid_seq | function worker_repartition_cleanup(bigint) void |
table columnar.chunk | schema columnar |
table columnar.chunk_group | sequence columnar.storageid_seq |
table columnar.options | table columnar.chunk |
table columnar.stripe | table columnar.chunk_group |
| function citus_cleanup_orphaned_resources() table columnar.options |
| function citus_internal_delete_partition_metadata(regclass) void table columnar.stripe |
| function citus_job_cancel(bigint) void | function citus_cleanup_orphaned_resources()
| function citus_job_wait(bigint,citus_job_status) void | function citus_copy_shard_placement(bigint,text,integer,text,integer,citus.shard_transfer_mode) void
| function citus_locks() SETOF record | function citus_internal_delete_partition_metadata(regclass) void
| function citus_split_shard_by_split_points(bigint,text[],integer[],citus.shard_transfer_mode) void | function citus_job_cancel(bigint) void
| function create_distributed_table_concurrently(regclass,text,citus.distribution_type,text,integer) void | function citus_job_wait(bigint,citus_job_status) void
| function isolate_tenant_to_new_shard(regclass,"any",text,citus.shard_transfer_mode) bigint | function citus_locks() SETOF record
| function replicate_reference_tables(citus.shard_transfer_mode) void | function citus_rebalance_start(name,boolean,citus.shard_transfer_mode) bigint
| function worker_copy_table_to_node(regclass,integer) void | function citus_rebalance_stop() void
| function worker_split_copy(bigint,text,split_copy_info[]) void | function citus_rebalance_wait() void
| function worker_split_shard_release_dsm() void | function citus_split_shard_by_split_points(bigint,text[],integer[],citus.shard_transfer_mode) void
| function worker_split_shard_replication_setup(split_shard_info[]) SETOF replication_slot_info | function create_distributed_table_concurrently(regclass,text,citus.distribution_type,text,integer) void
| sequence pg_dist_background_job_job_id_seq | function get_rebalance_progress() TABLE(sessionid integer, table_name regclass, shardid bigint, shard_size bigint, sourcename text, sourceport integer, targetname text, targetport integer, progress bigint, source_shard_size bigint, target_shard_size bigint, operation_type text)
| sequence pg_dist_background_task_task_id_seq | function isolate_tenant_to_new_shard(regclass,"any",text,citus.shard_transfer_mode) bigint
| sequence pg_dist_cleanup_recordid_seq | function replicate_reference_tables(citus.shard_transfer_mode) void
| sequence pg_dist_operationid_seq | function worker_copy_table_to_node(regclass,integer) void
| table pg_dist_background_job | function worker_split_copy(bigint,text,split_copy_info[]) void
| table pg_dist_background_task | function worker_split_shard_release_dsm() void
| table pg_dist_background_task_depend | function worker_split_shard_replication_setup(split_shard_info[]) SETOF replication_slot_info
| table pg_dist_cleanup | sequence pg_dist_background_job_job_id_seq
| type citus_job_status | sequence pg_dist_background_task_task_id_seq
| type citus_task_status | sequence pg_dist_cleanup_recordid_seq
| type replication_slot_info | sequence pg_dist_operationid_seq
| type split_copy_info | table pg_dist_background_job
| type split_shard_info | table pg_dist_background_task
| view citus_locks | table pg_dist_background_task_depend
(50 rows) | table pg_dist_cleanup
| type citus_job_status
| type citus_task_status
| type replication_slot_info
| type split_copy_info
| type split_shard_info
| view citus_locks
(57 rows)
DROP TABLE multi_extension.prev_objects, multi_extension.extension_diff; DROP TABLE multi_extension.prev_objects, multi_extension.extension_diff;
-- show running version -- show running version

View File

@ -100,9 +100,9 @@ ORDER BY
(12 rows) (12 rows)
\c - - - :master_port \c - - - :master_port
-- Check that master_copy_shard_placement cannot be run with MX tables -- Check that citus_copy_shard_placement cannot be run with MX tables
SELECT SELECT
master_copy_shard_placement(shardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false, 'force_logical') citus_copy_shard_placement(shardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical')
FROM FROM
pg_dist_shard NATURAL JOIN pg_dist_shard_placement pg_dist_shard NATURAL JOIN pg_dist_shard_placement
WHERE WHERE
@ -138,20 +138,6 @@ SELECT pg_reload_conf();
t t
(1 row) (1 row)
\c - - - :master_port
BEGIN;
SELECT
master_move_shard_placement(shardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical')
FROM
pg_dist_shard NATURAL JOIN pg_dist_shard_placement
WHERE
logicalrelid = 'mx_table_1'::regclass
AND nodeport = :worker_1_port
ORDER BY
shardid
LIMIT 1;
ERROR: source placement must be in active state
ROLLBACK;
\c - - - :worker_2_port \c - - - :worker_2_port
-- before reseting citus.node_conninfo, check that CREATE SUBSCRIPTION -- before reseting citus.node_conninfo, check that CREATE SUBSCRIPTION
-- with citus_use_authinfo takes into account node_conninfo even when -- with citus_use_authinfo takes into account node_conninfo even when
@ -229,7 +215,7 @@ ORDER BY
-- Check that the UDFs cannot be called from the workers -- Check that the UDFs cannot be called from the workers
SELECT SELECT
master_copy_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, false, 'force_logical') citus_copy_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'force_logical')
FROM FROM
pg_dist_shard NATURAL JOIN pg_dist_shard_placement pg_dist_shard NATURAL JOIN pg_dist_shard_placement
WHERE WHERE

View File

@ -278,6 +278,19 @@ HINT: To remove the local data, run: SELECT truncate_local_data_after_distribut
(1 row) (1 row)
-- table should not show up in citus_tables or citus_shards
SELECT count(*) FROM citus_tables WHERE table_name = 'seg_test'::regclass;
count
---------------------------------------------------------------------
0
(1 row)
SELECT count(*) FROM citus_shards WHERE table_name = 'seg_test'::regclass;
count
---------------------------------------------------------------------
0
(1 row)
\c - - - :worker_1_port \c - - - :worker_1_port
-- should be able to see contents from worker -- should be able to see contents from worker
SELECT * FROM seg_test; SELECT * FROM seg_test;
@ -286,6 +299,19 @@ SELECT * FROM seg_test;
42 42
(1 row) (1 row)
-- table should not show up in citus_tables or citus_shards
SELECT count(*) FROM citus_tables WHERE table_name = 'seg_test'::regclass;
count
---------------------------------------------------------------------
0
(1 row)
SELECT count(*) FROM citus_shards WHERE table_name = 'seg_test'::regclass;
count
---------------------------------------------------------------------
0
(1 row)
\c - - - :master_port \c - - - :master_port
-- test metadata sync in the presence of an extension-owned table -- test metadata sync in the presence of an extension-owned table
SELECT start_metadata_sync_to_node('localhost', :worker_1_port); SELECT start_metadata_sync_to_node('localhost', :worker_1_port);

View File

@ -1128,7 +1128,7 @@ CREATE OR REPLACE FUNCTION immutable_bleat(text) RETURNS int LANGUAGE plpgsql IM
CREATE TABLE test_table (test_id integer NOT NULL, data text); CREATE TABLE test_table (test_id integer NOT NULL, data text);
SET citus.shard_count TO 2; SET citus.shard_count TO 2;
SET citus.shard_replication_factor TO 2; SET citus.shard_replication_factor TO 2;
SELECT create_distributed_table('test_table', 'test_id', 'hash'); SELECT create_distributed_table('test_table', 'test_id', 'hash', colocate_with := 'none');
create_distributed_table create_distributed_table
--------------------------------------------------------------------- ---------------------------------------------------------------------
@ -1157,7 +1157,7 @@ EXECUTE countsome; -- no replanning
(0 rows) (0 rows)
-- invalidate half of the placements using SQL, should invalidate via trigger -- invalidate half of the placements using SQL, should invalidate via trigger
UPDATE pg_dist_shard_placement SET shardstate = '3' DELETE FROM pg_dist_shard_placement
WHERE shardid IN ( WHERE shardid IN (
SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass)
AND nodeport = :worker_1_port; AND nodeport = :worker_1_port;
@ -1172,13 +1172,13 @@ EXECUTE countsome; -- no replanning
--------------------------------------------------------------------- ---------------------------------------------------------------------
(0 rows) (0 rows)
-- repair shards, should invalidate via master_metadata_utility.c -- copy shards, should invalidate via master_metadata_utility.c
SELECT master_copy_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port) SELECT citus_copy_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, transfer_mode := 'block_writes')
FROM pg_dist_shard_placement FROM pg_dist_shard_placement
WHERE shardid IN ( WHERE shardid IN (
SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass)
AND nodeport = :worker_1_port; AND nodeport = :worker_2_port;
master_copy_shard_placement citus_copy_shard_placement
--------------------------------------------------------------------- ---------------------------------------------------------------------

View File

@ -1421,23 +1421,6 @@ SELECT master_get_table_ddl_events('reference_schema.reference_table_ddl');
ALTER TABLE reference_schema.reference_table_ddl OWNER TO postgres ALTER TABLE reference_schema.reference_table_ddl OWNER TO postgres
(2 rows) (2 rows)
-- in reality, we wouldn't need to repair any reference table shard placements
-- however, the test could be relevant for other purposes
SELECT placementid AS a_placement_id FROM pg_dist_shard_placement WHERE shardid = :a_shard_id AND nodeport = :worker_1_port \gset
SELECT placementid AS b_placement_id FROM pg_dist_shard_placement WHERE shardid = :a_shard_id AND nodeport = :worker_2_port \gset
UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE placementid = :a_placement_id;
SELECT master_copy_shard_placement(:a_shard_id, 'localhost', :worker_2_port, 'localhost', :worker_1_port);
master_copy_shard_placement
---------------------------------------------------------------------
(1 row)
SELECT shardid, shardstate FROM pg_dist_shard_placement WHERE placementid = :a_placement_id;
shardid | shardstate
---------------------------------------------------------------------
1250019 | 1
(1 row)
-- some queries that are captured in functions -- some queries that are captured in functions
CREATE OR REPLACE FUNCTION select_count_all() RETURNS bigint AS ' CREATE OR REPLACE FUNCTION select_count_all() RETURNS bigint AS '
SELECT SELECT

View File

@ -1,94 +0,0 @@
SET citus.next_shard_id TO 820000;
SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport=:worker_2_port \gset
SELECT groupid AS worker_1_group FROM pg_dist_node WHERE nodeport=:worker_1_port \gset
-- ===================================================================
-- test shard repair functionality
-- ===================================================================
-- create a table and create its distribution metadata
CREATE TABLE customer_engagements ( id integer, created_at date, event_data text );
-- add some indexes
CREATE INDEX ON customer_engagements (id);
CREATE INDEX ON customer_engagements (created_at);
CREATE INDEX ON customer_engagements (event_data);
-- distribute the table
-- create a single shard on the first worker
SET citus.shard_count TO 1;
SET citus.shard_replication_factor TO 2;
SELECT create_distributed_table('customer_engagements', 'id', 'hash');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- ingest some data for the tests
INSERT INTO customer_engagements VALUES (1, '01-01-2015', 'first event');
INSERT INTO customer_engagements VALUES (2, '02-01-2015', 'second event');
INSERT INTO customer_engagements VALUES (1, '03-01-2015', 'third event');
-- the following queries does the following:
-- (i) create a new shard
-- (ii) mark the second shard placements as unhealthy
-- (iii) do basic checks i.e., only allow copy from healthy placement to unhealthy ones
-- (iv) do a successful master_copy_shard_placement from the first placement to the second
-- (v) mark the first placement as unhealthy and execute a query that is routed to the second placement
-- get the newshardid
SELECT shardid as newshardid FROM pg_dist_shard WHERE logicalrelid = 'customer_engagements'::regclass
\gset
-- now, update the second placement as unhealthy
UPDATE pg_dist_placement SET shardstate = 3 WHERE shardid = :newshardid
AND groupid = :worker_2_group;
-- cannot repair a shard after a modification (transaction still open during repair)
BEGIN;
ALTER TABLE customer_engagements ADD COLUMN value float;
SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port);
ERROR: cannot open new connections after the first modification command within a transaction
ROLLBACK;
BEGIN;
INSERT INTO customer_engagements VALUES (4, '04-01-2015', 'fourth event');
SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port);
ERROR: cannot open new connections after the first modification command within a transaction
ROLLBACK;
-- modifications after reparing a shard are fine (will use new metadata)
BEGIN;
SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port);
master_copy_shard_placement
---------------------------------------------------------------------
(1 row)
ALTER TABLE customer_engagements ADD COLUMN value float;
ROLLBACK;
BEGIN;
SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port);
master_copy_shard_placement
---------------------------------------------------------------------
(1 row)
INSERT INTO customer_engagements VALUES (4, '04-01-2015', 'fourth event');
ROLLBACK;
-- deactivate placement
UPDATE pg_dist_placement SET shardstate = 1 WHERE groupid = :worker_2_group and shardid = :newshardid;
SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port);
ERROR: target placement must be in inactive state
UPDATE pg_dist_placement SET shardstate = 3 WHERE groupid = :worker_2_group and shardid = :newshardid;
-- also try to copy from an inactive placement
SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port);
ERROR: source placement must be in active state
-- "copy" this shard from the first placement to the second one
SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port);
master_copy_shard_placement
---------------------------------------------------------------------
(1 row)
-- now, update first placement as unhealthy (and raise a notice) so that queries are not routed to there
UPDATE pg_dist_placement SET shardstate = 3 WHERE shardid = :newshardid AND groupid = :worker_1_group;
-- get the data from the second placement
SELECT * FROM customer_engagements;
id | created_at | event_data
---------------------------------------------------------------------
1 | 01-01-2015 | first event
2 | 02-01-2015 | second event
1 | 03-01-2015 | third event
(3 rows)

View File

@ -356,6 +356,63 @@ WHERE colocationid IN
(1 row) (1 row)
DROP TABLE replicate_reference_table_commit; DROP TABLE replicate_reference_table_commit;
-- exercise reference table replication in create_distributed_table_concurrently
SELECT citus_remove_node('localhost', :worker_2_port);
citus_remove_node
---------------------------------------------------------------------
(1 row)
CREATE TABLE replicate_reference_table_cdtc(column1 int);
SELECT create_reference_table('replicate_reference_table_cdtc');
create_reference_table
---------------------------------------------------------------------
(1 row)
SELECT citus_add_node('localhost', :worker_2_port);
citus_add_node
---------------------------------------------------------------------
1370004
(1 row)
-- required for create_distributed_table_concurrently
SELECT 1 FROM citus_set_coordinator_host('localhost', :master_port);
?column?
---------------------------------------------------------------------
1
(1 row)
SET citus.shard_replication_factor TO 1;
CREATE TABLE distributed_table_cdtc(column1 int primary key);
SELECT create_distributed_table_concurrently('distributed_table_cdtc', 'column1');
create_distributed_table_concurrently
---------------------------------------------------------------------
(1 row)
RESET citus.shard_replication_factor;
SELECT citus_remove_node('localhost', :master_port);
citus_remove_node
---------------------------------------------------------------------
(1 row)
SELECT
shardid, shardstate, shardlength, nodename, nodeport
FROM
pg_dist_shard_placement_view
WHERE
nodeport = :worker_2_port
ORDER BY shardid, nodeport;
shardid | shardstate | shardlength | nodename | nodeport
---------------------------------------------------------------------
1370004 | 1 | 0 | localhost | 57638
1370005 | 1 | 0 | localhost | 57638
1370007 | 1 | 0 | localhost | 57638
(3 rows)
DROP TABLE replicate_reference_table_cdtc, distributed_table_cdtc;
-- test adding new node + upgrading another hash distributed table to reference table + creating new reference table in TRANSACTION -- test adding new node + upgrading another hash distributed table to reference table + creating new reference table in TRANSACTION
SELECT master_remove_node('localhost', :worker_2_port); SELECT master_remove_node('localhost', :worker_2_port);
master_remove_node master_remove_node
@ -431,8 +488,8 @@ WHERE
ORDER BY shardid, nodeport; ORDER BY shardid, nodeport;
shardid | shardstate | shardlength | nodename | nodeport shardid | shardstate | shardlength | nodename | nodeport
--------------------------------------------------------------------- ---------------------------------------------------------------------
1370004 | 1 | 0 | localhost | 57638 1370009 | 1 | 0 | localhost | 57638
1370005 | 1 | 0 | localhost | 57638 1370010 | 1 | 0 | localhost | 57638
(2 rows) (2 rows)
SELECT shardcount, replicationfactor, distributioncolumntype, distributioncolumncollation FROM pg_dist_colocation SELECT shardcount, replicationfactor, distributioncolumntype, distributioncolumncollation FROM pg_dist_colocation
@ -739,7 +796,7 @@ WHERE
ORDER BY 1,4,5; ORDER BY 1,4,5;
shardid | shardstate | shardlength | nodename | nodeport shardid | shardstate | shardlength | nodename | nodeport
--------------------------------------------------------------------- ---------------------------------------------------------------------
1370014 | 1 | 0 | localhost | 57637 1370019 | 1 | 0 | localhost | 57637
(1 row) (1 row)
-- we should see the two shard placements after activation -- we should see the two shard placements after activation
@ -764,7 +821,7 @@ WHERE
ORDER BY 1,4,5; ORDER BY 1,4,5;
shardid | shardstate | shardlength | nodename | nodeport shardid | shardstate | shardlength | nodename | nodeport
--------------------------------------------------------------------- ---------------------------------------------------------------------
1370014 | 1 | 0 | localhost | 57637 1370019 | 1 | 0 | localhost | 57637
(1 row) (1 row)
SELECT 1 FROM master_remove_node('localhost', :worker_2_port); SELECT 1 FROM master_remove_node('localhost', :worker_2_port);
@ -787,7 +844,6 @@ SELECT citus_copy_shard_placement(
(SELECT shardid FROM pg_dist_shard WHERE logicalrelid='ref_table'::regclass::oid), (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='ref_table'::regclass::oid),
'localhost', :worker_1_port, 'localhost', :worker_1_port,
'localhost', :worker_2_port, 'localhost', :worker_2_port,
do_repair := false,
transfer_mode := 'block_writes'); transfer_mode := 'block_writes');
ERROR: Copying shards to a non-existing node is not supported ERROR: Copying shards to a non-existing node is not supported
HINT: Add the target node via SELECT citus_add_node('localhost', 57638); HINT: Add the target node via SELECT citus_add_node('localhost', 57638);
@ -795,14 +851,13 @@ HINT: Add the target node via SELECT citus_add_node('localhost', 57638);
SELECT citus_add_secondary_node('localhost', :worker_2_port, 'localhost', :worker_1_port); SELECT citus_add_secondary_node('localhost', :worker_2_port, 'localhost', :worker_1_port);
citus_add_secondary_node citus_add_secondary_node
--------------------------------------------------------------------- ---------------------------------------------------------------------
1370012 1370014
(1 row) (1 row)
SELECT citus_copy_shard_placement( SELECT citus_copy_shard_placement(
(SELECT shardid FROM pg_dist_shard WHERE logicalrelid='ref_table'::regclass::oid), (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='ref_table'::regclass::oid),
'localhost', :worker_1_port, 'localhost', :worker_1_port,
'localhost', :worker_2_port, 'localhost', :worker_2_port,
do_repair := false,
transfer_mode := 'block_writes'); transfer_mode := 'block_writes');
ERROR: Copying shards to a secondary (e.g., replica) node is not supported ERROR: Copying shards to a secondary (e.g., replica) node is not supported
SELECT citus_remove_node('localhost', :worker_2_port); SELECT citus_remove_node('localhost', :worker_2_port);
@ -822,7 +877,6 @@ SELECT citus_copy_shard_placement(
(SELECT shardid FROM pg_dist_shard WHERE logicalrelid='ref_table'::regclass::oid), (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='ref_table'::regclass::oid),
'localhost', :worker_1_port, 'localhost', :worker_1_port,
'localhost', :worker_2_port, 'localhost', :worker_2_port,
do_repair := false,
transfer_mode := 'block_writes'); transfer_mode := 'block_writes');
ERROR: Copying shards to a non-active node is not supported ERROR: Copying shards to a non-active node is not supported
HINT: Activate the target node via SELECT citus_activate_node('localhost', 57638); HINT: Activate the target node via SELECT citus_activate_node('localhost', 57638);
@ -1005,7 +1059,7 @@ SELECT min(result) = max(result) AS consistent FROM run_command_on_placements('r
t t
(1 row) (1 row)
-- test that metadata is synced when master_copy_shard_placement replicates -- test that metadata is synced when citus_copy_shard_placement replicates
-- reference table shards -- reference table shards
SET citus.replicate_reference_tables_on_activate TO off; SET citus.replicate_reference_tables_on_activate TO off;
SELECT 1 FROM master_remove_node('localhost', :worker_2_port); SELECT 1 FROM master_remove_node('localhost', :worker_2_port);
@ -1021,13 +1075,12 @@ SELECT 1 FROM master_add_node('localhost', :worker_2_port);
(1 row) (1 row)
SET citus.shard_replication_factor TO 1; SET citus.shard_replication_factor TO 1;
SELECT master_copy_shard_placement( SELECT citus_copy_shard_placement(
:ref_table_shard, :ref_table_shard,
'localhost', :worker_1_port, 'localhost', :worker_1_port,
'localhost', :worker_2_port, 'localhost', :worker_2_port,
do_repair := false,
transfer_mode := 'block_writes'); transfer_mode := 'block_writes');
master_copy_shard_placement citus_copy_shard_placement
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
@ -1166,7 +1219,7 @@ SELECT create_distributed_table('test','x');
SELECT citus_add_node('localhost', :worker_2_port); SELECT citus_add_node('localhost', :worker_2_port);
citus_add_node citus_add_node
--------------------------------------------------------------------- ---------------------------------------------------------------------
1370022 1370024
(1 row) (1 row)
SELECT SELECT

View File

@ -19,8 +19,8 @@ CREATE TABLE test_schema_support.nation_append(
n_regionkey integer not null, n_regionkey integer not null,
n_comment varchar(152) n_comment varchar(152)
); );
SELECT master_create_distributed_table('test_schema_support.nation_append', 'n_nationkey', 'append'); SELECT create_distributed_table('test_schema_support.nation_append', 'n_nationkey', 'append');
master_create_distributed_table create_distributed_table
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
@ -41,8 +41,8 @@ CREATE TABLE test_schema_support."nation._'append" (
n_name char(25) not null, n_name char(25) not null,
n_regionkey integer not null, n_regionkey integer not null,
n_comment varchar(152)); n_comment varchar(152));
SELECT master_create_distributed_table('test_schema_support."nation._''append"', 'n_nationkey', 'append'); SELECT create_distributed_table('test_schema_support."nation._''append"', 'n_nationkey', 'append');
master_create_distributed_table create_distributed_table
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
@ -351,6 +351,7 @@ SET search_path TO public;
SELECT quote_ident(current_setting('lc_collate')) as current_locale \gset SELECT quote_ident(current_setting('lc_collate')) as current_locale \gset
CREATE COLLATION test_schema_support.english (LOCALE = :current_locale); CREATE COLLATION test_schema_support.english (LOCALE = :current_locale);
\c - - - :master_port \c - - - :master_port
SET citus.shard_replication_factor TO 2;
CREATE TABLE test_schema_support.nation_hash_collation( CREATE TABLE test_schema_support.nation_hash_collation(
n_nationkey integer not null, n_nationkey integer not null,
n_name char(25) not null COLLATE test_schema_support.english, n_name char(25) not null COLLATE test_schema_support.english,
@ -364,14 +365,8 @@ SELECT master_get_table_ddl_events('test_schema_support.nation_hash_collation')
CREATE TABLE test_schema_support.nation_hash_collation (n_nationkey integer NOT NULL, n_name character(25) NOT NULL COLLATE test_schema_support.english, n_regionkey integer NOT NULL, n_comment character varying(152)) CREATE TABLE test_schema_support.nation_hash_collation (n_nationkey integer NOT NULL, n_name character(25) NOT NULL COLLATE test_schema_support.english, n_regionkey integer NOT NULL, n_comment character varying(152))
(2 rows) (2 rows)
SELECT master_create_distributed_table('test_schema_support.nation_hash_collation', 'n_nationkey', 'hash'); SELECT create_distributed_table('test_schema_support.nation_hash_collation', 'n_nationkey', 'hash', shard_count := 4, colocate_with := 'none');
master_create_distributed_table create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT master_create_worker_shards('test_schema_support.nation_hash_collation', 4, 2);
master_create_worker_shards
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
@ -407,14 +402,9 @@ CREATE TABLE nation_hash_collation_search_path(
n_regionkey integer not null, n_regionkey integer not null,
n_comment varchar(152) n_comment varchar(152)
); );
SELECT master_create_distributed_table('nation_hash_collation_search_path', 'n_nationkey', 'hash'); SET citus.shard_replication_factor TO 2;
master_create_distributed_table SELECT create_distributed_table('nation_hash_collation_search_path', 'n_nationkey', 'hash', shard_count := 4, colocate_with := 'none');
--------------------------------------------------------------------- create_distributed_table
(1 row)
SELECT master_create_worker_shards('nation_hash_collation_search_path', 4, 2);
master_create_worker_shards
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
@ -452,14 +442,8 @@ CREATE TABLE test_schema_support.nation_hash_composite_types(
n_comment varchar(152), n_comment varchar(152),
test_col test_schema_support.new_composite_type test_col test_schema_support.new_composite_type
); );
SELECT master_create_distributed_table('test_schema_support.nation_hash_composite_types', 'n_nationkey', 'hash'); SELECT create_distributed_table('test_schema_support.nation_hash_composite_types', 'n_nationkey', 'hash', shard_count := 4, colocate_with := 'none');
master_create_distributed_table create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT master_create_worker_shards('test_schema_support.nation_hash_composite_types', 4, 2);
master_create_worker_shards
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
@ -638,12 +622,12 @@ DROP INDEX index1;
\c - - - :worker_1_port \c - - - :worker_1_port
\d test_schema_support.index1_1190003 \d test_schema_support.index1_1190003
\c - - - :master_port \c - - - :master_port
-- test master_copy_shard_placement with schemas -- test citus_copy_shard_placement with schemas
SET search_path TO public; SET search_path TO public;
-- mark shard as inactive -- delete placements
UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid = 1190000 and nodeport = :worker_1_port; DELETE FROM pg_dist_shard_placement WHERE shardid = 1190000 and nodeport = :worker_1_port;
SELECT master_copy_shard_placement(1190000, 'localhost', :worker_2_port, 'localhost', :worker_1_port); SELECT citus_copy_shard_placement(1190000, 'localhost', :worker_2_port, 'localhost', :worker_1_port, transfer_mode := 'block_writes');
master_copy_shard_placement citus_copy_shard_placement
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
@ -659,9 +643,9 @@ SELECT shardstate, nodename, nodeport FROM pg_dist_shard_placement WHERE shardid
--test with search_path is set --test with search_path is set
SET search_path TO test_schema_support; SET search_path TO test_schema_support;
-- mark shard as inactive -- mark shard as inactive
UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid = 1190000 and nodeport = :worker_1_port; DELETE FROM pg_dist_shard_placement WHERE shardid = 1190000 and nodeport = :worker_1_port;
SELECT master_copy_shard_placement(1190000, 'localhost', :worker_2_port, 'localhost', :worker_1_port); SELECT citus_copy_shard_placement(1190000, 'localhost', :worker_2_port, 'localhost', :worker_1_port, transfer_mode := 'block_writes');
master_copy_shard_placement citus_copy_shard_placement
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)

View File

@ -667,7 +667,6 @@ SET search_path to "Tenant Isolation";
UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE nodeport = :worker_1_port; UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE nodeport = :worker_1_port;
SELECT isolate_tenant_to_new_shard('lineitem_date', '1997-08-08', shard_transfer_mode => 'block_writes'); SELECT isolate_tenant_to_new_shard('lineitem_date', '1997-08-08', shard_transfer_mode => 'block_writes');
ERROR: cannot isolate tenant because relation "lineitem_date" has an inactive shard placement for the shard xxxxx ERROR: cannot isolate tenant because relation "lineitem_date" has an inactive shard placement for the shard xxxxx
HINT: Use master_copy_shard_placement UDF to repair the inactive shard placement.
UPDATE pg_dist_shard_placement SET shardstate = 1 WHERE nodeport = :worker_1_port; UPDATE pg_dist_shard_placement SET shardstate = 1 WHERE nodeport = :worker_1_port;
\c - mx_isolation_role_ent - :master_port \c - mx_isolation_role_ent - :master_port
SET search_path to "Tenant Isolation"; SET search_path to "Tenant Isolation";

View File

@ -661,13 +661,40 @@ SELECT count(*) FROM lineitem_date WHERE l_shipdate = '1997-08-08';
1 1
(1 row) (1 row)
-- test with text distribution column (because of collations)
SET citus.shard_replication_factor TO 1;
CREATE TABLE text_column (tenant_id text, value jsonb);
INSERT INTO text_column VALUES ('hello','{}');
SELECT create_distributed_table('text_column','tenant_id');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$"Tenant Isolation".text_column$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT isolate_tenant_to_new_shard('text_column', 'hello', shard_transfer_mode => 'force_logical');
isolate_tenant_to_new_shard
---------------------------------------------------------------------
1230053
(1 row)
SELECT * FROM text_column;
tenant_id | value
---------------------------------------------------------------------
hello | {}
(1 row)
CALL pg_catalog.citus_cleanup_orphaned_resources();
NOTICE: cleaned up 1 orphaned resources
-- test with invalid shard placements -- test with invalid shard placements
\c - postgres - :master_port \c - postgres - :master_port
SET search_path to "Tenant Isolation"; SET search_path to "Tenant Isolation";
UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE nodeport = :worker_1_port; UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE nodeport = :worker_1_port;
SELECT isolate_tenant_to_new_shard('lineitem_date', '1997-08-08', shard_transfer_mode => 'force_logical'); SELECT isolate_tenant_to_new_shard('lineitem_date', '1997-08-08', shard_transfer_mode => 'force_logical');
ERROR: cannot isolate tenant because relation "lineitem_date" has an inactive shard placement for the shard xxxxx ERROR: cannot isolate tenant because relation "lineitem_date" has an inactive shard placement for the shard xxxxx
HINT: Use master_copy_shard_placement UDF to repair the inactive shard placement.
UPDATE pg_dist_shard_placement SET shardstate = 1 WHERE nodeport = :worker_1_port; UPDATE pg_dist_shard_placement SET shardstate = 1 WHERE nodeport = :worker_1_port;
\c - mx_isolation_role_ent - :master_port \c - mx_isolation_role_ent - :master_port
SET search_path to "Tenant Isolation"; SET search_path to "Tenant Isolation";
@ -752,7 +779,11 @@ SET citus.override_table_visibility TO false;
Tenant Isolation | orders_streaming_1230039 | table | mx_isolation_role_ent Tenant Isolation | orders_streaming_1230039 | table | mx_isolation_role_ent
Tenant Isolation | orders_streaming_1230042 | table | mx_isolation_role_ent Tenant Isolation | orders_streaming_1230042 | table | mx_isolation_role_ent
Tenant Isolation | orders_streaming_1230043 | table | mx_isolation_role_ent Tenant Isolation | orders_streaming_1230043 | table | mx_isolation_role_ent
(14 rows) Tenant Isolation | text_column | table | mx_isolation_role_ent
Tenant Isolation | text_column_1230052 | table | mx_isolation_role_ent
Tenant Isolation | text_column_1230053 | table | mx_isolation_role_ent
Tenant Isolation | text_column_1230054 | table | mx_isolation_role_ent
(18 rows)
\c - mx_isolation_role_ent - :master_port \c - mx_isolation_role_ent - :master_port
SET search_path to "Tenant Isolation"; SET search_path to "Tenant Isolation";
@ -783,7 +814,11 @@ SET citus.override_table_visibility TO false;
Tenant Isolation | orders_streaming_1230039 | table | mx_isolation_role_ent Tenant Isolation | orders_streaming_1230039 | table | mx_isolation_role_ent
Tenant Isolation | orders_streaming_1230042 | table | mx_isolation_role_ent Tenant Isolation | orders_streaming_1230042 | table | mx_isolation_role_ent
Tenant Isolation | orders_streaming_1230043 | table | mx_isolation_role_ent Tenant Isolation | orders_streaming_1230043 | table | mx_isolation_role_ent
(14 rows) Tenant Isolation | text_column | table | mx_isolation_role_ent
Tenant Isolation | text_column_1230052 | table | mx_isolation_role_ent
Tenant Isolation | text_column_1230053 | table | mx_isolation_role_ent
Tenant Isolation | text_column_1230054 | table | mx_isolation_role_ent
(18 rows)
DROP EVENT TRIGGER abort_ddl; DROP EVENT TRIGGER abort_ddl;
-- create a trigger for drops -- create a trigger for drops
@ -852,9 +887,9 @@ SET citus.override_table_visibility TO false;
Tenant Isolation | lineitem_streaming_1230036 | table | mx_isolation_role_ent Tenant Isolation | lineitem_streaming_1230036 | table | mx_isolation_role_ent
Tenant Isolation | lineitem_streaming_1230040 | table | mx_isolation_role_ent Tenant Isolation | lineitem_streaming_1230040 | table | mx_isolation_role_ent
Tenant Isolation | lineitem_streaming_1230041 | table | mx_isolation_role_ent Tenant Isolation | lineitem_streaming_1230041 | table | mx_isolation_role_ent
Tenant Isolation | lineitem_streaming_1230056 | table | mx_isolation_role_ent Tenant Isolation | lineitem_streaming_1230061 | table | mx_isolation_role_ent
Tenant Isolation | lineitem_streaming_1230057 | table | mx_isolation_role_ent Tenant Isolation | lineitem_streaming_1230062 | table | mx_isolation_role_ent
Tenant Isolation | lineitem_streaming_1230058 | table | mx_isolation_role_ent Tenant Isolation | lineitem_streaming_1230063 | table | mx_isolation_role_ent
Tenant Isolation | orders_streaming | table | mx_isolation_role_ent Tenant Isolation | orders_streaming | table | mx_isolation_role_ent
Tenant Isolation | orders_streaming_1230014 | table | mx_isolation_role_ent Tenant Isolation | orders_streaming_1230014 | table | mx_isolation_role_ent
Tenant Isolation | orders_streaming_1230015 | table | mx_isolation_role_ent Tenant Isolation | orders_streaming_1230015 | table | mx_isolation_role_ent
@ -862,10 +897,14 @@ SET citus.override_table_visibility TO false;
Tenant Isolation | orders_streaming_1230039 | table | mx_isolation_role_ent Tenant Isolation | orders_streaming_1230039 | table | mx_isolation_role_ent
Tenant Isolation | orders_streaming_1230042 | table | mx_isolation_role_ent Tenant Isolation | orders_streaming_1230042 | table | mx_isolation_role_ent
Tenant Isolation | orders_streaming_1230043 | table | mx_isolation_role_ent Tenant Isolation | orders_streaming_1230043 | table | mx_isolation_role_ent
Tenant Isolation | orders_streaming_1230059 | table | mx_isolation_role_ent Tenant Isolation | orders_streaming_1230064 | table | mx_isolation_role_ent
Tenant Isolation | orders_streaming_1230060 | table | mx_isolation_role_ent Tenant Isolation | orders_streaming_1230065 | table | mx_isolation_role_ent
Tenant Isolation | orders_streaming_1230061 | table | mx_isolation_role_ent Tenant Isolation | orders_streaming_1230066 | table | mx_isolation_role_ent
(20 rows) Tenant Isolation | text_column | table | mx_isolation_role_ent
Tenant Isolation | text_column_1230052 | table | mx_isolation_role_ent
Tenant Isolation | text_column_1230053 | table | mx_isolation_role_ent
Tenant Isolation | text_column_1230054 | table | mx_isolation_role_ent
(24 rows)
\c - postgres - :worker_1_port \c - postgres - :worker_1_port
DROP EVENT TRIGGER abort_drop; DROP EVENT TRIGGER abort_drop;
@ -1091,7 +1130,7 @@ DROP TABLE test_colocated_table_no_rep_identity;
SELECT isolate_tenant_to_new_shard('test_colocated_table_2', 1, 'CASCADE', shard_transfer_mode => 'auto'); SELECT isolate_tenant_to_new_shard('test_colocated_table_2', 1, 'CASCADE', shard_transfer_mode => 'auto');
isolate_tenant_to_new_shard isolate_tenant_to_new_shard
--------------------------------------------------------------------- ---------------------------------------------------------------------
1230103 1230108
(1 row) (1 row)
SELECT count(*) FROM test_colocated_table_2; SELECT count(*) FROM test_colocated_table_2;
@ -1113,47 +1152,47 @@ ORDER BY 1, 2;
relname | Constraint | Definition relname | Constraint | Definition
--------------------------------------------------------------------- ---------------------------------------------------------------------
test_colocated_table_1 | test_colocated_table_1_id_fkey | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1(id) test_colocated_table_1 | test_colocated_table_1_id_fkey | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1(id)
test_colocated_table_1_1230069 | test_colocated_table_1_id_fkey_1230069 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230069(id) test_colocated_table_1_1230074 | test_colocated_table_1_id_fkey_1230074 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230074(id)
test_colocated_table_1_1230071 | test_colocated_table_1_id_fkey_1230071 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230071(id) test_colocated_table_1_1230076 | test_colocated_table_1_id_fkey_1230076 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230076(id)
test_colocated_table_1_1230073 | test_colocated_table_1_id_fkey_1230073 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230073(id) test_colocated_table_1_1230078 | test_colocated_table_1_id_fkey_1230078 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230078(id)
test_colocated_table_1_1230099 | test_colocated_table_1_id_fkey_1230099 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230099(id) test_colocated_table_1_1230104 | test_colocated_table_1_id_fkey_1230104 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230104(id)
test_colocated_table_1_1230100 | test_colocated_table_1_id_fkey_1230100 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230100(id) test_colocated_table_1_1230105 | test_colocated_table_1_id_fkey_1230105 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230105(id)
test_colocated_table_1_1230101 | test_colocated_table_1_id_fkey_1230101 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230101(id) test_colocated_table_1_1230106 | test_colocated_table_1_id_fkey_1230106 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230106(id)
test_colocated_table_2 | test_colocated_table_2_id_fkey | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1(id) test_colocated_table_2 | test_colocated_table_2_id_fkey | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1(id)
test_colocated_table_2 | test_colocated_table_2_value_1_fkey | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey(id) test_colocated_table_2 | test_colocated_table_2_value_1_fkey | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey(id)
test_colocated_table_2_1230077 | test_colocated_table_2_id_fkey_1230077 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230069(id) test_colocated_table_2_1230082 | test_colocated_table_2_id_fkey_1230082 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230074(id)
test_colocated_table_2_1230077 | test_colocated_table_2_value_1_fkey_1230077 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230066(id) test_colocated_table_2_1230082 | test_colocated_table_2_value_1_fkey_1230082 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230071(id)
test_colocated_table_2_1230079 | test_colocated_table_2_id_fkey_1230079 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230071(id) test_colocated_table_2_1230084 | test_colocated_table_2_id_fkey_1230084 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230076(id)
test_colocated_table_2_1230079 | test_colocated_table_2_value_1_fkey_1230079 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230066(id) test_colocated_table_2_1230084 | test_colocated_table_2_value_1_fkey_1230084 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230071(id)
test_colocated_table_2_1230081 | test_colocated_table_2_id_fkey_1230081 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230073(id) test_colocated_table_2_1230086 | test_colocated_table_2_id_fkey_1230086 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230078(id)
test_colocated_table_2_1230081 | test_colocated_table_2_value_1_fkey_1230081 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230066(id) test_colocated_table_2_1230086 | test_colocated_table_2_value_1_fkey_1230086 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230071(id)
test_colocated_table_2_1230102 | test_colocated_table_2_id_fkey_1230102 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230099(id) test_colocated_table_2_1230107 | test_colocated_table_2_id_fkey_1230107 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230104(id)
test_colocated_table_2_1230102 | test_colocated_table_2_value_1_fkey_1230102 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230066(id) test_colocated_table_2_1230107 | test_colocated_table_2_value_1_fkey_1230107 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230071(id)
test_colocated_table_2_1230103 | test_colocated_table_2_id_fkey_1230103 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230100(id) test_colocated_table_2_1230108 | test_colocated_table_2_id_fkey_1230108 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230105(id)
test_colocated_table_2_1230103 | test_colocated_table_2_value_1_fkey_1230103 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230066(id) test_colocated_table_2_1230108 | test_colocated_table_2_value_1_fkey_1230108 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230071(id)
test_colocated_table_2_1230104 | test_colocated_table_2_id_fkey_1230104 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230101(id) test_colocated_table_2_1230109 | test_colocated_table_2_id_fkey_1230109 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230106(id)
test_colocated_table_2_1230104 | test_colocated_table_2_value_1_fkey_1230104 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230066(id) test_colocated_table_2_1230109 | test_colocated_table_2_value_1_fkey_1230109 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230071(id)
test_colocated_table_3 | test_colocated_table_3_id_fkey | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1(id) test_colocated_table_3 | test_colocated_table_3_id_fkey | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1(id)
test_colocated_table_3 | test_colocated_table_3_id_fkey1 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2(id) test_colocated_table_3 | test_colocated_table_3_id_fkey1 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2(id)
test_colocated_table_3 | test_colocated_table_3_value_1_fkey | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey(id) test_colocated_table_3 | test_colocated_table_3_value_1_fkey | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey(id)
test_colocated_table_3_1230085 | test_colocated_table_3_id_fkey1_1230085 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2_1230077(id) test_colocated_table_3_1230090 | test_colocated_table_3_id_fkey1_1230090 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2_1230082(id)
test_colocated_table_3_1230085 | test_colocated_table_3_id_fkey_1230085 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230069(id) test_colocated_table_3_1230090 | test_colocated_table_3_id_fkey_1230090 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230074(id)
test_colocated_table_3_1230085 | test_colocated_table_3_value_1_fkey_1230085 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230066(id) test_colocated_table_3_1230090 | test_colocated_table_3_value_1_fkey_1230090 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230071(id)
test_colocated_table_3_1230087 | test_colocated_table_3_id_fkey1_1230087 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2_1230079(id) test_colocated_table_3_1230092 | test_colocated_table_3_id_fkey1_1230092 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2_1230084(id)
test_colocated_table_3_1230087 | test_colocated_table_3_id_fkey_1230087 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230071(id) test_colocated_table_3_1230092 | test_colocated_table_3_id_fkey_1230092 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230076(id)
test_colocated_table_3_1230087 | test_colocated_table_3_value_1_fkey_1230087 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230066(id) test_colocated_table_3_1230092 | test_colocated_table_3_value_1_fkey_1230092 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230071(id)
test_colocated_table_3_1230089 | test_colocated_table_3_id_fkey1_1230089 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2_1230081(id) test_colocated_table_3_1230094 | test_colocated_table_3_id_fkey1_1230094 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2_1230086(id)
test_colocated_table_3_1230089 | test_colocated_table_3_id_fkey_1230089 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230073(id) test_colocated_table_3_1230094 | test_colocated_table_3_id_fkey_1230094 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230078(id)
test_colocated_table_3_1230089 | test_colocated_table_3_value_1_fkey_1230089 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230066(id) test_colocated_table_3_1230094 | test_colocated_table_3_value_1_fkey_1230094 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230071(id)
test_colocated_table_3_1230105 | test_colocated_table_3_id_fkey1_1230105 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2_1230102(id) test_colocated_table_3_1230110 | test_colocated_table_3_id_fkey1_1230110 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2_1230107(id)
test_colocated_table_3_1230105 | test_colocated_table_3_id_fkey_1230105 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230099(id) test_colocated_table_3_1230110 | test_colocated_table_3_id_fkey_1230110 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230104(id)
test_colocated_table_3_1230105 | test_colocated_table_3_value_1_fkey_1230105 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230066(id) test_colocated_table_3_1230110 | test_colocated_table_3_value_1_fkey_1230110 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230071(id)
test_colocated_table_3_1230106 | test_colocated_table_3_id_fkey1_1230106 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2_1230103(id) test_colocated_table_3_1230111 | test_colocated_table_3_id_fkey1_1230111 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2_1230108(id)
test_colocated_table_3_1230106 | test_colocated_table_3_id_fkey_1230106 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230100(id) test_colocated_table_3_1230111 | test_colocated_table_3_id_fkey_1230111 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230105(id)
test_colocated_table_3_1230106 | test_colocated_table_3_value_1_fkey_1230106 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230066(id) test_colocated_table_3_1230111 | test_colocated_table_3_value_1_fkey_1230111 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230071(id)
test_colocated_table_3_1230107 | test_colocated_table_3_id_fkey1_1230107 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2_1230104(id) test_colocated_table_3_1230112 | test_colocated_table_3_id_fkey1_1230112 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2_1230109(id)
test_colocated_table_3_1230107 | test_colocated_table_3_id_fkey_1230107 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230101(id) test_colocated_table_3_1230112 | test_colocated_table_3_id_fkey_1230112 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230106(id)
test_colocated_table_3_1230107 | test_colocated_table_3_value_1_fkey_1230107 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230066(id) test_colocated_table_3_1230112 | test_colocated_table_3_value_1_fkey_1230112 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230071(id)
(42 rows) (42 rows)
\c - mx_isolation_role_ent - :master_port \c - mx_isolation_role_ent - :master_port

View File

@ -302,7 +302,7 @@ SELECT count(*) FROM mx_table;
5 5
(1 row) (1 row)
-- master_copy_shard_placement -- citus_copy_shard_placement
SELECT logicalrelid, shardid AS testshardid, nodename, nodeport SELECT logicalrelid, shardid AS testshardid, nodename, nodeport
FROM pg_dist_shard NATURAL JOIN pg_dist_shard_placement FROM pg_dist_shard NATURAL JOIN pg_dist_shard_placement
WHERE logicalrelid = 'mx_table'::regclass AND nodeport=:worker_1_port WHERE logicalrelid = 'mx_table'::regclass AND nodeport=:worker_1_port
@ -311,7 +311,7 @@ LIMIT 1 \gset
SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport = :worker_2_port \gset SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport = :worker_2_port \gset
INSERT INTO pg_dist_placement (groupid, shardid, shardstate, shardlength) INSERT INTO pg_dist_placement (groupid, shardid, shardstate, shardlength)
VALUES (:worker_2_group, :testshardid, 3, 0); VALUES (:worker_2_group, :testshardid, 3, 0);
SELECT master_copy_shard_placement(:testshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); SELECT citus_copy_shard_placement(:testshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port);
ERROR: operation is not allowed on this node ERROR: operation is not allowed on this node
HINT: Connect to the coordinator and run it again. HINT: Connect to the coordinator and run it again.
SELECT shardid, nodename, nodeport, shardstate SELECT shardid, nodename, nodeport, shardstate

View File

@ -54,11 +54,11 @@ SELECT result FROM run_command_on_all_nodes('
SELECT result FROM run_command_on_all_nodes(' SELECT result FROM run_command_on_all_nodes('
SELECT colliculocale FROM pg_collation WHERE collname = ''german_phonebook_test''; SELECT colliculocale FROM pg_collation WHERE collname = ''german_phonebook_test'';
'); ');
result result
--------------------------------------------------------------------- ---------------------------------------------------------------------
de-u-co-phonebk de-u-co-phonebk
de-u-co-phonebk de-u-co-phonebk
de-u-co-phonebk de-u-co-phonebk
(3 rows) (3 rows)
-- with non-icu provider, colliculocale will be null, collcollate and collctype will be set -- with non-icu provider, colliculocale will be null, collcollate and collctype will be set
@ -552,7 +552,334 @@ SELECT count(*)=100 FROM copy_test2;
t t
(1 row) (1 row)
--
-- In PG15, unlogged sequences are supported
-- we support this for distributed sequences as well
--
CREATE SEQUENCE seq1;
CREATE UNLOGGED SEQUENCE "pg15"."seq 2";
-- first, test that sequence persistence is distributed correctly
-- when the sequence is distributed
SELECT relname,
CASE relpersistence
WHEN 'u' THEN 'unlogged'
WHEN 'p' then 'logged'
ELSE 'unknown'
END AS logged_info
FROM pg_class
WHERE relname IN ('seq1', 'seq 2') AND relnamespace='pg15'::regnamespace
ORDER BY relname;
relname | logged_info
---------------------------------------------------------------------
seq 2 | unlogged
seq1 | logged
(2 rows)
CREATE TABLE "seq test"(a int, b int default nextval ('seq1'), c int default nextval ('"pg15"."seq 2"'));
SELECT create_distributed_table('"pg15"."seq test"','a');
create_distributed_table
---------------------------------------------------------------------
(1 row)
\c - - - :worker_1_port
SELECT relname,
CASE relpersistence
WHEN 'u' THEN 'unlogged'
WHEN 'p' then 'logged'
ELSE 'unknown'
END AS logged_info
FROM pg_class
WHERE relname IN ('seq1', 'seq 2') AND relnamespace='pg15'::regnamespace
ORDER BY relname;
relname | logged_info
---------------------------------------------------------------------
seq 2 | unlogged
seq1 | logged
(2 rows)
\c - - - :master_port
SET search_path TO pg15;
-- now, check that we can change sequence persistence using ALTER SEQUENCE
ALTER SEQUENCE seq1 SET UNLOGGED;
-- use IF EXISTS
ALTER SEQUENCE IF EXISTS "seq 2" SET LOGGED;
-- check non-existent sequence as well
ALTER SEQUENCE seq_non_exists SET LOGGED;
ERROR: relation "seq_non_exists" does not exist
ALTER SEQUENCE IF EXISTS seq_non_exists SET LOGGED;
NOTICE: relation "seq_non_exists" does not exist, skipping
SELECT relname,
CASE relpersistence
WHEN 'u' THEN 'unlogged'
WHEN 'p' then 'logged'
ELSE 'unknown'
END AS logged_info
FROM pg_class
WHERE relname IN ('seq1', 'seq 2') AND relnamespace='pg15'::regnamespace
ORDER BY relname;
relname | logged_info
---------------------------------------------------------------------
seq 2 | logged
seq1 | unlogged
(2 rows)
\c - - - :worker_1_port
SELECT relname,
CASE relpersistence
WHEN 'u' THEN 'unlogged'
WHEN 'p' then 'logged'
ELSE 'unknown'
END AS logged_info
FROM pg_class
WHERE relname IN ('seq1', 'seq 2') AND relnamespace='pg15'::regnamespace
ORDER BY relname;
relname | logged_info
---------------------------------------------------------------------
seq 2 | logged
seq1 | unlogged
(2 rows)
\c - - - :master_port
SET search_path TO pg15;
-- now, check that we can change sequence persistence using ALTER TABLE
ALTER TABLE seq1 SET LOGGED;
ALTER TABLE "seq 2" SET UNLOGGED;
SELECT relname,
CASE relpersistence
WHEN 'u' THEN 'unlogged'
WHEN 'p' then 'logged'
ELSE 'unknown'
END AS logged_info
FROM pg_class
WHERE relname IN ('seq1', 'seq 2') AND relnamespace='pg15'::regnamespace
ORDER BY relname;
relname | logged_info
---------------------------------------------------------------------
seq 2 | unlogged
seq1 | logged
(2 rows)
\c - - - :worker_1_port
SELECT relname,
CASE relpersistence
WHEN 'u' THEN 'unlogged'
WHEN 'p' then 'logged'
ELSE 'unknown'
END AS logged_info
FROM pg_class
WHERE relname IN ('seq1', 'seq 2') AND relnamespace='pg15'::regnamespace
ORDER BY relname;
relname | logged_info
---------------------------------------------------------------------
seq 2 | unlogged
seq1 | logged
(2 rows)
\c - - - :master_port
SET search_path TO pg15;
-- An identity/serial sequence now automatically gets and follows the
-- persistence level (logged/unlogged) of its owning table.
-- Test this behavior as well
CREATE UNLOGGED TABLE test(a bigserial, b bigserial);
SELECT create_distributed_table('test', 'a');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- show that associated sequence is unlooged
SELECT relname,
CASE relpersistence
WHEN 'u' THEN 'unlogged'
WHEN 'p' then 'logged'
ELSE 'unknown'
END AS logged_info
FROM pg_class
WHERE relname IN ('test_a_seq', 'test_b_seq') AND relnamespace='pg15'::regnamespace
ORDER BY relname;
relname | logged_info
---------------------------------------------------------------------
test_a_seq | unlogged
test_b_seq | unlogged
(2 rows)
\c - - - :worker_1_port
SELECT relname,
CASE relpersistence
WHEN 'u' THEN 'unlogged'
WHEN 'p' then 'logged'
ELSE 'unknown'
END AS logged_info
FROM pg_class
WHERE relname IN ('test_a_seq', 'test_b_seq') AND relnamespace='pg15'::regnamespace
ORDER BY relname;
relname | logged_info
---------------------------------------------------------------------
test_a_seq | unlogged
test_b_seq | unlogged
(2 rows)
\c - - - :master_port
SET search_path TO pg15;
-- allow foreign key columns to have SET NULL/DEFAULT on column basis
-- currently only reference tables can support that
CREATE TABLE PKTABLE (tid int, id int, PRIMARY KEY (tid, id));
CREATE TABLE FKTABLE (
tid int, id int,
fk_id_del_set_null int,
fk_id_del_set_default int DEFAULT 0,
FOREIGN KEY (tid, fk_id_del_set_null) REFERENCES PKTABLE ON DELETE SET NULL (fk_id_del_set_null),
FOREIGN KEY (tid, fk_id_del_set_default) REFERENCES PKTABLE ON DELETE SET DEFAULT (fk_id_del_set_default)
);
SELECT create_reference_table('PKTABLE');
create_reference_table
---------------------------------------------------------------------
(1 row)
-- ok, Citus could relax this constraint in the future
SELECT create_distributed_table('FKTABLE', 'tid');
ERROR: cannot create foreign key constraint
DETAIL: SET NULL or SET DEFAULT is not supported in ON DELETE operation when distribution key is included in the foreign key constraint
-- with reference tables it should all work fine
SELECT create_reference_table('FKTABLE');
create_reference_table
---------------------------------------------------------------------
(1 row)
-- show that the definition is expected
SELECT pg_get_constraintdef(oid) FROM pg_constraint WHERE conrelid = 'fktable'::regclass::oid ORDER BY oid;
pg_get_constraintdef
---------------------------------------------------------------------
FOREIGN KEY (tid, fk_id_del_set_null) REFERENCES pktable(tid, id) ON DELETE SET NULL (fk_id_del_set_null)
FOREIGN KEY (tid, fk_id_del_set_default) REFERENCES pktable(tid, id) ON DELETE SET DEFAULT (fk_id_del_set_default)
(2 rows)
\c - - - :worker_1_port
SET search_path TO pg15;
-- show that the definition is expected on the worker as well
SELECT pg_get_constraintdef(oid) FROM pg_constraint WHERE conrelid = 'fktable'::regclass::oid ORDER BY oid;
pg_get_constraintdef
---------------------------------------------------------------------
FOREIGN KEY (tid, fk_id_del_set_default) REFERENCES pktable(tid, id) ON DELETE SET DEFAULT (fk_id_del_set_default)
FOREIGN KEY (tid, fk_id_del_set_null) REFERENCES pktable(tid, id) ON DELETE SET NULL (fk_id_del_set_null)
(2 rows)
-- also, make sure that it works as expected
INSERT INTO PKTABLE VALUES (1, 0), (1, 1), (1, 2);
INSERT INTO FKTABLE VALUES
(1, 1, 1, NULL),
(1, 2, NULL, 2);
DELETE FROM PKTABLE WHERE id = 1 OR id = 2;
SELECT * FROM FKTABLE ORDER BY id;
tid | id | fk_id_del_set_null | fk_id_del_set_default
---------------------------------------------------------------------
1 | 1 | |
1 | 2 | | 0
(2 rows)
\c - - - :master_port
SET search_path TO pg15;
-- test NULL NOT DISTINCT clauses
-- set the next shard id so that the error messages are easier to maintain
SET citus.next_shard_id TO 960050;
CREATE TABLE null_distinct_test(id INT, c1 INT, c2 INT, c3 VARCHAR(10)) ;
SELECT create_distributed_table('null_distinct_test', 'id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
CREATE UNIQUE INDEX idx1_null_distinct_test ON null_distinct_test(id, c1) NULLS DISTINCT ;
CREATE UNIQUE INDEX idx2_null_distinct_test ON null_distinct_test(id, c2) NULLS NOT DISTINCT ;
-- populate with some initial data
INSERT INTO null_distinct_test VALUES (1, 1, 1, 'data1') ;
INSERT INTO null_distinct_test VALUES (1, 2, NULL, 'data2') ;
INSERT INTO null_distinct_test VALUES (1, NULL, 3, 'data3') ;
-- should fail as we already have a null value in c2 column
INSERT INTO null_distinct_test VALUES (1, NULL, NULL, 'data4') ;
ERROR: duplicate key value violates unique constraint "idx2_null_distinct_test_960050"
DETAIL: Key (id, c2)=(1, null) already exists.
CONTEXT: while executing command on localhost:xxxxx
INSERT INTO null_distinct_test VALUES (1, NULL, NULL, 'data4') ON CONFLICT DO NOTHING;
INSERT INTO null_distinct_test VALUES (1, NULL, NULL, 'data4') ON CONFLICT (id, c2) DO UPDATE SET c2=100 RETURNING *;
id | c1 | c2 | c3
---------------------------------------------------------------------
1 | 2 | 100 | data2
(1 row)
-- should not fail as null values are distinct for c1 column
INSERT INTO null_distinct_test VALUES (1, NULL, 5, 'data5') ;
-- test that unique constraints also work properly
-- since we have multiple (1,NULL) pairs for columns (id,c1) the first will work, second will fail
ALTER TABLE null_distinct_test ADD CONSTRAINT uniq_distinct_c1 UNIQUE NULLS DISTINCT (id,c1);
ALTER TABLE null_distinct_test ADD CONSTRAINT uniq_c1 UNIQUE NULLS NOT DISTINCT (id,c1);
ERROR: could not create unique index "uniq_c1_960050"
DETAIL: Key (id, c1)=(1, null) is duplicated.
CONTEXT: while executing command on localhost:xxxxx
-- show all records in the table for fact checking
SELECT * FROM null_distinct_test ORDER BY c3;
id | c1 | c2 | c3
---------------------------------------------------------------------
1 | 1 | 1 | data1
1 | 2 | 100 | data2
1 | | 3 | data3
1 | | 5 | data5
(4 rows)
-- test unique nulls not distinct constraints on a reference table
CREATE TABLE reference_uniq_test (
x int, y int,
UNIQUE NULLS NOT DISTINCT (x, y)
);
SELECT create_reference_table('reference_uniq_test');
create_reference_table
---------------------------------------------------------------------
(1 row)
INSERT INTO reference_uniq_test VALUES (1, 1), (1, NULL), (NULL, 1);
-- the following will fail
INSERT INTO reference_uniq_test VALUES (1, NULL);
ERROR: duplicate key value violates unique constraint "reference_uniq_test_x_y_key_960054"
DETAIL: Key (x, y)=(1, null) already exists.
CONTEXT: while executing command on localhost:xxxxx
--
-- PG15 introduces CLUSTER command support for partitioned tables. However, similar to
-- CLUSTER commands with no table name, these queries can not be run inside a transaction
-- block. Therefore, we do not propagate such queries.
--
-- Should print a warning that it will not be propagated to worker nodes.
CLUSTER sale USING sale_pk;
WARNING: not propagating CLUSTER command for partitioned table to worker nodes
HINT: Provide a child partition table names in order to CLUSTER distributed partitioned tables.
-- verify that we can cluster the partition tables only when replication factor is 1
CLUSTER sale_newyork USING sale_newyork_pkey;
ERROR: modifications on partitions when replication factor is greater than 1 is not supported
HINT: Run the query on the parent table "sale" instead.
-- create a new partitioned table with shard replicaiton factor 1
SET citus.shard_replication_factor = 1;
CREATE TABLE sale_repl_factor_1 ( LIKE sale )
PARTITION BY list (state_code);
ALTER TABLE sale_repl_factor_1 ADD CONSTRAINT sale_repl_factor_1_pk PRIMARY KEY (state_code, sale_date);
CREATE TABLE sale_newyork_repl_factor_1 PARTITION OF sale_repl_factor_1 FOR VALUES IN ('NY');
CREATE TABLE sale_california_repl_factor_1 PARTITION OF sale_repl_factor_1 FOR VALUES IN ('CA');
SELECT create_distributed_table('sale_repl_factor_1', 'state_code');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- Should print a warning that it will not be propagated to worker nodes.
CLUSTER sale_repl_factor_1 USING sale_repl_factor_1_pk;
WARNING: not propagating CLUSTER command for partitioned table to worker nodes
HINT: Provide a child partition table names in order to CLUSTER distributed partitioned tables.
-- verify that we can still cluster the partition tables now since replication factor is 1
CLUSTER sale_newyork_repl_factor_1 USING sale_newyork_repl_factor_1_pkey;
-- Clean up -- Clean up
RESET citus.shard_replication_factor;
\set VERBOSITY terse \set VERBOSITY terse
SET client_min_messages TO ERROR;
DROP SCHEMA pg15 CASCADE; DROP SCHEMA pg15 CASCADE;
NOTICE: drop cascades to 13 other objects

View File

@ -1,488 +0,0 @@
--
-- PG15+ test
--
SHOW server_version \gset
SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15
\gset
\if :server_version_ge_15
\else
\q
\endif
CREATE SCHEMA pg15_json;
SET search_path TO pg15_json;
SET citus.next_shard_id TO 1687000;
CREATE TABLE test_table(id bigserial, value text);
SELECT create_distributed_table('test_table', 'id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
INSERT INTO test_table (value) SELECT i::text FROM generate_series(0,100)i;
CREATE TABLE my_films(id bigserial, js jsonb);
SELECT create_distributed_table('my_films', 'id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
INSERT INTO my_films(js) VALUES (
'{ "favorites" : [
{ "kind" : "comedy", "films" : [ { "title" : "Bananas", "director" : "Woody Allen"},
{ "title" : "The Dinner Game", "director" : "Francis Veber" } ] },
{ "kind" : "horror", "films" : [{ "title" : "Psycho", "director" : "Alfred Hitchcock" } ] },
{ "kind" : "thriller", "films" : [{ "title" : "Vertigo", "director" : "Alfred Hitchcock" } ] },
{ "kind" : "drama", "films" : [{ "title" : "Yojimbo", "director" : "Akira Kurosawa" } ] }
] }');
INSERT INTO my_films(js) VALUES (
'{ "favorites" : [
{ "kind" : "comedy", "films" : [ { "title" : "Bananas2", "director" : "Woody Allen"},
{ "title" : "The Dinner Game2", "director" : "Francis Veber" } ] },
{ "kind" : "horror", "films" : [{ "title" : "Psycho2", "director" : "Alfred Hitchcock" } ] },
{ "kind" : "thriller", "films" : [{ "title" : "Vertigo2", "director" : "Alfred Hitchcock" } ] },
{ "kind" : "drama", "films" : [{ "title" : "Yojimbo2", "director" : "Akira Kurosawa" } ] }
] }');
-- a router query
SELECT jt.* FROM
my_films,
JSON_TABLE ( js, '$.favorites[*]' COLUMNS (
id FOR ORDINALITY,
kind text PATH '$.kind',
NESTED PATH '$.films[*]' COLUMNS (
title text PATH '$.title',
director text PATH '$.director'))) AS jt
WHERE my_films.id = 1
ORDER BY 1,2,3,4;
id | kind | title | director
---------------------------------------------------------------------
1 | comedy | Bananas | Woody Allen
1 | comedy | The Dinner Game | Francis Veber
2 | horror | Psycho | Alfred Hitchcock
3 | thriller | Vertigo | Alfred Hitchcock
4 | drama | Yojimbo | Akira Kurosawa
(5 rows)
-- router query with an explicit LATEREL SUBQUERY
SELECT sub.*
FROM my_films,
lateral(SELECT * FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY,
kind text PATH '$.kind',
NESTED PATH '$.films[*]' COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt) as sub
WHERE my_films.id = 1 ORDER BY 1,2,3,4;
id | kind | title | director
---------------------------------------------------------------------
1 | comedy | Bananas | Woody Allen
1 | comedy | The Dinner Game | Francis Veber
2 | horror | Psycho | Alfred Hitchcock
3 | thriller | Vertigo | Alfred Hitchcock
4 | drama | Yojimbo | Akira Kurosawa
(5 rows)
-- router query with an explicit LATEREL SUBQUERY and LIMIT
SELECT sub.*
FROM my_films,
lateral(SELECT * FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY,
kind text PATH '$.kind',
NESTED PATH '$.films[*]' COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt ORDER BY id DESC LIMIT 1) as sub
WHERE my_films.id = 1 ORDER BY 1,2,3,4;
id | kind | title | director
---------------------------------------------------------------------
4 | drama | Yojimbo | Akira Kurosawa
(1 row)
-- set it DEBUG1 in case the plan changes
-- we can see details
SET client_min_messages TO DEBUG1;
-- a mult-shard query
SELECT jt.* FROM
my_films,
JSON_TABLE ( js, '$.favorites[*]' COLUMNS (
id FOR ORDINALITY,
kind text PATH '$.kind',
NESTED PATH '$.films[*]' COLUMNS (
title text PATH '$.title',
director text PATH '$.director'))) AS jt
ORDER BY 1,2,3,4;
id | kind | title | director
---------------------------------------------------------------------
1 | comedy | Bananas | Woody Allen
1 | comedy | Bananas2 | Woody Allen
1 | comedy | The Dinner Game | Francis Veber
1 | comedy | The Dinner Game2 | Francis Veber
2 | horror | Psycho | Alfred Hitchcock
2 | horror | Psycho2 | Alfred Hitchcock
3 | thriller | Vertigo | Alfred Hitchcock
3 | thriller | Vertigo2 | Alfred Hitchcock
4 | drama | Yojimbo | Akira Kurosawa
4 | drama | Yojimbo2 | Akira Kurosawa
(10 rows)
-- recursively plan subqueries that has JSON_TABLE
SELECT count(*) FROM
(
SELECT jt.* FROM
my_films,
JSON_TABLE ( js, '$.favorites[*]' COLUMNS (
id FOR ORDINALITY,
kind text PATH '$.kind',
NESTED PATH '$.films[*]' COLUMNS (
title text PATH '$.title',
director text PATH '$.director'))) AS jt
LIMIT 1) as sub_with_json, test_table
WHERE test_table.id = sub_with_json.id;
DEBUG: push down of limit count: 1
DEBUG: generating subplan XXX_1 for subquery SELECT jt.id, jt.kind, jt.title, jt.director FROM pg15_json.my_films, LATERAL JSON_TABLE(my_films.js, '$."favorites"[*]' AS json_table_path_1 COLUMNS (id FOR ORDINALITY, kind text PATH '$."kind"', NESTED PATH '$."films"[*]' AS json_table_path_2 COLUMNS (title text PATH '$."title"', director text PATH '$."director"')) PLAN (json_table_path_1 OUTER json_table_path_2)) jt LIMIT 1
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.id, intermediate_result.kind, intermediate_result.title, intermediate_result.director FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, kind text, title text, director text)) sub_with_json, pg15_json.test_table WHERE (test_table.id OPERATOR(pg_catalog.=) sub_with_json.id)
count
---------------------------------------------------------------------
1
(1 row)
-- multi-shard query with an explicit LATEREL SUBQUERY
SELECT sub.*
FROM my_films JOIN
lateral
(SELECT *
FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY,
kind text PATH '$.kind', NESTED PATH '$.films[*]'
COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt
LIMIT 1000) AS sub ON (true)
ORDER BY 1,2,3,4;
id | kind | title | director
---------------------------------------------------------------------
1 | comedy | Bananas | Woody Allen
1 | comedy | Bananas2 | Woody Allen
1 | comedy | The Dinner Game | Francis Veber
1 | comedy | The Dinner Game2 | Francis Veber
2 | horror | Psycho | Alfred Hitchcock
2 | horror | Psycho2 | Alfred Hitchcock
3 | thriller | Vertigo | Alfred Hitchcock
3 | thriller | Vertigo2 | Alfred Hitchcock
4 | drama | Yojimbo | Akira Kurosawa
4 | drama | Yojimbo2 | Akira Kurosawa
(10 rows)
-- JSON_TABLE can be on the inner part of an outer joion
SELECT sub.*
FROM my_films LEFT JOIN
lateral
(SELECT *
FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY,
kind text PATH '$.kind', NESTED PATH '$.films[*]'
COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt
LIMIT 1000) AS sub ON (true)
ORDER BY 1,2,3,4;
id | kind | title | director
---------------------------------------------------------------------
1 | comedy | Bananas | Woody Allen
1 | comedy | Bananas2 | Woody Allen
1 | comedy | The Dinner Game | Francis Veber
1 | comedy | The Dinner Game2 | Francis Veber
2 | horror | Psycho | Alfred Hitchcock
2 | horror | Psycho2 | Alfred Hitchcock
3 | thriller | Vertigo | Alfred Hitchcock
3 | thriller | Vertigo2 | Alfred Hitchcock
4 | drama | Yojimbo | Akira Kurosawa
4 | drama | Yojimbo2 | Akira Kurosawa
(10 rows)
-- we can pushdown this correlated subquery in WHERE clause
SELECT count(*)
FROM my_films WHERE
(SELECT count(*) > 0
FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY,
kind text PATH '$.kind', NESTED PATH '$.films[*]'
COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt
LIMIT 1000);
count
---------------------------------------------------------------------
2
(1 row)
-- we can pushdown this correlated subquery in SELECT clause
SELECT (SELECT count(*) > 0
FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY,
kind text PATH '$.kind', NESTED PATH '$.films[*]'
COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt)
FROM my_films;
?column?
---------------------------------------------------------------------
t
t
(2 rows)
-- multi-shard query with an explicit LATEREL SUBQUERY
-- along with other tables
SELECT sub.*
FROM my_films JOIN
lateral
(SELECT *
FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY,
kind text PATH '$.kind', NESTED PATH '$.films[*]'
COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt
LIMIT 1000) AS sub ON (true) JOIN test_table ON(my_films.id = test_table.id)
ORDER BY 1,2,3,4;
id | kind | title | director
---------------------------------------------------------------------
1 | comedy | Bananas | Woody Allen
1 | comedy | Bananas2 | Woody Allen
1 | comedy | The Dinner Game | Francis Veber
1 | comedy | The Dinner Game2 | Francis Veber
2 | horror | Psycho | Alfred Hitchcock
2 | horror | Psycho2 | Alfred Hitchcock
3 | thriller | Vertigo | Alfred Hitchcock
3 | thriller | Vertigo2 | Alfred Hitchcock
4 | drama | Yojimbo | Akira Kurosawa
4 | drama | Yojimbo2 | Akira Kurosawa
(10 rows)
-- non-colocated join fails
SELECT sub.*
FROM my_films JOIN
lateral
(SELECT *
FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY,
kind text PATH '$.kind', NESTED PATH '$.films[*]'
COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt
LIMIT 1000) AS sub ON (true) JOIN test_table ON(my_films.id != test_table.id)
ORDER BY 1,2,3,4;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- JSON_TABLE can be in the outer part of the join
-- as long as there is a distributed table
SELECT sub.*
FROM my_films JOIN
lateral
(SELECT *
FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY,
kind text PATH '$.kind', NESTED PATH '$.films[*]'
COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt
LIMIT 1000) AS sub ON (true) LEFT JOIN test_table ON(my_films.id = test_table.id)
ORDER BY 1,2,3,4;
id | kind | title | director
---------------------------------------------------------------------
1 | comedy | Bananas | Woody Allen
1 | comedy | Bananas2 | Woody Allen
1 | comedy | The Dinner Game | Francis Veber
1 | comedy | The Dinner Game2 | Francis Veber
2 | horror | Psycho | Alfred Hitchcock
2 | horror | Psycho2 | Alfred Hitchcock
3 | thriller | Vertigo | Alfred Hitchcock
3 | thriller | Vertigo2 | Alfred Hitchcock
4 | drama | Yojimbo | Akira Kurosawa
4 | drama | Yojimbo2 | Akira Kurosawa
(10 rows)
-- JSON_TABLE cannot be on the outer side of the join
SELECT *
FROM json_table('[{"a":10,"b":20},{"a":30,"b":40}]'::JSONB, '$[*]'
COLUMNS (id FOR ORDINALITY, column_a int4 PATH '$.a', column_b int4 PATH '$.b', a int4, b int4, c text))
LEFT JOIN LATERAL
(SELECT *
FROM my_films) AS foo on(foo.id = a);
ERROR: cannot pushdown the subquery
DETAIL: There exist a JSON_TABLE clause in the outer part of the outer join
-- JSON_TABLE cannot be on the FROM clause alone
SELECT *
FROM json_table('[{"a":10,"b":20},{"a":30,"b":40}]'::JSONB, '$[*]'
COLUMNS (id FOR ORDINALITY, column_a int4 PATH '$.a', column_b int4 PATH '$.b', a int4, b int4, c text)) as foo
WHERE b >
(SELECT count(*)
FROM my_films WHERE id = foo.a);
ERROR: correlated subqueries are not supported when the FROM clause contains JSON_TABLE
-- we can recursively plan json_tables on set operations
(SELECT *
FROM json_table('[{"a":10,"b":20},{"a":30,"b":40}]'::JSONB, '$[*]'
COLUMNS (id FOR ORDINALITY)) ORDER BY id ASC LIMIT 1)
UNION
(SELECT *
FROM json_table('[{"a":10,"b":20},{"a":30,"b":40}]'::JSONB, '$[*]'
COLUMNS (id FOR ORDINALITY)) ORDER BY id ASC LIMIT 1)
UNION
(SELECT id FROM test_table ORDER BY id ASC LIMIT 1);
DEBUG: generating subplan XXX_1 for subquery SELECT id FROM JSON_TABLE('[{"a": 10, "b": 20}, {"a": 30, "b": 40}]'::jsonb, '$[*]' AS json_table_path_1 COLUMNS (id FOR ORDINALITY) PLAN (json_table_path_1)) ORDER BY id LIMIT 1
DEBUG: generating subplan XXX_2 for subquery SELECT id FROM JSON_TABLE('[{"a": 10, "b": 20}, {"a": 30, "b": 40}]'::jsonb, '$[*]' AS json_table_path_1 COLUMNS (id FOR ORDINALITY) PLAN (json_table_path_1)) ORDER BY id LIMIT 1
DEBUG: push down of limit count: 1
DEBUG: generating subplan XXX_3 for subquery SELECT id FROM pg15_json.test_table ORDER BY id LIMIT 1
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer) UNION SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer) UNION SELECT intermediate_result.id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(id bigint)
id
---------------------------------------------------------------------
1
(1 row)
-- LIMIT in subquery not supported when json_table exists
SELECT *
FROM json_table('[{"a":10,"b":20},{"a":30,"b":40}]'::JSONB, '$[*]'
COLUMNS (id FOR ORDINALITY, column_a int4 PATH '$.a', column_b int4 PATH '$.b', a int4, b int4, c text))
JOIN LATERAL
(SELECT *
FROM my_films WHERE json_table.id = a LIMIT 1) as foo ON (true);
ERROR: cannot push down this subquery
DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from a JSON_TABLE
-- a little more complex query with multiple json_table
SELECT
director1 AS director, title1, kind1, title2, kind2
FROM
my_films,
JSON_TABLE ( js, '$.favorites' AS favs COLUMNS (
NESTED PATH '$[*]' AS films1 COLUMNS (
kind1 text PATH '$.kind',
NESTED PATH '$.films[*]' AS film1 COLUMNS (
title1 text PATH '$.title',
director1 text PATH '$.director')
),
NESTED PATH '$[*]' AS films2 COLUMNS (
kind2 text PATH '$.kind',
NESTED PATH '$.films[*]' AS film2 COLUMNS (
title2 text PATH '$.title',
director2 text PATH '$.director'
)
)
)
PLAN (favs INNER ((films1 INNER film1) CROSS (films2 INNER film2)))
) AS jt
WHERE kind1 > kind2 AND director1 = director2
ORDER BY 1,2,3,4;
director | title1 | kind1 | title2 | kind2
---------------------------------------------------------------------
Alfred Hitchcock | Vertigo | thriller | Psycho | horror
Alfred Hitchcock | Vertigo2 | thriller | Psycho2 | horror
(2 rows)
RESET client_min_messages;
-- test some utility functions on the target list & where clause
select jsonb_path_exists(js, '$.favorites') from my_films;
jsonb_path_exists
---------------------------------------------------------------------
t
t
(2 rows)
select bool_and(JSON_EXISTS(js, '$.favorites.films.title')) from my_films;
bool_and
---------------------------------------------------------------------
t
(1 row)
SELECT count(*) FROM my_films WHERE jsonb_path_exists(js, '$.favorites');
count
---------------------------------------------------------------------
2
(1 row)
SELECT count(*) FROM my_films WHERE jsonb_path_exists(js, '$.favorites');
count
---------------------------------------------------------------------
2
(1 row)
SELECT count(*) FROM my_films WHERE JSON_EXISTS(js, '$.favorites.films.title');
count
---------------------------------------------------------------------
2
(1 row)
-- check constraint with json_exists
create table user_profiles (
id bigserial,
addresses jsonb,
anyjson jsonb,
check (json_exists( addresses, '$.main' ))
);
select create_distributed_table('user_profiles', 'id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
insert into user_profiles (addresses) VALUES (JSON_SCALAR('1'));
ERROR: new row for relation "user_profiles_1687008" violates check constraint "user_profiles_addresses_check"
DETAIL: Failing row contains (1, "1", null).
CONTEXT: while executing command on localhost:xxxxx
insert into user_profiles (addresses) VALUES ('{"main":"value"}');
-- we cannot insert because WITH UNIQUE KEYS
insert into user_profiles (addresses) VALUES (JSON ('{"main":"value", "main":"value"}' WITH UNIQUE KEYS));
ERROR: duplicate JSON object key value
-- we can insert with
insert into user_profiles (addresses) VALUES (JSON ('{"main":"value", "main":"value"}' WITHOUT UNIQUE KEYS)) RETURNING *;
id | addresses | anyjson
---------------------------------------------------------------------
4 | {"main": "value"} |
(1 row)
TRUNCATE user_profiles;
INSERT INTO user_profiles (anyjson) VALUES ('12'), ('"abc"'), ('[1,2,3]'), ('{"a":12}');
select anyjson, anyjson is json array as json_array, anyjson is json object as json_object, anyjson is json scalar as json_scalar,
anyjson is json with UNIQUE keys
from user_profiles WHERE anyjson IS NOT NULL ORDER BY 1;
anyjson | json_array | json_object | json_scalar | ?column?
---------------------------------------------------------------------
"abc" | f | f | t | t
12 | f | f | t | t
[1, 2, 3] | t | f | f | t
{"a": 12} | f | t | f | t
(4 rows)
-- use json_query
SELECT i,
json_query('[{"x": "aaa"},{"x": "bbb"},{"x": "ccc"}]'::JSONB, '$[$i].x' passing id AS i RETURNING text omit quotes)
FROM generate_series(0, 3) i
JOIN my_films ON(id = i) ORDER BY 1;
i | json_query
---------------------------------------------------------------------
1 | bbb
2 | ccc
(2 rows)
-- we can use JSON_TABLE in modification queries as well
-- use log level such that we can see trace changes
SET client_min_messages TO DEBUG1;
--the JSON_TABLE subquery is recursively planned
UPDATE test_table SET VALUE = 'XXX' FROM(
SELECT jt.* FROM
my_films,
JSON_TABLE ( js, '$.favorites[*]' COLUMNS (
id FOR ORDINALITY,
kind text PATH '$.kind',
NESTED PATH '$.films[*]' COLUMNS (
title text PATH '$.title',
director text PATH '$.director'))) AS jt) as foo WHERE foo.id = test_table.id;
DEBUG: generating subplan XXX_1 for subquery SELECT jt.id, jt.kind, jt.title, jt.director FROM pg15_json.my_films, LATERAL JSON_TABLE(my_films.js, '$."favorites"[*]' AS json_table_path_1 COLUMNS (id FOR ORDINALITY, kind text PATH '$."kind"', NESTED PATH '$."films"[*]' AS json_table_path_2 COLUMNS (title text PATH '$."title"', director text PATH '$."director"')) PLAN (json_table_path_1 OUTER json_table_path_2)) jt
DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE pg15_json.test_table SET value = 'XXX'::text FROM (SELECT intermediate_result.id, intermediate_result.kind, intermediate_result.title, intermediate_result.director FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, kind text, title text, director text)) foo WHERE (foo.id OPERATOR(pg_catalog.=) test_table.id)
-- Subquery with JSON table can be pushed down because two distributed tables
-- in the query are joined on distribution column
UPDATE test_table SET VALUE = 'XXX' FROM (
SELECT my_films.id, jt.* FROM
my_films,
JSON_TABLE ( js, '$.favorites[*]' COLUMNS (
kind text PATH '$.kind',
NESTED PATH '$.films[*]' COLUMNS (
title text PATH '$.title',
director text PATH '$.director'))) AS jt) as foo WHERE foo.id = test_table.id;
-- we can pushdown with CTEs as well
WITH json_cte AS
(SELECT my_films.id, jt.* FROM
my_films,
JSON_TABLE ( js, '$.favorites[*]' COLUMNS (
kind text PATH '$.kind',
NESTED PATH '$.films[*]' COLUMNS (
title text PATH '$.title',
director text PATH '$.director'))) AS jt)
UPDATE test_table SET VALUE = 'XYZ' FROM json_cte
WHERE json_cte.id = test_table.id;
-- we can recursively with CTEs as well
WITH json_cte AS
(SELECT my_films.id as film_id, jt.* FROM
my_films,
JSON_TABLE ( js, '$.favorites[*]' COLUMNS (
kind text PATH '$.kind',
NESTED PATH '$.films[*]' COLUMNS (
id FOR ORDINALITY,
title text PATH '$.title',
director text PATH '$.director'))) AS jt ORDER BY jt.id LIMIT 1)
UPDATE test_table SET VALUE = 'XYZ' FROM json_cte
WHERE json_cte.film_id = test_table.id;
DEBUG: generating subplan XXX_1 for CTE json_cte: SELECT my_films.id AS film_id, jt.kind, jt.id, jt.title, jt.director FROM pg15_json.my_films, LATERAL JSON_TABLE(my_films.js, '$."favorites"[*]' AS json_table_path_1 COLUMNS (kind text PATH '$."kind"', NESTED PATH '$."films"[*]' AS json_table_path_2 COLUMNS (id FOR ORDINALITY, title text PATH '$."title"', director text PATH '$."director"')) PLAN (json_table_path_1 OUTER json_table_path_2)) jt ORDER BY jt.id LIMIT 1
DEBUG: push down of limit count: 1
DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE pg15_json.test_table SET value = 'XYZ'::text FROM (SELECT intermediate_result.film_id, intermediate_result.kind, intermediate_result.id, intermediate_result.title, intermediate_result.director FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(film_id bigint, kind text, id integer, title text, director text)) json_cte WHERE (json_cte.film_id OPERATOR(pg_catalog.=) test_table.id)
SET client_min_messages TO ERROR;
DROP SCHEMA pg15_json CASCADE;

View File

@ -1,9 +0,0 @@
--
-- PG15+ test
--
SHOW server_version \gset
SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15
\gset
\if :server_version_ge_15
\else
\q

View File

@ -203,7 +203,7 @@ SELECT create_distributed_table('collections_agg', 'key');
INSERT INTO collections_agg SELECT key, sum(key) FROM collections_1 GROUP BY key; INSERT INTO collections_agg SELECT key, sum(key) FROM collections_1 GROUP BY key;
-- coordinator roll-up -- coordinator roll-up
INSERT INTO collections_agg SELECT collection_id, sum(key) FROM collections_1 GROUP BY collection_id; INSERT INTO collections_agg SELECT collection_id, sum(key) FROM collections_1 GROUP BY collection_id;
-- now make sure that repair functionality works fine -- now make sure that copy functionality works fine
-- create a table and create its distribution metadata -- create a table and create its distribution metadata
CREATE TABLE customer_engagements (id integer, event_id int) PARTITION BY LIST ( event_id ); CREATE TABLE customer_engagements (id integer, event_id int) PARTITION BY LIST ( event_id );
CREATE TABLE customer_engagements_1 CREATE TABLE customer_engagements_1
@ -220,7 +220,7 @@ CREATE INDEX ON customer_engagements (id, event_id);
-- create a single shard on the first worker -- create a single shard on the first worker
SET citus.shard_count TO 1; SET citus.shard_count TO 1;
SET citus.shard_replication_factor TO 2; SET citus.shard_replication_factor TO 2;
SELECT create_distributed_table('customer_engagements', 'id', 'hash'); SELECT create_distributed_table('customer_engagements', 'id', 'hash', colocate_with := 'none');
create_distributed_table create_distributed_table
--------------------------------------------------------------------- ---------------------------------------------------------------------
@ -231,30 +231,22 @@ INSERT INTO customer_engagements VALUES (1, 1);
INSERT INTO customer_engagements VALUES (2, 1); INSERT INTO customer_engagements VALUES (2, 1);
INSERT INTO customer_engagements VALUES (1, 2); INSERT INTO customer_engagements VALUES (1, 2);
INSERT INTO customer_engagements VALUES (2, 2); INSERT INTO customer_engagements VALUES (2, 2);
-- the following queries does the following:
-- (i) create a new shard
-- (ii) mark the second shard placements as unhealthy
-- (iii) do basic checks i.e., only allow copy from healthy placement to unhealthy ones
-- (iv) do a successful master_copy_shard_placement from the first placement to the second
-- (v) mark the first placement as unhealthy and execute a query that is routed to the second placement
SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport=:worker_2_port \gset
SELECT groupid AS worker_1_group FROM pg_dist_node WHERE nodeport=:worker_1_port \gset
-- get the newshardid -- get the newshardid
SELECT shardid as newshardid FROM pg_dist_shard WHERE logicalrelid = 'customer_engagements'::regclass SELECT shardid as newshardid FROM pg_dist_shard WHERE logicalrelid = 'customer_engagements'::regclass
\gset \gset
-- now, update the second placement as unhealthy -- delete all the placements on the second node belonging to partitioning hierarchy
UPDATE pg_dist_placement SET shardstate = 3 WHERE shardid = :newshardid DELETE FROM pg_dist_shard_placement p USING pg_dist_shard s
AND groupid = :worker_2_group; WHERE s.shardid = p.shardid AND nodeport = :worker_2_port AND logicalrelid::text LIKE 'customer_engagements%';
-- cannot repair a shard after a modification (transaction still open during repair) -- cannot copy a shard after a modification (transaction still open during copy)
BEGIN; BEGIN;
INSERT INTO customer_engagements VALUES (1, 1); INSERT INTO customer_engagements VALUES (1, 1);
SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); SELECT citus_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, transfer_mode := 'block_writes');
ERROR: cannot open new connections after the first modification command within a transaction ERROR: cannot open new connections after the first modification command within a transaction
ROLLBACK; ROLLBACK;
-- modifications after reparing a shard are fine (will use new metadata) -- modifications after copying a shard are fine (will use new metadata)
BEGIN; BEGIN;
SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); SELECT citus_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, transfer_mode := 'block_writes');
master_copy_shard_placement citus_copy_shard_placement
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
@ -271,8 +263,8 @@ SELECT * FROM customer_engagements ORDER BY 1,2,3;
ROLLBACK; ROLLBACK;
BEGIN; BEGIN;
SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); SELECT citus_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, transfer_mode := 'block_writes');
master_copy_shard_placement citus_copy_shard_placement
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)

View File

@ -1044,7 +1044,7 @@ SELECT * FROM get_rebalance_table_shards_plan('colocated_rebalance_test', rebala
-- Check that we can call this function -- Check that we can call this function
SELECT * FROM get_rebalance_progress(); SELECT * FROM get_rebalance_progress();
sessionid | table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport | progress | source_shard_size | target_shard_size sessionid | table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport | progress | source_shard_size | target_shard_size | operation_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
(0 rows) (0 rows)
@ -1058,7 +1058,7 @@ SELECT * FROM rebalance_table_shards('colocated_rebalance_test', threshold := 0,
CALL citus_cleanup_orphaned_shards(); CALL citus_cleanup_orphaned_shards();
-- Check that we can call this function without a crash -- Check that we can call this function without a crash
SELECT * FROM get_rebalance_progress(); SELECT * FROM get_rebalance_progress();
sessionid | table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport | progress | source_shard_size | target_shard_size sessionid | table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport | progress | source_shard_size | target_shard_size | operation_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
(0 rows) (0 rows)

View File

@ -164,7 +164,7 @@ SELECT * FROM master_get_table_ddl_events('test_range_dist');
(2 rows) (2 rows)
-- --
-- Test master_copy_shard_placement with a fake_am table -- Test copy_copy_shard_placement with a fake_am table
-- --
select a.shardid, a.nodeport select a.shardid, a.nodeport
FROM pg_dist_shard b, pg_dist_shard_placement a FROM pg_dist_shard b, pg_dist_shard_placement a
@ -178,15 +178,14 @@ ORDER BY a.shardid, nodeport;
60003 | 57638 60003 | 57638
(4 rows) (4 rows)
-- Change repmodel to allow master_copy_shard_placement -- Change repmodel to allow copy_copy_shard_placement
UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid = 'test_hash_dist'::regclass; UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid = 'test_hash_dist'::regclass;
SELECT master_copy_shard_placement( SELECT citus_copy_shard_placement(
get_shard_id_for_distribution_column('test_hash_dist', '1'), get_shard_id_for_distribution_column('test_hash_dist', '1'),
'localhost', :worker_1_port, 'localhost', :worker_1_port,
'localhost', :worker_2_port, 'localhost', :worker_2_port,
do_repair := false,
transfer_mode := 'block_writes'); transfer_mode := 'block_writes');
master_copy_shard_placement citus_copy_shard_placement
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)

View File

@ -35,7 +35,7 @@ ORDER BY 1;
function citus_cleanup_orphaned_shards() function citus_cleanup_orphaned_shards()
function citus_conninfo_cache_invalidate() function citus_conninfo_cache_invalidate()
function citus_coordinator_nodeid() function citus_coordinator_nodeid()
function citus_copy_shard_placement(bigint,text,integer,text,integer,boolean,citus.shard_transfer_mode) function citus_copy_shard_placement(bigint,text,integer,text,integer,citus.shard_transfer_mode)
function citus_create_restore_point(text) function citus_create_restore_point(text)
function citus_disable_node(text,integer,boolean) function citus_disable_node(text,integer,boolean)
function citus_dist_local_group_cache_invalidate() function citus_dist_local_group_cache_invalidate()
@ -90,6 +90,9 @@ ORDER BY 1;
function citus_pid_for_gpid(bigint) function citus_pid_for_gpid(bigint)
function citus_prepare_pg_upgrade() function citus_prepare_pg_upgrade()
function citus_query_stats() function citus_query_stats()
function citus_rebalance_start(name,boolean,citus.shard_transfer_mode)
function citus_rebalance_stop()
function citus_rebalance_wait()
function citus_relation_size(regclass) function citus_relation_size(regclass)
function citus_remote_connection_stats() function citus_remote_connection_stats()
function citus_remove_node(text,integer) function citus_remove_node(text,integer)
@ -283,5 +286,5 @@ ORDER BY 1;
view citus_stat_statements view citus_stat_statements
view pg_dist_shard_placement view pg_dist_shard_placement
view time_partitions view time_partitions
(275 rows) (278 rows)

View File

@ -26,7 +26,6 @@ test: isolation_citus_dist_activity
test: isolation_remove_coordinator test: isolation_remove_coordinator
test: isolation_insert_select_repartition test: isolation_insert_select_repartition
test: isolation_dml_vs_repair
test: isolation_copy_placement_vs_copy_placement test: isolation_copy_placement_vs_copy_placement
test: isolation_concurrent_dml test: isolation_concurrent_dml

File diff suppressed because it is too large Load Diff

View File

@ -1,9 +0,0 @@
--
-- PG15+ test
--
SHOW server_version \gset
SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15
\gset
\if :server_version_ge_15
\else
\q

View File

@ -180,7 +180,7 @@ test: multi_modifications
test: multi_distribution_metadata test: multi_distribution_metadata
test: multi_prune_shard_list test: multi_prune_shard_list
test: multi_upsert multi_simple_queries multi_data_types test: multi_upsert multi_simple_queries multi_data_types
test: master_copy_shard_placement test: citus_copy_shard_placement
# multi_utilities cannot be run in parallel with other tests because it checks # multi_utilities cannot be run in parallel with other tests because it checks
# global locks # global locks
test: multi_utilities test: multi_utilities
@ -188,7 +188,7 @@ test: foreign_key_to_reference_table validate_constraint
test: multi_repartition_udt multi_repartitioned_subquery_udf multi_subtransactions test: multi_repartition_udt multi_repartitioned_subquery_udf multi_subtransactions
test: multi_modifying_xacts test: multi_modifying_xacts
test: multi_generate_ddl_commands multi_repair_shards test: multi_generate_ddl_commands
test: multi_create_shards test: multi_create_shards
test: multi_transaction_recovery test: multi_transaction_recovery
@ -253,10 +253,8 @@ test: multi_truncate
# ---------- # ----------
# multi_colocation_utils tests utility functions written for co-location feature & internal API # multi_colocation_utils tests utility functions written for co-location feature & internal API
# multi_colocated_shard_transfer tests master_copy_shard_placement with colocated tables.
# ---------- # ----------
test: multi_colocation_utils test: multi_colocation_utils
test: multi_colocated_shard_transfer
# ---------- # ----------
# node_conninfo_reload tests that node_conninfo changes take effect # node_conninfo_reload tests that node_conninfo changes take effect

View File

@ -55,7 +55,7 @@ test: subquery_in_targetlist subquery_in_where subquery_complex_target_list subq
test: subquery_prepared_statements test: subquery_prepared_statements
test: non_colocated_leaf_subquery_joins non_colocated_subquery_joins test: non_colocated_leaf_subquery_joins non_colocated_subquery_joins
test: cte_inline recursive_view_local_table values sequences_with_different_types test: cte_inline recursive_view_local_table values sequences_with_different_types
test: pg13 pg12 pg15_json json_table_select_only test: pg13 pg12
# run pg14 sequentially as it syncs metadata # run pg14 sequentially as it syncs metadata
test: pg14 test: pg14
test: pg15 test: pg15

View File

@ -3,6 +3,7 @@ test: multi_cluster_management
test: multi_test_catalog_views test: multi_test_catalog_views
test: shard_rebalancer_unit test: shard_rebalancer_unit
test: shard_rebalancer test: shard_rebalancer
test: background_rebalance
test: worker_copy_table_to_node test: worker_copy_table_to_node
test: foreign_key_to_reference_shard_rebalance test: foreign_key_to_reference_shard_rebalance
test: multi_move_mx test: multi_move_mx

View File

@ -28,7 +28,7 @@ step "s1-load-cache"
step "s1-repair-placement" step "s1-repair-placement"
{ {
SELECT master_copy_shard_placement((SELECT * FROM selected_shard_for_test_table), 'localhost', 57637, 'localhost', 57638); SELECT citus_copy_shard_placement((SELECT * FROM selected_shard_for_test_table), 'localhost', 57637, 'localhost', 57638);
} }
session "s2" session "s2"
@ -38,14 +38,14 @@ step "s2-begin"
BEGIN; BEGIN;
} }
step "s2-set-placement-inactive" step "s2-delete-inactive"
{ {
UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard_for_test_table) AND nodeport = 57638; DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard_for_test_table) AND nodeport = 57638;
} }
step "s2-repair-placement" step "s2-repair-placement"
{ {
SELECT master_copy_shard_placement((SELECT * FROM selected_shard_for_test_table), 'localhost', 57637, 'localhost', 57638); SELECT citus_copy_shard_placement((SELECT * FROM selected_shard_for_test_table), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes');
} }
// since test_hash_table has rep > 1 simple select query doesn't hit all placements // since test_hash_table has rep > 1 simple select query doesn't hit all placements
@ -65,7 +65,7 @@ step "s2-commit"
// note that "s1-repair-placement" errors out but that is expected // note that "s1-repair-placement" errors out but that is expected
// given that "s2-repair-placement" succeeds and the placement is // given that "s2-repair-placement" succeeds and the placement is
// already repaired // already repaired
permutation "s1-load-cache" "s2-load-cache" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-repair-placement" "s2-commit" permutation "s1-load-cache" "s2-load-cache" "s2-delete-inactive" "s2-begin" "s2-repair-placement" "s1-repair-placement" "s2-commit"
// the same test without the load caches // the same test without the load caches
permutation "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-repair-placement" "s2-commit" permutation "s2-delete-inactive" "s2-begin" "s2-repair-placement" "s1-repair-placement" "s2-commit"

View File

@ -73,14 +73,14 @@ step "s2-begin"
BEGIN; BEGIN;
} }
step "s2-set-placement-inactive" step "s2-delete-inactive"
{ {
UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638;
} }
step "s2-repair-placement" step "s2-repair-placement"
{ {
SELECT master_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); SELECT citus_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes');
} }
step "s2-commit" step "s2-commit"
@ -113,16 +113,16 @@ step "s2-print-index-count"
// repair a placement while concurrently performing an update/delete/insert/copy // repair a placement while concurrently performing an update/delete/insert/copy
// note that at some points we use "s1-select" just after "s1-begin" given that BEGIN // note that at some points we use "s1-select" just after "s1-begin" given that BEGIN
// may invalidate cache at certain cases // may invalidate cache at certain cases
permutation "s1-load-cache" "s1-insert" "s1-begin" "s1-select" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-update" "s2-commit" "s1-commit" "s2-print-content" permutation "s1-load-cache" "s1-insert" "s1-begin" "s1-select" "s2-delete-inactive" "s2-begin" "s2-repair-placement" "s1-update" "s2-commit" "s1-commit" "s2-print-content"
permutation "s1-load-cache" "s1-insert" "s1-begin" "s1-select" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-delete" "s2-commit" "s1-commit" "s2-print-content" permutation "s1-load-cache" "s1-insert" "s1-begin" "s1-select" "s2-delete-inactive" "s2-begin" "s2-repair-placement" "s1-delete" "s2-commit" "s1-commit" "s2-print-content"
permutation "s1-load-cache" "s1-begin" "s1-select" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-insert" "s2-commit" "s1-commit" "s2-print-content" permutation "s1-load-cache" "s1-begin" "s1-select" "s2-delete-inactive" "s2-begin" "s2-repair-placement" "s1-insert" "s2-commit" "s1-commit" "s2-print-content"
permutation "s1-load-cache" "s1-begin" "s1-select" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-copy" "s2-commit" "s1-commit" "s2-print-content" permutation "s1-load-cache" "s1-begin" "s1-select" "s2-delete-inactive" "s2-begin" "s2-repair-placement" "s1-copy" "s2-commit" "s1-commit" "s2-print-content"
permutation "s1-load-cache" "s1-begin" "s1-select" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-ddl" "s2-commit" "s1-commit" "s2-print-index-count" permutation "s1-load-cache" "s1-begin" "s1-select" "s2-delete-inactive" "s2-begin" "s2-repair-placement" "s1-ddl" "s2-commit" "s1-commit" "s2-print-index-count"
// the same tests without loading the cache at first // the same tests without loading the cache at first
permutation "s1-insert" "s1-begin" "s1-select" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-update" "s2-commit" "s1-commit" "s2-print-content" permutation "s1-insert" "s1-begin" "s1-select" "s2-delete-inactive" "s2-begin" "s2-repair-placement" "s1-update" "s2-commit" "s1-commit" "s2-print-content"
permutation "s1-insert" "s1-begin" "s1-select" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-delete" "s2-commit" "s1-commit" "s2-print-content" permutation "s1-insert" "s1-begin" "s1-select" "s2-delete-inactive" "s2-begin" "s2-repair-placement" "s1-delete" "s2-commit" "s1-commit" "s2-print-content"
permutation "s1-begin" "s1-select" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-insert" "s2-commit" "s1-commit" "s2-print-content" permutation "s1-begin" "s1-select" "s2-delete-inactive" "s2-begin" "s2-repair-placement" "s1-insert" "s2-commit" "s1-commit" "s2-print-content"
permutation "s1-begin" "s1-select" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-copy" "s2-commit" "s1-commit" "s2-print-content" permutation "s1-begin" "s1-select" "s2-delete-inactive" "s2-begin" "s2-repair-placement" "s1-copy" "s2-commit" "s1-commit" "s2-print-content"
permutation "s1-begin" "s1-select" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-ddl" "s2-commit" "s1-commit" "s2-print-index-count" permutation "s1-begin" "s1-select" "s2-delete-inactive" "s2-begin" "s2-repair-placement" "s1-ddl" "s2-commit" "s1-commit" "s2-print-index-count"

View File

@ -169,7 +169,7 @@ step "s4-print-waiting-advisory-locks"
step "s4-print-colocations" step "s4-print-colocations"
{ {
SELECT * FROM pg_dist_colocation ORDER BY colocationid; SELECT shardcount, replicationfactor, distributioncolumntype, distributioncolumncollation FROM pg_dist_colocation ORDER BY colocationid;
} }
// show concurrent insert is NOT blocked by create_distributed_table_concurrently // show concurrent insert is NOT blocked by create_distributed_table_concurrently

View File

@ -1,104 +0,0 @@
setup
{
CREATE TABLE test_dml_vs_repair (test_id integer NOT NULL, data int);
SET citus.shard_replication_factor TO 2;
SELECT create_distributed_table('test_dml_vs_repair', 'test_id', 'hash', shard_count:=1);
}
teardown
{
DROP TABLE IF EXISTS test_dml_vs_repair CASCADE;
}
session "s1"
setup
{
DEALLOCATE all;
TRUNCATE test_dml_vs_repair;
PREPARE insertone AS INSERT INTO test_dml_vs_repair VALUES(1, 1);
PREPARE insertall AS INSERT INTO test_dml_vs_repair SELECT test_id, data+1 FROM test_dml_vs_repair;
}
step "s1-begin"
{
BEGIN;
}
step "s1-insertone"
{
INSERT INTO test_dml_vs_repair VALUES(1, 1);
}
step "s1-prepared-insertone"
{
EXECUTE insertone;
}
step "s1-insertall"
{
INSERT INTO test_dml_vs_repair SELECT test_id, data+1 FROM test_dml_vs_repair;
}
step "s1-prepared-insertall"
{
EXECUTE insertall;
}
step "s1-display"
{
SELECT * FROM test_dml_vs_repair WHERE test_id = 1 ORDER BY test_id;
}
step "s1-commit"
{
COMMIT;
}
session "s2"
step "s2-begin"
{
BEGIN;
}
step "s2-invalidate-57637"
{
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57637;
}
step "s2-invalidate-57638"
{
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57638;
}
step "s2-revalidate-57638"
{
UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57638;
}
step "s2-repair"
{
SELECT master_copy_shard_placement((SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass), 'localhost', 57638, 'localhost', 57637);
}
step "s2-commit"
{
COMMIT;
}
// verify that repair is blocked by ongoing modifying simple transaction
permutation "s2-invalidate-57637" "s1-begin" "s1-insertone" "s2-repair" "s1-commit"
// verify that repair is blocked by ongoing modifying insert...select transaction
permutation "s1-insertone" "s2-invalidate-57637" "s1-begin" "s1-insertall" "s2-repair" "s1-commit"
// verify that modifications wait for shard repair
permutation "s2-invalidate-57637" "s2-begin" "s2-repair" "s1-insertone" "s2-commit" "s2-invalidate-57638" "s1-display" "s2-invalidate-57637" "s2-revalidate-57638" "s1-display"
// verify that prepared plain modifications wait for shard repair
permutation "s2-invalidate-57637" "s1-prepared-insertone" "s2-begin" "s2-repair" "s1-prepared-insertone" "s2-commit" "s2-invalidate-57638" "s1-display" "s2-invalidate-57637" "s2-revalidate-57638" "s1-display"
// verify that prepared INSERT ... SELECT waits for shard repair
permutation "s2-invalidate-57637" "s1-insertone" "s1-prepared-insertall" "s2-begin" "s2-repair" "s1-prepared-insertall" "s2-commit" "s2-invalidate-57638" "s1-display" "s2-invalidate-57637" "s2-revalidate-57638" "s1-display"

View File

@ -44,6 +44,12 @@ step "s1-replicate-nc"
select replicate_table_shards('non_colocated'); select replicate_table_shards('non_colocated');
} }
step "s1-rebalance-all"
{
BEGIN;
select rebalance_table_shards();
}
step "s1-commit" step "s1-commit"
{ {
COMMIT; COMMIT;
@ -82,6 +88,11 @@ step "s2-drain"
select master_drain_node('localhost', 57638); select master_drain_node('localhost', 57638);
} }
step "s2-citus-rebalance-start"
{
SELECT 1 FROM citus_rebalance_start();
}
// disallowed because it's the same table // disallowed because it's the same table
permutation "s1-rebalance-nc" "s2-rebalance-nc" "s1-commit" permutation "s1-rebalance-nc" "s2-rebalance-nc" "s1-commit"
@ -112,3 +123,6 @@ permutation "s1-rebalance-c1" "s2-drain" "s1-commit"
permutation "s1-replicate-c1" "s2-drain" "s1-commit" permutation "s1-replicate-c1" "s2-drain" "s1-commit"
permutation "s1-rebalance-nc" "s2-drain" "s1-commit" permutation "s1-rebalance-nc" "s2-drain" "s1-commit"
permutation "s1-replicate-nc" "s2-drain" "s1-commit" permutation "s1-replicate-nc" "s2-drain" "s1-commit"
// disallow the background rebalancer to run when rebalance_table_shard rung
permutation "s1-rebalance-all" "s2-citus-rebalance-start" "s1-commit"

View File

@ -58,12 +58,26 @@ step "s1-shard-move-c1-block-writes"
SELECT citus_move_shard_placement(1500001, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes'); SELECT citus_move_shard_placement(1500001, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes');
} }
step "s1-shard-copy-c1-block-writes"
{
BEGIN;
UPDATE pg_dist_partition SET repmodel = 'c' WHERE logicalrelid IN ('colocated1', 'colocated2');
SELECT citus_copy_shard_placement(1500001, 'localhost', 57637, 'localhost', 57638, transfer_mode:='block_writes');
}
step "s1-shard-move-c1-online" step "s1-shard-move-c1-online"
{ {
BEGIN; BEGIN;
SELECT citus_move_shard_placement(1500001, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='force_logical'); SELECT citus_move_shard_placement(1500001, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='force_logical');
} }
step "s1-shard-copy-c1-online"
{
BEGIN;
UPDATE pg_dist_partition SET repmodel = 'c' WHERE logicalrelid IN ('colocated1', 'colocated2');
SELECT citus_copy_shard_placement(1500001, 'localhost', 57637, 'localhost', 57638, transfer_mode:='force_logical');
}
step "s1-commit" step "s1-commit"
{ {
COMMIT; COMMIT;
@ -156,7 +170,8 @@ step "s7-get-progress"
targetname, targetname,
targetport, targetport,
target_shard_size, target_shard_size,
progress progress,
operation_type
FROM get_rebalance_progress(); FROM get_rebalance_progress();
} }
@ -188,10 +203,15 @@ permutation "s7-grab-lock" "s1-shard-move-c1-online" "s7-get-progress" "s7-relea
permutation "s2-lock-1-start" "s1-shard-move-c1-block-writes" "s7-get-progress" "s2-unlock-1-start" "s1-commit" "s7-get-progress" "enable-deferred-drop" permutation "s2-lock-1-start" "s1-shard-move-c1-block-writes" "s7-get-progress" "s2-unlock-1-start" "s1-commit" "s7-get-progress" "enable-deferred-drop"
permutation "s7-grab-lock" "s1-shard-move-c1-block-writes" "s7-get-progress" "s7-release-lock" "s1-commit" "s7-get-progress" "enable-deferred-drop" permutation "s7-grab-lock" "s1-shard-move-c1-block-writes" "s7-get-progress" "s7-release-lock" "s1-commit" "s7-get-progress" "enable-deferred-drop"
// blocking shard copy
permutation "s2-lock-1-start" "s1-shard-copy-c1-block-writes" "s7-get-progress" "s2-unlock-1-start" "s1-commit"
// online shard move // online shard move
permutation "s6-acquire-advisory-lock" "s1-shard-move-c1-online" "s7-get-progress" "s6-release-advisory-lock" "s1-commit" "s7-get-progress" "enable-deferred-drop" permutation "s6-acquire-advisory-lock" "s1-shard-move-c1-online" "s7-get-progress" "s6-release-advisory-lock" "s1-commit" "s7-get-progress" "enable-deferred-drop"
permutation "s7-grab-lock" "s1-shard-move-c1-online" "s7-get-progress" "s7-release-lock" "s1-commit" "s7-get-progress" "enable-deferred-drop" permutation "s7-grab-lock" "s1-shard-move-c1-online" "s7-get-progress" "s7-release-lock" "s1-commit" "s7-get-progress" "enable-deferred-drop"
// online shard copy
permutation "s6-acquire-advisory-lock" "s1-shard-copy-c1-online" "s7-get-progress" "s6-release-advisory-lock" "s1-commit"
// parallel blocking shard move // parallel blocking shard move
permutation "s2-lock-1-start" "s1-shard-move-c1-block-writes" "s4-shard-move-sep-block-writes" "s7-get-progress" "s2-unlock-1-start" "s1-commit" "s4-commit" "s7-get-progress" "enable-deferred-drop" permutation "s2-lock-1-start" "s1-shard-move-c1-block-writes" "s4-shard-move-sep-block-writes" "s7-get-progress" "s2-unlock-1-start" "s1-commit" "s4-commit" "s7-get-progress" "enable-deferred-drop"

View File

@ -0,0 +1,82 @@
CREATE SCHEMA background_rebalance;
SET search_path TO background_rebalance;
SET citus.next_shard_id TO 85674000;
SET citus.shard_replication_factor TO 1;
ALTER SYSTEM SET citus.background_task_queue_interval TO '1s';
SELECT pg_reload_conf();
CREATE TABLE t1 (a int PRIMARY KEY);
SELECT create_distributed_table('t1', 'a', shard_count => 4, colocate_with => 'none');
-- verify the rebalance works - no-op - when the shards are balanced. Noop is shown by wait complaining there is nothing
-- to wait on.
SELECT 1 FROM citus_rebalance_start();
SELECT citus_rebalance_wait();
SELECT citus_move_shard_placement(85674000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode => 'block_writes');
-- rebalance a table in the background
SELECT 1 FROM citus_rebalance_start();
SELECT citus_rebalance_wait();
SELECT citus_move_shard_placement(85674000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode => 'block_writes');
CREATE TABLE t2 (a int);
SELECT create_distributed_table('t2', 'a' , colocate_with => 't1');
-- show that we get an error when a table in the colocation group can't be moved non-blocking
SELECT 1 FROM citus_rebalance_start();
SELECT 1 FROM citus_rebalance_start(shard_transfer_mode => 'block_writes');
SELECT citus_rebalance_wait();
DROP TABLE t2;
SELECT citus_move_shard_placement(85674000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode => 'block_writes');
-- show we can stop a rebalance, the stop causes the move to not have happened, eg, our move back below fails.
SELECT 1 FROM citus_rebalance_start();
SELECT citus_rebalance_stop();
-- waiting on this rebalance is racy, as it sometimes sees no rebalance is ongoing while other times it actually sees it ongoing
-- we simply sleep a bit here
SELECT pg_sleep(1);
-- failing move due to a stopped rebalance, first clean orphans to make the error stable
SET client_min_messages TO WARNING;
CALL citus_cleanup_orphaned_shards();
RESET client_min_messages;
SELECT citus_move_shard_placement(85674000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode => 'block_writes');
-- show we can't start the rebalancer twice
SELECT 1 FROM citus_rebalance_start();
SELECT 1 FROM citus_rebalance_start();
SELECT citus_rebalance_wait();
-- show that the old rebalancer cannot be started with a background rebalance in progress
SELECT citus_move_shard_placement(85674000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode => 'block_writes');
SELECT 1 FROM citus_rebalance_start();
SELECT rebalance_table_shards();
SELECT citus_rebalance_wait();
DROP TABLE t1;
-- make sure a non-super user can stop rebalancing
CREATE USER non_super_user_rebalance WITH LOGIN;
GRANT ALL ON SCHEMA background_rebalance TO non_super_user_rebalance;
SET ROLE non_super_user_rebalance;
CREATE TABLE non_super_user_t1 (a int PRIMARY KEY);
SELECT create_distributed_table('non_super_user_t1', 'a', shard_count => 4, colocate_with => 'none');
SELECT citus_move_shard_placement(85674008, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode => 'block_writes');
SELECT 1 FROM citus_rebalance_start();
SELECT citus_rebalance_stop();
RESET ROLE;
SET client_min_messages TO WARNING;
DROP SCHEMA background_rebalance CASCADE;

View File

@ -1,4 +1,4 @@
-- Tests for master_copy_shard_placement, which can be used for adding replicas in statement-based replication -- Tests for citus_copy_shard_placement, which can be used for adding replicas in statement-based replication
CREATE SCHEMA mcsp; CREATE SCHEMA mcsp;
SET search_path TO mcsp; SET search_path TO mcsp;
SET citus.next_shard_id TO 8139000; SET citus.next_shard_id TO 8139000;
@ -24,7 +24,7 @@ CREATE TABLE history_p1 PARTITION OF history FOR VALUES FROM ('2019-01-01') TO (
CREATE TABLE history_p2 PARTITION OF history FOR VALUES FROM ('2020-01-01') TO ('2021-01-01'); CREATE TABLE history_p2 PARTITION OF history FOR VALUES FROM ('2020-01-01') TO ('2021-01-01');
SELECT create_distributed_table('history','key'); SELECT create_distributed_table('history','key');
-- Mark tables as non-mx tables, in order to be able to test master_copy_shard_placement -- Mark tables as non-mx tables, in order to be able to test citus_copy_shard_placement
UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid IN UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid IN
('data'::regclass, 'history'::regclass); ('data'::regclass, 'history'::regclass);
@ -35,47 +35,42 @@ INSERT INTO history VALUES ('key-1', '2020-02-01', 'old');
INSERT INTO history VALUES ('key-1', '2019-10-01', 'older'); INSERT INTO history VALUES ('key-1', '2019-10-01', 'older');
-- verify we error out if no healthy placement exists at source -- verify we error out if no healthy placement exists at source
SELECT master_copy_shard_placement( SELECT citus_copy_shard_placement(
get_shard_id_for_distribution_column('data', 'key-1'), get_shard_id_for_distribution_column('data', 'key-1'),
'localhost', :worker_1_port, 'localhost', :worker_1_port,
'localhost', :worker_2_port, 'localhost', :worker_2_port,
do_repair := false,
transfer_mode := 'block_writes'); transfer_mode := 'block_writes');
-- verify we error out if source and destination are the same -- verify we error out if source and destination are the same
SELECT master_copy_shard_placement( SELECT citus_copy_shard_placement(
get_shard_id_for_distribution_column('data', 'key-1'), get_shard_id_for_distribution_column('data', 'key-1'),
'localhost', :worker_2_port, 'localhost', :worker_2_port,
'localhost', :worker_2_port, 'localhost', :worker_2_port,
do_repair := false,
transfer_mode := 'block_writes'); transfer_mode := 'block_writes');
-- verify we error out if target already contains a healthy placement -- verify we warn if target already contains a healthy placement
SELECT master_copy_shard_placement( SELECT citus_copy_shard_placement(
(SELECT shardid FROM pg_dist_shard WHERE logicalrelid='ref_table'::regclass::oid), (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='ref_table'::regclass::oid),
'localhost', :worker_1_port, 'localhost', :worker_1_port,
'localhost', :worker_2_port, 'localhost', :worker_2_port,
do_repair := false,
transfer_mode := 'block_writes'); transfer_mode := 'block_writes');
-- verify we error out if table has foreign key constraints -- verify we error out if table has foreign key constraints
INSERT INTO ref_table SELECT 1, value FROM data; INSERT INTO ref_table SELECT 1, value FROM data;
ALTER TABLE data ADD CONSTRAINT distfk FOREIGN KEY (value) REFERENCES ref_table (b) MATCH FULL; ALTER TABLE data ADD CONSTRAINT distfk FOREIGN KEY (value) REFERENCES ref_table (b) MATCH FULL;
SELECT master_copy_shard_placement( SELECT citus_copy_shard_placement(
get_shard_id_for_distribution_column('data', 'key-1'), get_shard_id_for_distribution_column('data', 'key-1'),
'localhost', :worker_2_port, 'localhost', :worker_2_port,
'localhost', :worker_1_port, 'localhost', :worker_1_port);
do_repair := false);
ALTER TABLE data DROP CONSTRAINT distfk; ALTER TABLE data DROP CONSTRAINT distfk;
-- replicate shard that contains key-1 -- replicate shard that contains key-1
SELECT master_copy_shard_placement( SELECT citus_copy_shard_placement(
get_shard_id_for_distribution_column('data', 'key-1'), get_shard_id_for_distribution_column('data', 'key-1'),
'localhost', :worker_2_port, 'localhost', :worker_2_port,
'localhost', :worker_1_port, 'localhost', :worker_1_port,
do_repair := false,
transfer_mode := 'block_writes'); transfer_mode := 'block_writes');
-- forcefully mark the old replica as inactive -- forcefully mark the old replica as inactive
@ -102,11 +97,10 @@ RESET client_min_messages;
CREATE TABLE mx_table(a int); CREATE TABLE mx_table(a int);
SELECT create_distributed_table('mx_table', 'a'); SELECT create_distributed_table('mx_table', 'a');
SELECT master_copy_shard_placement( SELECT citus_copy_shard_placement(
get_shard_id_for_distribution_column('mx_table', '1'), get_shard_id_for_distribution_column('mx_table', '1'),
'localhost', :worker_1_port, 'localhost', :worker_1_port,
'localhost', :worker_2_port, 'localhost', :worker_2_port,
do_repair := false,
transfer_mode := 'block_writes'); transfer_mode := 'block_writes');
SET client_min_messages TO ERROR; SET client_min_messages TO ERROR;

View File

@ -223,8 +223,8 @@ SELECT master_create_empty_shard('citus_local_table_1');
-- get_shard_id_for_distribution_column is supported -- get_shard_id_for_distribution_column is supported
SELECT get_shard_id_for_distribution_column('citus_local_table_1', 'not_checking_this_arg_for_non_dist_tables'); SELECT get_shard_id_for_distribution_column('citus_local_table_1', 'not_checking_this_arg_for_non_dist_tables');
SELECT get_shard_id_for_distribution_column('citus_local_table_1'); SELECT get_shard_id_for_distribution_column('citus_local_table_1');
-- master_copy_shard_placement is not supported -- citus_copy_shard_placement is not supported
SELECT master_copy_shard_placement(shardid, 'localhost', :master_port, 'localhost', :worker_1_port, true) SELECT citus_copy_shard_placement(shardid, 'localhost', :master_port, 'localhost', :worker_1_port)
FROM (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='citus_local_table_1'::regclass) as shardid; FROM (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='citus_local_table_1'::regclass) as shardid;
-- undistribute_table is supported -- undistribute_table is supported
BEGIN; BEGIN;

Some files were not shown because too many files have changed in this diff Show More