Merge pull request #5946 from citusdata/propagate-vacuum

propagate 'vacuum;' to all worker nodes
pull/6026/head
aykut-bozkurt 2022-06-23 15:48:02 +03:00 committed by GitHub
commit ace800851a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 816 additions and 117 deletions

View File

@ -1024,6 +1024,14 @@ static DistributeObjectOps Type_Rename = {
.address = RenameTypeStmtObjectAddress,
.markDistributed = false,
};
static DistributeObjectOps Vacuum_Analyze = {
.deparse = NULL,
.qualify = NULL,
.preprocess = NULL,
.postprocess = PostprocessVacuumStmt,
.address = NULL,
.markDistributed = false,
};
/*
* PreprocessRenameViewStmt function can be called internally by ALTER TABLE view_name
@ -1653,6 +1661,11 @@ GetDistributeObjectOps(Node *node)
return &Any_Reindex;
}
case T_VacuumStmt:
{
return &Vacuum_Analyze;
}
case T_RenameStmt:
{
RenameStmt *stmt = castNode(RenameStmt, node);

View File

@ -830,14 +830,6 @@ ProcessUtilityInternal(PlannedStmt *pstmt,
}
}
/* TODO: fold VACUUM's processing into the above block */
if (IsA(parsetree, VacuumStmt))
{
VacuumStmt *vacuumStmt = (VacuumStmt *) parsetree;
PostprocessVacuumStmt(vacuumStmt, queryString);
}
if (!IsDropCitusExtensionStmt(parsetree) && !IsA(parsetree, DropdbStmt))
{
/*

View File

@ -20,7 +20,7 @@
#include "distributed/deparse_shard_query.h"
#include "distributed/listutils.h"
#include "distributed/metadata_cache.h"
#include "distributed/multi_executor.h"
#include "distributed/metadata_sync.h"
#include "distributed/resource_lock.h"
#include "distributed/transaction_management.h"
#include "distributed/version_compat.h"
@ -48,7 +48,7 @@ typedef struct CitusVacuumParams
} CitusVacuumParams;
/* Local functions forward declarations for processing distributed table commands */
static bool IsDistributedVacuumStmt(int vacuumOptions, List *VacuumCitusRelationIdList);
static bool IsDistributedVacuumStmt(List *vacuumRelationIdList);
static List * VacuumTaskList(Oid relationId, CitusVacuumParams vacuumParams,
List *vacuumColumnList);
static char * DeparseVacuumStmtPrefix(CitusVacuumParams vacuumParams);
@ -57,44 +57,28 @@ static List * VacuumColumnList(VacuumStmt *vacuumStmt, int relationIndex);
static List * ExtractVacuumTargetRels(VacuumStmt *vacuumStmt);
static void ExecuteVacuumOnDistributedTables(VacuumStmt *vacuumStmt, List *relationIdList,
CitusVacuumParams vacuumParams);
static void ExecuteUnqualifiedVacuumTasks(VacuumStmt *vacuumStmt,
CitusVacuumParams vacuumParams);
static CitusVacuumParams VacuumStmtParams(VacuumStmt *vacstmt);
static List * VacuumCitusRelationIdList(VacuumStmt *vacuumStmt, CitusVacuumParams
vacuumParams);
static List * VacuumRelationIdList(VacuumStmt *vacuumStmt, CitusVacuumParams
vacuumParams);
/*
* PostprocessVacuumStmt processes vacuum statements that may need propagation to
* distributed tables. If a VACUUM or ANALYZE command references a distributed
* table, it is propagated to all involved nodes; otherwise, this function will
* immediately exit after some error checking.
* citus tables only if ddl propagation is enabled. If a VACUUM or ANALYZE command
* references a citus table or no table, it is propagated to all involved nodes; otherwise,
* the statements will not be propagated.
*
* Unlike most other Process functions within this file, this function does not
* return a modified parse node, as it is expected that the local VACUUM or
* ANALYZE has already been processed.
*/
void
PostprocessVacuumStmt(VacuumStmt *vacuumStmt, const char *vacuumCommand)
List *
PostprocessVacuumStmt(Node *node, const char *vacuumCommand)
{
VacuumStmt *vacuumStmt = castNode(VacuumStmt, node);
CitusVacuumParams vacuumParams = VacuumStmtParams(vacuumStmt);
const char *stmtName = (vacuumParams.options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
/*
* No table in the vacuum statement means vacuuming all relations
* which is not supported by citus.
*/
if (list_length(vacuumStmt->rels) == 0)
{
/* WARN for unqualified VACUUM commands */
ereport(WARNING, (errmsg("not propagating %s command to worker nodes", stmtName),
errhint("Provide a specific table in order to %s "
"distributed tables.", stmtName)));
}
List *citusRelationIdList = VacuumCitusRelationIdList(vacuumStmt, vacuumParams);
if (list_length(citusRelationIdList) == 0)
{
return;
}
if (vacuumParams.options & VACOPT_VACUUM)
{
@ -109,32 +93,42 @@ PostprocessVacuumStmt(VacuumStmt *vacuumStmt, const char *vacuumCommand)
}
/*
* Here we get the relation list again because we might have
* closed the current transaction and the memory context got reset.
* Vacuum's context is PortalContext, which lasts for the whole session
* so committing/starting a new transaction doesn't affect it.
* when no table is specified propagate the command as it is;
* otherwise, only propagate when there is at least 1 citus table
*/
citusRelationIdList = VacuumCitusRelationIdList(vacuumStmt, vacuumParams);
bool distributedVacuumStmt = IsDistributedVacuumStmt(vacuumParams.options,
citusRelationIdList);
if (!distributedVacuumStmt)
List *relationIdList = VacuumRelationIdList(vacuumStmt, vacuumParams);
if (list_length(vacuumStmt->rels) == 0)
{
return;
/* no table is specified (unqualified vacuum) */
ExecuteUnqualifiedVacuumTasks(vacuumStmt, vacuumParams);
}
else if (IsDistributedVacuumStmt(relationIdList))
{
/* there is at least 1 citus table specified */
ExecuteVacuumOnDistributedTables(vacuumStmt, relationIdList,
vacuumParams);
}
ExecuteVacuumOnDistributedTables(vacuumStmt, citusRelationIdList, vacuumParams);
/* else only local tables are specified */
return NIL;
}
/*
* VacuumCitusRelationIdList returns the oid of the relations in the given vacuum statement.
* VacuumRelationIdList returns the oid of the relations in the given vacuum statement.
*/
static List *
VacuumCitusRelationIdList(VacuumStmt *vacuumStmt, CitusVacuumParams vacuumParams)
VacuumRelationIdList(VacuumStmt *vacuumStmt, CitusVacuumParams vacuumParams)
{
LOCKMODE lockMode = (vacuumParams.options & VACOPT_FULL) ? AccessExclusiveLock :
ShareUpdateExclusiveLock;
bool skipLocked = (vacuumParams.options & VACOPT_SKIP_LOCKED);
List *vacuumRelationList = ExtractVacuumTargetRels(vacuumStmt);
List *relationIdList = NIL;
@ -142,18 +136,45 @@ VacuumCitusRelationIdList(VacuumStmt *vacuumStmt, CitusVacuumParams vacuumParams
RangeVar *vacuumRelation = NULL;
foreach_ptr(vacuumRelation, vacuumRelationList)
{
Oid relationId = RangeVarGetRelid(vacuumRelation, lockMode, false);
if (!IsCitusTable(relationId))
/*
* If skip_locked option is enabled, we are skipping that relation
* if the lock for it is currently not available; else, we get the lock.
*/
Oid relationId = RangeVarGetRelidExtended(vacuumRelation,
lockMode,
skipLocked ? RVR_SKIP_LOCKED : 0, NULL,
NULL);
if (OidIsValid(relationId))
{
continue;
relationIdList = lappend_oid(relationIdList, relationId);
}
relationIdList = lappend_oid(relationIdList, relationId);
}
return relationIdList;
}
/*
* IsDistributedVacuumStmt returns true if there is any citus table in the relation id list;
* otherwise, it returns false.
*/
static bool
IsDistributedVacuumStmt(List *vacuumRelationIdList)
{
Oid relationId = InvalidOid;
foreach_oid(relationId, vacuumRelationIdList)
{
if (OidIsValid(relationId) && IsCitusTable(relationId))
{
return true;
}
}
return false;
}
/*
* ExecuteVacuumOnDistributedTables executes the vacuum for the shard placements of given tables
* if they are citus tables.
@ -183,53 +204,6 @@ ExecuteVacuumOnDistributedTables(VacuumStmt *vacuumStmt, List *relationIdList,
}
/*
* IsDistributedVacuumStmt returns whether distributed execution of a
* given VacuumStmt is supported. The provided relationId list represents
* the list of tables targeted by the provided statement.
*
* Returns true if the statement requires distributed execution and returns
* false otherwise.
*/
static bool
IsDistributedVacuumStmt(int vacuumOptions, List *VacuumCitusRelationIdList)
{
bool distributeStmt = false;
int distributedRelationCount = 0;
const char *stmtName = (vacuumOptions & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
Oid relationId = InvalidOid;
foreach_oid(relationId, VacuumCitusRelationIdList)
{
if (OidIsValid(relationId) && IsCitusTable(relationId))
{
distributedRelationCount++;
}
}
if (distributedRelationCount == 0)
{
/* nothing to do here */
}
else if (!EnableDDLPropagation)
{
/* WARN if DDL propagation is not enabled */
ereport(WARNING, (errmsg("not propagating %s command to worker nodes", stmtName),
errhint("Set citus.enable_ddl_propagation to true in order to "
"send targeted %s commands to worker nodes.",
stmtName)));
}
else
{
distributeStmt = true;
}
return distributeStmt;
}
/*
* VacuumTaskList returns a list of tasks to be executed as part of processing
* a VacuumStmt which targets a distributed relation.
@ -237,6 +211,9 @@ IsDistributedVacuumStmt(int vacuumOptions, List *VacuumCitusRelationIdList)
static List *
VacuumTaskList(Oid relationId, CitusVacuumParams vacuumParams, List *vacuumColumnList)
{
LOCKMODE lockMode = (vacuumParams.options & VACOPT_FULL) ? AccessExclusiveLock :
ShareUpdateExclusiveLock;
/* resulting task list */
List *taskList = NIL;
@ -255,8 +232,20 @@ VacuumTaskList(Oid relationId, CitusVacuumParams vacuumParams, List *vacuumColum
* RowExclusiveLock. However if VACUUM FULL is used, we already obtain
* AccessExclusiveLock before reaching to that point and INSERT's will be
* blocked anyway. This is inline with PostgreSQL's own behaviour.
* Also note that if skip locked option is enabled, we try to acquire the lock
* in nonblocking way. If lock is not available, vacuum just skip that relation.
*/
LockRelationOid(relationId, ShareUpdateExclusiveLock);
if (!(vacuumParams.options & VACOPT_SKIP_LOCKED))
{
LockRelationOid(relationId, lockMode);
}
else
{
if (!ConditionalLockRelationOid(relationId, lockMode))
{
return NIL;
}
}
List *shardIntervalList = LoadShardIntervalList(relationId);
@ -606,3 +595,62 @@ VacuumStmtParams(VacuumStmt *vacstmt)
(disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0);
return params;
}
/*
* ExecuteUnqualifiedVacuumTasks executes tasks for unqualified vacuum commands
*/
static void
ExecuteUnqualifiedVacuumTasks(VacuumStmt *vacuumStmt, CitusVacuumParams vacuumParams)
{
/* don't allow concurrent node list changes that require an exclusive lock */
List *workerNodes = TargetWorkerSetNodeList(ALL_SHARD_NODES, RowShareLock);
if (list_length(workerNodes) == 0)
{
return;
}
const char *vacuumStringPrefix = DeparseVacuumStmtPrefix(vacuumParams);
StringInfo vacuumCommand = makeStringInfo();
appendStringInfoString(vacuumCommand, vacuumStringPrefix);
List *unqualifiedVacuumCommands = list_make3(DISABLE_DDL_PROPAGATION,
vacuumCommand->data,
ENABLE_DDL_PROPAGATION);
Task *task = CitusMakeNode(Task);
task->jobId = INVALID_JOB_ID;
task->taskType = VACUUM_ANALYZE_TASK;
SetTaskQueryStringList(task, unqualifiedVacuumCommands);
task->dependentTaskList = NULL;
task->replicationModel = REPLICATION_MODEL_INVALID;
task->cannotBeExecutedInTransction = ((vacuumParams.options) & VACOPT_VACUUM);
bool hasPeerWorker = false;
int32 localNodeGroupId = GetLocalGroupId();
WorkerNode *workerNode = NULL;
foreach_ptr(workerNode, workerNodes)
{
if (workerNode->groupId != localNodeGroupId)
{
ShardPlacement *targetPlacement = CitusMakeNode(ShardPlacement);
targetPlacement->nodeName = workerNode->workerName;
targetPlacement->nodePort = workerNode->workerPort;
targetPlacement->groupId = workerNode->groupId;
task->taskPlacementList = lappend(task->taskPlacementList,
targetPlacement);
hasPeerWorker = true;
}
}
if (hasPeerWorker)
{
bool localExecution = false;
ExecuteUtilityTaskList(list_make1(task), localExecution);
}
}

View File

@ -553,7 +553,7 @@ extern void UpdateFunctionDistributionInfo(const ObjectAddress *distAddress,
bool *forceDelegation);
/* vacuum.c - forward declarations */
extern void PostprocessVacuumStmt(VacuumStmt *vacuumStmt, const char *vacuumCommand);
extern List * PostprocessVacuumStmt(Node *node, const char *vacuumCommand);
/* view.c - forward declarations */
extern List * PreprocessViewStmt(Node *node, const char *queryString,

View File

@ -0,0 +1,171 @@
Parsed test spec with 2 sessions
starting permutation: lock_share vac_specified commit
step lock_share:
BEGIN;
LOCK part1 IN SHARE MODE;
s2: WARNING: skipping vacuum of "part1" --- lock not available
step vac_specified: VACUUM (SKIP_LOCKED) part1, part2;
step commit:
COMMIT;
starting permutation: lock_share vac_all_parts commit
step lock_share:
BEGIN;
LOCK part1 IN SHARE MODE;
step vac_all_parts: VACUUM (SKIP_LOCKED) parted;
step commit:
COMMIT;
starting permutation: lock_share analyze_specified commit
step lock_share:
BEGIN;
LOCK part1 IN SHARE MODE;
s2: WARNING: skipping analyze of "part1" --- lock not available
step analyze_specified: ANALYZE (SKIP_LOCKED) part1, part2;
step commit:
COMMIT;
starting permutation: lock_share analyze_all_parts commit
step lock_share:
BEGIN;
LOCK part1 IN SHARE MODE;
step analyze_all_parts: ANALYZE (SKIP_LOCKED) parted;
step commit:
COMMIT;
starting permutation: lock_share vac_analyze_specified commit
step lock_share:
BEGIN;
LOCK part1 IN SHARE MODE;
s2: WARNING: skipping vacuum of "part1" --- lock not available
step vac_analyze_specified: VACUUM (ANALYZE, SKIP_LOCKED) part1, part2;
step commit:
COMMIT;
starting permutation: lock_share vac_analyze_all_parts commit
step lock_share:
BEGIN;
LOCK part1 IN SHARE MODE;
step vac_analyze_all_parts: VACUUM (ANALYZE, SKIP_LOCKED) parted;
step commit:
COMMIT;
starting permutation: lock_share vac_full_specified commit
step lock_share:
BEGIN;
LOCK part1 IN SHARE MODE;
s2: WARNING: skipping vacuum of "part1" --- lock not available
step vac_full_specified: VACUUM (SKIP_LOCKED, FULL) part1, part2;
step commit:
COMMIT;
starting permutation: lock_share vac_full_all_parts commit
step lock_share:
BEGIN;
LOCK part1 IN SHARE MODE;
step vac_full_all_parts: VACUUM (SKIP_LOCKED, FULL) parted;
step commit:
COMMIT;
starting permutation: lock_access_exclusive vac_specified commit
step lock_access_exclusive:
BEGIN;
LOCK part1 IN ACCESS EXCLUSIVE MODE;
s2: WARNING: skipping vacuum of "part1" --- lock not available
step vac_specified: VACUUM (SKIP_LOCKED) part1, part2;
step commit:
COMMIT;
starting permutation: lock_access_exclusive vac_all_parts commit
step lock_access_exclusive:
BEGIN;
LOCK part1 IN ACCESS EXCLUSIVE MODE;
step vac_all_parts: VACUUM (SKIP_LOCKED) parted;
step commit:
COMMIT;
starting permutation: lock_access_exclusive analyze_specified commit
step lock_access_exclusive:
BEGIN;
LOCK part1 IN ACCESS EXCLUSIVE MODE;
s2: WARNING: skipping analyze of "part1" --- lock not available
step analyze_specified: ANALYZE (SKIP_LOCKED) part1, part2;
step commit:
COMMIT;
starting permutation: lock_access_exclusive analyze_all_parts commit
step lock_access_exclusive:
BEGIN;
LOCK part1 IN ACCESS EXCLUSIVE MODE;
step analyze_all_parts: ANALYZE (SKIP_LOCKED) parted; <waiting ...>
step commit:
COMMIT;
step analyze_all_parts: <... completed>
starting permutation: lock_access_exclusive vac_analyze_specified commit
step lock_access_exclusive:
BEGIN;
LOCK part1 IN ACCESS EXCLUSIVE MODE;
s2: WARNING: skipping vacuum of "part1" --- lock not available
step vac_analyze_specified: VACUUM (ANALYZE, SKIP_LOCKED) part1, part2;
step commit:
COMMIT;
starting permutation: lock_access_exclusive vac_analyze_all_parts commit
step lock_access_exclusive:
BEGIN;
LOCK part1 IN ACCESS EXCLUSIVE MODE;
step vac_analyze_all_parts: VACUUM (ANALYZE, SKIP_LOCKED) parted; <waiting ...>
step commit:
COMMIT;
step vac_analyze_all_parts: <... completed>
starting permutation: lock_access_exclusive vac_full_specified commit
step lock_access_exclusive:
BEGIN;
LOCK part1 IN ACCESS EXCLUSIVE MODE;
s2: WARNING: skipping vacuum of "part1" --- lock not available
step vac_full_specified: VACUUM (SKIP_LOCKED, FULL) part1, part2;
step commit:
COMMIT;
starting permutation: lock_access_exclusive vac_full_all_parts commit
step lock_access_exclusive:
BEGIN;
LOCK part1 IN ACCESS EXCLUSIVE MODE;
step vac_full_all_parts: VACUUM (SKIP_LOCKED, FULL) parted;
step commit:
COMMIT;

View File

@ -75,7 +75,7 @@ SELECT citus_table_size('customer_copy_hash'),
citus_table_size('supplier');
citus_table_size | citus_table_size | citus_table_size
---------------------------------------------------------------------
548864 | 548864 | 425984
548864 | 548864 | 442368
(1 row)
CREATE INDEX index_1 on customer_copy_hash(c_custkey);

View File

@ -246,10 +246,6 @@ WHERE tablename = 'dustbunnies_990002' ORDER BY attname;
\c - - :master_host :master_port
SET citus.log_remote_commands TO ON;
-- verify warning for unqualified VACUUM
VACUUM;
WARNING: not propagating VACUUM command to worker nodes
HINT: Provide a specific table in order to VACUUM distributed tables.
-- check for multiple table vacuum
VACUUM dustbunnies, second_dustbunnies;
NOTICE: issuing VACUUM public.dustbunnies_990002
@ -260,14 +256,10 @@ NOTICE: issuing VACUUM public.second_dustbunnies_990003
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing VACUUM public.second_dustbunnies_990003
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
-- and warning when using targeted VACUUM without DDL propagation
-- and do not propagate when using targeted VACUUM without DDL propagation
SET citus.enable_ddl_propagation to false;
VACUUM dustbunnies;
WARNING: not propagating VACUUM command to worker nodes
HINT: Set citus.enable_ddl_propagation to true in order to send targeted VACUUM commands to worker nodes.
ANALYZE dustbunnies;
WARNING: not propagating ANALYZE command to worker nodes
HINT: Set citus.enable_ddl_propagation to true in order to send targeted ANALYZE commands to worker nodes.
SET citus.enable_ddl_propagation to DEFAULT;
-- test worker_hash
SELECT worker_hash(123);
@ -314,3 +306,284 @@ DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
1
(1 row)
SET citus.shard_count TO 1;
SET citus.shard_replication_factor TO 1;
SET citus.next_shard_id TO 970000;
SET citus.log_remote_commands TO OFF;
CREATE TABLE local_vacuum_table(id int primary key, b text);
CREATE TABLE reference_vacuum_table(id int);
SELECT create_reference_table('reference_vacuum_table');
create_reference_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE distributed_vacuum_table(id int);
SELECT create_distributed_table('distributed_vacuum_table', 'id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SET citus.log_remote_commands TO ON;
-- should propagate to all workers because no table is specified
VACUUM;
NOTICE: issuing SET citus.enable_ddl_propagation TO 'off'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing SET citus.enable_ddl_propagation TO 'off'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing VACUUM
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing VACUUM
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing SET citus.enable_ddl_propagation TO 'on'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing SET citus.enable_ddl_propagation TO 'on'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
-- should not propagate because no distributed table is specified
insert into local_vacuum_table select i from generate_series(1,1000000) i;
delete from local_vacuum_table;
VACUUM local_vacuum_table;
SELECT pg_size_pretty( pg_total_relation_size('local_vacuum_table') );
pg_size_pretty
---------------------------------------------------------------------
21 MB
(1 row)
-- vacuum full deallocates pages of dead tuples whereas normal vacuum only marks dead tuples on visibility map
VACUUM FULL local_vacuum_table;
SELECT pg_size_pretty( pg_total_relation_size('local_vacuum_table') );
pg_size_pretty
---------------------------------------------------------------------
16 kB
(1 row)
-- should propagate to all workers because table is reference table
VACUUM reference_vacuum_table;
NOTICE: issuing VACUUM public.reference_vacuum_table_970000
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing VACUUM public.reference_vacuum_table_970000
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
-- should propagate to all workers because table is distributed table
VACUUM distributed_vacuum_table;
NOTICE: issuing VACUUM public.distributed_vacuum_table_970001
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
-- only distributed_vacuum_table and reference_vacuum_table should propagate
VACUUM distributed_vacuum_table, local_vacuum_table, reference_vacuum_table;
NOTICE: issuing VACUUM public.distributed_vacuum_table_970001
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing VACUUM public.reference_vacuum_table_970000
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing VACUUM public.reference_vacuum_table_970000
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
-- only reference_vacuum_table should propagate
VACUUM local_vacuum_table, reference_vacuum_table;
NOTICE: issuing VACUUM public.reference_vacuum_table_970000
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing VACUUM public.reference_vacuum_table_970000
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
-- vacuum (disable_page_skipping) aggressively process pages of the relation, it does not respect visibility map
VACUUM (DISABLE_PAGE_SKIPPING true) local_vacuum_table;
VACUUM (DISABLE_PAGE_SKIPPING false) local_vacuum_table;
-- vacuum (index_cleanup on, parallel 1) should execute index vacuuming and index cleanup phases in parallel
insert into local_vacuum_table select i from generate_series(1,1000000) i;
delete from local_vacuum_table;
VACUUM (INDEX_CLEANUP OFF, PARALLEL 1) local_vacuum_table;
SELECT pg_size_pretty( pg_total_relation_size('local_vacuum_table') );
pg_size_pretty
---------------------------------------------------------------------
56 MB
(1 row)
insert into local_vacuum_table select i from generate_series(1,1000000) i;
delete from local_vacuum_table;
VACUUM (INDEX_CLEANUP ON, PARALLEL 1) local_vacuum_table;
SELECT pg_size_pretty( pg_total_relation_size('local_vacuum_table') );
pg_size_pretty
---------------------------------------------------------------------
21 MB
(1 row)
----------------- PROCESS_TOAST is only available for pg14
-- vacuum (process_toast false) should not be vacuuming toast tables (default is true)
--select reltoastrelid from pg_class where relname='local_vacuum_table'
--\gset
--SELECT relfrozenxid AS frozenxid FROM pg_class WHERE oid=:reltoastrelid::regclass
--\gset
--VACUUM (FREEZE, PROCESS_TOAST true) local_vacuum_table;
--SELECT relfrozenxid::text::integer > :frozenxid AS frozen_performed FROM pg_class
--WHERE oid=:reltoastrelid::regclass;
--SELECT relfrozenxid AS frozenxid FROM pg_class WHERE oid=:reltoastrelid::regclass
--\gset
--VACUUM (FREEZE, PROCESS_TOAST false) local_vacuum_table;
--SELECT relfrozenxid::text::integer = :frozenxid AS frozen_not_performed FROM pg_class
--WHERE oid=:reltoastrelid::regclass;
---------------------------------------------------------------------
-- vacuum (truncate false) should not attempt to truncate off any empty pages at the end of the table (default is true)
insert into local_vacuum_table select i from generate_series(1,1000000) i;
delete from local_vacuum_table;
vacuum (TRUNCATE false) local_vacuum_table;
SELECT pg_total_relation_size('local_vacuum_table') as size1 \gset
insert into local_vacuum_table select i from generate_series(1,1000000) i;
delete from local_vacuum_table;
vacuum (TRUNCATE true) local_vacuum_table;
SELECT pg_total_relation_size('local_vacuum_table') as size2 \gset
SELECT :size1 > :size2 as truncate_less_size;
truncate_less_size
---------------------------------------------------------------------
t
(1 row)
-- vacuum (analyze) should be analyzing the table to generate statistics after vacuuming
select analyze_count from pg_stat_all_tables where relname = 'local_vacuum_table' or relname = 'reference_vacuum_table';
analyze_count
---------------------------------------------------------------------
0
0
(2 rows)
vacuum (analyze) local_vacuum_table, reference_vacuum_table;
NOTICE: issuing VACUUM (ANALYZE) public.reference_vacuum_table_970000
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing VACUUM (ANALYZE) public.reference_vacuum_table_970000
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
-- give enough time for stats to be updated.(updated per 500ms by default)
select pg_sleep(1);
pg_sleep
---------------------------------------------------------------------
(1 row)
select analyze_count from pg_stat_all_tables where relname = 'local_vacuum_table' or relname = 'reference_vacuum_table';
analyze_count
---------------------------------------------------------------------
1
1
(2 rows)
-- should not propagate because ddl propagation is disabled
SET citus.enable_ddl_propagation TO OFF;
VACUUM distributed_vacuum_table;
SET citus.enable_ddl_propagation TO ON;
SET citus.log_remote_commands TO OFF;
-- ANALYZE tests
CREATE TABLE local_analyze_table(id int);
CREATE TABLE reference_analyze_table(id int);
SELECT create_reference_table('reference_analyze_table');
create_reference_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE distributed_analyze_table(id int);
SELECT create_distributed_table('distributed_analyze_table', 'id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE loc (a INT, b INT);
CREATE TABLE dist (a INT);
SELECT create_distributed_table ('dist', 'a');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SET citus.log_remote_commands TO ON;
-- should propagate to all workers because no table is specified
ANALYZE;
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing SET citus.enable_ddl_propagation TO 'off'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing SET citus.enable_ddl_propagation TO 'off'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing ANALYZE
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing ANALYZE
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing SET citus.enable_ddl_propagation TO 'on'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing SET citus.enable_ddl_propagation TO 'on'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing COMMIT PREPARED 'citus_xx_xx_xx_xx'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing COMMIT PREPARED 'citus_xx_xx_xx_xx'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
-- should not propagate because no distributed table is specified
ANALYZE local_analyze_table;
-- should propagate to all workers because table is reference table
ANALYZE reference_analyze_table;
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing ANALYZE public.reference_analyze_table_970002
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing ANALYZE public.reference_analyze_table_970002
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing COMMIT PREPARED 'citus_xx_xx_xx_xx'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing COMMIT PREPARED 'citus_xx_xx_xx_xx'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
-- should propagate to all workers because table is distributed table
ANALYZE distributed_analyze_table;
NOTICE: issuing ANALYZE public.distributed_analyze_table_970003
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
-- only distributed_analyze_table and reference_analyze_table should propagate
ANALYZE distributed_analyze_table, local_analyze_table, reference_analyze_table;
NOTICE: issuing ANALYZE public.distributed_analyze_table_970003
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing ANALYZE public.reference_analyze_table_970002
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing ANALYZE public.reference_analyze_table_970002
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing COMMIT PREPARED 'citus_xx_xx_xx_xx'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing COMMIT PREPARED 'citus_xx_xx_xx_xx'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
-- only reference_analyze_table should propagate
ANALYZE local_analyze_table, reference_analyze_table;
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing ANALYZE public.reference_analyze_table_970002
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing ANALYZE public.reference_analyze_table_970002
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing COMMIT PREPARED 'citus_xx_xx_xx_xx'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing COMMIT PREPARED 'citus_xx_xx_xx_xx'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
-- should not propagate because ddl propagation is disabled
SET citus.enable_ddl_propagation TO OFF;
ANALYZE distributed_analyze_table;
SET citus.enable_ddl_propagation TO ON;
-- analyze only specified columns for corresponding tables
ANALYZE loc(b), dist(a);
NOTICE: issuing ANALYZE public.dist_970004 (a)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx

View File

@ -27,7 +27,7 @@ test: isolation_dml_vs_repair isolation_copy_placement_vs_copy_placement
test: isolation_concurrent_dml isolation_data_migration
test: isolation_drop_shards isolation_copy_placement_vs_modification
test: isolation_insert_vs_vacuum isolation_transaction_recovery
test: isolation_insert_vs_vacuum isolation_transaction_recovery isolation_vacuum_skip_locked
test: isolation_progress_monitoring
test: isolation_dump_local_wait_edges

View File

@ -0,0 +1,61 @@
// Test for SKIP_LOCKED option of VACUUM and ANALYZE commands.
#
// This also verifies that log messages are not emitted for skipped relations
// that were not specified in the VACUUM or ANALYZE command.
setup
{
CREATE TABLE parted (a INT) PARTITION BY LIST (a);
CREATE TABLE part1 PARTITION OF parted FOR VALUES IN (1);
ALTER TABLE part1 SET (autovacuum_enabled = false);
CREATE TABLE part2 PARTITION OF parted FOR VALUES IN (2);
ALTER TABLE part2 SET (autovacuum_enabled = false);
}
teardown
{
DROP TABLE IF EXISTS parted;
}
session s1
step lock_share
{
BEGIN;
LOCK part1 IN SHARE MODE;
}
step lock_access_exclusive
{
BEGIN;
LOCK part1 IN ACCESS EXCLUSIVE MODE;
}
step commit
{
COMMIT;
}
session s2
step vac_specified { VACUUM (SKIP_LOCKED) part1, part2; }
step vac_all_parts { VACUUM (SKIP_LOCKED) parted; }
step analyze_specified { ANALYZE (SKIP_LOCKED) part1, part2; }
step analyze_all_parts { ANALYZE (SKIP_LOCKED) parted; }
step vac_analyze_specified { VACUUM (ANALYZE, SKIP_LOCKED) part1, part2; }
step vac_analyze_all_parts { VACUUM (ANALYZE, SKIP_LOCKED) parted; }
step vac_full_specified { VACUUM (SKIP_LOCKED, FULL) part1, part2; }
step vac_full_all_parts { VACUUM (SKIP_LOCKED, FULL) parted; }
permutation lock_share vac_specified commit
permutation lock_share vac_all_parts commit
permutation lock_share analyze_specified commit
permutation lock_share analyze_all_parts commit
permutation lock_share vac_analyze_specified commit
permutation lock_share vac_analyze_all_parts commit
permutation lock_share vac_full_specified commit
permutation lock_share vac_full_all_parts commit
permutation lock_access_exclusive vac_specified commit
permutation lock_access_exclusive vac_all_parts commit
permutation lock_access_exclusive analyze_specified commit
permutation lock_access_exclusive analyze_all_parts commit
permutation lock_access_exclusive vac_analyze_specified commit
permutation lock_access_exclusive vac_analyze_all_parts commit
permutation lock_access_exclusive vac_full_specified commit
permutation lock_access_exclusive vac_full_all_parts commit

View File

@ -170,13 +170,10 @@ WHERE tablename = 'dustbunnies_990002' ORDER BY attname;
\c - - :master_host :master_port
SET citus.log_remote_commands TO ON;
-- verify warning for unqualified VACUUM
VACUUM;
-- check for multiple table vacuum
VACUUM dustbunnies, second_dustbunnies;
-- and warning when using targeted VACUUM without DDL propagation
-- and do not propagate when using targeted VACUUM without DDL propagation
SET citus.enable_ddl_propagation to false;
VACUUM dustbunnies;
ANALYZE dustbunnies;
@ -198,3 +195,147 @@ SELECT worker_create_or_alter_role(NULL, 'create role dontcrash', NULL);
-- confirm that citus_create_restore_point works
SELECT 1 FROM citus_create_restore_point('regression-test');
SET citus.shard_count TO 1;
SET citus.shard_replication_factor TO 1;
SET citus.next_shard_id TO 970000;
SET citus.log_remote_commands TO OFF;
CREATE TABLE local_vacuum_table(id int primary key, b text);
CREATE TABLE reference_vacuum_table(id int);
SELECT create_reference_table('reference_vacuum_table');
CREATE TABLE distributed_vacuum_table(id int);
SELECT create_distributed_table('distributed_vacuum_table', 'id');
SET citus.log_remote_commands TO ON;
-- should propagate to all workers because no table is specified
VACUUM;
-- should not propagate because no distributed table is specified
insert into local_vacuum_table select i from generate_series(1,1000000) i;
delete from local_vacuum_table;
VACUUM local_vacuum_table;
SELECT pg_size_pretty( pg_total_relation_size('local_vacuum_table') );
-- vacuum full deallocates pages of dead tuples whereas normal vacuum only marks dead tuples on visibility map
VACUUM FULL local_vacuum_table;
SELECT pg_size_pretty( pg_total_relation_size('local_vacuum_table') );
-- should propagate to all workers because table is reference table
VACUUM reference_vacuum_table;
-- should propagate to all workers because table is distributed table
VACUUM distributed_vacuum_table;
-- only distributed_vacuum_table and reference_vacuum_table should propagate
VACUUM distributed_vacuum_table, local_vacuum_table, reference_vacuum_table;
-- only reference_vacuum_table should propagate
VACUUM local_vacuum_table, reference_vacuum_table;
-- vacuum (disable_page_skipping) aggressively process pages of the relation, it does not respect visibility map
VACUUM (DISABLE_PAGE_SKIPPING true) local_vacuum_table;
VACUUM (DISABLE_PAGE_SKIPPING false) local_vacuum_table;
-- vacuum (index_cleanup on, parallel 1) should execute index vacuuming and index cleanup phases in parallel
insert into local_vacuum_table select i from generate_series(1,1000000) i;
delete from local_vacuum_table;
VACUUM (INDEX_CLEANUP OFF, PARALLEL 1) local_vacuum_table;
SELECT pg_size_pretty( pg_total_relation_size('local_vacuum_table') );
insert into local_vacuum_table select i from generate_series(1,1000000) i;
delete from local_vacuum_table;
VACUUM (INDEX_CLEANUP ON, PARALLEL 1) local_vacuum_table;
SELECT pg_size_pretty( pg_total_relation_size('local_vacuum_table') );
----------------- PROCESS_TOAST is only available for pg14
-- vacuum (process_toast false) should not be vacuuming toast tables (default is true)
--select reltoastrelid from pg_class where relname='local_vacuum_table'
--\gset
--SELECT relfrozenxid AS frozenxid FROM pg_class WHERE oid=:reltoastrelid::regclass
--\gset
--VACUUM (FREEZE, PROCESS_TOAST true) local_vacuum_table;
--SELECT relfrozenxid::text::integer > :frozenxid AS frozen_performed FROM pg_class
--WHERE oid=:reltoastrelid::regclass;
--SELECT relfrozenxid AS frozenxid FROM pg_class WHERE oid=:reltoastrelid::regclass
--\gset
--VACUUM (FREEZE, PROCESS_TOAST false) local_vacuum_table;
--SELECT relfrozenxid::text::integer = :frozenxid AS frozen_not_performed FROM pg_class
--WHERE oid=:reltoastrelid::regclass;
---------------
-- vacuum (truncate false) should not attempt to truncate off any empty pages at the end of the table (default is true)
insert into local_vacuum_table select i from generate_series(1,1000000) i;
delete from local_vacuum_table;
vacuum (TRUNCATE false) local_vacuum_table;
SELECT pg_total_relation_size('local_vacuum_table') as size1 \gset
insert into local_vacuum_table select i from generate_series(1,1000000) i;
delete from local_vacuum_table;
vacuum (TRUNCATE true) local_vacuum_table;
SELECT pg_total_relation_size('local_vacuum_table') as size2 \gset
SELECT :size1 > :size2 as truncate_less_size;
-- vacuum (analyze) should be analyzing the table to generate statistics after vacuuming
select analyze_count from pg_stat_all_tables where relname = 'local_vacuum_table' or relname = 'reference_vacuum_table';
vacuum (analyze) local_vacuum_table, reference_vacuum_table;
-- give enough time for stats to be updated.(updated per 500ms by default)
select pg_sleep(1);
select analyze_count from pg_stat_all_tables where relname = 'local_vacuum_table' or relname = 'reference_vacuum_table';
-- should not propagate because ddl propagation is disabled
SET citus.enable_ddl_propagation TO OFF;
VACUUM distributed_vacuum_table;
SET citus.enable_ddl_propagation TO ON;
SET citus.log_remote_commands TO OFF;
-- ANALYZE tests
CREATE TABLE local_analyze_table(id int);
CREATE TABLE reference_analyze_table(id int);
SELECT create_reference_table('reference_analyze_table');
CREATE TABLE distributed_analyze_table(id int);
SELECT create_distributed_table('distributed_analyze_table', 'id');
CREATE TABLE loc (a INT, b INT);
CREATE TABLE dist (a INT);
SELECT create_distributed_table ('dist', 'a');
SET citus.log_remote_commands TO ON;
-- should propagate to all workers because no table is specified
ANALYZE;
-- should not propagate because no distributed table is specified
ANALYZE local_analyze_table;
-- should propagate to all workers because table is reference table
ANALYZE reference_analyze_table;
-- should propagate to all workers because table is distributed table
ANALYZE distributed_analyze_table;
-- only distributed_analyze_table and reference_analyze_table should propagate
ANALYZE distributed_analyze_table, local_analyze_table, reference_analyze_table;
-- only reference_analyze_table should propagate
ANALYZE local_analyze_table, reference_analyze_table;
-- should not propagate because ddl propagation is disabled
SET citus.enable_ddl_propagation TO OFF;
ANALYZE distributed_analyze_table;
SET citus.enable_ddl_propagation TO ON;
-- analyze only specified columns for corresponding tables
ANALYZE loc(b), dist(a);