From 73696a03e4bc52af8850826f7449d873b203f24c Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Tue, 2 Oct 2018 15:31:21 +0300 Subject: [PATCH] Make sure not to leak intermediate result folders on the workers --- .../transaction/transaction_management.c | 20 +++++++++++++++++++ .../ensure_no_intermediate_data_leak.out | 20 +++++++++++++++++++ src/test/regress/multi_schedule | 8 ++++++++ .../sql/ensure_no_intermediate_data_leak.sql | 12 +++++++++++ 4 files changed, 60 insertions(+) create mode 100644 src/test/regress/expected/ensure_no_intermediate_data_leak.out create mode 100644 src/test/regress/sql/ensure_no_intermediate_data_leak.sql diff --git a/src/backend/distributed/transaction/transaction_management.c b/src/backend/distributed/transaction/transaction_management.c index 944e9f043..70ddfc31e 100644 --- a/src/backend/distributed/transaction/transaction_management.c +++ b/src/backend/distributed/transaction/transaction_management.c @@ -281,12 +281,32 @@ CoordinatedTransactionCallback(XactEvent event, void *arg) case XACT_EVENT_PREPARE: { + /* + * This callback is only relevant for worker queries since + * distributed queries cannot be executed with 2PC, see + * XACT_EVENT_PRE_PREPARE. + * + * We should remove the intermediate results before unsetting the + * distributed transaction id. That is necessary, otherwise Citus + * would try to remove a non-existing folder and leak some of the + * existing folders that are associated with distributed transaction + * ids on the worker nodes. + */ + RemoveIntermediateResultsDirectory(); + UnSetDistributedTransactionId(); break; } case XACT_EVENT_PRE_COMMIT: { + /* + * If the distributed query involves 2PC, we already removed + * the intermediate result directory on XACT_EVENT_PREPARE. However, + * if not, we should remove it here on the COMMIT. Since + * RemoveIntermediateResultsDirectory() is idempotent, we're safe + * to call it here again even if the transaction involves 2PC. + */ RemoveIntermediateResultsDirectory(); /* nothing further to do if there's no managed remote xacts */ diff --git a/src/test/regress/expected/ensure_no_intermediate_data_leak.out b/src/test/regress/expected/ensure_no_intermediate_data_leak.out new file mode 100644 index 000000000..4b9fb7a3a --- /dev/null +++ b/src/test/regress/expected/ensure_no_intermediate_data_leak.out @@ -0,0 +1,20 @@ +------ +-- THIS TEST SHOULD IDEALLY BE EXECUTED AT THE END OF +-- THE REGRESSION TEST SUITE TO MAKE SURE THAT WE +-- CLEAR ALL INTERMEDIATE RESULTS ON BOTH THE COORDINATOR +-- AND ON THE WORKERS. HOWEVER, WE HAVE SOME ISSUES AROUND +-- WINDOWS SUPPORT, FAILURES IN TASK-TRACKER EXECUTOR +-- SO WE DISABLE THIS TEST ON WINDOWS +------ +SELECT pg_ls_dir('base/pgsql_job_cache') WHERE citus_version() NOT ILIKE '%windows%'; + pg_ls_dir +----------- +(0 rows) + +SELECT run_command_on_workers($$SELECT pg_ls_dir('base/pgsql_job_cache') WHERE citus_version() NOT ILIKE '%windows%'$$); + run_command_on_workers +------------------------ + (localhost,57637,t,"") + (localhost,57638,t,"") +(2 rows) + diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 68b3ccb8b..d33f96ac2 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -34,6 +34,13 @@ test: multi_create_table_constraints multi_master_protocol multi_load_data multi test: multi_behavioral_analytics_basics multi_behavioral_analytics_single_shard_queries multi_insert_select_non_pushable_queries multi_insert_select test: multi_insert_select_window multi_shard_update_delete window_functions dml_recursive recursive_dml_with_different_planners_executors +# --------- +# at the end of the regression tests regaring recursively planned modifications +# ensure that we don't leak any intermediate results +# This test should not run in parallel with any other tests +# --------- +test: ensure_no_intermediate_data_leak + # ---------- # Tests for partitioning support # ---------- @@ -260,3 +267,4 @@ test: multi_cache_invalidation # multi_task_string_size tests task string size checks # --------- test: multi_task_string_size + diff --git a/src/test/regress/sql/ensure_no_intermediate_data_leak.sql b/src/test/regress/sql/ensure_no_intermediate_data_leak.sql new file mode 100644 index 000000000..c8dcf2634 --- /dev/null +++ b/src/test/regress/sql/ensure_no_intermediate_data_leak.sql @@ -0,0 +1,12 @@ + +------ +-- THIS TEST SHOULD IDEALLY BE EXECUTED AT THE END OF +-- THE REGRESSION TEST SUITE TO MAKE SURE THAT WE +-- CLEAR ALL INTERMEDIATE RESULTS ON BOTH THE COORDINATOR +-- AND ON THE WORKERS. HOWEVER, WE HAVE SOME ISSUES AROUND +-- WINDOWS SUPPORT, FAILURES IN TASK-TRACKER EXECUTOR +-- SO WE DISABLE THIS TEST ON WINDOWS +------ + +SELECT pg_ls_dir('base/pgsql_job_cache') WHERE citus_version() NOT ILIKE '%windows%'; +SELECT run_command_on_workers($$SELECT pg_ls_dir('base/pgsql_job_cache') WHERE citus_version() NOT ILIKE '%windows%'$$);