diff --git a/src/backend/distributed/executor/multi_server_executor.c b/src/backend/distributed/executor/multi_server_executor.c index 2fd75ab48..127daff20 100644 --- a/src/backend/distributed/executor/multi_server_executor.c +++ b/src/backend/distributed/executor/multi_server_executor.c @@ -29,6 +29,7 @@ int RemoteTaskCheckInterval = 100; /* per cycle sleep interval in millisecs */ int TaskExecutorType = MULTI_EXECUTOR_REAL_TIME; /* distributed executor type */ bool BinaryMasterCopyFormat = false; /* copy data from workers in binary format */ +int MultiTaskQueryLogLevel = MULTI_TASK_QUERY_INFO_OFF; /* multi-task query log level */ /* @@ -58,6 +59,15 @@ JobExecutorType(MultiPlan *multiPlan) return MULTI_EXECUTOR_ROUTER; } + /* if it is not a router executable plan, inform user according to the log level */ + if (MultiTaskQueryLogLevel != MULTI_TASK_QUERY_INFO_OFF) + { + ereport(MultiTaskQueryLogLevel, (errmsg("multi-task query about to be executed"), + errhint("Queries are split to multiple tasks " + "if they have to be split into several" + " queries on the workers."))); + } + if (executorType == MULTI_EXECUTOR_REAL_TIME) { double reasonableConnectionCount = 0; diff --git a/src/backend/distributed/planner/multi_router_planner.c b/src/backend/distributed/planner/multi_router_planner.c index f912ecdf8..52bb3d1b5 100644 --- a/src/backend/distributed/planner/multi_router_planner.c +++ b/src/backend/distributed/planner/multi_router_planner.c @@ -34,6 +34,7 @@ #include "distributed/multi_logical_optimizer.h" #include "distributed/multi_physical_planner.h" #include "distributed/multi_router_planner.h" +#include "distributed/multi_server_executor.h" #include "distributed/listutils.h" #include "distributed/citus_ruleutils.h" #include "distributed/relation_restriction_equivalence.h" @@ -343,6 +344,15 @@ CreateInsertSelectRouterPlan(Query *originalQuery, ++taskIdIndex; } + if (MultiTaskQueryLogLevel != MULTI_TASK_QUERY_INFO_OFF && + list_length(sqlTaskList) > 1) + { + ereport(MultiTaskQueryLogLevel, (errmsg("multi-task query about to be executed"), + errhint("Queries are split to multiple tasks " + "if they have to be split into several" + " queries on the workers."))); + } + /* Create the worker job */ workerJob = CitusMakeNode(Job); workerJob->taskList = sqlTaskList; diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c index 5a924bfe8..181242e8b 100644 --- a/src/backend/distributed/shared_library_init.c +++ b/src/backend/distributed/shared_library_init.c @@ -92,6 +92,16 @@ static const struct config_enum_entry multi_shard_commit_protocol_options[] = { { NULL, 0, false } }; +static const struct config_enum_entry multi_task_query_log_level_options[] = { + { "off", MULTI_TASK_QUERY_INFO_OFF, false }, + { "debug", DEBUG2, false }, + { "log", LOG, false }, + { "notice", NOTICE, false }, + { "warning", WARNING, false }, + { "error", ERROR, false }, + { NULL, 0, false } +}; + /* *INDENT-ON* */ @@ -614,6 +624,16 @@ RegisterCitusConfigVariables(void) 0, NULL, NULL, NULL); + DefineCustomEnumVariable( + "citus.multi_task_query_log_level", + gettext_noop("Sets the level of multi task query execution log messages"), + NULL, + &MultiTaskQueryLogLevel, + MULTI_TASK_QUERY_INFO_OFF, multi_task_query_log_level_options, + PGC_USERSET, + 0, + NULL, NULL, NULL); + DefineCustomStringVariable( "citus.version", gettext_noop("Shows the Citus library version"), diff --git a/src/include/distributed/multi_server_executor.h b/src/include/distributed/multi_server_executor.h index 5f3b5a8e8..20421860d 100644 --- a/src/include/distributed/multi_server_executor.h +++ b/src/include/distributed/multi_server_executor.h @@ -36,6 +36,8 @@ #define JOB_CLEANUP_QUERY "SELECT task_tracker_cleanup_job("UINT64_FORMAT ")" #define JOB_CLEANUP_TASK_ID INT_MAX +#define MULTI_TASK_QUERY_INFO_OFF 0 /* do not log multi-task queries */ + /* Enumeration to track one task's execution status */ typedef enum @@ -184,6 +186,7 @@ extern int RemoteTaskCheckInterval; extern int MaxAssignTaskBatchSize; extern int TaskExecutorType; extern bool BinaryMasterCopyFormat; +extern int MultiTaskQueryLogLevel; /* Function declarations for distributed execution */ diff --git a/src/test/regress/expected/multi_cross_shard.out b/src/test/regress/expected/multi_cross_shard.out new file mode 100644 index 000000000..d015a8104 --- /dev/null +++ b/src/test/regress/expected/multi_cross_shard.out @@ -0,0 +1,191 @@ +-- +-- MULTI_CROSS_SHARD +-- +-- Tests to log cross shard queries according to error log level +-- +-- Create a distributed table and add data to it +CREATE TABLE multi_task_table +( + id int, + name varchar(20) +); +SELECT create_distributed_table('multi_task_table', 'id'); + create_distributed_table +-------------------------- + +(1 row) + +INSERT INTO multi_task_table VALUES(1, 'elem_1'); +INSERT INTO multi_task_table VALUES(2, 'elem_2'); +INSERT INTO multi_task_table VALUES(3, 'elem_3'); +-- Shouldn't log anything when the log level is 'off' +SHOW citus.multi_task_query_log_level; + citus.multi_task_query_log_level +---------------------------------- + off +(1 row) + +SELECT * FROM multi_task_table; + id | name +----+-------- + 1 | elem_1 + 3 | elem_3 + 2 | elem_2 +(3 rows) + +-- Get messages with the log level 'notice' +SET citus.multi_task_query_log_level TO notice; +SELECT * FROM multi_task_table; +NOTICE: multi-task query about to be executed +HINT: Queries are split to multiple tasks if they have to be split into several queries on the workers. + id | name +----+-------- + 1 | elem_1 + 3 | elem_3 + 2 | elem_2 +(3 rows) + +SELECT AVG(id) AS avg_id FROM multi_task_table; +NOTICE: multi-task query about to be executed +HINT: Queries are split to multiple tasks if they have to be split into several queries on the workers. + avg_id +-------------------- + 2.0000000000000000 +(1 row) + +-- Get messages with the log level 'error' +SET citus.multi_task_query_log_level TO error; +SELECT * FROM multi_task_table; +ERROR: multi-task query about to be executed +HINT: Queries are split to multiple tasks if they have to be split into several queries on the workers. +-- Check the log message with INSERT INTO ... SELECT +CREATE TABLE raw_table +( + id int, + order_count int +); +CREATE TABLE summary_table +( + id int, + order_sum BIGINT +); +SELECT create_distributed_table('raw_table', 'id'); + create_distributed_table +-------------------------- + +(1 row) + +SELECT create_distributed_table('summary_table', 'id'); + create_distributed_table +-------------------------- + +(1 row) + +INSERT INTO raw_table VALUES(1, '15'); +INSERT INTO raw_table VALUES(2, '15'); +INSERT INTO raw_table VALUES(3, '15'); +INSERT INTO raw_table VALUES(1, '20'); +INSERT INTO raw_table VALUES(2, '25'); +INSERT INTO raw_table VALUES(3, '35'); +-- Should notice user that the query is multi-task one +SET citus.multi_task_query_log_level TO notice; +INSERT INTO summary_table SELECT id, SUM(order_count) FROM raw_table GROUP BY id; +NOTICE: multi-task query about to be executed +HINT: Queries are split to multiple tasks if they have to be split into several queries on the workers. +-- Should error out since the query is multi-task one +SET citus.multi_task_query_log_level TO error; +INSERT INTO summary_table SELECT id, SUM(order_count) FROM raw_table GROUP BY id; +ERROR: multi-task query about to be executed +HINT: Queries are split to multiple tasks if they have to be split into several queries on the workers. +-- Shouldn't error out since it is a single task insert-into select query +INSERT INTO summary_table SELECT id, SUM(order_count) FROM raw_table WHERE id = 1 GROUP BY id; +-- Should have four rows (three rows from the query without where and the one from with where) +SET citus.multi_task_query_log_level to DEFAULT; +SELECT * FROM summary_table; + id | order_sum +----+----------- + 1 | 35 + 1 | 35 + 3 | 50 + 2 | 40 +(4 rows) + +-- Set log-level to different levels inside the transaction +BEGIN; +-- Should notice user that the query is multi-task one +SET citus.multi_task_query_log_level TO notice; +INSERT INTO summary_table SELECT id, SUM(order_count) FROM raw_table GROUP BY id; +NOTICE: multi-task query about to be executed +HINT: Queries are split to multiple tasks if they have to be split into several queries on the workers. +-- Should error out since the query is multi-task one +SET citus.multi_task_query_log_level TO error; +INSERT INTO summary_table SELECT id, SUM(order_count) FROM raw_table GROUP BY id; +ERROR: multi-task query about to be executed +HINT: Queries are split to multiple tasks if they have to be split into several queries on the workers. +ROLLBACK; +-- Should have only four rows since the transaction is rollbacked. +SET citus.multi_task_query_log_level to DEFAULT; +SELECT * FROM summary_table; + id | order_sum +----+----------- + 1 | 35 + 1 | 35 + 3 | 50 + 2 | 40 +(4 rows) + +-- Test router-select query +SET citus.multi_task_query_log_level TO notice; +-- Shouldn't log since it is a router select query +SELECT * FROM raw_table WHERE ID = 1; + id | order_count +----+------------- + 1 | 15 + 1 | 20 +(2 rows) + +-- Task tracker query test +CREATE TABLE tt1 +( + id int, + name varchar(20) +); +CREATE TABLE tt2 +( + id int, + name varchar(20), + count bigint +); +SELECT create_distributed_table('tt1', 'id'); + create_distributed_table +-------------------------- + +(1 row) + +SELECT create_distributed_table('tt2', 'name'); + create_distributed_table +-------------------------- + +(1 row) + +INSERT INTO tt1 VALUES(1, 'Ahmet'); +INSERT INTO tt1 VALUES(2, 'Mehmet'); +INSERT INTO tt2 VALUES(1, 'Ahmet', 5); +INSERT INTO tt2 VALUES(2, 'Mehmet', 15); +-- Should notice since it is a task-tracker query +SET citus.task_executor_type to "task-tracker"; +SELECT tt1.id, tt2.count from tt1,tt2 where tt1.id = tt2.id; +NOTICE: multi-task query about to be executed +HINT: Queries are split to multiple tasks if they have to be split into several queries on the workers. + id | count +----+------- + 1 | 5 + 2 | 15 +(2 rows) + +SET citus.task_executor_type to DEFAULT; +DROP TABLE tt2; +DROP TABLE tt1; +DROP TABLE multi_task_table; +DROP TABLE raw_table; +DROP TABLE summary_table; diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 3ffb820be..ac9c206d8 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -52,7 +52,7 @@ test: multi_partition_pruning test: multi_join_pruning multi_hash_pruning test: multi_null_minmax_value_pruning test: multi_query_directory_cleanup -test: multi_task_assignment_policy +test: multi_task_assignment_policy multi_cross_shard test: multi_utility_statements test: multi_dropped_column_aliases test: multi_binary_master_copy_format diff --git a/src/test/regress/sql/multi_cross_shard.sql b/src/test/regress/sql/multi_cross_shard.sql new file mode 100644 index 000000000..7f0e51708 --- /dev/null +++ b/src/test/regress/sql/multi_cross_shard.sql @@ -0,0 +1,124 @@ +-- +-- MULTI_CROSS_SHARD +-- +-- Tests to log cross shard queries according to error log level +-- + +-- Create a distributed table and add data to it +CREATE TABLE multi_task_table +( + id int, + name varchar(20) +); +SELECT create_distributed_table('multi_task_table', 'id'); + +INSERT INTO multi_task_table VALUES(1, 'elem_1'); +INSERT INTO multi_task_table VALUES(2, 'elem_2'); +INSERT INTO multi_task_table VALUES(3, 'elem_3'); + +-- Shouldn't log anything when the log level is 'off' +SHOW citus.multi_task_query_log_level; +SELECT * FROM multi_task_table; + +-- Get messages with the log level 'notice' +SET citus.multi_task_query_log_level TO notice; +SELECT * FROM multi_task_table; +SELECT AVG(id) AS avg_id FROM multi_task_table; + +-- Get messages with the log level 'error' +SET citus.multi_task_query_log_level TO error; +SELECT * FROM multi_task_table; + +-- Check the log message with INSERT INTO ... SELECT +CREATE TABLE raw_table +( + id int, + order_count int +); + +CREATE TABLE summary_table +( + id int, + order_sum BIGINT +); + +SELECT create_distributed_table('raw_table', 'id'); +SELECT create_distributed_table('summary_table', 'id'); + +INSERT INTO raw_table VALUES(1, '15'); +INSERT INTO raw_table VALUES(2, '15'); +INSERT INTO raw_table VALUES(3, '15'); +INSERT INTO raw_table VALUES(1, '20'); +INSERT INTO raw_table VALUES(2, '25'); +INSERT INTO raw_table VALUES(3, '35'); + +-- Should notice user that the query is multi-task one +SET citus.multi_task_query_log_level TO notice; +INSERT INTO summary_table SELECT id, SUM(order_count) FROM raw_table GROUP BY id; + +-- Should error out since the query is multi-task one +SET citus.multi_task_query_log_level TO error; +INSERT INTO summary_table SELECT id, SUM(order_count) FROM raw_table GROUP BY id; + +-- Shouldn't error out since it is a single task insert-into select query +INSERT INTO summary_table SELECT id, SUM(order_count) FROM raw_table WHERE id = 1 GROUP BY id; + +-- Should have four rows (three rows from the query without where and the one from with where) +SET citus.multi_task_query_log_level to DEFAULT; +SELECT * FROM summary_table; + +-- Set log-level to different levels inside the transaction +BEGIN; +-- Should notice user that the query is multi-task one +SET citus.multi_task_query_log_level TO notice; +INSERT INTO summary_table SELECT id, SUM(order_count) FROM raw_table GROUP BY id; + +-- Should error out since the query is multi-task one +SET citus.multi_task_query_log_level TO error; +INSERT INTO summary_table SELECT id, SUM(order_count) FROM raw_table GROUP BY id; +ROLLBACK; + +-- Should have only four rows since the transaction is rollbacked. +SET citus.multi_task_query_log_level to DEFAULT; +SELECT * FROM summary_table; + +-- Test router-select query +SET citus.multi_task_query_log_level TO notice; + +-- Shouldn't log since it is a router select query +SELECT * FROM raw_table WHERE ID = 1; + +-- Task tracker query test +CREATE TABLE tt1 +( + id int, + name varchar(20) +); + +CREATE TABLE tt2 +( + id int, + name varchar(20), + count bigint +); + +SELECT create_distributed_table('tt1', 'id'); +SELECT create_distributed_table('tt2', 'name'); + +INSERT INTO tt1 VALUES(1, 'Ahmet'); +INSERT INTO tt1 VALUES(2, 'Mehmet'); + +INSERT INTO tt2 VALUES(1, 'Ahmet', 5); +INSERT INTO tt2 VALUES(2, 'Mehmet', 15); + +-- Should notice since it is a task-tracker query +SET citus.task_executor_type to "task-tracker"; +SELECT tt1.id, tt2.count from tt1,tt2 where tt1.id = tt2.id; + +SET citus.task_executor_type to DEFAULT; + +DROP TABLE tt2; +DROP TABLE tt1; +DROP TABLE multi_task_table; +DROP TABLE raw_table; +DROP TABLE summary_table;