From cb9e510e4096f34e4ad5b312da0cd20d0f0261ed Mon Sep 17 00:00:00 2001 From: Marco Slot Date: Fri, 1 Apr 2022 12:45:49 +0200 Subject: [PATCH] Add TABLESAMPLE support --- .../planner/multi_logical_planner.c | 32 +------- .../planner/multi_physical_planner.c | 1 + .../distributed/multi_logical_planner.h | 3 + .../regress/expected/multi_simple_queries.out | 77 +++++++++++++++++++ src/test/regress/sql/multi_simple_queries.sql | 20 +++++ 5 files changed, 102 insertions(+), 31 deletions(-) diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index 3bab7d1bc..1535beb1e 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -75,7 +75,6 @@ static Oid NodeTryGetRteRelid(Node *node); static bool FullCompositeFieldList(List *compositeFieldList); static bool HasUnsupportedJoinWalker(Node *node, void *context); static bool ErrorHintRequired(const char *errorHint, Query *queryTree); -static bool HasTablesample(Query *queryTree); static bool HasComplexRangeTableType(Query *queryTree); static bool IsReadIntermediateResultFunction(Node *node); static bool IsReadIntermediateResultArrayFunction(Node *node); @@ -899,14 +898,6 @@ DeferErrorIfQueryNotSupported(Query *queryTree) errorHint = filterHint; } - bool hasTablesample = HasTablesample(queryTree); - if (hasTablesample) - { - preconditionsSatisfied = false; - errorMessage = "could not run distributed query which use TABLESAMPLE"; - errorHint = filterHint; - } - bool hasUnsupportedJoin = HasUnsupportedJoinWalker((Node *) queryTree->jointree, NULL); if (hasUnsupportedJoin) @@ -960,28 +951,6 @@ DeferErrorIfQueryNotSupported(Query *queryTree) } -/* HasTablesample returns tree if the query contains tablesample */ -static bool -HasTablesample(Query *queryTree) -{ - List *rangeTableList = queryTree->rtable; - ListCell *rangeTableEntryCell = NULL; - bool hasTablesample = false; - - foreach(rangeTableEntryCell, rangeTableList) - { - RangeTblEntry *rangeTableEntry = lfirst(rangeTableEntryCell); - if (rangeTableEntry->tablesample) - { - hasTablesample = true; - break; - } - } - - return hasTablesample; -} - - /* * HasUnsupportedJoinWalker returns tree if the query contains an unsupported * join type. We currently support inner, left, right, full and anti joins. @@ -1541,6 +1510,7 @@ MultiTableNodeList(List *tableEntryList, List *rangeTableList) tableNode->alias = rangeTableEntry->alias; tableNode->referenceNames = rangeTableEntry->eref; tableNode->includePartitions = GetOriginalInh(rangeTableEntry); + tableNode->tablesample = rangeTableEntry->tablesample; tableNodeList = lappend(tableNodeList, tableNode); } diff --git a/src/backend/distributed/planner/multi_physical_planner.c b/src/backend/distributed/planner/multi_physical_planner.c index 3d9c78bf8..a26bf158d 100644 --- a/src/backend/distributed/planner/multi_physical_planner.c +++ b/src/backend/distributed/planner/multi_physical_planner.c @@ -735,6 +735,7 @@ BaseRangeTableList(MultiNode *multiNode) rangeTableEntry->alias = multiTable->alias; rangeTableEntry->relid = multiTable->relationId; rangeTableEntry->inh = multiTable->includePartitions; + rangeTableEntry->tablesample = multiTable->tablesample; SetRangeTblExtraData(rangeTableEntry, CITUS_RTE_RELATION, NULL, NULL, list_make1_int(multiTable->rangeTableId), diff --git a/src/include/distributed/multi_logical_planner.h b/src/include/distributed/multi_logical_planner.h index 41d259885..69da17aca 100644 --- a/src/include/distributed/multi_logical_planner.h +++ b/src/include/distributed/multi_logical_planner.h @@ -90,6 +90,9 @@ typedef struct MultiTable Alias *referenceNames; Query *subquery; /* this field is only valid for non-relation subquery types */ bool includePartitions; + + /* FROM .. TABLESAMPLE clause */ + TableSampleClause *tablesample; } MultiTable; diff --git a/src/test/regress/expected/multi_simple_queries.out b/src/test/regress/expected/multi_simple_queries.out index 7cb4072e6..a3a1b0c71 100644 --- a/src/test/regress/expected/multi_simple_queries.out +++ b/src/test/regress/expected/multi_simple_queries.out @@ -664,6 +664,83 @@ DEBUG: query has a single distribution column value: 1 41 | 1 | aznavour | 11814 (5 rows) +SELECT count(*) FROM articles TABLESAMPLE SYSTEM (0); +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM articles TABLESAMPLE BERNOULLI (0); +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- We use REPEATABLE to get deterministic results in terms of which pages are returned. +-- That does mean we get the same page indexes on each shard, which in this case means +-- they all return the first page, meaning all rows. +SELECT count(*) FROM articles TABLESAMPLE SYSTEM (90) REPEATABLE (200); +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- + 50 +(1 row) + +SELECT count(*) FROM articles TABLESAMPLE BERNOULLI (90) REPEATABLE (200); +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- + 43 +(1 row) + +SELECT count(*) FROM articles TABLESAMPLE SYSTEM (100); +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- + 50 +(1 row) + +SELECT count(*) FROM articles TABLESAMPLE BERNOULLI (100); +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- + 50 +(1 row) + +SELECT count(*) FROM (SELECT random() FROM articles TABLESAMPLE SYSTEM (0)) a; +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM (SELECT random() FROM articles TABLESAMPLE BERNOULLI (0)) a; +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM (SELECT random() FROM articles TABLESAMPLE SYSTEM (100)) a; +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- + 50 +(1 row) + +SELECT count(*) FROM (SELECT random() FROM articles TABLESAMPLE BERNOULLI (100)) a; +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- + 50 +(1 row) + +DELETE FROM articles a USING articles b TABLESAMPLE bernoulli(0) WHERE a.author_id = b.author_id; +DEBUG: Creating router plan +UPDATE articles a SET title = '' FROM articles b TABLESAMPLE bernoulli(0) WHERE a.author_id = b.author_id; +DEBUG: Creating router plan -- test tablesample with fast path as well SET citus.enable_fast_path_router_planner TO true; SELECT * FROM articles TABLESAMPLE SYSTEM (0) WHERE author_id = 1; diff --git a/src/test/regress/sql/multi_simple_queries.sql b/src/test/regress/sql/multi_simple_queries.sql index f987518e1..ac976600e 100644 --- a/src/test/regress/sql/multi_simple_queries.sql +++ b/src/test/regress/sql/multi_simple_queries.sql @@ -319,6 +319,26 @@ SELECT * FROM articles TABLESAMPLE BERNOULLI (0) WHERE author_id = 1; SELECT * FROM articles TABLESAMPLE SYSTEM (100) WHERE author_id = 1 ORDER BY id; SELECT * FROM articles TABLESAMPLE BERNOULLI (100) WHERE author_id = 1 ORDER BY id; +SELECT count(*) FROM articles TABLESAMPLE SYSTEM (0); +SELECT count(*) FROM articles TABLESAMPLE BERNOULLI (0); + +-- We use REPEATABLE to get deterministic results in terms of which pages are returned. +-- That does mean we get the same page indexes on each shard, which in this case means +-- they all return the first page, meaning all rows. +SELECT count(*) FROM articles TABLESAMPLE SYSTEM (90) REPEATABLE (200); +SELECT count(*) FROM articles TABLESAMPLE BERNOULLI (90) REPEATABLE (200); + +SELECT count(*) FROM articles TABLESAMPLE SYSTEM (100); +SELECT count(*) FROM articles TABLESAMPLE BERNOULLI (100); + +SELECT count(*) FROM (SELECT random() FROM articles TABLESAMPLE SYSTEM (0)) a; +SELECT count(*) FROM (SELECT random() FROM articles TABLESAMPLE BERNOULLI (0)) a; +SELECT count(*) FROM (SELECT random() FROM articles TABLESAMPLE SYSTEM (100)) a; +SELECT count(*) FROM (SELECT random() FROM articles TABLESAMPLE BERNOULLI (100)) a; + +DELETE FROM articles a USING articles b TABLESAMPLE bernoulli(0) WHERE a.author_id = b.author_id; +UPDATE articles a SET title = '' FROM articles b TABLESAMPLE bernoulli(0) WHERE a.author_id = b.author_id; + -- test tablesample with fast path as well SET citus.enable_fast_path_router_planner TO true; SELECT * FROM articles TABLESAMPLE SYSTEM (0) WHERE author_id = 1;