Merge pull request #5875 from citusdata/marcocitus/tablesample

pull/5878/head
Marco Slot 2022-04-01 16:47:03 +02:00 committed by GitHub
commit b511e28e80
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 102 additions and 31 deletions

View File

@ -75,7 +75,6 @@ static Oid NodeTryGetRteRelid(Node *node);
static bool FullCompositeFieldList(List *compositeFieldList); static bool FullCompositeFieldList(List *compositeFieldList);
static bool HasUnsupportedJoinWalker(Node *node, void *context); static bool HasUnsupportedJoinWalker(Node *node, void *context);
static bool ErrorHintRequired(const char *errorHint, Query *queryTree); static bool ErrorHintRequired(const char *errorHint, Query *queryTree);
static bool HasTablesample(Query *queryTree);
static bool HasComplexRangeTableType(Query *queryTree); static bool HasComplexRangeTableType(Query *queryTree);
static bool IsReadIntermediateResultFunction(Node *node); static bool IsReadIntermediateResultFunction(Node *node);
static bool IsReadIntermediateResultArrayFunction(Node *node); static bool IsReadIntermediateResultArrayFunction(Node *node);
@ -899,14 +898,6 @@ DeferErrorIfQueryNotSupported(Query *queryTree)
errorHint = filterHint; errorHint = filterHint;
} }
bool hasTablesample = HasTablesample(queryTree);
if (hasTablesample)
{
preconditionsSatisfied = false;
errorMessage = "could not run distributed query which use TABLESAMPLE";
errorHint = filterHint;
}
bool hasUnsupportedJoin = HasUnsupportedJoinWalker((Node *) queryTree->jointree, bool hasUnsupportedJoin = HasUnsupportedJoinWalker((Node *) queryTree->jointree,
NULL); NULL);
if (hasUnsupportedJoin) if (hasUnsupportedJoin)
@ -960,28 +951,6 @@ DeferErrorIfQueryNotSupported(Query *queryTree)
} }
/* HasTablesample returns tree if the query contains tablesample */
static bool
HasTablesample(Query *queryTree)
{
List *rangeTableList = queryTree->rtable;
ListCell *rangeTableEntryCell = NULL;
bool hasTablesample = false;
foreach(rangeTableEntryCell, rangeTableList)
{
RangeTblEntry *rangeTableEntry = lfirst(rangeTableEntryCell);
if (rangeTableEntry->tablesample)
{
hasTablesample = true;
break;
}
}
return hasTablesample;
}
/* /*
* HasUnsupportedJoinWalker returns tree if the query contains an unsupported * HasUnsupportedJoinWalker returns tree if the query contains an unsupported
* join type. We currently support inner, left, right, full and anti joins. * join type. We currently support inner, left, right, full and anti joins.
@ -1541,6 +1510,7 @@ MultiTableNodeList(List *tableEntryList, List *rangeTableList)
tableNode->alias = rangeTableEntry->alias; tableNode->alias = rangeTableEntry->alias;
tableNode->referenceNames = rangeTableEntry->eref; tableNode->referenceNames = rangeTableEntry->eref;
tableNode->includePartitions = GetOriginalInh(rangeTableEntry); tableNode->includePartitions = GetOriginalInh(rangeTableEntry);
tableNode->tablesample = rangeTableEntry->tablesample;
tableNodeList = lappend(tableNodeList, tableNode); tableNodeList = lappend(tableNodeList, tableNode);
} }

View File

@ -735,6 +735,7 @@ BaseRangeTableList(MultiNode *multiNode)
rangeTableEntry->alias = multiTable->alias; rangeTableEntry->alias = multiTable->alias;
rangeTableEntry->relid = multiTable->relationId; rangeTableEntry->relid = multiTable->relationId;
rangeTableEntry->inh = multiTable->includePartitions; rangeTableEntry->inh = multiTable->includePartitions;
rangeTableEntry->tablesample = multiTable->tablesample;
SetRangeTblExtraData(rangeTableEntry, CITUS_RTE_RELATION, NULL, NULL, SetRangeTblExtraData(rangeTableEntry, CITUS_RTE_RELATION, NULL, NULL,
list_make1_int(multiTable->rangeTableId), list_make1_int(multiTable->rangeTableId),

View File

@ -90,6 +90,9 @@ typedef struct MultiTable
Alias *referenceNames; Alias *referenceNames;
Query *subquery; /* this field is only valid for non-relation subquery types */ Query *subquery; /* this field is only valid for non-relation subquery types */
bool includePartitions; bool includePartitions;
/* FROM .. TABLESAMPLE clause */
TableSampleClause *tablesample;
} MultiTable; } MultiTable;

View File

@ -664,6 +664,83 @@ DEBUG: query has a single distribution column value: 1
41 | 1 | aznavour | 11814 41 | 1 | aznavour | 11814
(5 rows) (5 rows)
SELECT count(*) FROM articles TABLESAMPLE SYSTEM (0);
DEBUG: Router planner cannot handle multi-shard select queries
count
---------------------------------------------------------------------
0
(1 row)
SELECT count(*) FROM articles TABLESAMPLE BERNOULLI (0);
DEBUG: Router planner cannot handle multi-shard select queries
count
---------------------------------------------------------------------
0
(1 row)
-- We use REPEATABLE to get deterministic results in terms of which pages are returned.
-- That does mean we get the same page indexes on each shard, which in this case means
-- they all return the first page, meaning all rows.
SELECT count(*) FROM articles TABLESAMPLE SYSTEM (90) REPEATABLE (200);
DEBUG: Router planner cannot handle multi-shard select queries
count
---------------------------------------------------------------------
50
(1 row)
SELECT count(*) FROM articles TABLESAMPLE BERNOULLI (90) REPEATABLE (200);
DEBUG: Router planner cannot handle multi-shard select queries
count
---------------------------------------------------------------------
43
(1 row)
SELECT count(*) FROM articles TABLESAMPLE SYSTEM (100);
DEBUG: Router planner cannot handle multi-shard select queries
count
---------------------------------------------------------------------
50
(1 row)
SELECT count(*) FROM articles TABLESAMPLE BERNOULLI (100);
DEBUG: Router planner cannot handle multi-shard select queries
count
---------------------------------------------------------------------
50
(1 row)
SELECT count(*) FROM (SELECT random() FROM articles TABLESAMPLE SYSTEM (0)) a;
DEBUG: Router planner cannot handle multi-shard select queries
count
---------------------------------------------------------------------
0
(1 row)
SELECT count(*) FROM (SELECT random() FROM articles TABLESAMPLE BERNOULLI (0)) a;
DEBUG: Router planner cannot handle multi-shard select queries
count
---------------------------------------------------------------------
0
(1 row)
SELECT count(*) FROM (SELECT random() FROM articles TABLESAMPLE SYSTEM (100)) a;
DEBUG: Router planner cannot handle multi-shard select queries
count
---------------------------------------------------------------------
50
(1 row)
SELECT count(*) FROM (SELECT random() FROM articles TABLESAMPLE BERNOULLI (100)) a;
DEBUG: Router planner cannot handle multi-shard select queries
count
---------------------------------------------------------------------
50
(1 row)
DELETE FROM articles a USING articles b TABLESAMPLE bernoulli(0) WHERE a.author_id = b.author_id;
DEBUG: Creating router plan
UPDATE articles a SET title = '' FROM articles b TABLESAMPLE bernoulli(0) WHERE a.author_id = b.author_id;
DEBUG: Creating router plan
-- test tablesample with fast path as well -- test tablesample with fast path as well
SET citus.enable_fast_path_router_planner TO true; SET citus.enable_fast_path_router_planner TO true;
SELECT * FROM articles TABLESAMPLE SYSTEM (0) WHERE author_id = 1; SELECT * FROM articles TABLESAMPLE SYSTEM (0) WHERE author_id = 1;

View File

@ -319,6 +319,26 @@ SELECT * FROM articles TABLESAMPLE BERNOULLI (0) WHERE author_id = 1;
SELECT * FROM articles TABLESAMPLE SYSTEM (100) WHERE author_id = 1 ORDER BY id; SELECT * FROM articles TABLESAMPLE SYSTEM (100) WHERE author_id = 1 ORDER BY id;
SELECT * FROM articles TABLESAMPLE BERNOULLI (100) WHERE author_id = 1 ORDER BY id; SELECT * FROM articles TABLESAMPLE BERNOULLI (100) WHERE author_id = 1 ORDER BY id;
SELECT count(*) FROM articles TABLESAMPLE SYSTEM (0);
SELECT count(*) FROM articles TABLESAMPLE BERNOULLI (0);
-- We use REPEATABLE to get deterministic results in terms of which pages are returned.
-- That does mean we get the same page indexes on each shard, which in this case means
-- they all return the first page, meaning all rows.
SELECT count(*) FROM articles TABLESAMPLE SYSTEM (90) REPEATABLE (200);
SELECT count(*) FROM articles TABLESAMPLE BERNOULLI (90) REPEATABLE (200);
SELECT count(*) FROM articles TABLESAMPLE SYSTEM (100);
SELECT count(*) FROM articles TABLESAMPLE BERNOULLI (100);
SELECT count(*) FROM (SELECT random() FROM articles TABLESAMPLE SYSTEM (0)) a;
SELECT count(*) FROM (SELECT random() FROM articles TABLESAMPLE BERNOULLI (0)) a;
SELECT count(*) FROM (SELECT random() FROM articles TABLESAMPLE SYSTEM (100)) a;
SELECT count(*) FROM (SELECT random() FROM articles TABLESAMPLE BERNOULLI (100)) a;
DELETE FROM articles a USING articles b TABLESAMPLE bernoulli(0) WHERE a.author_id = b.author_id;
UPDATE articles a SET title = '' FROM articles b TABLESAMPLE bernoulli(0) WHERE a.author_id = b.author_id;
-- test tablesample with fast path as well -- test tablesample with fast path as well
SET citus.enable_fast_path_router_planner TO true; SET citus.enable_fast_path_router_planner TO true;
SELECT * FROM articles TABLESAMPLE SYSTEM (0) WHERE author_id = 1; SELECT * FROM articles TABLESAMPLE SYSTEM (0) WHERE author_id = 1;