Merge pull request #4253 from citusdata/improve_perf_for_queries

Improve the relation restriction counters
pull/4103/head
Önder Kalacı 2020-10-19 09:22:03 +02:00 committed by GitHub
commit 25e43a4aa6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 124 additions and 20 deletions

View File

@ -534,9 +534,8 @@ ContainsMultipleDistributedRelations(PlannerRestrictionContext *
RelationRestrictionContext *restrictionContext = RelationRestrictionContext *restrictionContext =
plannerRestrictionContext->relationRestrictionContext; plannerRestrictionContext->relationRestrictionContext;
uint32 referenceRelationCount = ReferenceRelationCount(restrictionContext); uint32 distributedRelationCount =
uint32 totalRelationCount = list_length(restrictionContext->relationRestrictionList); UniqueRelationCount(restrictionContext, DISTRIBUTED_TABLE);
uint32 nonReferenceRelationCount = totalRelationCount - referenceRelationCount;
/* /*
* If the query includes a single relation which is not a reference table, * If the query includes a single relation which is not a reference table,
@ -551,7 +550,7 @@ ContainsMultipleDistributedRelations(PlannerRestrictionContext *
* tasks that are going to be created should not need data from other tasks. In both * tasks that are going to be created should not need data from other tasks. In both
* cases mentioned above, the necessary data per task would be on available. * cases mentioned above, the necessary data per task would be on available.
*/ */
if (nonReferenceRelationCount <= 1) if (distributedRelationCount <= 1)
{ {
return false; return false;
} }
@ -591,29 +590,39 @@ GenerateAllAttributeEquivalences(PlannerRestrictionContext *plannerRestrictionCo
/* /*
* ReferenceRelationCount iterates over the relations and returns the reference table * UniqueRelationCount iterates over the relations and returns the
* relation count. * unique relation count. We use RTEIdentity as the identifiers, so if
* the same relation appears twice in the restrictionContext, we count
* it as a single item.
*/ */
uint32 uint32
ReferenceRelationCount(RelationRestrictionContext *restrictionContext) UniqueRelationCount(RelationRestrictionContext *restrictionContext, CitusTableType
tableType)
{ {
ListCell *relationRestrictionCell = NULL; ListCell *relationRestrictionCell = NULL;
uint32 referenceRelationCount = 0; List *rteIdentityList = NIL;
foreach(relationRestrictionCell, restrictionContext->relationRestrictionList) foreach(relationRestrictionCell, restrictionContext->relationRestrictionList)
{ {
RelationRestriction *relationRestriction = RelationRestriction *relationRestriction =
(RelationRestriction *) lfirst(relationRestrictionCell); (RelationRestriction *) lfirst(relationRestrictionCell);
CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry( Oid relationId = relationRestriction->relationId;
relationRestriction->relationId);
if (IsCitusTableTypeCacheEntry(cacheEntry, REFERENCE_TABLE)) CitusTableCacheEntry *cacheEntry = LookupCitusTableCacheEntry(relationId);
if (cacheEntry == NULL)
{ {
referenceRelationCount++; /* we don't expect non-distributed tables, still be no harm to skip */
continue;
}
if (IsCitusTableTypeCacheEntry(cacheEntry, tableType))
{
int rteIdentity = GetRTEIdentity(relationRestriction->rte);
rteIdentityList = list_append_unique_int(rteIdentityList, rteIdentity);
} }
} }
return referenceRelationCount; return list_length(rteIdentityList);
} }
@ -1805,10 +1814,10 @@ FilterPlannerRestrictionForQuery(PlannerRestrictionContext *plannerRestrictionCo
filteredPlannerRestrictionContext->memoryContext = filteredPlannerRestrictionContext->memoryContext =
plannerRestrictionContext->memoryContext; plannerRestrictionContext->memoryContext;
int totalRelationCount = list_length( int totalRelationCount = UniqueRelationCount(
filteredRelationRestrictionContext->relationRestrictionList); filteredRelationRestrictionContext, ANY_CITUS_TABLE_TYPE);
int referenceRelationCount = ReferenceRelationCount( int referenceRelationCount = UniqueRelationCount(
filteredRelationRestrictionContext); filteredRelationRestrictionContext, REFERENCE_TABLE);
filteredRelationRestrictionContext->allReferenceTables = filteredRelationRestrictionContext->allReferenceTables =
(totalRelationCount == referenceRelationCount); (totalRelationCount == referenceRelationCount);

View File

@ -13,7 +13,7 @@
#define RELATION_RESTRICTION_EQUIVALENCE_H #define RELATION_RESTRICTION_EQUIVALENCE_H
#include "distributed/distributed_planner.h" #include "distributed/distributed_planner.h"
#include "distributed/metadata_cache.h"
extern bool AllDistributionKeysInQueryAreEqual(Query *originalQuery, extern bool AllDistributionKeysInQueryAreEqual(Query *originalQuery,
PlannerRestrictionContext * PlannerRestrictionContext *
@ -29,7 +29,8 @@ bool RestrictionEquivalenceForPartitionKeysViaEquivalences(PlannerRestrictionCon
allAttributeEquivalenceList); allAttributeEquivalenceList);
extern List * GenerateAllAttributeEquivalences(PlannerRestrictionContext * extern List * GenerateAllAttributeEquivalences(PlannerRestrictionContext *
plannerRestrictionContext); plannerRestrictionContext);
extern uint32 ReferenceRelationCount(RelationRestrictionContext *restrictionContext); extern uint32 UniqueRelationCount(RelationRestrictionContext *restrictionContext,
CitusTableType tableType);
extern List * DistributedRelationIdList(Query *query); extern List * DistributedRelationIdList(Query *query);
extern PlannerRestrictionContext * FilterPlannerRestrictionForQuery( extern PlannerRestrictionContext * FilterPlannerRestrictionForQuery(

View File

@ -2,6 +2,65 @@
-- test recursive planning functionality -- test recursive planning functionality
-- =================================================================== -- ===================================================================
SET client_min_messages TO DEBUG1; SET client_min_messages TO DEBUG1;
-- the subquery is safe to pushdown, should not
-- recursively plan
SELECT
user_id, value_1
FROM
(SELECT user_id, value_1 FROM users_table) as foo
ORDER BY 1 DESC, 2 DESC LIMIT 3;
DEBUG: push down of limit count: 3
user_id | value_1
---------------------------------------------------------------------
6 | 5
6 | 5
6 | 3
(3 rows)
-- the subquery is safe to pushdown, should not
-- recursively plan
SELECT
sum(sel_val_1), sum(sel_val_2)
FROM
(SELECT max(value_1) as sel_val_1, min(value_2) as sel_val_2 FROM users_table GROUP BY user_id) as foo;
sum | sum
---------------------------------------------------------------------
29 | 1
(1 row)
-- the subquery is safe to pushdown, should not
-- recursively plan
SELECT
min(user_id), max(value_1)
FROM
(SELECT user_id, value_1 FROM users_table) as foo;
min | max
---------------------------------------------------------------------
1 | 5
(1 row)
-- the subquery is safe to pushdown, should not
-- recursively plan
SELECT
min(user_id)
FROM
(SELECT user_id, value_1 FROM users_table GROUP BY user_id, value_1) as bar;
min
---------------------------------------------------------------------
1
(1 row)
-- the subquery is safe to pushdown, should not
-- recursively plan
SELECT
min(user_id), sum(max_value_1)
FROM
(SELECT user_id, max(value_1) as max_value_1 FROM users_table GROUP BY user_id) as bar;
min | sum
---------------------------------------------------------------------
1 | 29
(1 row)
-- subqueries in FROM clause with LIMIT should be recursively planned -- subqueries in FROM clause with LIMIT should be recursively planned
SELECT SELECT
user_id user_id

View File

@ -1,9 +1,44 @@
-- =================================================================== -- ===================================================================
-- test recursive planning functionality -- test recursive planning functionality
-- =================================================================== -- ===================================================================
SET client_min_messages TO DEBUG1; SET client_min_messages TO DEBUG1;
-- the subquery is safe to pushdown, should not
-- recursively plan
SELECT
user_id, value_1
FROM
(SELECT user_id, value_1 FROM users_table) as foo
ORDER BY 1 DESC, 2 DESC LIMIT 3;
-- the subquery is safe to pushdown, should not
-- recursively plan
SELECT
sum(sel_val_1), sum(sel_val_2)
FROM
(SELECT max(value_1) as sel_val_1, min(value_2) as sel_val_2 FROM users_table GROUP BY user_id) as foo;
-- the subquery is safe to pushdown, should not
-- recursively plan
SELECT
min(user_id), max(value_1)
FROM
(SELECT user_id, value_1 FROM users_table) as foo;
-- the subquery is safe to pushdown, should not
-- recursively plan
SELECT
min(user_id)
FROM
(SELECT user_id, value_1 FROM users_table GROUP BY user_id, value_1) as bar;
-- the subquery is safe to pushdown, should not
-- recursively plan
SELECT
min(user_id), sum(max_value_1)
FROM
(SELECT user_id, max(value_1) as max_value_1 FROM users_table GROUP BY user_id) as bar;
-- subqueries in FROM clause with LIMIT should be recursively planned -- subqueries in FROM clause with LIMIT should be recursively planned
SELECT SELECT
user_id user_id