From bbedfca761a7acb8f2b61af1eeaba012e2e9ae4d Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Fri, 16 Oct 2020 15:16:55 +0200 Subject: [PATCH] Improve the relation restriction counters It seems like Postgres could call set_rel_pathlist() for the same relation multiple times. This breaks the logic where we assume relationCount eqauls to the number of entries in relationRestrictionList. In summary, relationRestrictionList may contain duplicate entries. --- .../relation_restriction_equivalence.c | 43 ++++++++------ .../relation_restriction_equivalence.h | 5 +- src/test/regress/expected/subquery_basics.out | 59 +++++++++++++++++++ src/test/regress/sql/subquery_basics.sql | 37 +++++++++++- 4 files changed, 124 insertions(+), 20 deletions(-) diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c index 7b80b94e1..199a6cd8a 100644 --- a/src/backend/distributed/planner/relation_restriction_equivalence.c +++ b/src/backend/distributed/planner/relation_restriction_equivalence.c @@ -534,9 +534,8 @@ ContainsMultipleDistributedRelations(PlannerRestrictionContext * RelationRestrictionContext *restrictionContext = plannerRestrictionContext->relationRestrictionContext; - uint32 referenceRelationCount = ReferenceRelationCount(restrictionContext); - uint32 totalRelationCount = list_length(restrictionContext->relationRestrictionList); - uint32 nonReferenceRelationCount = totalRelationCount - referenceRelationCount; + uint32 distributedRelationCount = + UniqueRelationCount(restrictionContext, DISTRIBUTED_TABLE); /* * If the query includes a single relation which is not a reference table, @@ -551,7 +550,7 @@ ContainsMultipleDistributedRelations(PlannerRestrictionContext * * tasks that are going to be created should not need data from other tasks. In both * cases mentioned above, the necessary data per task would be on available. */ - if (nonReferenceRelationCount <= 1) + if (distributedRelationCount <= 1) { return false; } @@ -591,29 +590,39 @@ GenerateAllAttributeEquivalences(PlannerRestrictionContext *plannerRestrictionCo /* - * ReferenceRelationCount iterates over the relations and returns the reference table - * relation count. + * UniqueRelationCount iterates over the relations and returns the + * unique relation count. We use RTEIdentity as the identifiers, so if + * the same relation appears twice in the restrictionContext, we count + * it as a single item. */ uint32 -ReferenceRelationCount(RelationRestrictionContext *restrictionContext) +UniqueRelationCount(RelationRestrictionContext *restrictionContext, CitusTableType + tableType) { ListCell *relationRestrictionCell = NULL; - uint32 referenceRelationCount = 0; + List *rteIdentityList = NIL; foreach(relationRestrictionCell, restrictionContext->relationRestrictionList) { RelationRestriction *relationRestriction = (RelationRestriction *) lfirst(relationRestrictionCell); - CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry( - relationRestriction->relationId); + Oid relationId = relationRestriction->relationId; - if (IsCitusTableTypeCacheEntry(cacheEntry, REFERENCE_TABLE)) + CitusTableCacheEntry *cacheEntry = LookupCitusTableCacheEntry(relationId); + if (cacheEntry == NULL) { - referenceRelationCount++; + /* we don't expect non-distributed tables, still be no harm to skip */ + continue; + } + + if (IsCitusTableTypeCacheEntry(cacheEntry, tableType)) + { + int rteIdentity = GetRTEIdentity(relationRestriction->rte); + rteIdentityList = list_append_unique_int(rteIdentityList, rteIdentity); } } - return referenceRelationCount; + return list_length(rteIdentityList); } @@ -1805,10 +1814,10 @@ FilterPlannerRestrictionForQuery(PlannerRestrictionContext *plannerRestrictionCo filteredPlannerRestrictionContext->memoryContext = plannerRestrictionContext->memoryContext; - int totalRelationCount = list_length( - filteredRelationRestrictionContext->relationRestrictionList); - int referenceRelationCount = ReferenceRelationCount( - filteredRelationRestrictionContext); + int totalRelationCount = UniqueRelationCount( + filteredRelationRestrictionContext, ANY_CITUS_TABLE_TYPE); + int referenceRelationCount = UniqueRelationCount( + filteredRelationRestrictionContext, REFERENCE_TABLE); filteredRelationRestrictionContext->allReferenceTables = (totalRelationCount == referenceRelationCount); diff --git a/src/include/distributed/relation_restriction_equivalence.h b/src/include/distributed/relation_restriction_equivalence.h index 197944e49..1a32bdbcc 100644 --- a/src/include/distributed/relation_restriction_equivalence.h +++ b/src/include/distributed/relation_restriction_equivalence.h @@ -13,7 +13,7 @@ #define RELATION_RESTRICTION_EQUIVALENCE_H #include "distributed/distributed_planner.h" - +#include "distributed/metadata_cache.h" extern bool AllDistributionKeysInQueryAreEqual(Query *originalQuery, PlannerRestrictionContext * @@ -29,7 +29,8 @@ bool RestrictionEquivalenceForPartitionKeysViaEquivalences(PlannerRestrictionCon allAttributeEquivalenceList); extern List * GenerateAllAttributeEquivalences(PlannerRestrictionContext * plannerRestrictionContext); -extern uint32 ReferenceRelationCount(RelationRestrictionContext *restrictionContext); +extern uint32 UniqueRelationCount(RelationRestrictionContext *restrictionContext, + CitusTableType tableType); extern List * DistributedRelationIdList(Query *query); extern PlannerRestrictionContext * FilterPlannerRestrictionForQuery( diff --git a/src/test/regress/expected/subquery_basics.out b/src/test/regress/expected/subquery_basics.out index d43a48f0b..8b96008d2 100644 --- a/src/test/regress/expected/subquery_basics.out +++ b/src/test/regress/expected/subquery_basics.out @@ -2,6 +2,65 @@ -- test recursive planning functionality -- =================================================================== SET client_min_messages TO DEBUG1; +-- the subquery is safe to pushdown, should not +-- recursively plan +SELECT + user_id, value_1 +FROM + (SELECT user_id, value_1 FROM users_table) as foo +ORDER BY 1 DESC, 2 DESC LIMIT 3; +DEBUG: push down of limit count: 3 + user_id | value_1 +--------------------------------------------------------------------- + 6 | 5 + 6 | 5 + 6 | 3 +(3 rows) + +-- the subquery is safe to pushdown, should not +-- recursively plan +SELECT + sum(sel_val_1), sum(sel_val_2) +FROM + (SELECT max(value_1) as sel_val_1, min(value_2) as sel_val_2 FROM users_table GROUP BY user_id) as foo; + sum | sum +--------------------------------------------------------------------- + 29 | 1 +(1 row) + +-- the subquery is safe to pushdown, should not +-- recursively plan +SELECT + min(user_id), max(value_1) +FROM + (SELECT user_id, value_1 FROM users_table) as foo; + min | max +--------------------------------------------------------------------- + 1 | 5 +(1 row) + +-- the subquery is safe to pushdown, should not +-- recursively plan +SELECT + min(user_id) +FROM + (SELECT user_id, value_1 FROM users_table GROUP BY user_id, value_1) as bar; + min +--------------------------------------------------------------------- + 1 +(1 row) + +-- the subquery is safe to pushdown, should not +-- recursively plan +SELECT + min(user_id), sum(max_value_1) +FROM + (SELECT user_id, max(value_1) as max_value_1 FROM users_table GROUP BY user_id) as bar; + min | sum +--------------------------------------------------------------------- + 1 | 29 +(1 row) + -- subqueries in FROM clause with LIMIT should be recursively planned SELECT user_id diff --git a/src/test/regress/sql/subquery_basics.sql b/src/test/regress/sql/subquery_basics.sql index f5dd17020..082604bf4 100644 --- a/src/test/regress/sql/subquery_basics.sql +++ b/src/test/regress/sql/subquery_basics.sql @@ -1,9 +1,44 @@ -- =================================================================== -- test recursive planning functionality -- =================================================================== - SET client_min_messages TO DEBUG1; +-- the subquery is safe to pushdown, should not +-- recursively plan +SELECT + user_id, value_1 +FROM + (SELECT user_id, value_1 FROM users_table) as foo +ORDER BY 1 DESC, 2 DESC LIMIT 3; + +-- the subquery is safe to pushdown, should not +-- recursively plan +SELECT + sum(sel_val_1), sum(sel_val_2) +FROM + (SELECT max(value_1) as sel_val_1, min(value_2) as sel_val_2 FROM users_table GROUP BY user_id) as foo; + +-- the subquery is safe to pushdown, should not +-- recursively plan +SELECT + min(user_id), max(value_1) +FROM + (SELECT user_id, value_1 FROM users_table) as foo; + +-- the subquery is safe to pushdown, should not +-- recursively plan +SELECT + min(user_id) +FROM + (SELECT user_id, value_1 FROM users_table GROUP BY user_id, value_1) as bar; + +-- the subquery is safe to pushdown, should not +-- recursively plan +SELECT + min(user_id), sum(max_value_1) +FROM + (SELECT user_id, max(value_1) as max_value_1 FROM users_table GROUP BY user_id) as bar; + -- subqueries in FROM clause with LIMIT should be recursively planned SELECT user_id