Collations matter for hashing strings in pg12

See https://www.postgresql.org/docs/12/collation.html#COLLATION-NONDETERMINISTIC
2019-08-14 21:29:48 +00:00 · 2019-08-14 21:29:48 +00:00 · be3285828f
parent fe10ca453d
commit be3285828f
5 changed files with 113 additions and 26 deletions
--- a/src/backend/distributed/master/master_split_shards.c
+++ b/src/backend/distributed/master/master_split_shards.c
@ -84,7 +84,7 @@ worker_hash(PG_FUNCTION_ARGS)
 	fmgr_info_copy(hashFunction, &(typeEntry->hash_proc_finfo), CurrentMemoryContext);
 	/* calculate hash value */
-	hashedValueDatum = FunctionCall1(hashFunction, valueDatum);
+	hashedValueDatum = FunctionCall1Coll(hashFunction, PG_GET_COLLATION(), valueDatum);
 	PG_RETURN_INT32(hashedValueDatum);
 }
--- a/src/backend/distributed/planner/multi_master_planner.c
+++ b/src/backend/distributed/planner/multi_master_planner.c
@ -21,6 +21,7 @@
 #include "distributed/multi_physical_planner.h"
 #include "distributed/distributed_planner.h"
 #include "distributed/multi_server_executor.h"
 #include "distributed/version_compat.h"
 #include "distributed/worker_protocol.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
@ -51,6 +52,8 @@ static bool UseGroupAggregateWithHLL(Query *masterQuery);
 static bool QueryContainsAggregateWithHLL(Query *query);
 static Plan * BuildDistinctPlan(Query *masterQuery, Plan *subPlan);
 static List * PrepareTargetListForNextPlan(List *targetList);
 static Agg * makeAggNode(List *groupClauseList, List *havingQual,
 						 AggStrategy aggrStrategy, List *queryTargetList, Plan *subPlan);
 /*
@ -267,7 +270,6 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
 	Agg *aggregatePlan = NULL;
 	AggStrategy aggregateStrategy = AGG_PLAIN;
 	AggClauseCosts aggregateCosts;
 	AttrNumber *groupColumnIdArray = NULL;
 	List *aggregateTargetList = NIL;
 	List *groupColumnList = NIL;
 	List *aggregateColumnList = NIL;
@ -275,9 +277,7 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
 	List *columnList = NIL;
 	ListCell *columnCell = NULL;
 	Node *havingQual = NULL;
 	Oid *groupColumnOpArray = NULL;
 	uint32 groupColumnCount = 0;
 	const long rowEstimate = 10;
 	/* assert that we need to build an aggregate plan */
 	Assert(masterQuery->hasAggs || masterQuery->groupClause);
@ -353,17 +353,11 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
 		{
 			aggregateStrategy = AGG_HASHED;
 		}
 		/* get column indexes that are being grouped */
 		groupColumnIdArray = extract_grouping_cols(groupColumnList, subPlan->targetlist);
 		groupColumnOpArray = extract_grouping_ops(groupColumnList);
 	}
 	/* finally create the plan */
-	aggregatePlan = make_agg(aggregateTargetList, (List *) havingQual, aggregateStrategy,
+	aggregatePlan = makeAggNode(groupColumnList, (List *) havingQual,
-							 AGGSPLIT_SIMPLE, groupColumnCount, groupColumnIdArray,
+								aggregateStrategy, aggregateTargetList, subPlan);
 							 groupColumnOpArray, NIL, NIL,
 							 rowEstimate, subPlan);
 	/* just for reproducible costs between different PostgreSQL versions */
 	aggregatePlan->plan.startup_cost = 0;
@ -527,17 +521,8 @@ BuildDistinctPlan(Query *masterQuery, Plan *subPlan)
 	if (enable_hashagg && distinctClausesHashable && !hasDistinctAggregate)
 	{
-		const long rowEstimate = 10;  /* using the same value as BuildAggregatePlan() */
+		distinctPlan = (Plan *) makeAggNode(distinctClauseList, NIL, AGG_HASHED,
-		AttrNumber *distinctColumnIdArray = extract_grouping_cols(distinctClauseList,
+											targetList, subPlan);
 																  subPlan->targetlist);
 		Oid *distinctColumnOpArray = extract_grouping_ops(distinctClauseList);
 		uint32 distinctClauseCount = list_length(distinctClauseList);
 		distinctPlan = (Plan *) make_agg(targetList, NIL, AGG_HASHED,
 										 AGGSPLIT_SIMPLE, distinctClauseCount,
 										 distinctColumnIdArray,
 										 distinctColumnOpArray, NIL, NIL,
 										 rowEstimate, subPlan);
 	}
 	else
 	{
@ -581,3 +566,36 @@ PrepareTargetListForNextPlan(List *targetList)
 	return newtargetList;
 }
 /*
 * makeAggNode creates a "Agg" plan node. groupClauseList is a list of
 * SortGroupClause's.
 */
 static Agg *
 makeAggNode(List *groupClauseList, List *havingQual, AggStrategy aggrStrategy,
 			List *queryTargetList, Plan *subPlan)
 {
 	Agg *aggNode = NULL;
 	int groupColumnCount = list_length(groupClauseList);
 	AttrNumber *groupColumnIdArray =
 		extract_grouping_cols(groupClauseList, subPlan->targetlist);
 	Oid *groupColumnOpArray = extract_grouping_ops(groupClauseList);
 	const int rowEstimate = 10;
 #if (PG_VERSION_NUM >= 120000)
 	aggNode = make_agg(queryTargetList, havingQual, aggrStrategy,
 					   AGGSPLIT_SIMPLE, groupColumnCount, groupColumnIdArray,
 					   groupColumnOpArray,
 					   extract_grouping_collations(groupClauseList,
 												   subPlan->targetlist),
 					   NIL, NIL, rowEstimate, subPlan);
 #else
 	aggNode = make_agg(queryTargetList, havingQual, aggrStrategy,
 					   AGGSPLIT_SIMPLE, groupColumnCount, groupColumnIdArray,
 					   groupColumnOpArray,
 					   NIL, NIL, rowEstimate, subPlan);
 #endif
 	return aggNode;
 }
--- a/src/backend/distributed/planner/postgres_planning_functions.c
+++ b/src/backend/distributed/planner/postgres_planning_functions.c
@ -15,15 +15,77 @@
 #include "distributed/multi_master_planner.h"
 #include "nodes/plannodes.h"
 #if PG_VERSION_NUM >= 120000
 #include "nodes/nodeFuncs.h"
 #include "optimizer/optimizer.h"
 #else
 #include "optimizer/tlist.h"
-
+#endif
 /*
 * make_unique_from_sortclauses creates and returns a unique node
 * from provided distinct clause list.
 * The functions is copied from postgresql from
 * src/backend/optimizer/plan/createplan.c.
- *
+ */
 #if PG_VERSION_NUM >= 120000
 /*
 * distinctList is a list of SortGroupClauses, identifying the targetlist items
 * that should be considered by the Unique filter.  The input path must
 * already be sorted accordingly.
 */
 Unique *
 make_unique_from_sortclauses(Plan *lefttree, List *distinctList)
 {
 	Unique *node = makeNode(Unique);
 	Plan *plan = &node->plan;
 	int numCols = list_length(distinctList);
 	int keyno = 0;
 	AttrNumber *uniqColIdx;
 	Oid *uniqOperators;
 	Oid *uniqCollations;
 	ListCell *slitem;
 	plan->targetlist = lefttree->targetlist;
 	plan->qual = NIL;
 	plan->lefttree = lefttree;
 	plan->righttree = NULL;
 	/*
 	 * convert SortGroupClause list into arrays of attr indexes and equality
 	 * operators, as wanted by executor
 	 */
 	Assert(numCols > 0);
 	uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
 	uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols);
 	uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols);
 	foreach(slitem, distinctList)
 	{
 		SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem);
 		TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist);
 		uniqColIdx[keyno] = tle->resno;
 		uniqOperators[keyno] = sortcl->eqop;
 		uniqCollations[keyno] = exprCollation((Node *) tle->expr);
 		Assert(OidIsValid(uniqOperators[keyno]));
 		keyno++;
 	}
 	node->numCols = numCols;
 	node->uniqColIdx = uniqColIdx;
 	node->uniqOperators = uniqOperators;
 	node->uniqCollations = uniqCollations;
 	return node;
 }
 #else
 /*
 * distinctList is a list of SortGroupClauses, identifying the targetlist items
 * that should be considered by the Unique filter.  The input path must
 * already be sorted accordingly.
@ -69,3 +131,6 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList)
 	return node;
 }
 #endif
--- a/src/backend/distributed/utils/shardinterval_utils.c
+++ b/src/backend/distributed/utils/shardinterval_utils.c
@ -254,7 +254,9 @@ FindShardInterval(Datum partitionColumnValue, DistTableCacheEntry *cacheEntry)
 	if (cacheEntry->partitionMethod == DISTRIBUTE_BY_HASH)
 	{
-		searchedValue = FunctionCall1(cacheEntry->hashFunction, partitionColumnValue);
+		searchedValue = FunctionCall1Coll(cacheEntry->hashFunction,
 										  cacheEntry->partitionColumn->varcollid,
 										  partitionColumnValue);
 	}
 	shardIndex = FindShardIntervalIndex(searchedValue, cacheEntry);
--- a/src/include/distributed/worker_protocol.h
+++ b/src/include/distributed/worker_protocol.h
@ -20,6 +20,7 @@
 #include "nodes/parsenodes.h"
 #include "storage/fd.h"
 #include "utils/array.h"
 #include "distributed/version_compat.h"
 /* Number of rows to prefetch when reading data with a cursor */
@ -79,6 +80,7 @@ typedef struct HashPartitionContext
 	FmgrInfo *comparisonFunction;
 	ShardInterval **syntheticShardIntervalArray;
 	uint32 partitionCount;
 	Oid collation;
 	bool hasUniformHashDistribution;
 } HashPartitionContext;