Collations matter for hashing strings in pg12

See https://www.postgresql.org/docs/12/collation.html#COLLATION-NONDETERMINISTIC
2019-08-14 21:29:48 +00:00 · 2019-08-14 21:29:48 +00:00 · be3285828f
parent fe10ca453d
commit be3285828f
5 changed files with 113 additions and 26 deletions
--- a/src/backend/distributed/master/master_split_shards.c
+++ b/src/backend/distributed/master/master_split_shards.c
@ -84,7 +84,7 @@ worker_hash(PG_FUNCTION_ARGS)
 	fmgr_info_copy(hashFunction, &(typeEntry->hash_proc_finfo), CurrentMemoryContext);

 	/* calculate hash value */
-	hashedValueDatum = FunctionCall1(hashFunction, valueDatum);
+	hashedValueDatum = FunctionCall1Coll(hashFunction, PG_GET_COLLATION(), valueDatum);

 	PG_RETURN_INT32(hashedValueDatum);
 }
--- a/src/backend/distributed/planner/multi_master_planner.c
+++ b/src/backend/distributed/planner/multi_master_planner.c
@ -21,6 +21,7 @@
 #include "distributed/multi_physical_planner.h"
 #include "distributed/distributed_planner.h"
 #include "distributed/multi_server_executor.h"
+#include "distributed/version_compat.h"
 #include "distributed/worker_protocol.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
@ -51,6 +52,8 @@ static bool UseGroupAggregateWithHLL(Query *masterQuery);
 static bool QueryContainsAggregateWithHLL(Query *query);
 static Plan * BuildDistinctPlan(Query *masterQuery, Plan *subPlan);
 static List * PrepareTargetListForNextPlan(List *targetList);
+static Agg * makeAggNode(List *groupClauseList, List *havingQual,
+						 AggStrategy aggrStrategy, List *queryTargetList, Plan *subPlan);


 /*
@ -267,7 +270,6 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
 	Agg *aggregatePlan = NULL;
 	AggStrategy aggregateStrategy = AGG_PLAIN;
 	AggClauseCosts aggregateCosts;
-	AttrNumber *groupColumnIdArray = NULL;
 	List *aggregateTargetList = NIL;
 	List *groupColumnList = NIL;
 	List *aggregateColumnList = NIL;
@ -275,9 +277,7 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
 	List *columnList = NIL;
 	ListCell *columnCell = NULL;
 	Node *havingQual = NULL;
-	Oid *groupColumnOpArray = NULL;
 	uint32 groupColumnCount = 0;
-	const long rowEstimate = 10;

 	/* assert that we need to build an aggregate plan */
 	Assert(masterQuery->hasAggs || masterQuery->groupClause);
@ -353,17 +353,11 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
 		{
 			aggregateStrategy = AGG_HASHED;
 		}
-
-		/* get column indexes that are being grouped */
-		groupColumnIdArray = extract_grouping_cols(groupColumnList, subPlan->targetlist);
-		groupColumnOpArray = extract_grouping_ops(groupColumnList);
 	}

 	/* finally create the plan */
-	aggregatePlan = make_agg(aggregateTargetList, (List *) havingQual, aggregateStrategy,
-							 AGGSPLIT_SIMPLE, groupColumnCount, groupColumnIdArray,
-							 groupColumnOpArray, NIL, NIL,
-							 rowEstimate, subPlan);
+	aggregatePlan = makeAggNode(groupColumnList, (List *) havingQual,
+								aggregateStrategy, aggregateTargetList, subPlan);

 	/* just for reproducible costs between different PostgreSQL versions */
 	aggregatePlan->plan.startup_cost = 0;
@ -527,17 +521,8 @@ BuildDistinctPlan(Query *masterQuery, Plan *subPlan)

 	if (enable_hashagg && distinctClausesHashable && !hasDistinctAggregate)
 	{
-		const long rowEstimate = 10;  /* using the same value as BuildAggregatePlan() */
-		AttrNumber *distinctColumnIdArray = extract_grouping_cols(distinctClauseList,
-																  subPlan->targetlist);
-		Oid *distinctColumnOpArray = extract_grouping_ops(distinctClauseList);
-		uint32 distinctClauseCount = list_length(distinctClauseList);
-
-		distinctPlan = (Plan *) make_agg(targetList, NIL, AGG_HASHED,
-										 AGGSPLIT_SIMPLE, distinctClauseCount,
-										 distinctColumnIdArray,
-										 distinctColumnOpArray, NIL, NIL,
-										 rowEstimate, subPlan);
+		distinctPlan = (Plan *) makeAggNode(distinctClauseList, NIL, AGG_HASHED,
+											targetList, subPlan);
 	}
 	else
 	{
@ -581,3 +566,36 @@ PrepareTargetListForNextPlan(List *targetList)

 	return newtargetList;
 }
+
+
+/*
+ * makeAggNode creates a "Agg" plan node. groupClauseList is a list of
+ * SortGroupClause's.
+ */
+static Agg *
+makeAggNode(List *groupClauseList, List *havingQual, AggStrategy aggrStrategy,
+			List *queryTargetList, Plan *subPlan)
+{
+	Agg *aggNode = NULL;
+	int groupColumnCount = list_length(groupClauseList);
+	AttrNumber *groupColumnIdArray =
+		extract_grouping_cols(groupClauseList, subPlan->targetlist);
+	Oid *groupColumnOpArray = extract_grouping_ops(groupClauseList);
+	const int rowEstimate = 10;
+
+#if (PG_VERSION_NUM >= 120000)
+	aggNode = make_agg(queryTargetList, havingQual, aggrStrategy,
+					   AGGSPLIT_SIMPLE, groupColumnCount, groupColumnIdArray,
+					   groupColumnOpArray,
+					   extract_grouping_collations(groupClauseList,
+												   subPlan->targetlist),
+					   NIL, NIL, rowEstimate, subPlan);
+#else
+	aggNode = make_agg(queryTargetList, havingQual, aggrStrategy,
+					   AGGSPLIT_SIMPLE, groupColumnCount, groupColumnIdArray,
+					   groupColumnOpArray,
+					   NIL, NIL, rowEstimate, subPlan);
+#endif
+
+	return aggNode;
+}
--- a/src/backend/distributed/planner/postgres_planning_functions.c
+++ b/src/backend/distributed/planner/postgres_planning_functions.c
@ -15,15 +15,77 @@

 #include "distributed/multi_master_planner.h"
 #include "nodes/plannodes.h"
+#if PG_VERSION_NUM >= 120000
+#include "nodes/nodeFuncs.h"
+#include "optimizer/optimizer.h"
+#else
 #include "optimizer/tlist.h"
-
+#endif

 /*
 * make_unique_from_sortclauses creates and returns a unique node
 * from provided distinct clause list.
 * The functions is copied from postgresql from
 * src/backend/optimizer/plan/createplan.c.
- *
+ */
+
+#if PG_VERSION_NUM >= 120000
+
+/*
+ * distinctList is a list of SortGroupClauses, identifying the targetlist items
+ * that should be considered by the Unique filter.  The input path must
+ * already be sorted accordingly.
+ */
+Unique *
+make_unique_from_sortclauses(Plan *lefttree, List *distinctList)
+{
+	Unique *node = makeNode(Unique);
+	Plan *plan = &node->plan;
+	int numCols = list_length(distinctList);
+	int keyno = 0;
+	AttrNumber *uniqColIdx;
+	Oid *uniqOperators;
+	Oid *uniqCollations;
+	ListCell *slitem;
+
+	plan->targetlist = lefttree->targetlist;
+	plan->qual = NIL;
+	plan->lefttree = lefttree;
+	plan->righttree = NULL;
+
+	/*
+	 * convert SortGroupClause list into arrays of attr indexes and equality
+	 * operators, as wanted by executor
+	 */
+	Assert(numCols > 0);
+	uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
+	uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+	uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols);
+
+	foreach(slitem, distinctList)
+	{
+		SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem);
+		TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist);
+
+		uniqColIdx[keyno] = tle->resno;
+		uniqOperators[keyno] = sortcl->eqop;
+		uniqCollations[keyno] = exprCollation((Node *) tle->expr);
+		Assert(OidIsValid(uniqOperators[keyno]));
+		keyno++;
+	}
+
+	node->numCols = numCols;
+	node->uniqColIdx = uniqColIdx;
+	node->uniqOperators = uniqOperators;
+	node->uniqCollations = uniqCollations;
+
+	return node;
+}
+
+
+#else
+
+/*
 * distinctList is a list of SortGroupClauses, identifying the targetlist items
 * that should be considered by the Unique filter.  The input path must
 * already be sorted accordingly.
@ -69,3 +131,6 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList)

 	return node;
 }
+
+
+#endif
--- a/src/backend/distributed/utils/shardinterval_utils.c
+++ b/src/backend/distributed/utils/shardinterval_utils.c
@ -254,7 +254,9 @@ FindShardInterval(Datum partitionColumnValue, DistTableCacheEntry *cacheEntry)

 	if (cacheEntry->partitionMethod == DISTRIBUTE_BY_HASH)
 	{
-		searchedValue = FunctionCall1(cacheEntry->hashFunction, partitionColumnValue);
+		searchedValue = FunctionCall1Coll(cacheEntry->hashFunction,
+										  cacheEntry->partitionColumn->varcollid,
+										  partitionColumnValue);
 	}

 	shardIndex = FindShardIntervalIndex(searchedValue, cacheEntry);
--- a/src/include/distributed/worker_protocol.h
+++ b/src/include/distributed/worker_protocol.h
@ -20,6 +20,7 @@
 #include "nodes/parsenodes.h"
 #include "storage/fd.h"
 #include "utils/array.h"
+#include "distributed/version_compat.h"


 /* Number of rows to prefetch when reading data with a cursor */
@ -79,6 +80,7 @@ typedef struct HashPartitionContext
 	FmgrInfo *comparisonFunction;
 	ShardInterval **syntheticShardIntervalArray;
 	uint32 partitionCount;
+	Oid collation;
 	bool hasUniformHashDistribution;
 } HashPartitionContext;