Collations matter for hashing strings in pg12

See https://www.postgresql.org/docs/12/collation.html#COLLATION-NONDETERMINISTIC
pull/2844/head
Philip Dubé 2019-08-14 21:29:48 +00:00
parent fe10ca453d
commit be3285828f
5 changed files with 113 additions and 26 deletions

View File

@ -84,7 +84,7 @@ worker_hash(PG_FUNCTION_ARGS)
fmgr_info_copy(hashFunction, &(typeEntry->hash_proc_finfo), CurrentMemoryContext);
/* calculate hash value */
hashedValueDatum = FunctionCall1(hashFunction, valueDatum);
hashedValueDatum = FunctionCall1Coll(hashFunction, PG_GET_COLLATION(), valueDatum);
PG_RETURN_INT32(hashedValueDatum);
}

View File

@ -21,6 +21,7 @@
#include "distributed/multi_physical_planner.h"
#include "distributed/distributed_planner.h"
#include "distributed/multi_server_executor.h"
#include "distributed/version_compat.h"
#include "distributed/worker_protocol.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
@ -51,6 +52,8 @@ static bool UseGroupAggregateWithHLL(Query *masterQuery);
static bool QueryContainsAggregateWithHLL(Query *query);
static Plan * BuildDistinctPlan(Query *masterQuery, Plan *subPlan);
static List * PrepareTargetListForNextPlan(List *targetList);
static Agg * makeAggNode(List *groupClauseList, List *havingQual,
AggStrategy aggrStrategy, List *queryTargetList, Plan *subPlan);
/*
@ -267,7 +270,6 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
Agg *aggregatePlan = NULL;
AggStrategy aggregateStrategy = AGG_PLAIN;
AggClauseCosts aggregateCosts;
AttrNumber *groupColumnIdArray = NULL;
List *aggregateTargetList = NIL;
List *groupColumnList = NIL;
List *aggregateColumnList = NIL;
@ -275,9 +277,7 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
List *columnList = NIL;
ListCell *columnCell = NULL;
Node *havingQual = NULL;
Oid *groupColumnOpArray = NULL;
uint32 groupColumnCount = 0;
const long rowEstimate = 10;
/* assert that we need to build an aggregate plan */
Assert(masterQuery->hasAggs || masterQuery->groupClause);
@ -353,17 +353,11 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
{
aggregateStrategy = AGG_HASHED;
}
/* get column indexes that are being grouped */
groupColumnIdArray = extract_grouping_cols(groupColumnList, subPlan->targetlist);
groupColumnOpArray = extract_grouping_ops(groupColumnList);
}
/* finally create the plan */
aggregatePlan = make_agg(aggregateTargetList, (List *) havingQual, aggregateStrategy,
AGGSPLIT_SIMPLE, groupColumnCount, groupColumnIdArray,
groupColumnOpArray, NIL, NIL,
rowEstimate, subPlan);
aggregatePlan = makeAggNode(groupColumnList, (List *) havingQual,
aggregateStrategy, aggregateTargetList, subPlan);
/* just for reproducible costs between different PostgreSQL versions */
aggregatePlan->plan.startup_cost = 0;
@ -527,17 +521,8 @@ BuildDistinctPlan(Query *masterQuery, Plan *subPlan)
if (enable_hashagg && distinctClausesHashable && !hasDistinctAggregate)
{
const long rowEstimate = 10; /* using the same value as BuildAggregatePlan() */
AttrNumber *distinctColumnIdArray = extract_grouping_cols(distinctClauseList,
subPlan->targetlist);
Oid *distinctColumnOpArray = extract_grouping_ops(distinctClauseList);
uint32 distinctClauseCount = list_length(distinctClauseList);
distinctPlan = (Plan *) make_agg(targetList, NIL, AGG_HASHED,
AGGSPLIT_SIMPLE, distinctClauseCount,
distinctColumnIdArray,
distinctColumnOpArray, NIL, NIL,
rowEstimate, subPlan);
distinctPlan = (Plan *) makeAggNode(distinctClauseList, NIL, AGG_HASHED,
targetList, subPlan);
}
else
{
@ -581,3 +566,36 @@ PrepareTargetListForNextPlan(List *targetList)
return newtargetList;
}
/*
* makeAggNode creates a "Agg" plan node. groupClauseList is a list of
* SortGroupClause's.
*/
static Agg *
makeAggNode(List *groupClauseList, List *havingQual, AggStrategy aggrStrategy,
List *queryTargetList, Plan *subPlan)
{
Agg *aggNode = NULL;
int groupColumnCount = list_length(groupClauseList);
AttrNumber *groupColumnIdArray =
extract_grouping_cols(groupClauseList, subPlan->targetlist);
Oid *groupColumnOpArray = extract_grouping_ops(groupClauseList);
const int rowEstimate = 10;
#if (PG_VERSION_NUM >= 120000)
aggNode = make_agg(queryTargetList, havingQual, aggrStrategy,
AGGSPLIT_SIMPLE, groupColumnCount, groupColumnIdArray,
groupColumnOpArray,
extract_grouping_collations(groupClauseList,
subPlan->targetlist),
NIL, NIL, rowEstimate, subPlan);
#else
aggNode = make_agg(queryTargetList, havingQual, aggrStrategy,
AGGSPLIT_SIMPLE, groupColumnCount, groupColumnIdArray,
groupColumnOpArray,
NIL, NIL, rowEstimate, subPlan);
#endif
return aggNode;
}

View File

@ -15,15 +15,77 @@
#include "distributed/multi_master_planner.h"
#include "nodes/plannodes.h"
#if PG_VERSION_NUM >= 120000
#include "nodes/nodeFuncs.h"
#include "optimizer/optimizer.h"
#else
#include "optimizer/tlist.h"
#endif
/*
* make_unique_from_sortclauses creates and returns a unique node
* from provided distinct clause list.
* The functions is copied from postgresql from
* src/backend/optimizer/plan/createplan.c.
*
*/
#if PG_VERSION_NUM >= 120000
/*
* distinctList is a list of SortGroupClauses, identifying the targetlist items
* that should be considered by the Unique filter. The input path must
* already be sorted accordingly.
*/
Unique *
make_unique_from_sortclauses(Plan *lefttree, List *distinctList)
{
Unique *node = makeNode(Unique);
Plan *plan = &node->plan;
int numCols = list_length(distinctList);
int keyno = 0;
AttrNumber *uniqColIdx;
Oid *uniqOperators;
Oid *uniqCollations;
ListCell *slitem;
plan->targetlist = lefttree->targetlist;
plan->qual = NIL;
plan->lefttree = lefttree;
plan->righttree = NULL;
/*
* convert SortGroupClause list into arrays of attr indexes and equality
* operators, as wanted by executor
*/
Assert(numCols > 0);
uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols);
uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols);
foreach(slitem, distinctList)
{
SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem);
TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist);
uniqColIdx[keyno] = tle->resno;
uniqOperators[keyno] = sortcl->eqop;
uniqCollations[keyno] = exprCollation((Node *) tle->expr);
Assert(OidIsValid(uniqOperators[keyno]));
keyno++;
}
node->numCols = numCols;
node->uniqColIdx = uniqColIdx;
node->uniqOperators = uniqOperators;
node->uniqCollations = uniqCollations;
return node;
}
#else
/*
* distinctList is a list of SortGroupClauses, identifying the targetlist items
* that should be considered by the Unique filter. The input path must
* already be sorted accordingly.
@ -69,3 +131,6 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList)
return node;
}
#endif

View File

@ -254,7 +254,9 @@ FindShardInterval(Datum partitionColumnValue, DistTableCacheEntry *cacheEntry)
if (cacheEntry->partitionMethod == DISTRIBUTE_BY_HASH)
{
searchedValue = FunctionCall1(cacheEntry->hashFunction, partitionColumnValue);
searchedValue = FunctionCall1Coll(cacheEntry->hashFunction,
cacheEntry->partitionColumn->varcollid,
partitionColumnValue);
}
shardIndex = FindShardIntervalIndex(searchedValue, cacheEntry);

View File

@ -20,6 +20,7 @@
#include "nodes/parsenodes.h"
#include "storage/fd.h"
#include "utils/array.h"
#include "distributed/version_compat.h"
/* Number of rows to prefetch when reading data with a cursor */
@ -79,6 +80,7 @@ typedef struct HashPartitionContext
FmgrInfo *comparisonFunction;
ShardInterval **syntheticShardIntervalArray;
uint32 partitionCount;
Oid collation;
bool hasUniformHashDistribution;
} HashPartitionContext;