mirror of https://github.com/citusdata/citus.git
Collations matter for hashing strings in pg12
See https://www.postgresql.org/docs/12/collation.html#COLLATION-NONDETERMINISTICpull/2844/head
parent
fe10ca453d
commit
be3285828f
|
@ -84,7 +84,7 @@ worker_hash(PG_FUNCTION_ARGS)
|
|||
fmgr_info_copy(hashFunction, &(typeEntry->hash_proc_finfo), CurrentMemoryContext);
|
||||
|
||||
/* calculate hash value */
|
||||
hashedValueDatum = FunctionCall1(hashFunction, valueDatum);
|
||||
hashedValueDatum = FunctionCall1Coll(hashFunction, PG_GET_COLLATION(), valueDatum);
|
||||
|
||||
PG_RETURN_INT32(hashedValueDatum);
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "distributed/multi_physical_planner.h"
|
||||
#include "distributed/distributed_planner.h"
|
||||
#include "distributed/multi_server_executor.h"
|
||||
#include "distributed/version_compat.h"
|
||||
#include "distributed/worker_protocol.h"
|
||||
#include "nodes/makefuncs.h"
|
||||
#include "nodes/nodeFuncs.h"
|
||||
|
@ -51,6 +52,8 @@ static bool UseGroupAggregateWithHLL(Query *masterQuery);
|
|||
static bool QueryContainsAggregateWithHLL(Query *query);
|
||||
static Plan * BuildDistinctPlan(Query *masterQuery, Plan *subPlan);
|
||||
static List * PrepareTargetListForNextPlan(List *targetList);
|
||||
static Agg * makeAggNode(List *groupClauseList, List *havingQual,
|
||||
AggStrategy aggrStrategy, List *queryTargetList, Plan *subPlan);
|
||||
|
||||
|
||||
/*
|
||||
|
@ -267,7 +270,6 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
|
|||
Agg *aggregatePlan = NULL;
|
||||
AggStrategy aggregateStrategy = AGG_PLAIN;
|
||||
AggClauseCosts aggregateCosts;
|
||||
AttrNumber *groupColumnIdArray = NULL;
|
||||
List *aggregateTargetList = NIL;
|
||||
List *groupColumnList = NIL;
|
||||
List *aggregateColumnList = NIL;
|
||||
|
@ -275,9 +277,7 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
|
|||
List *columnList = NIL;
|
||||
ListCell *columnCell = NULL;
|
||||
Node *havingQual = NULL;
|
||||
Oid *groupColumnOpArray = NULL;
|
||||
uint32 groupColumnCount = 0;
|
||||
const long rowEstimate = 10;
|
||||
|
||||
/* assert that we need to build an aggregate plan */
|
||||
Assert(masterQuery->hasAggs || masterQuery->groupClause);
|
||||
|
@ -353,17 +353,11 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
|
|||
{
|
||||
aggregateStrategy = AGG_HASHED;
|
||||
}
|
||||
|
||||
/* get column indexes that are being grouped */
|
||||
groupColumnIdArray = extract_grouping_cols(groupColumnList, subPlan->targetlist);
|
||||
groupColumnOpArray = extract_grouping_ops(groupColumnList);
|
||||
}
|
||||
|
||||
/* finally create the plan */
|
||||
aggregatePlan = make_agg(aggregateTargetList, (List *) havingQual, aggregateStrategy,
|
||||
AGGSPLIT_SIMPLE, groupColumnCount, groupColumnIdArray,
|
||||
groupColumnOpArray, NIL, NIL,
|
||||
rowEstimate, subPlan);
|
||||
aggregatePlan = makeAggNode(groupColumnList, (List *) havingQual,
|
||||
aggregateStrategy, aggregateTargetList, subPlan);
|
||||
|
||||
/* just for reproducible costs between different PostgreSQL versions */
|
||||
aggregatePlan->plan.startup_cost = 0;
|
||||
|
@ -527,17 +521,8 @@ BuildDistinctPlan(Query *masterQuery, Plan *subPlan)
|
|||
|
||||
if (enable_hashagg && distinctClausesHashable && !hasDistinctAggregate)
|
||||
{
|
||||
const long rowEstimate = 10; /* using the same value as BuildAggregatePlan() */
|
||||
AttrNumber *distinctColumnIdArray = extract_grouping_cols(distinctClauseList,
|
||||
subPlan->targetlist);
|
||||
Oid *distinctColumnOpArray = extract_grouping_ops(distinctClauseList);
|
||||
uint32 distinctClauseCount = list_length(distinctClauseList);
|
||||
|
||||
distinctPlan = (Plan *) make_agg(targetList, NIL, AGG_HASHED,
|
||||
AGGSPLIT_SIMPLE, distinctClauseCount,
|
||||
distinctColumnIdArray,
|
||||
distinctColumnOpArray, NIL, NIL,
|
||||
rowEstimate, subPlan);
|
||||
distinctPlan = (Plan *) makeAggNode(distinctClauseList, NIL, AGG_HASHED,
|
||||
targetList, subPlan);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -581,3 +566,36 @@ PrepareTargetListForNextPlan(List *targetList)
|
|||
|
||||
return newtargetList;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* makeAggNode creates a "Agg" plan node. groupClauseList is a list of
|
||||
* SortGroupClause's.
|
||||
*/
|
||||
static Agg *
|
||||
makeAggNode(List *groupClauseList, List *havingQual, AggStrategy aggrStrategy,
|
||||
List *queryTargetList, Plan *subPlan)
|
||||
{
|
||||
Agg *aggNode = NULL;
|
||||
int groupColumnCount = list_length(groupClauseList);
|
||||
AttrNumber *groupColumnIdArray =
|
||||
extract_grouping_cols(groupClauseList, subPlan->targetlist);
|
||||
Oid *groupColumnOpArray = extract_grouping_ops(groupClauseList);
|
||||
const int rowEstimate = 10;
|
||||
|
||||
#if (PG_VERSION_NUM >= 120000)
|
||||
aggNode = make_agg(queryTargetList, havingQual, aggrStrategy,
|
||||
AGGSPLIT_SIMPLE, groupColumnCount, groupColumnIdArray,
|
||||
groupColumnOpArray,
|
||||
extract_grouping_collations(groupClauseList,
|
||||
subPlan->targetlist),
|
||||
NIL, NIL, rowEstimate, subPlan);
|
||||
#else
|
||||
aggNode = make_agg(queryTargetList, havingQual, aggrStrategy,
|
||||
AGGSPLIT_SIMPLE, groupColumnCount, groupColumnIdArray,
|
||||
groupColumnOpArray,
|
||||
NIL, NIL, rowEstimate, subPlan);
|
||||
#endif
|
||||
|
||||
return aggNode;
|
||||
}
|
||||
|
|
|
@ -15,15 +15,77 @@
|
|||
|
||||
#include "distributed/multi_master_planner.h"
|
||||
#include "nodes/plannodes.h"
|
||||
#if PG_VERSION_NUM >= 120000
|
||||
#include "nodes/nodeFuncs.h"
|
||||
#include "optimizer/optimizer.h"
|
||||
#else
|
||||
#include "optimizer/tlist.h"
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* make_unique_from_sortclauses creates and returns a unique node
|
||||
* from provided distinct clause list.
|
||||
* The functions is copied from postgresql from
|
||||
* src/backend/optimizer/plan/createplan.c.
|
||||
*
|
||||
*/
|
||||
|
||||
#if PG_VERSION_NUM >= 120000
|
||||
|
||||
/*
|
||||
* distinctList is a list of SortGroupClauses, identifying the targetlist items
|
||||
* that should be considered by the Unique filter. The input path must
|
||||
* already be sorted accordingly.
|
||||
*/
|
||||
Unique *
|
||||
make_unique_from_sortclauses(Plan *lefttree, List *distinctList)
|
||||
{
|
||||
Unique *node = makeNode(Unique);
|
||||
Plan *plan = &node->plan;
|
||||
int numCols = list_length(distinctList);
|
||||
int keyno = 0;
|
||||
AttrNumber *uniqColIdx;
|
||||
Oid *uniqOperators;
|
||||
Oid *uniqCollations;
|
||||
ListCell *slitem;
|
||||
|
||||
plan->targetlist = lefttree->targetlist;
|
||||
plan->qual = NIL;
|
||||
plan->lefttree = lefttree;
|
||||
plan->righttree = NULL;
|
||||
|
||||
/*
|
||||
* convert SortGroupClause list into arrays of attr indexes and equality
|
||||
* operators, as wanted by executor
|
||||
*/
|
||||
Assert(numCols > 0);
|
||||
uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
|
||||
uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols);
|
||||
uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols);
|
||||
|
||||
foreach(slitem, distinctList)
|
||||
{
|
||||
SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem);
|
||||
TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist);
|
||||
|
||||
uniqColIdx[keyno] = tle->resno;
|
||||
uniqOperators[keyno] = sortcl->eqop;
|
||||
uniqCollations[keyno] = exprCollation((Node *) tle->expr);
|
||||
Assert(OidIsValid(uniqOperators[keyno]));
|
||||
keyno++;
|
||||
}
|
||||
|
||||
node->numCols = numCols;
|
||||
node->uniqColIdx = uniqColIdx;
|
||||
node->uniqOperators = uniqOperators;
|
||||
node->uniqCollations = uniqCollations;
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
|
||||
/*
|
||||
* distinctList is a list of SortGroupClauses, identifying the targetlist items
|
||||
* that should be considered by the Unique filter. The input path must
|
||||
* already be sorted accordingly.
|
||||
|
@ -69,3 +131,6 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList)
|
|||
|
||||
return node;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -254,7 +254,9 @@ FindShardInterval(Datum partitionColumnValue, DistTableCacheEntry *cacheEntry)
|
|||
|
||||
if (cacheEntry->partitionMethod == DISTRIBUTE_BY_HASH)
|
||||
{
|
||||
searchedValue = FunctionCall1(cacheEntry->hashFunction, partitionColumnValue);
|
||||
searchedValue = FunctionCall1Coll(cacheEntry->hashFunction,
|
||||
cacheEntry->partitionColumn->varcollid,
|
||||
partitionColumnValue);
|
||||
}
|
||||
|
||||
shardIndex = FindShardIntervalIndex(searchedValue, cacheEntry);
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "nodes/parsenodes.h"
|
||||
#include "storage/fd.h"
|
||||
#include "utils/array.h"
|
||||
#include "distributed/version_compat.h"
|
||||
|
||||
|
||||
/* Number of rows to prefetch when reading data with a cursor */
|
||||
|
@ -79,6 +80,7 @@ typedef struct HashPartitionContext
|
|||
FmgrInfo *comparisonFunction;
|
||||
ShardInterval **syntheticShardIntervalArray;
|
||||
uint32 partitionCount;
|
||||
Oid collation;
|
||||
bool hasUniformHashDistribution;
|
||||
} HashPartitionContext;
|
||||
|
||||
|
|
Loading…
Reference in New Issue