mirror of https://github.com/citusdata/citus.git
Collations matter for hashing strings in pg12
See https://www.postgresql.org/docs/12/collation.html#COLLATION-NONDETERMINISTICpull/2844/head
parent
fe10ca453d
commit
be3285828f
|
@ -84,7 +84,7 @@ worker_hash(PG_FUNCTION_ARGS)
|
||||||
fmgr_info_copy(hashFunction, &(typeEntry->hash_proc_finfo), CurrentMemoryContext);
|
fmgr_info_copy(hashFunction, &(typeEntry->hash_proc_finfo), CurrentMemoryContext);
|
||||||
|
|
||||||
/* calculate hash value */
|
/* calculate hash value */
|
||||||
hashedValueDatum = FunctionCall1(hashFunction, valueDatum);
|
hashedValueDatum = FunctionCall1Coll(hashFunction, PG_GET_COLLATION(), valueDatum);
|
||||||
|
|
||||||
PG_RETURN_INT32(hashedValueDatum);
|
PG_RETURN_INT32(hashedValueDatum);
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include "distributed/multi_physical_planner.h"
|
#include "distributed/multi_physical_planner.h"
|
||||||
#include "distributed/distributed_planner.h"
|
#include "distributed/distributed_planner.h"
|
||||||
#include "distributed/multi_server_executor.h"
|
#include "distributed/multi_server_executor.h"
|
||||||
|
#include "distributed/version_compat.h"
|
||||||
#include "distributed/worker_protocol.h"
|
#include "distributed/worker_protocol.h"
|
||||||
#include "nodes/makefuncs.h"
|
#include "nodes/makefuncs.h"
|
||||||
#include "nodes/nodeFuncs.h"
|
#include "nodes/nodeFuncs.h"
|
||||||
|
@ -51,6 +52,8 @@ static bool UseGroupAggregateWithHLL(Query *masterQuery);
|
||||||
static bool QueryContainsAggregateWithHLL(Query *query);
|
static bool QueryContainsAggregateWithHLL(Query *query);
|
||||||
static Plan * BuildDistinctPlan(Query *masterQuery, Plan *subPlan);
|
static Plan * BuildDistinctPlan(Query *masterQuery, Plan *subPlan);
|
||||||
static List * PrepareTargetListForNextPlan(List *targetList);
|
static List * PrepareTargetListForNextPlan(List *targetList);
|
||||||
|
static Agg * makeAggNode(List *groupClauseList, List *havingQual,
|
||||||
|
AggStrategy aggrStrategy, List *queryTargetList, Plan *subPlan);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -267,7 +270,6 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
|
||||||
Agg *aggregatePlan = NULL;
|
Agg *aggregatePlan = NULL;
|
||||||
AggStrategy aggregateStrategy = AGG_PLAIN;
|
AggStrategy aggregateStrategy = AGG_PLAIN;
|
||||||
AggClauseCosts aggregateCosts;
|
AggClauseCosts aggregateCosts;
|
||||||
AttrNumber *groupColumnIdArray = NULL;
|
|
||||||
List *aggregateTargetList = NIL;
|
List *aggregateTargetList = NIL;
|
||||||
List *groupColumnList = NIL;
|
List *groupColumnList = NIL;
|
||||||
List *aggregateColumnList = NIL;
|
List *aggregateColumnList = NIL;
|
||||||
|
@ -275,9 +277,7 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
|
||||||
List *columnList = NIL;
|
List *columnList = NIL;
|
||||||
ListCell *columnCell = NULL;
|
ListCell *columnCell = NULL;
|
||||||
Node *havingQual = NULL;
|
Node *havingQual = NULL;
|
||||||
Oid *groupColumnOpArray = NULL;
|
|
||||||
uint32 groupColumnCount = 0;
|
uint32 groupColumnCount = 0;
|
||||||
const long rowEstimate = 10;
|
|
||||||
|
|
||||||
/* assert that we need to build an aggregate plan */
|
/* assert that we need to build an aggregate plan */
|
||||||
Assert(masterQuery->hasAggs || masterQuery->groupClause);
|
Assert(masterQuery->hasAggs || masterQuery->groupClause);
|
||||||
|
@ -353,17 +353,11 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
|
||||||
{
|
{
|
||||||
aggregateStrategy = AGG_HASHED;
|
aggregateStrategy = AGG_HASHED;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* get column indexes that are being grouped */
|
|
||||||
groupColumnIdArray = extract_grouping_cols(groupColumnList, subPlan->targetlist);
|
|
||||||
groupColumnOpArray = extract_grouping_ops(groupColumnList);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* finally create the plan */
|
/* finally create the plan */
|
||||||
aggregatePlan = make_agg(aggregateTargetList, (List *) havingQual, aggregateStrategy,
|
aggregatePlan = makeAggNode(groupColumnList, (List *) havingQual,
|
||||||
AGGSPLIT_SIMPLE, groupColumnCount, groupColumnIdArray,
|
aggregateStrategy, aggregateTargetList, subPlan);
|
||||||
groupColumnOpArray, NIL, NIL,
|
|
||||||
rowEstimate, subPlan);
|
|
||||||
|
|
||||||
/* just for reproducible costs between different PostgreSQL versions */
|
/* just for reproducible costs between different PostgreSQL versions */
|
||||||
aggregatePlan->plan.startup_cost = 0;
|
aggregatePlan->plan.startup_cost = 0;
|
||||||
|
@ -527,17 +521,8 @@ BuildDistinctPlan(Query *masterQuery, Plan *subPlan)
|
||||||
|
|
||||||
if (enable_hashagg && distinctClausesHashable && !hasDistinctAggregate)
|
if (enable_hashagg && distinctClausesHashable && !hasDistinctAggregate)
|
||||||
{
|
{
|
||||||
const long rowEstimate = 10; /* using the same value as BuildAggregatePlan() */
|
distinctPlan = (Plan *) makeAggNode(distinctClauseList, NIL, AGG_HASHED,
|
||||||
AttrNumber *distinctColumnIdArray = extract_grouping_cols(distinctClauseList,
|
targetList, subPlan);
|
||||||
subPlan->targetlist);
|
|
||||||
Oid *distinctColumnOpArray = extract_grouping_ops(distinctClauseList);
|
|
||||||
uint32 distinctClauseCount = list_length(distinctClauseList);
|
|
||||||
|
|
||||||
distinctPlan = (Plan *) make_agg(targetList, NIL, AGG_HASHED,
|
|
||||||
AGGSPLIT_SIMPLE, distinctClauseCount,
|
|
||||||
distinctColumnIdArray,
|
|
||||||
distinctColumnOpArray, NIL, NIL,
|
|
||||||
rowEstimate, subPlan);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -581,3 +566,36 @@ PrepareTargetListForNextPlan(List *targetList)
|
||||||
|
|
||||||
return newtargetList;
|
return newtargetList;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* makeAggNode creates a "Agg" plan node. groupClauseList is a list of
|
||||||
|
* SortGroupClause's.
|
||||||
|
*/
|
||||||
|
static Agg *
|
||||||
|
makeAggNode(List *groupClauseList, List *havingQual, AggStrategy aggrStrategy,
|
||||||
|
List *queryTargetList, Plan *subPlan)
|
||||||
|
{
|
||||||
|
Agg *aggNode = NULL;
|
||||||
|
int groupColumnCount = list_length(groupClauseList);
|
||||||
|
AttrNumber *groupColumnIdArray =
|
||||||
|
extract_grouping_cols(groupClauseList, subPlan->targetlist);
|
||||||
|
Oid *groupColumnOpArray = extract_grouping_ops(groupClauseList);
|
||||||
|
const int rowEstimate = 10;
|
||||||
|
|
||||||
|
#if (PG_VERSION_NUM >= 120000)
|
||||||
|
aggNode = make_agg(queryTargetList, havingQual, aggrStrategy,
|
||||||
|
AGGSPLIT_SIMPLE, groupColumnCount, groupColumnIdArray,
|
||||||
|
groupColumnOpArray,
|
||||||
|
extract_grouping_collations(groupClauseList,
|
||||||
|
subPlan->targetlist),
|
||||||
|
NIL, NIL, rowEstimate, subPlan);
|
||||||
|
#else
|
||||||
|
aggNode = make_agg(queryTargetList, havingQual, aggrStrategy,
|
||||||
|
AGGSPLIT_SIMPLE, groupColumnCount, groupColumnIdArray,
|
||||||
|
groupColumnOpArray,
|
||||||
|
NIL, NIL, rowEstimate, subPlan);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return aggNode;
|
||||||
|
}
|
||||||
|
|
|
@ -15,15 +15,77 @@
|
||||||
|
|
||||||
#include "distributed/multi_master_planner.h"
|
#include "distributed/multi_master_planner.h"
|
||||||
#include "nodes/plannodes.h"
|
#include "nodes/plannodes.h"
|
||||||
|
#if PG_VERSION_NUM >= 120000
|
||||||
|
#include "nodes/nodeFuncs.h"
|
||||||
|
#include "optimizer/optimizer.h"
|
||||||
|
#else
|
||||||
#include "optimizer/tlist.h"
|
#include "optimizer/tlist.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* make_unique_from_sortclauses creates and returns a unique node
|
* make_unique_from_sortclauses creates and returns a unique node
|
||||||
* from provided distinct clause list.
|
* from provided distinct clause list.
|
||||||
* The functions is copied from postgresql from
|
* The functions is copied from postgresql from
|
||||||
* src/backend/optimizer/plan/createplan.c.
|
* src/backend/optimizer/plan/createplan.c.
|
||||||
*
|
*/
|
||||||
|
|
||||||
|
#if PG_VERSION_NUM >= 120000
|
||||||
|
|
||||||
|
/*
|
||||||
|
* distinctList is a list of SortGroupClauses, identifying the targetlist items
|
||||||
|
* that should be considered by the Unique filter. The input path must
|
||||||
|
* already be sorted accordingly.
|
||||||
|
*/
|
||||||
|
Unique *
|
||||||
|
make_unique_from_sortclauses(Plan *lefttree, List *distinctList)
|
||||||
|
{
|
||||||
|
Unique *node = makeNode(Unique);
|
||||||
|
Plan *plan = &node->plan;
|
||||||
|
int numCols = list_length(distinctList);
|
||||||
|
int keyno = 0;
|
||||||
|
AttrNumber *uniqColIdx;
|
||||||
|
Oid *uniqOperators;
|
||||||
|
Oid *uniqCollations;
|
||||||
|
ListCell *slitem;
|
||||||
|
|
||||||
|
plan->targetlist = lefttree->targetlist;
|
||||||
|
plan->qual = NIL;
|
||||||
|
plan->lefttree = lefttree;
|
||||||
|
plan->righttree = NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* convert SortGroupClause list into arrays of attr indexes and equality
|
||||||
|
* operators, as wanted by executor
|
||||||
|
*/
|
||||||
|
Assert(numCols > 0);
|
||||||
|
uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
|
||||||
|
uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols);
|
||||||
|
uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols);
|
||||||
|
|
||||||
|
foreach(slitem, distinctList)
|
||||||
|
{
|
||||||
|
SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem);
|
||||||
|
TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist);
|
||||||
|
|
||||||
|
uniqColIdx[keyno] = tle->resno;
|
||||||
|
uniqOperators[keyno] = sortcl->eqop;
|
||||||
|
uniqCollations[keyno] = exprCollation((Node *) tle->expr);
|
||||||
|
Assert(OidIsValid(uniqOperators[keyno]));
|
||||||
|
keyno++;
|
||||||
|
}
|
||||||
|
|
||||||
|
node->numCols = numCols;
|
||||||
|
node->uniqColIdx = uniqColIdx;
|
||||||
|
node->uniqOperators = uniqOperators;
|
||||||
|
node->uniqCollations = uniqCollations;
|
||||||
|
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
/*
|
||||||
* distinctList is a list of SortGroupClauses, identifying the targetlist items
|
* distinctList is a list of SortGroupClauses, identifying the targetlist items
|
||||||
* that should be considered by the Unique filter. The input path must
|
* that should be considered by the Unique filter. The input path must
|
||||||
* already be sorted accordingly.
|
* already be sorted accordingly.
|
||||||
|
@ -69,3 +131,6 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList)
|
||||||
|
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
|
@ -254,7 +254,9 @@ FindShardInterval(Datum partitionColumnValue, DistTableCacheEntry *cacheEntry)
|
||||||
|
|
||||||
if (cacheEntry->partitionMethod == DISTRIBUTE_BY_HASH)
|
if (cacheEntry->partitionMethod == DISTRIBUTE_BY_HASH)
|
||||||
{
|
{
|
||||||
searchedValue = FunctionCall1(cacheEntry->hashFunction, partitionColumnValue);
|
searchedValue = FunctionCall1Coll(cacheEntry->hashFunction,
|
||||||
|
cacheEntry->partitionColumn->varcollid,
|
||||||
|
partitionColumnValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
shardIndex = FindShardIntervalIndex(searchedValue, cacheEntry);
|
shardIndex = FindShardIntervalIndex(searchedValue, cacheEntry);
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
#include "nodes/parsenodes.h"
|
#include "nodes/parsenodes.h"
|
||||||
#include "storage/fd.h"
|
#include "storage/fd.h"
|
||||||
#include "utils/array.h"
|
#include "utils/array.h"
|
||||||
|
#include "distributed/version_compat.h"
|
||||||
|
|
||||||
|
|
||||||
/* Number of rows to prefetch when reading data with a cursor */
|
/* Number of rows to prefetch when reading data with a cursor */
|
||||||
|
@ -79,6 +80,7 @@ typedef struct HashPartitionContext
|
||||||
FmgrInfo *comparisonFunction;
|
FmgrInfo *comparisonFunction;
|
||||||
ShardInterval **syntheticShardIntervalArray;
|
ShardInterval **syntheticShardIntervalArray;
|
||||||
uint32 partitionCount;
|
uint32 partitionCount;
|
||||||
|
Oid collation;
|
||||||
bool hasUniformHashDistribution;
|
bool hasUniformHashDistribution;
|
||||||
} HashPartitionContext;
|
} HashPartitionContext;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue