Use CreateSimpleHash (and variants) whenever possible (#6177)

This is a refactoring PR that starts using our new hash table creation
helper function. It adds a few more macros for ease of use, because C
doesn't have default arguments. It also adds a macro to check if a
struct contains automatic padding bytes. No struct that is hashed using
tag_hash should have automatic padding bytes, because those bytes are
undefined and thus using them to create a hash will result in undefined
behaviour (usually a random hash).
pull/6180/head
Jelte Fennema 2022-08-17 12:01:59 +02:00 committed by GitHub
parent 52efe08642
commit 3f6ce889eb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 140 additions and 251 deletions

View File

@ -43,6 +43,7 @@
#include "distributed/coordinator_protocol.h"
#include "distributed/deparser.h"
#include "distributed/distribution_column.h"
#include "distributed/hash_helpers.h"
#include "distributed/listutils.h"
#include "distributed/local_executor.h"
#include "distributed/metadata/dependency.h"
@ -1276,14 +1277,7 @@ CreateCitusTableLike(TableConversionState *con)
static void
ErrorIfUnsupportedCascadeObjects(Oid relationId)
{
HASHCTL info;
memset(&info, 0, sizeof(info));
info.keysize = sizeof(Oid);
info.entrysize = sizeof(Oid);
info.hash = oid_hash;
info.hcxt = CurrentMemoryContext;
uint32 hashFlags = (HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
HTAB *nodeMap = hash_create("object dependency map (oid)", 64, &info, hashFlags);
HTAB *nodeMap = CreateSimpleHashSetWithName(Oid, "object dependency map (oid)");
bool unsupportedObjectInDepGraph =
DoesCascadeDropUnsupportedObject(RelationRelationId, relationId, nodeMap);

View File

@ -26,6 +26,12 @@ typedef struct TaskHashKey
{
uint64 jobId;
uint32 taskId;
/*
* The padding field is needed to make sure the struct contains no
* automatic padding, which is not allowed for hashmap keys.
*/
uint32 padding;
}TaskHashKey;
typedef struct TaskHashEntry
@ -34,14 +40,10 @@ typedef struct TaskHashEntry
Task *task;
}TaskHashEntry;
static HASHCTL InitHashTableInfo(void);
static HTAB * CreateTaskHashTable(void);
static bool IsAllDependencyCompleted(Task *task, HTAB *completedTasks);
static void AddCompletedTasks(List *curCompletedTasks, HTAB *completedTasks);
static List * FindExecutableTasks(List *allTasks, HTAB *completedTasks);
static List * RemoveMergeTasks(List *taskList);
static int TaskHashCompare(const void *key1, const void *key2, Size keysize);
static uint32 TaskHash(const void *key, Size keysize);
static bool IsTaskAlreadyCompleted(Task *task, HTAB *completedTasks);
/*
@ -53,7 +55,8 @@ static bool IsTaskAlreadyCompleted(Task *task, HTAB *completedTasks);
void
ExecuteTasksInDependencyOrder(List *allTasks, List *excludedTasks, List *jobIds)
{
HTAB *completedTasks = CreateTaskHashTable();
assert_valid_hash_key3(TaskHashKey, jobId, taskId, padding);
HTAB *completedTasks = CreateSimpleHash(TaskHashKey, TaskHashEntry);
/* We only execute depended jobs' tasks, therefore to not execute */
/* top level tasks, we add them to the completedTasks. */
@ -143,19 +146,6 @@ AddCompletedTasks(List *curCompletedTasks, HTAB *completedTasks)
}
/*
* CreateTaskHashTable creates a HTAB with the necessary initialization.
*/
static HTAB *
CreateTaskHashTable()
{
uint32 hashFlags = (HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT | HASH_COMPARE);
HASHCTL info = InitHashTableInfo();
return hash_create("citus task completed list (jobId, taskId)",
64, &info, hashFlags);
}
/*
* IsTaskAlreadyCompleted returns true if the given task
* is found in the completedTasks HTAB.
@ -193,54 +183,3 @@ IsAllDependencyCompleted(Task *targetTask, HTAB *completedTasks)
}
return true;
}
/*
* InitHashTableInfo returns hash table info, the hash table is
* configured to be created in the CurrentMemoryContext so that
* it will be cleaned when this memory context gets freed/reset.
*/
static HASHCTL
InitHashTableInfo()
{
HASHCTL info;
memset(&info, 0, sizeof(info));
info.keysize = sizeof(TaskHashKey);
info.entrysize = sizeof(TaskHashEntry);
info.hash = TaskHash;
info.match = TaskHashCompare;
info.hcxt = CurrentMemoryContext;
return info;
}
static uint32
TaskHash(const void *key, Size keysize)
{
TaskHashKey *taskKey = (TaskHashKey *) key;
uint32 hash = 0;
hash = hash_combine(hash, hash_any((unsigned char *) &taskKey->jobId,
sizeof(int64)));
hash = hash_combine(hash, hash_uint32(taskKey->taskId));
return hash;
}
static int
TaskHashCompare(const void *key1, const void *key2, Size keysize)
{
TaskHashKey *taskKey1 = (TaskHashKey *) key1;
TaskHashKey *taskKey2 = (TaskHashKey *) key2;
if (taskKey1->jobId != taskKey2->jobId || taskKey1->taskId != taskKey2->taskId)
{
return 1;
}
else
{
return 0;
}
}

View File

@ -21,6 +21,7 @@
#include "catalog/pg_authid.h"
#include "distributed/citus_safe_lib.h"
#include "distributed/function_utils.h"
#include "distributed/hash_helpers.h"
#include "distributed/multi_executor.h"
#include "distributed/multi_server_executor.h"
#include "distributed/version_compat.h"
@ -131,8 +132,6 @@ static void CitusQueryStatsEntryDealloc(void);
static void CitusQueryStatsEntryReset(void);
static uint32 CitusQuerysStatsHashFn(const void *key, Size keysize);
static int CitusQuerysStatsMatchFn(const void *key1, const void *key2, Size keysize);
static uint32 ExistingStatsHashFn(const void *key, Size keysize);
static int ExistingStatsMatchFn(const void *key1, const void *key2, Size keysize);
static HTAB * BuildExistingQueryIdHash(void);
static int GetPGStatStatementsMax(void);
@ -649,42 +648,6 @@ CitusQuerysStatsMatchFn(const void *key1, const void *key2, Size keysize)
}
/*
* ExistingStatsHashFn calculates and returns hash value for ExistingStatsHashKey
*/
static uint32
ExistingStatsHashFn(const void *key, Size keysize)
{
const ExistingStatsHashKey *k = (const ExistingStatsHashKey *) key;
return hash_uint32((uint32) k->userid) ^
hash_uint32((uint32) k->dbid) ^
hash_any((const unsigned char *) &(k->queryid), sizeof(uint64));
}
/*
* ExistingStatsMatchFn compares two keys of type ExistingStatsHashKey - zero
* means match. See definition of HashCompareFunc in hsearch.h for more info.
*/
static int
ExistingStatsMatchFn(const void *key1, const void *key2, Size keysize)
{
const ExistingStatsHashKey *k1 = (const ExistingStatsHashKey *) key1;
const ExistingStatsHashKey *k2 = (const ExistingStatsHashKey *) key2;
if (k1->userid == k2->userid &&
k1->dbid == k2->dbid &&
k1->queryid == k2->queryid)
{
return 0;
}
return 1;
}
/*
* Reset statistics.
*/
@ -840,7 +803,6 @@ BuildExistingQueryIdHash(void)
const int queryIdAttributeNumber = 3;
#endif
Datum commandTypeDatum = (Datum) 0;
HASHCTL info;
bool missingOK = true;
Oid pgStatStatementsOid = FunctionOidExtended("public", "pg_stat_statements", 1,
@ -872,20 +834,15 @@ BuildExistingQueryIdHash(void)
statStatementsReturnSet->setDesc,
&TTSOpsMinimalTuple);
info.keysize = sizeof(ExistingStatsHashKey);
info.entrysize = sizeof(ExistingStatsHashKey);
info.hcxt = CurrentMemoryContext;
info.hash = ExistingStatsHashFn;
info.match = ExistingStatsMatchFn;
int hashFlags = (HASH_ELEM | HASH_CONTEXT | HASH_FUNCTION | HASH_COMPARE);
/*
* Allocate more hash slots (twice as much) than necessary to minimize
* collisions.
*/
HTAB *queryIdHashTable = hash_create("pg_stats_statements queryId hash",
pgStatStatementsMax * 2, &info, hashFlags);
assert_valid_hash_key3(ExistingStatsHashKey, userid, dbid, queryid);
HTAB *queryIdHashTable = CreateSimpleHashSetWithNameAndSize(
ExistingStatsHashKey,
"pg_stats_statements queryId hash",
pgStatStatementsMax * 2);
/* iterate over tuples in tuple store, and add queryIds to hash table */
while (true)

View File

@ -39,6 +39,7 @@
#include "distributed/citus_depended_object.h"
#include "distributed/commands.h"
#include "distributed/commands/utility_hook.h"
#include "distributed/hash_helpers.h"
#include "distributed/listutils.h"
#include "distributed/metadata/dependency.h"
#include "distributed/metadata/distobject.h"
@ -531,18 +532,13 @@ DependencyDefinitionFromPgShDepend(ObjectAddress target)
static void
InitObjectAddressCollector(ObjectAddressCollector *collector)
{
HASHCTL info;
memset(&info, 0, sizeof(info));
info.keysize = sizeof(ObjectAddress);
info.entrysize = sizeof(ObjectAddress);
info.hcxt = CurrentMemoryContext;
int hashFlags = (HASH_ELEM | HASH_CONTEXT | HASH_BLOBS);
collector->dependencySet = hash_create("dependency set", 128, &info, hashFlags);
assert_valid_hash_key3(ObjectAddress, classId, objectId, objectSubId);
collector->dependencySet = CreateSimpleHashSetWithName(ObjectAddress,
"dependency set");
collector->dependencyList = NULL;
collector->visitedObjects = hash_create("visited object set", 128, &info, hashFlags);
collector->visitedObjects = CreateSimpleHashSetWithName(ObjectAddress,
"visited object set");
}
@ -2094,14 +2090,8 @@ GetPgDependTuplesForDependingObjects(Oid targetObjectClassId, Oid targetObjectId
List *
GetDependingViews(Oid relationId)
{
HASHCTL info;
memset(&info, 0, sizeof(info));
info.keysize = sizeof(Oid);
info.entrysize = sizeof(ViewDependencyNode);
info.hash = oid_hash;
info.hcxt = CurrentMemoryContext;
uint32 hashFlags = (HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
HTAB *nodeMap = hash_create("view dependency map (oid)", 32, &info, hashFlags);
HTAB *nodeMap = CreateSimpleHashWithName(Oid, ViewDependencyNode,
"view dependency map (oid)");
ViewDependencyNode *tableNode = BuildViewDependencyGraph(relationId, nodeMap);

View File

@ -33,6 +33,7 @@
#include "distributed/deparse_shard_query.h"
#include "distributed/distributed_planner.h"
#include "distributed/foreign_key_relationship.h"
#include "distributed/hash_helpers.h"
#include "distributed/listutils.h"
#include "distributed/lock_graph.h"
#include "distributed/multi_client_executor.h"
@ -768,7 +769,8 @@ ReceiveAndUpdateShardsSizes(List *connectionList)
* all the placements. We use a hash table to remember already visited shard ids
* since we update all the different placements of a shard id at once.
*/
HTAB *alreadyVisitedShardPlacements = CreateOidVisitedHashSet();
HTAB *alreadyVisitedShardPlacements = CreateSimpleHashSetWithName(Oid,
"oid visited hash set");
MultiConnection *connection = NULL;
foreach_ptr(connection, connectionList)

View File

@ -20,6 +20,7 @@
#include "access/table.h"
#include "catalog/pg_constraint.h"
#include "distributed/commands.h"
#include "distributed/hash_helpers.h"
#include "distributed/foreign_key_relationship.h"
#include "distributed/hash_helpers.h"
#include "distributed/listutils.h"
@ -176,7 +177,7 @@ static List *
GetRelationshipNodesForFKeyConnectedRelations(
ForeignConstraintRelationshipNode *relationshipNode)
{
HTAB *oidVisitedMap = CreateOidVisitedHashSet();
HTAB *oidVisitedMap = CreateSimpleHashSetWithName(Oid, "oid visited hash set");
VisitOid(oidVisitedMap, relationshipNode->relationId);
List *relationshipNodeList = list_make1(relationshipNode);
@ -314,8 +315,6 @@ GetRelationshipNodeForRelationId(Oid relationId, bool *isFound)
static void
CreateForeignConstraintRelationshipGraph()
{
HASHCTL info;
/* if we have already created the graph, use it */
if (IsForeignConstraintRelationshipGraphValid())
{
@ -338,17 +337,8 @@ CreateForeignConstraintRelationshipGraph()
sizeof(ForeignConstraintRelationshipGraph));
fConstraintRelationshipGraph->isValid = false;
/* create (oid) -> [ForeignConstraintRelationshipNode] hash */
memset(&info, 0, sizeof(info));
info.keysize = sizeof(Oid);
info.entrysize = sizeof(ForeignConstraintRelationshipNode);
info.hash = oid_hash;
info.hcxt = CurrentMemoryContext;
uint32 hashFlags = (HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
fConstraintRelationshipGraph->nodeMap = hash_create(
"foreign key relationship map (oid)",
32, &info, hashFlags);
fConstraintRelationshipGraph->nodeMap = CreateSimpleHash(Oid,
ForeignConstraintRelationshipNode);
PopulateAdjacencyLists();
@ -400,7 +390,7 @@ SetForeignConstraintRelationshipGraphInvalid()
static List *
GetConnectedListHelper(ForeignConstraintRelationshipNode *node, bool isReferencing)
{
HTAB *oidVisitedMap = CreateOidVisitedHashSet();
HTAB *oidVisitedMap = CreateSimpleHashSetWithName(Oid, "oid visited hash set");
List *connectedNodeList = NIL;
@ -444,31 +434,6 @@ GetConnectedListHelper(ForeignConstraintRelationshipNode *node, bool isReferenci
}
/*
* CreateOidVisitedHashSet creates and returns an hash-set object in
* CurrentMemoryContext to store visited oid's.
* As hash_create allocates memory in heap, callers are responsible to call
* hash_destroy when appropriate.
*/
HTAB *
CreateOidVisitedHashSet(void)
{
HASHCTL info = { 0 };
info.keysize = sizeof(Oid);
info.hash = oid_hash;
info.hcxt = CurrentMemoryContext;
/* we don't have value field as it's a set */
info.entrysize = info.keysize;
uint32 hashFlags = (HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
HTAB *oidVisitedMap = hash_create("oid visited hash map", 32, &info, hashFlags);
return oidVisitedMap;
}
/*
* OidVisited returns true if given oid is visited according to given oid hash-set.
*/

View File

@ -37,12 +37,12 @@ hash_delete_all(HTAB *htab)
/*
* CreateSimpleHashWithName creates a hashmap that hashes its key using
* CreateSimpleHashWithNameAndSize creates a hashmap that hashes its key using
* tag_hash function and stores the entries in the current memory context.
*/
HTAB
*
CreateSimpleHashWithName(Size keySize, Size entrySize, char *name)
HTAB *
CreateSimpleHashWithNameAndSizeInternal(Size keySize, Size entrySize,
char *name, long nelem)
{
HASHCTL info;
memset_struct_0(info);
@ -50,32 +50,9 @@ CreateSimpleHashWithName(Size keySize, Size entrySize, char *name)
info.entrysize = entrySize;
info.hcxt = CurrentMemoryContext;
/*
* uint32_hash does the same as tag_hash for keys of 4 bytes, but it's
* faster.
*/
if (keySize == sizeof(uint32))
{
info.hash = uint32_hash;
}
else
{
info.hash = tag_hash;
}
int hashFlags = (HASH_ELEM | HASH_CONTEXT | HASH_BLOBS);
/*
* We use 32 as the initial number of elements that fit into this hash
* table. This value seems a reasonable tradeof between two issues:
* 1. An empty hashmap shouldn't take up a lot of space
* 2. Doing a few inserts shouldn't require growing the hashmap
*
* NOTE: No performance testing has been performed when choosing this
* value. If this ever turns out to be a problem, feel free to do some
* performance tests.
*/
HTAB *publicationInfoHash = hash_create(name, 32, &info, hashFlags);
HTAB *publicationInfoHash = hash_create(name, nelem, &info, hashFlags);
return publicationInfoHash;
}

View File

@ -16,6 +16,7 @@
#include "distributed/connection_management.h"
#include "distributed/deparse_shard_query.h"
#include "distributed/distributed_execution_locks.h"
#include "distributed/hash_helpers.h"
#include "distributed/listutils.h"
#include "distributed/local_executor.h"
#include "distributed/metadata_cache.h"
@ -38,8 +39,8 @@
typedef struct TaskMapKey
{
TaskType taskType;
uint64 jobId;
uint32 taskId;
uint64 jobId;
} TaskMapKey;
@ -53,7 +54,6 @@ typedef struct TaskMapEntry
Task *task;
} TaskMapEntry;
static HTAB * TaskHashCreate(uint32 taskHashSize);
static Task * TaskHashEnter(HTAB *taskHash, Task *task);
static Task * TaskHashLookup(HTAB *trackerHash, TaskType taskType, uint64 jobId,
uint32 taskId);
@ -68,7 +68,8 @@ CreateTaskListForJobTree(List *jobTaskList)
List *taskList = NIL;
const int topLevelTaskHashSize = 32;
int taskHashSize = list_length(jobTaskList) * topLevelTaskHashSize;
HTAB *taskHash = TaskHashCreate(taskHashSize);
assert_valid_hash_key3(TaskMapKey, taskType, taskId, jobId);
HTAB *taskHash = CreateSimpleHashWithSize(TaskMapKey, TaskMapEntry, taskHashSize);
/*
* We walk over the task tree using breadth-first search. For the search, we
@ -131,38 +132,6 @@ CreateTaskListForJobTree(List *jobTaskList)
}
/*
* TaskHashCreate allocates memory for a task hash, initializes an
* empty hash, and returns this hash.
*/
static HTAB *
TaskHashCreate(uint32 taskHashSize)
{
HASHCTL info;
const char *taskHashName = "Task Hash";
/*
* Can't create a hashtable of size 0. Normally that shouldn't happen, but
* shard pruning currently can lead to this (Job with 0 Tasks). See #833.
*/
if (taskHashSize == 0)
{
taskHashSize = 2;
}
memset(&info, 0, sizeof(info));
info.keysize = sizeof(TaskMapKey);
info.entrysize = sizeof(TaskMapEntry);
info.hash = tag_hash;
info.hcxt = CurrentMemoryContext;
int hashFlags = (HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
HTAB *taskHash = hash_create(taskHashName, taskHashSize, &info, hashFlags);
return taskHash;
}
/*
* TaskHashEnter creates a reference to the task entry in the given task
* hash. The function errors-out if the same key exists multiple times.

View File

@ -21,7 +21,6 @@ extern List * ReferencedRelationIdList(Oid relationId);
extern List * ReferencingRelationIdList(Oid relationId);
extern void SetForeignConstraintRelationshipGraphInvalid(void);
extern void ClearForeignConstraintRelationshipGraphContext(void);
extern HTAB * CreateOidVisitedHashSet(void);
extern bool OidVisited(HTAB *oidVisitedMap, Oid oid);
extern void VisitOid(HTAB *oidVisitedMap, Oid oid);

View File

@ -15,6 +15,27 @@
#include "utils/hsearch.h"
/*
* assert_valid_hash_key2 checks if a type that contains 2 fields contains no
* padding bytes. This is necessary to use a type as a hash key with tag_hash.
*/
#define assert_valid_hash_key2(type, field1, field2) \
StaticAssertDecl( \
sizeof(type) == sizeof(((type) { 0 }).field1) \
+ sizeof(((type) { 0 }).field2), \
# type " has padding bytes, but is used as a hash key in a simple hash");
/*
* assert_valid_hash_key3 checks if a type that contains 3 fields contains no
* padding bytes. This is necessary to use a type as a hash key with tag_hash.
*/
#define assert_valid_hash_key3(type, field1, field2, field3) \
StaticAssertDecl( \
sizeof(type) == sizeof(((type) { 0 }).field1) \
+ sizeof(((type) { 0 }).field2) \
+ sizeof(((type) { 0 }).field3), \
# type " has padding bytes, but is used as a hash key in a simple hash");
extern void hash_delete_all(HTAB *htab);
/*
@ -30,8 +51,82 @@ extern void hash_delete_all(HTAB *htab);
extern void foreach_htab_cleanup(void *var, HASH_SEQ_STATUS *status);
extern HTAB * CreateSimpleHashWithName(Size keysize, Size entrysize, char *name);
extern HTAB * CreateSimpleHashWithNameAndSizeInternal(Size keysize, Size entrysize,
char *name, long nelem);
/*
* CreatesSimpleHash creates a hash table that hash its key using the tag_hash
* and stores then entries in the CurrentMemoryContext.
*
* We use 32 as the initial number of elements that fit into this hash
* table. This value seems a reasonable tradeof between two issues:
* 1. An empty hashmap shouldn't take up a lot of space
* 2. Doing a few inserts shouldn't require growing the hashmap
*
* NOTE: No performance testing has been performed when choosing this
* value. If this ever turns out to be a problem, feel free to do some
* performance tests.
*
* IMPORTANT: Because this uses tag_hash it's required that the keyType
* contains no automatic padding bytes, because that will result in tag_hash
* returning undefined values. You can check this using assert_valid_hash_keyX.
*/
#define CreateSimpleHash(keyType, entryType) \
CreateSimpleHashWithName(sizeof(keyType), sizeof(entryType), # entryType "Hash")
CreateSimpleHashWithNameAndSize(keyType, entryType, \
# entryType "Hash", 32)
/*
* Same as CreateSimpleHash but allows specifying the name
*/
#define CreateSimpleHashWithName(keyType, entryType, name) \
CreateSimpleHashWithNameAndSize(keyType, entryType, \
name, 32)
/*
* CreateSimpleHashWithSize is the same as CreateSimpleHash, but allows
* configuring of the amount of elements that initially fit in the hash table.
*/
#define CreateSimpleHashWithSize(keyType, entryType, size) \
CreateSimpleHashWithNameAndSize(keyType, entryType, \
# entryType "Hash", size)
#define CreateSimpleHashWithNameAndSize(keyType, entryType, name, size) \
CreateSimpleHashWithNameAndSizeInternal(sizeof(keyType), \
sizeof(entryType), \
name, size)
/*
* CreatesSimpleHashSet creates a hash set that hashes its values using the
* tag_hash and stores the values in the CurrentMemoryContext.
*/
#define CreateSimpleHashSet(keyType) \
CreateSimpleHashWithName(keyType, keyType, \
# keyType "HashSet")
/*
* CreatesSimpleHashSetWithSize creates a hash set that hashes its values using
* the tag_hash and stores the values in the CurrentMemoryContext. It allows
* specifying its number of elements.
*/
#define CreateSimpleHashSetWithSize(keyType, size) \
CreateSimpleHashWithNameAndSize(keyType, keyType, # keyType "HashSet", size)
/*
* CreatesSimpleHashSetWithName creates a hash set that hashes its values using the
* tag_hash and stores the values in the CurrentMemoryContext. It allows
* specifying its name.
*/
#define CreateSimpleHashSetWithName(keyType, name) \
CreateSimpleHashWithName(keyType, keyType, name)
/*
* CreatesSimpleHashSetWithName creates a hash set that hashes its values using the
* tag_hash and stores the values in the CurrentMemoryContext. It allows
* specifying its name and number of elements.
*/
#define CreateSimpleHashSetWithNameAndSize(keyType, name, size) \
CreateSimpleHashWithNameAndSize(keyType, keyType, name, size)
#endif

View File

@ -16,6 +16,7 @@
#include "nodes/pg_list.h"
#include "distributed/connection_management.h"
#include "distributed/hash_helpers.h"
/* Config variables managed via guc.c */
@ -32,6 +33,7 @@ typedef struct NodeAndOwner
uint32_t nodeId;
Oid tableOwnerId;
} NodeAndOwner;
assert_valid_hash_key2(NodeAndOwner, nodeId, tableOwnerId);
/*