mirror of https://github.com/citusdata/citus.git
Use a hash table to de-duplicate column names in ruleutils.c.
52c707483ce4d0161127e4958d981d1b5655865em3hm3t/pg18_dev_relation_oid_0
parent
9057c8778b
commit
8383666109
|
@ -0,0 +1 @@
|
|||
Subproject commit 3376bd6845f0614908ed304f5033bd644c82d3bf
|
|
@ -235,6 +235,10 @@ typedef void (*rsv_callback) (Node *node, deparse_context *context,
|
|||
* of aliases to columns of the right input. Thus, positions in the printable
|
||||
* column alias list are not necessarily one-for-one with varattnos of the
|
||||
* JOIN, so we need a separate new_colnames[] array for printing purposes.
|
||||
*
|
||||
* Finally, when dealing with wide tables we risk O(N^2) costs in assigning
|
||||
* non-duplicate column names. We ameliorate that by using a hash table that
|
||||
* holds all the strings appearing in colnames, new_colnames, and parentUsing.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
|
@ -301,6 +305,15 @@ typedef struct
|
|||
int *leftattnos; /* left-child varattnos of join cols, or 0 */
|
||||
int *rightattnos; /* right-child varattnos of join cols, or 0 */
|
||||
List *usingNames; /* names assigned to merged columns */
|
||||
|
||||
/*
|
||||
* Hash table holding copies of all the strings appearing in this struct's
|
||||
* colnames, new_colnames, and parentUsing. We use a hash table only for
|
||||
* sufficiently wide relations, and only during the colname-assignment
|
||||
* functions set_relation_column_names and set_join_column_names;
|
||||
* otherwise, names_hash is NULL.
|
||||
*/
|
||||
HTAB *names_hash; /* entries are just strings */
|
||||
} deparse_columns;
|
||||
|
||||
/* This macro is analogous to rt_fetch(), but for deparse_columns structs */
|
||||
|
@ -342,6 +355,9 @@ static bool colname_is_unique(const char *colname, deparse_namespace *dpns,
|
|||
static char *make_colname_unique(char *colname, deparse_namespace *dpns,
|
||||
deparse_columns *colinfo);
|
||||
static void expand_colnames_array_to(deparse_columns *colinfo, int n);
|
||||
static void build_colinfo_names_hash(deparse_columns *colinfo);
|
||||
static void add_to_names_hash(deparse_columns *colinfo, const char *name);
|
||||
static void destroy_colinfo_names_hash(deparse_columns *colinfo);
|
||||
static void identify_join_columns(JoinExpr *j, RangeTblEntry *jrte,
|
||||
deparse_columns *colinfo);
|
||||
static char *get_rtable_name(int rtindex, deparse_context *context);
|
||||
|
@ -988,6 +1004,10 @@ has_dangerous_join_using(deparse_namespace *dpns, Node *jtnode)
|
|||
*
|
||||
* parentUsing is a list of all USING aliases assigned in parent joins of
|
||||
* the current jointree node. (The passed-in list must not be modified.)
|
||||
*
|
||||
* Note that we do not use per-deparse_columns hash tables in this function.
|
||||
* The number of names that need to be assigned should be small enough that
|
||||
* we don't need to trouble with that.
|
||||
*/
|
||||
static void
|
||||
set_using_names(deparse_namespace *dpns, Node *jtnode, List *parentUsing)
|
||||
|
@ -1265,6 +1285,9 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
|||
colinfo->new_colnames = (char **) palloc(ncolumns * sizeof(char *));
|
||||
colinfo->is_new_col = (bool *) palloc(ncolumns * sizeof(bool));
|
||||
|
||||
/* If the RTE is wide enough, use a hash table to avoid O(N^2) costs */
|
||||
build_colinfo_names_hash(colinfo);
|
||||
|
||||
/*
|
||||
* Scan the columns, select a unique alias for each one, and store it in
|
||||
* colinfo->colnames and colinfo->new_colnames. The former array has NULL
|
||||
|
@ -1301,6 +1324,7 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
|||
colname = make_colname_unique(colname, dpns, colinfo);
|
||||
|
||||
colinfo->colnames[i] = colname;
|
||||
add_to_names_hash(colinfo, colname);
|
||||
}
|
||||
|
||||
/* Put names of non-dropped columns in new_colnames[] too */
|
||||
|
@ -1321,6 +1345,9 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
|||
has_anonymous = true;
|
||||
}
|
||||
|
||||
/* We're now done needing the colinfo's names_hash */
|
||||
destroy_colinfo_names_hash(colinfo);
|
||||
|
||||
/*
|
||||
* Set correct length for new_colnames[] array. (Note: if columns have
|
||||
* been added, colinfo->num_cols includes them, which is not really quite
|
||||
|
@ -1391,6 +1418,9 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
|||
expand_colnames_array_to(colinfo, noldcolumns);
|
||||
Assert(colinfo->num_cols == noldcolumns);
|
||||
|
||||
/* If the RTE is wide enough, use a hash table to avoid O(N^2) costs */
|
||||
build_colinfo_names_hash(colinfo);
|
||||
|
||||
/*
|
||||
* Scan the join output columns, select an alias for each one, and store
|
||||
* it in colinfo->colnames. If there are USING columns, set_using_names()
|
||||
|
@ -1427,6 +1457,7 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
|||
if (rte->alias == NULL)
|
||||
{
|
||||
colinfo->colnames[i] = real_colname;
|
||||
add_to_names_hash(colinfo, real_colname);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -1443,6 +1474,7 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
|||
colname = make_colname_unique(colname, dpns, colinfo);
|
||||
|
||||
colinfo->colnames[i] = colname;
|
||||
add_to_names_hash(colinfo, colname);
|
||||
}
|
||||
|
||||
/* Remember if any assigned aliases differ from "real" name */
|
||||
|
@ -1541,6 +1573,7 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
|||
}
|
||||
else
|
||||
colinfo->new_colnames[j] = child_colname;
|
||||
add_to_names_hash(colinfo, colinfo->new_colnames[j]);
|
||||
}
|
||||
|
||||
colinfo->is_new_col[j] = leftcolinfo->is_new_col[jc];
|
||||
|
@ -1590,6 +1623,7 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
|||
}
|
||||
else
|
||||
colinfo->new_colnames[j] = child_colname;
|
||||
add_to_names_hash(colinfo, colinfo->new_colnames[j]);
|
||||
}
|
||||
|
||||
colinfo->is_new_col[j] = rightcolinfo->is_new_col[jc];
|
||||
|
@ -1611,6 +1645,9 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
|||
Assert(j == nnewcolumns);
|
||||
#endif
|
||||
|
||||
/* We're now done needing the colinfo's names_hash */
|
||||
destroy_colinfo_names_hash(colinfo);
|
||||
|
||||
/*
|
||||
* For a named join, print column aliases if we changed any from the child
|
||||
* names. Unnamed joins cannot print aliases.
|
||||
|
@ -1633,28 +1670,58 @@ colname_is_unique(const char *colname, deparse_namespace *dpns,
|
|||
int i;
|
||||
ListCell *lc;
|
||||
|
||||
/* Check against already-assigned column aliases within RTE */
|
||||
for (i = 0; i < colinfo->num_cols; i++)
|
||||
/*
|
||||
* If we have a hash table, consult that instead of linearly scanning the
|
||||
* colinfo's strings.
|
||||
*/
|
||||
if (colinfo->names_hash)
|
||||
{
|
||||
char *oldname = colinfo->colnames[i];
|
||||
|
||||
if (oldname && strcmp(oldname, colname) == 0)
|
||||
if (hash_search(colinfo->names_hash,
|
||||
colname,
|
||||
HASH_FIND,
|
||||
NULL) != NULL)
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Check against already-assigned column aliases within RTE */
|
||||
for (i = 0; i < colinfo->num_cols; i++)
|
||||
{
|
||||
char *oldname = colinfo->colnames[i];
|
||||
|
||||
if (oldname && strcmp(oldname, colname) == 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we're building a new_colnames array, check that too (this will
|
||||
* be partially but not completely redundant with the previous checks)
|
||||
*/
|
||||
for (i = 0; i < colinfo->num_new_cols; i++)
|
||||
{
|
||||
char *oldname = colinfo->new_colnames[i];
|
||||
|
||||
if (oldname && strcmp(oldname, colname) == 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Also check against names already assigned for parent-join USING
|
||||
* cols
|
||||
*/
|
||||
foreach(lc, colinfo->parentUsing)
|
||||
{
|
||||
char *oldname = (char *) lfirst(lc);
|
||||
|
||||
if (strcmp(oldname, colname) == 0)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we're building a new_colnames array, check that too (this will be
|
||||
* partially but not completely redundant with the previous checks)
|
||||
* Also check against USING-column names that must be globally unique.
|
||||
* These are not hashed, but there should be few of them.
|
||||
*/
|
||||
for (i = 0; i < colinfo->num_new_cols; i++)
|
||||
{
|
||||
char *oldname = colinfo->new_colnames[i];
|
||||
|
||||
if (oldname && strcmp(oldname, colname) == 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Also check against USING-column names that must be globally unique */
|
||||
foreach(lc, dpns->using_names)
|
||||
{
|
||||
char *oldname = (char *) lfirst(lc);
|
||||
|
@ -1663,15 +1730,6 @@ colname_is_unique(const char *colname, deparse_namespace *dpns,
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Also check against names already assigned for parent-join USING cols */
|
||||
foreach(lc, colinfo->parentUsing)
|
||||
{
|
||||
char *oldname = (char *) lfirst(lc);
|
||||
|
||||
if (strcmp(oldname, colname) == 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1734,6 +1792,90 @@ expand_colnames_array_to(deparse_columns *colinfo, int n)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* build_colinfo_names_hash: optionally construct a hash table for colinfo
|
||||
*/
|
||||
static void
|
||||
build_colinfo_names_hash(deparse_columns *colinfo)
|
||||
{
|
||||
HASHCTL hash_ctl;
|
||||
int i;
|
||||
ListCell *lc;
|
||||
|
||||
/*
|
||||
* Use a hash table only for RTEs with at least 32 columns. (The cutoff
|
||||
* is somewhat arbitrary, but let's choose it so that this code does get
|
||||
* exercised in the regression tests.)
|
||||
*/
|
||||
if (colinfo->num_cols < 32)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Set up the hash table. The entries are just strings with no other
|
||||
* payload.
|
||||
*/
|
||||
hash_ctl.keysize = NAMEDATALEN;
|
||||
hash_ctl.entrysize = NAMEDATALEN;
|
||||
hash_ctl.hcxt = CurrentMemoryContext;
|
||||
colinfo->names_hash = hash_create("deparse_columns names",
|
||||
colinfo->num_cols + colinfo->num_new_cols,
|
||||
&hash_ctl,
|
||||
HASH_ELEM | HASH_STRINGS | HASH_CONTEXT);
|
||||
|
||||
/*
|
||||
* Preload the hash table with any names already present (these would have
|
||||
* come from set_using_names).
|
||||
*/
|
||||
for (i = 0; i < colinfo->num_cols; i++)
|
||||
{
|
||||
char *oldname = colinfo->colnames[i];
|
||||
|
||||
if (oldname)
|
||||
add_to_names_hash(colinfo, oldname);
|
||||
}
|
||||
|
||||
for (i = 0; i < colinfo->num_new_cols; i++)
|
||||
{
|
||||
char *oldname = colinfo->new_colnames[i];
|
||||
|
||||
if (oldname)
|
||||
add_to_names_hash(colinfo, oldname);
|
||||
}
|
||||
|
||||
foreach(lc, colinfo->parentUsing)
|
||||
{
|
||||
char *oldname = (char *) lfirst(lc);
|
||||
|
||||
add_to_names_hash(colinfo, oldname);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* add_to_names_hash: add a string to the names_hash, if we're using one
|
||||
*/
|
||||
static void
|
||||
add_to_names_hash(deparse_columns *colinfo, const char *name)
|
||||
{
|
||||
if (colinfo->names_hash)
|
||||
(void) hash_search(colinfo->names_hash,
|
||||
name,
|
||||
HASH_ENTER,
|
||||
NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* destroy_colinfo_names_hash: destroy hash table when done with it
|
||||
*/
|
||||
static void
|
||||
destroy_colinfo_names_hash(deparse_columns *colinfo)
|
||||
{
|
||||
if (colinfo->names_hash)
|
||||
{
|
||||
hash_destroy(colinfo->names_hash);
|
||||
colinfo->names_hash = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* identify_join_columns: figure out where columns of a join come from
|
||||
*
|
||||
|
|
Loading…
Reference in New Issue