mirror of https://github.com/citusdata/citus.git
Use a hash table to de-duplicate column names in ruleutils.c.
52c707483ce4d0161127e4958d981d1b5655865em3hm3t/pg18_dev_relation_oid_0
parent
9057c8778b
commit
8383666109
|
@ -0,0 +1 @@
|
||||||
|
Subproject commit 3376bd6845f0614908ed304f5033bd644c82d3bf
|
|
@ -235,6 +235,10 @@ typedef void (*rsv_callback) (Node *node, deparse_context *context,
|
||||||
* of aliases to columns of the right input. Thus, positions in the printable
|
* of aliases to columns of the right input. Thus, positions in the printable
|
||||||
* column alias list are not necessarily one-for-one with varattnos of the
|
* column alias list are not necessarily one-for-one with varattnos of the
|
||||||
* JOIN, so we need a separate new_colnames[] array for printing purposes.
|
* JOIN, so we need a separate new_colnames[] array for printing purposes.
|
||||||
|
*
|
||||||
|
* Finally, when dealing with wide tables we risk O(N^2) costs in assigning
|
||||||
|
* non-duplicate column names. We ameliorate that by using a hash table that
|
||||||
|
* holds all the strings appearing in colnames, new_colnames, and parentUsing.
|
||||||
*/
|
*/
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
|
@ -301,6 +305,15 @@ typedef struct
|
||||||
int *leftattnos; /* left-child varattnos of join cols, or 0 */
|
int *leftattnos; /* left-child varattnos of join cols, or 0 */
|
||||||
int *rightattnos; /* right-child varattnos of join cols, or 0 */
|
int *rightattnos; /* right-child varattnos of join cols, or 0 */
|
||||||
List *usingNames; /* names assigned to merged columns */
|
List *usingNames; /* names assigned to merged columns */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Hash table holding copies of all the strings appearing in this struct's
|
||||||
|
* colnames, new_colnames, and parentUsing. We use a hash table only for
|
||||||
|
* sufficiently wide relations, and only during the colname-assignment
|
||||||
|
* functions set_relation_column_names and set_join_column_names;
|
||||||
|
* otherwise, names_hash is NULL.
|
||||||
|
*/
|
||||||
|
HTAB *names_hash; /* entries are just strings */
|
||||||
} deparse_columns;
|
} deparse_columns;
|
||||||
|
|
||||||
/* This macro is analogous to rt_fetch(), but for deparse_columns structs */
|
/* This macro is analogous to rt_fetch(), but for deparse_columns structs */
|
||||||
|
@ -342,6 +355,9 @@ static bool colname_is_unique(const char *colname, deparse_namespace *dpns,
|
||||||
static char *make_colname_unique(char *colname, deparse_namespace *dpns,
|
static char *make_colname_unique(char *colname, deparse_namespace *dpns,
|
||||||
deparse_columns *colinfo);
|
deparse_columns *colinfo);
|
||||||
static void expand_colnames_array_to(deparse_columns *colinfo, int n);
|
static void expand_colnames_array_to(deparse_columns *colinfo, int n);
|
||||||
|
static void build_colinfo_names_hash(deparse_columns *colinfo);
|
||||||
|
static void add_to_names_hash(deparse_columns *colinfo, const char *name);
|
||||||
|
static void destroy_colinfo_names_hash(deparse_columns *colinfo);
|
||||||
static void identify_join_columns(JoinExpr *j, RangeTblEntry *jrte,
|
static void identify_join_columns(JoinExpr *j, RangeTblEntry *jrte,
|
||||||
deparse_columns *colinfo);
|
deparse_columns *colinfo);
|
||||||
static char *get_rtable_name(int rtindex, deparse_context *context);
|
static char *get_rtable_name(int rtindex, deparse_context *context);
|
||||||
|
@ -988,6 +1004,10 @@ has_dangerous_join_using(deparse_namespace *dpns, Node *jtnode)
|
||||||
*
|
*
|
||||||
* parentUsing is a list of all USING aliases assigned in parent joins of
|
* parentUsing is a list of all USING aliases assigned in parent joins of
|
||||||
* the current jointree node. (The passed-in list must not be modified.)
|
* the current jointree node. (The passed-in list must not be modified.)
|
||||||
|
*
|
||||||
|
* Note that we do not use per-deparse_columns hash tables in this function.
|
||||||
|
* The number of names that need to be assigned should be small enough that
|
||||||
|
* we don't need to trouble with that.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
set_using_names(deparse_namespace *dpns, Node *jtnode, List *parentUsing)
|
set_using_names(deparse_namespace *dpns, Node *jtnode, List *parentUsing)
|
||||||
|
@ -1265,6 +1285,9 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
||||||
colinfo->new_colnames = (char **) palloc(ncolumns * sizeof(char *));
|
colinfo->new_colnames = (char **) palloc(ncolumns * sizeof(char *));
|
||||||
colinfo->is_new_col = (bool *) palloc(ncolumns * sizeof(bool));
|
colinfo->is_new_col = (bool *) palloc(ncolumns * sizeof(bool));
|
||||||
|
|
||||||
|
/* If the RTE is wide enough, use a hash table to avoid O(N^2) costs */
|
||||||
|
build_colinfo_names_hash(colinfo);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Scan the columns, select a unique alias for each one, and store it in
|
* Scan the columns, select a unique alias for each one, and store it in
|
||||||
* colinfo->colnames and colinfo->new_colnames. The former array has NULL
|
* colinfo->colnames and colinfo->new_colnames. The former array has NULL
|
||||||
|
@ -1301,6 +1324,7 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
||||||
colname = make_colname_unique(colname, dpns, colinfo);
|
colname = make_colname_unique(colname, dpns, colinfo);
|
||||||
|
|
||||||
colinfo->colnames[i] = colname;
|
colinfo->colnames[i] = colname;
|
||||||
|
add_to_names_hash(colinfo, colname);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Put names of non-dropped columns in new_colnames[] too */
|
/* Put names of non-dropped columns in new_colnames[] too */
|
||||||
|
@ -1321,6 +1345,9 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
||||||
has_anonymous = true;
|
has_anonymous = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* We're now done needing the colinfo's names_hash */
|
||||||
|
destroy_colinfo_names_hash(colinfo);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set correct length for new_colnames[] array. (Note: if columns have
|
* Set correct length for new_colnames[] array. (Note: if columns have
|
||||||
* been added, colinfo->num_cols includes them, which is not really quite
|
* been added, colinfo->num_cols includes them, which is not really quite
|
||||||
|
@ -1391,6 +1418,9 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
||||||
expand_colnames_array_to(colinfo, noldcolumns);
|
expand_colnames_array_to(colinfo, noldcolumns);
|
||||||
Assert(colinfo->num_cols == noldcolumns);
|
Assert(colinfo->num_cols == noldcolumns);
|
||||||
|
|
||||||
|
/* If the RTE is wide enough, use a hash table to avoid O(N^2) costs */
|
||||||
|
build_colinfo_names_hash(colinfo);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Scan the join output columns, select an alias for each one, and store
|
* Scan the join output columns, select an alias for each one, and store
|
||||||
* it in colinfo->colnames. If there are USING columns, set_using_names()
|
* it in colinfo->colnames. If there are USING columns, set_using_names()
|
||||||
|
@ -1427,6 +1457,7 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
||||||
if (rte->alias == NULL)
|
if (rte->alias == NULL)
|
||||||
{
|
{
|
||||||
colinfo->colnames[i] = real_colname;
|
colinfo->colnames[i] = real_colname;
|
||||||
|
add_to_names_hash(colinfo, real_colname);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1443,6 +1474,7 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
||||||
colname = make_colname_unique(colname, dpns, colinfo);
|
colname = make_colname_unique(colname, dpns, colinfo);
|
||||||
|
|
||||||
colinfo->colnames[i] = colname;
|
colinfo->colnames[i] = colname;
|
||||||
|
add_to_names_hash(colinfo, colname);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Remember if any assigned aliases differ from "real" name */
|
/* Remember if any assigned aliases differ from "real" name */
|
||||||
|
@ -1541,6 +1573,7 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
colinfo->new_colnames[j] = child_colname;
|
colinfo->new_colnames[j] = child_colname;
|
||||||
|
add_to_names_hash(colinfo, colinfo->new_colnames[j]);
|
||||||
}
|
}
|
||||||
|
|
||||||
colinfo->is_new_col[j] = leftcolinfo->is_new_col[jc];
|
colinfo->is_new_col[j] = leftcolinfo->is_new_col[jc];
|
||||||
|
@ -1590,6 +1623,7 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
colinfo->new_colnames[j] = child_colname;
|
colinfo->new_colnames[j] = child_colname;
|
||||||
|
add_to_names_hash(colinfo, colinfo->new_colnames[j]);
|
||||||
}
|
}
|
||||||
|
|
||||||
colinfo->is_new_col[j] = rightcolinfo->is_new_col[jc];
|
colinfo->is_new_col[j] = rightcolinfo->is_new_col[jc];
|
||||||
|
@ -1611,6 +1645,9 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
||||||
Assert(j == nnewcolumns);
|
Assert(j == nnewcolumns);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* We're now done needing the colinfo's names_hash */
|
||||||
|
destroy_colinfo_names_hash(colinfo);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For a named join, print column aliases if we changed any from the child
|
* For a named join, print column aliases if we changed any from the child
|
||||||
* names. Unnamed joins cannot print aliases.
|
* names. Unnamed joins cannot print aliases.
|
||||||
|
@ -1633,6 +1670,20 @@ colname_is_unique(const char *colname, deparse_namespace *dpns,
|
||||||
int i;
|
int i;
|
||||||
ListCell *lc;
|
ListCell *lc;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we have a hash table, consult that instead of linearly scanning the
|
||||||
|
* colinfo's strings.
|
||||||
|
*/
|
||||||
|
if (colinfo->names_hash)
|
||||||
|
{
|
||||||
|
if (hash_search(colinfo->names_hash,
|
||||||
|
colname,
|
||||||
|
HASH_FIND,
|
||||||
|
NULL) != NULL)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
/* Check against already-assigned column aliases within RTE */
|
/* Check against already-assigned column aliases within RTE */
|
||||||
for (i = 0; i < colinfo->num_cols; i++)
|
for (i = 0; i < colinfo->num_cols; i++)
|
||||||
{
|
{
|
||||||
|
@ -1643,8 +1694,8 @@ colname_is_unique(const char *colname, deparse_namespace *dpns,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we're building a new_colnames array, check that too (this will be
|
* If we're building a new_colnames array, check that too (this will
|
||||||
* partially but not completely redundant with the previous checks)
|
* be partially but not completely redundant with the previous checks)
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < colinfo->num_new_cols; i++)
|
for (i = 0; i < colinfo->num_new_cols; i++)
|
||||||
{
|
{
|
||||||
|
@ -1654,17 +1705,24 @@ colname_is_unique(const char *colname, deparse_namespace *dpns,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Also check against USING-column names that must be globally unique */
|
/*
|
||||||
foreach(lc, dpns->using_names)
|
* Also check against names already assigned for parent-join USING
|
||||||
|
* cols
|
||||||
|
*/
|
||||||
|
foreach(lc, colinfo->parentUsing)
|
||||||
{
|
{
|
||||||
char *oldname = (char *) lfirst(lc);
|
char *oldname = (char *) lfirst(lc);
|
||||||
|
|
||||||
if (strcmp(oldname, colname) == 0)
|
if (strcmp(oldname, colname) == 0)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Also check against names already assigned for parent-join USING cols */
|
/*
|
||||||
foreach(lc, colinfo->parentUsing)
|
* Also check against USING-column names that must be globally unique.
|
||||||
|
* These are not hashed, but there should be few of them.
|
||||||
|
*/
|
||||||
|
foreach(lc, dpns->using_names)
|
||||||
{
|
{
|
||||||
char *oldname = (char *) lfirst(lc);
|
char *oldname = (char *) lfirst(lc);
|
||||||
|
|
||||||
|
@ -1734,6 +1792,90 @@ expand_colnames_array_to(deparse_columns *colinfo, int n)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* build_colinfo_names_hash: optionally construct a hash table for colinfo
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
build_colinfo_names_hash(deparse_columns *colinfo)
|
||||||
|
{
|
||||||
|
HASHCTL hash_ctl;
|
||||||
|
int i;
|
||||||
|
ListCell *lc;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Use a hash table only for RTEs with at least 32 columns. (The cutoff
|
||||||
|
* is somewhat arbitrary, but let's choose it so that this code does get
|
||||||
|
* exercised in the regression tests.)
|
||||||
|
*/
|
||||||
|
if (colinfo->num_cols < 32)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set up the hash table. The entries are just strings with no other
|
||||||
|
* payload.
|
||||||
|
*/
|
||||||
|
hash_ctl.keysize = NAMEDATALEN;
|
||||||
|
hash_ctl.entrysize = NAMEDATALEN;
|
||||||
|
hash_ctl.hcxt = CurrentMemoryContext;
|
||||||
|
colinfo->names_hash = hash_create("deparse_columns names",
|
||||||
|
colinfo->num_cols + colinfo->num_new_cols,
|
||||||
|
&hash_ctl,
|
||||||
|
HASH_ELEM | HASH_STRINGS | HASH_CONTEXT);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Preload the hash table with any names already present (these would have
|
||||||
|
* come from set_using_names).
|
||||||
|
*/
|
||||||
|
for (i = 0; i < colinfo->num_cols; i++)
|
||||||
|
{
|
||||||
|
char *oldname = colinfo->colnames[i];
|
||||||
|
|
||||||
|
if (oldname)
|
||||||
|
add_to_names_hash(colinfo, oldname);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < colinfo->num_new_cols; i++)
|
||||||
|
{
|
||||||
|
char *oldname = colinfo->new_colnames[i];
|
||||||
|
|
||||||
|
if (oldname)
|
||||||
|
add_to_names_hash(colinfo, oldname);
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach(lc, colinfo->parentUsing)
|
||||||
|
{
|
||||||
|
char *oldname = (char *) lfirst(lc);
|
||||||
|
|
||||||
|
add_to_names_hash(colinfo, oldname);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* add_to_names_hash: add a string to the names_hash, if we're using one
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
add_to_names_hash(deparse_columns *colinfo, const char *name)
|
||||||
|
{
|
||||||
|
if (colinfo->names_hash)
|
||||||
|
(void) hash_search(colinfo->names_hash,
|
||||||
|
name,
|
||||||
|
HASH_ENTER,
|
||||||
|
NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* destroy_colinfo_names_hash: destroy hash table when done with it
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
destroy_colinfo_names_hash(deparse_columns *colinfo)
|
||||||
|
{
|
||||||
|
if (colinfo->names_hash)
|
||||||
|
{
|
||||||
|
hash_destroy(colinfo->names_hash);
|
||||||
|
colinfo->names_hash = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* identify_join_columns: figure out where columns of a join come from
|
* identify_join_columns: figure out where columns of a join come from
|
||||||
*
|
*
|
||||||
|
|
Loading…
Reference in New Issue