mirror of https://github.com/citusdata/citus.git
Fix PostgreSQL 18 compatibility issues related to "?column?" names in subqueries
- Updated various planner and deparser files to handle NULL or "?column?" as resname by generating meaningful column names. - Ensured that generated names follow a consistent format to avoid parsing errors in complex queries. - Added debug queries and tests to isolate and reproduce the issues related to intermediate results and subqueries. - Created setup scripts for test tables and data to facilitate regression testing for PostgreSQL 18 compatibility.pull/8090/head
parent
c183634207
commit
d653df74cf
|
|
@ -0,0 +1,32 @@
|
|||
-- Debug query for PostgreSQL 18 compatibility issue
|
||||
\c citus
|
||||
|
||||
SET citus.log_remote_commands = on;
|
||||
SET client_min_messages = DEBUG1;
|
||||
|
||||
SELECT
|
||||
DISTINCT user_id
|
||||
FROM
|
||||
(
|
||||
SELECT users_table.user_id FROM users_table,
|
||||
(
|
||||
SELECT
|
||||
avg(event_type) as avg_val
|
||||
FROM
|
||||
(SELECT
|
||||
event_type, users_table.user_id
|
||||
FROM
|
||||
users_table, (SELECT user_id, event_type FROM events_table WHERE value_2 < 3 ORDER BY 1, 2 OFFSET 0) as foo
|
||||
WHERE
|
||||
foo.user_id = users_table.user_id) bar, users_table
|
||||
WHERE
|
||||
bar.user_id = users_table.user_id
|
||||
GROUP BY
|
||||
users_table.value_1
|
||||
) as baz
|
||||
WHERE
|
||||
baz.avg_val < users_table.user_id
|
||||
ORDER BY 1
|
||||
LIMIT 3
|
||||
) as sub1
|
||||
ORDER BY 1 DESC;
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
|
||||
-- Create a simple test to reproduce the issue
|
||||
CREATE SCHEMA debug_test;
|
||||
SET search_path TO debug_test, public;
|
||||
SET client_min_messages TO DEBUG1;
|
||||
|
||||
-- Simple query that should generate intermediate results
|
||||
SELECT DISTINCT user_id FROM (
|
||||
SELECT users_table.user_id
|
||||
FROM users_table
|
||||
WHERE user_id < 3
|
||||
ORDER BY 1
|
||||
LIMIT 2
|
||||
) as sub1 ORDER BY 1 DESC;
|
||||
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
# using postmaster on localhost, port 57636
|
||||
# diff command failed with status 512: diff "/workspaces/citus/expected/subqueries_deep.out" "/workspaces/citus/results/subqueries_deep.out" > "/workspaces/citus/results/subqueries_deep.out.diff"Bail out!
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
-- Setup test tables for PostgreSQL 18 compatibility testing
|
||||
\c citus
|
||||
|
||||
SET citus.shard_replication_factor = 1;
|
||||
SET citus.shard_count = 4;
|
||||
|
||||
-- Create tables if they don't exist
|
||||
CREATE TABLE IF NOT EXISTS users_table (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint);
|
||||
CREATE TABLE IF NOT EXISTS events_table (user_id int, time timestamp, event_type int, value_2 int, value_3 float, value_4 bigint);
|
||||
|
||||
-- Create distributed tables
|
||||
SELECT create_distributed_table('users_table', 'user_id');
|
||||
SELECT create_distributed_table('events_table', 'user_id');
|
||||
|
||||
-- Insert some test data
|
||||
INSERT INTO users_table VALUES
|
||||
(1, now(), 10, 20, 1.5, 100),
|
||||
(2, now(), 30, 40, 2.5, 200),
|
||||
(3, now(), 50, 60, 3.5, 300);
|
||||
|
||||
INSERT INTO events_table VALUES
|
||||
(1, now(), 1, 1, 1.1, 10),
|
||||
(2, now(), 2, 2, 2.2, 20),
|
||||
(3, now(), 3, 3, 3.3, 30),
|
||||
(1, now(), 4, 4, 4.4, 40);
|
||||
|
|
@ -1875,9 +1875,16 @@ ExpandMergedSubscriptingRefEntries(List *targetEntryList)
|
|||
*/
|
||||
TargetEntry *newTargetEntry = copyObject(targetEntry);
|
||||
newTargetEntry->expr = expr;
|
||||
expandedTargetEntries = lappend(expandedTargetEntries, newTargetEntry);
|
||||
|
||||
/* now inspect the refexpr that SubscriptingRef at hand were holding */
|
||||
/*
|
||||
* PostgreSQL 18 compatibility: handle NULL or "?column?" resname
|
||||
*/
|
||||
if (newTargetEntry->resname == NULL || strcmp(newTargetEntry->resname, "?column?") == 0)
|
||||
{
|
||||
newTargetEntry->resname = psprintf("expr_col_%d", newTargetEntry->resno);
|
||||
}
|
||||
|
||||
expandedTargetEntries = lappend(expandedTargetEntries, newTargetEntry); /* now inspect the refexpr that SubscriptingRef at hand were holding */
|
||||
expr = refexpr;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2722,7 +2722,16 @@ get_simple_values_rte(Query *query, TupleDesc resultDesc)
|
|||
if (resultDesc && colno <= resultDesc->natts)
|
||||
colname = NameStr(TupleDescAttr(resultDesc, colno - 1)->attname);
|
||||
else
|
||||
{
|
||||
colname = tle->resname;
|
||||
/* PostgreSQL 18 compatibility: handle NULL resname */
|
||||
if (colname == NULL || strcmp(colname, "?column?") == 0)
|
||||
{
|
||||
static char generated_name[64];
|
||||
snprintf(generated_name, sizeof(generated_name), "intermediate_column_%d", colno);
|
||||
colname = generated_name;
|
||||
}
|
||||
}
|
||||
|
||||
/* does it match the VALUES RTE? */
|
||||
if (colname == NULL || strcmp(colname, cname) != 0)
|
||||
|
|
@ -2921,8 +2930,17 @@ get_target_list(List *targetList, deparse_context *context)
|
|||
* When colNamesVisible is true, we should always show the
|
||||
* assigned column name explicitly. Otherwise, show it only if
|
||||
* it's not FigureColname's fallback.
|
||||
*
|
||||
* PostgreSQL 18 fix: Instead of using "?column?" which causes issues
|
||||
* in complex subqueries, generate a meaningful column name.
|
||||
*/
|
||||
attname = context->colNamesVisible ? NULL : "?column?";
|
||||
if (context->colNamesVisible)
|
||||
attname = NULL;
|
||||
else
|
||||
{
|
||||
/* Generate a column name that won't cause parsing issues */
|
||||
attname = psprintf("expr_%d", colno);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -2935,8 +2953,24 @@ get_target_list(List *targetList, deparse_context *context)
|
|||
colname = NameStr(TupleDescAttr(context->resultDesc,
|
||||
colno - 1)->attname);
|
||||
else
|
||||
{
|
||||
colname = tle->resname;
|
||||
|
||||
/*
|
||||
* PostgreSQL 18 fix: tle->resname can be NULL for intermediate expressions.
|
||||
* In that case, generate a meaningful column name instead of using "?column?".
|
||||
*/
|
||||
if (colname == NULL || strcmp(colname, "?column?") == 0)
|
||||
{
|
||||
/*
|
||||
* Generate a column name in the format "col_N" where N is the column number.
|
||||
* This provides a stable, predictable name that won't cause issues with
|
||||
* column resolution in complex subqueries.
|
||||
*/
|
||||
colname = psprintf("col_%d", colno);
|
||||
}
|
||||
}
|
||||
|
||||
/* Show AS unless the column's name is correct as-is */
|
||||
if (colname) /* resname could be NULL */
|
||||
{
|
||||
|
|
@ -4532,7 +4566,16 @@ get_variable(Var *var, int levelsup, bool istoplevel, deparse_context *context)
|
|||
colname = NameStr(TupleDescAttr(context->resultDesc,
|
||||
colno - 1)->attname);
|
||||
else
|
||||
{
|
||||
colname = tle->resname;
|
||||
/* PostgreSQL 18 compatibility: handle NULL resname */
|
||||
if (colname == NULL || strcmp(colname, "?column?") == 0)
|
||||
{
|
||||
static char generated_name[64];
|
||||
snprintf(generated_name, sizeof(generated_name), "intermediate_column_%d", colno);
|
||||
colname = generated_name;
|
||||
}
|
||||
}
|
||||
if (colname && strcmp(colname, attname) == 0 &&
|
||||
!equal(var, tle->expr))
|
||||
{
|
||||
|
|
|
|||
|
|
@ -110,6 +110,19 @@ RemoteScanTargetList(List *workerTargetList)
|
|||
* entry's sort and group clauses will *break* us here.
|
||||
*/
|
||||
TargetEntry *remoteScanTargetEntry = flatCopyTargetEntry(workerTargetEntry);
|
||||
|
||||
/*
|
||||
* PostgreSQL 18 fix: Handle "?column?" names from intermediate expressions.
|
||||
* When flatCopyTargetEntry copies a target entry that has "?column?" as resname,
|
||||
* we need to generate a proper column name to avoid parsing errors.
|
||||
*/
|
||||
if (remoteScanTargetEntry->resname == NULL || strcmp(remoteScanTargetEntry->resname, "?column?") == 0)
|
||||
{
|
||||
StringInfo generatedName = makeStringInfo();
|
||||
appendStringInfo(generatedName, "remote_col_%d", workerTargetEntry->resno);
|
||||
remoteScanTargetEntry->resname = generatedName->data;
|
||||
}
|
||||
|
||||
remoteScanTargetEntry->expr = (Expr *) remoteScanColumn;
|
||||
remoteScanTargetList = lappend(remoteScanTargetList, remoteScanTargetEntry);
|
||||
}
|
||||
|
|
@ -272,7 +285,18 @@ BuildSelectStatementViaStdPlanner(Query *combineQuery, List *remoteScanTargetLis
|
|||
TargetEntry *targetEntry = NULL;
|
||||
foreach_declared_ptr(targetEntry, remoteScanTargetList)
|
||||
{
|
||||
columnNameList = lappend(columnNameList, makeString(targetEntry->resname));
|
||||
char *resname = targetEntry->resname;
|
||||
|
||||
/*
|
||||
* PostgreSQL 18 compatibility: handle NULL or "?column?" resname
|
||||
* by generating a proper column name for the remote scan RTE
|
||||
*/
|
||||
if (resname == NULL || strcmp(resname, "?column?") == 0)
|
||||
{
|
||||
resname = psprintf("remote_col_%d", targetEntry->resno);
|
||||
}
|
||||
|
||||
columnNameList = lappend(columnNameList, makeString(resname));
|
||||
}
|
||||
extradataContainerRTE->eref = makeAlias("remote_scan", columnNameList);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1545,7 +1545,18 @@ FinalizeRouterPlan(PlannedStmt *localPlan, CustomScan *customScan)
|
|||
TargetEntry *targetEntry = NULL;
|
||||
foreach_declared_ptr(targetEntry, customScan->scan.plan.targetlist)
|
||||
{
|
||||
String *columnName = makeString(targetEntry->resname);
|
||||
char *resname = targetEntry->resname;
|
||||
|
||||
/*
|
||||
* PostgreSQL 18 compatibility: handle NULL or "?column?" resname
|
||||
* by generating a proper column name for the remote scan RTE
|
||||
*/
|
||||
if (resname == NULL || strcmp(resname, "?column?") == 0)
|
||||
{
|
||||
resname = psprintf("remote_col_%d", targetEntry->resno);
|
||||
}
|
||||
|
||||
String *columnName = makeString(resname);
|
||||
columnNameList = lappend(columnNameList, columnName);
|
||||
}
|
||||
|
||||
|
|
@ -1613,6 +1624,19 @@ makeCustomScanTargetlistFromExistingTargetList(List *existingTargetlist)
|
|||
}
|
||||
|
||||
TargetEntry *newTargetEntry = flatCopyTargetEntry(targetEntry);
|
||||
|
||||
/*
|
||||
* PostgreSQL 18 fix: Handle "?column?" names from intermediate expressions.
|
||||
* When flatCopyTargetEntry copies a target entry that has "?column?" as resname,
|
||||
* we need to generate a proper column name to avoid parsing errors.
|
||||
*/
|
||||
if (newTargetEntry->resname == NULL || strcmp(newTargetEntry->resname, "?column?") == 0)
|
||||
{
|
||||
StringInfo generatedName = makeStringInfo();
|
||||
appendStringInfo(generatedName, "custom_col_%d", targetEntry->resno);
|
||||
newTargetEntry->resname = generatedName->data;
|
||||
}
|
||||
|
||||
newTargetEntry->expr = (Expr *) newVar;
|
||||
custom_scan_tlist = lappend(custom_scan_tlist, newTargetEntry);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -655,6 +655,18 @@ CreateTargetListForCombineQuery(List *targetList)
|
|||
{
|
||||
TargetEntry *newTargetEntry = flatCopyTargetEntry(originalTargetEntry);
|
||||
|
||||
/*
|
||||
* PostgreSQL 18 fix: Handle "?column?" names from intermediate expressions.
|
||||
* When flatCopyTargetEntry copies a target entry that has "?column?" as resname,
|
||||
* we need to generate a proper column name to avoid parsing errors.
|
||||
*/
|
||||
if (newTargetEntry->resname == NULL || strcmp(newTargetEntry->resname, "?column?") == 0)
|
||||
{
|
||||
StringInfo generatedName = makeStringInfo();
|
||||
appendStringInfo(generatedName, "insert_col_%d", originalTargetEntry->resno);
|
||||
newTargetEntry->resname = generatedName->data;
|
||||
}
|
||||
|
||||
Var *column = makeVarFromTargetEntry(masterTableId, originalTargetEntry);
|
||||
column->varattno = columnId;
|
||||
column->varattnosyn = columnId;
|
||||
|
|
@ -1116,6 +1128,14 @@ ReorderInsertSelectTargetLists(Query *originalQuery, RangeTblEntry *insertRte,
|
|||
|
||||
newSubqueryTargetEntry = copyObject(oldSubqueryTle);
|
||||
|
||||
/*
|
||||
* PostgreSQL 18 compatibility: handle NULL or "?column?" resname
|
||||
*/
|
||||
if (newSubqueryTargetEntry->resname == NULL || strcmp(newSubqueryTargetEntry->resname, "?column?") == 0)
|
||||
{
|
||||
newSubqueryTargetEntry->resname = psprintf("expr_col_%d", newSubqueryTargetEntry->resno);
|
||||
}
|
||||
|
||||
newSubqueryTargetEntry->resno = resno;
|
||||
newSubqueryTargetlist = lappend(newSubqueryTargetlist,
|
||||
newSubqueryTargetEntry);
|
||||
|
|
@ -1130,7 +1150,17 @@ ReorderInsertSelectTargetLists(Query *originalQuery, RangeTblEntry *insertRte,
|
|||
newSubqueryTargetEntry);
|
||||
}
|
||||
|
||||
String *columnName = makeString(newSubqueryTargetEntry->resname);
|
||||
/*
|
||||
* PostgreSQL 18 compatibility: handle NULL or "?column?" resname
|
||||
* before creating columnName for eref->colnames
|
||||
*/
|
||||
char *resname = newSubqueryTargetEntry->resname;
|
||||
if (resname == NULL || strcmp(resname, "?column?") == 0)
|
||||
{
|
||||
resname = psprintf("insert_col_%d", newSubqueryTargetEntry->resno);
|
||||
}
|
||||
|
||||
String *columnName = makeString(resname);
|
||||
columnNameList = lappend(columnNameList, columnName);
|
||||
|
||||
/*
|
||||
|
|
@ -1175,6 +1205,14 @@ ReorderInsertSelectTargetLists(Query *originalQuery, RangeTblEntry *insertRte,
|
|||
|
||||
TargetEntry *newSubqueryTargetEntry = copyObject(oldSubqueryTle);
|
||||
|
||||
/*
|
||||
* PostgreSQL 18 compatibility: handle NULL or "?column?" resname
|
||||
*/
|
||||
if (newSubqueryTargetEntry->resname == NULL || strcmp(newSubqueryTargetEntry->resname, "?column?") == 0)
|
||||
{
|
||||
newSubqueryTargetEntry->resname = psprintf("expr_col_%d", newSubqueryTargetEntry->resno);
|
||||
}
|
||||
|
||||
newSubqueryTargetEntry->resno = resno;
|
||||
newSubqueryTargetlist = lappend(newSubqueryTargetlist,
|
||||
newSubqueryTargetEntry);
|
||||
|
|
|
|||
|
|
@ -1431,6 +1431,18 @@ MasterExtendedOpNode(MultiExtendedOp *originalOpNode,
|
|||
Expr *originalExpression = originalTargetEntry->expr;
|
||||
Expr *newExpression = NULL;
|
||||
|
||||
/*
|
||||
* PostgreSQL 18 fix: Handle "?column?" names from intermediate expressions.
|
||||
* When flatCopyTargetEntry copies a target entry that has "?column?" as resname,
|
||||
* we need to generate a proper column name to avoid parsing errors on workers.
|
||||
*/
|
||||
if (newTargetEntry->resname == NULL || strcmp(newTargetEntry->resname, "?column?") == 0)
|
||||
{
|
||||
StringInfo generatedName = makeStringInfo();
|
||||
appendStringInfo(generatedName, "expr_col_%d", originalTargetEntry->resno);
|
||||
newTargetEntry->resname = generatedName->data;
|
||||
}
|
||||
|
||||
if (CanPushDownExpression((Node *) originalExpression, extendedOpNodeProperties))
|
||||
{
|
||||
/*
|
||||
|
|
@ -2947,7 +2959,7 @@ GenerateWorkerTargetEntry(TargetEntry *targetEntry, Expr *workerExpression,
|
|||
newTargetEntry = makeNode(TargetEntry);
|
||||
}
|
||||
|
||||
if (newTargetEntry->resname == NULL)
|
||||
if (newTargetEntry->resname == NULL || strcmp(newTargetEntry->resname, "?column?") == 0)
|
||||
{
|
||||
newTargetEntry->resname = WorkerColumnName(targetProjectionNumber);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1436,6 +1436,29 @@ ExtractColumns(RangeTblEntry *callingRTE, int rangeTableId,
|
|||
columnNames,
|
||||
columnVars);
|
||||
#endif
|
||||
|
||||
#if PG_VERSION_NUM >= PG_VERSION_18
|
||||
/*
|
||||
* PostgreSQL 18 compatibility: expandRTE might return NULL or "?column?" names
|
||||
* for intermediate expressions. Fix these column names to avoid errors.
|
||||
*/
|
||||
List *fixedColumnNames = NIL;
|
||||
ListCell *nameCell = NULL;
|
||||
int colIndex = 1;
|
||||
|
||||
foreach(nameCell, *columnNames)
|
||||
{
|
||||
char *colName = strVal(lfirst(nameCell));
|
||||
if (colName == NULL || strcmp(colName, "?column?") == 0)
|
||||
{
|
||||
colName = psprintf("expand_col_%d", colIndex);
|
||||
}
|
||||
fixedColumnNames = lappend(fixedColumnNames, makeString(colName));
|
||||
colIndex++;
|
||||
}
|
||||
|
||||
*columnNames = fixedColumnNames;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -305,6 +305,32 @@ RecursivelyPlanSubqueriesAndCTEs(Query *query, RecursivePlanningContext *context
|
|||
/* descend into subqueries */
|
||||
query_tree_walker(query, RecursivelyPlanSubqueryWalker, context, 0);
|
||||
|
||||
/*
|
||||
* PostgreSQL 18 may generate NULL or "?column?" as column names
|
||||
* for intermediate results. After subquery replacement, we need to fix
|
||||
* any remaining "?column?" references in the main query's target list.
|
||||
*/
|
||||
ListCell *targetEntryCell = NULL;
|
||||
int columnNumber = 1;
|
||||
foreach(targetEntryCell, query->targetList)
|
||||
{
|
||||
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
|
||||
|
||||
if (targetEntry->resjunk)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (targetEntry->resname == NULL || strcmp(targetEntry->resname, "?column?") == 0)
|
||||
{
|
||||
StringInfo generatedName = makeStringInfo();
|
||||
appendStringInfo(generatedName, "main_query_column_%d", columnNumber);
|
||||
targetEntry->resname = generatedName->data;
|
||||
}
|
||||
|
||||
columnNumber++;
|
||||
}
|
||||
|
||||
/*
|
||||
* At this point, all CTEs, leaf subqueries containing local tables and
|
||||
* non-pushdownable subqueries have been replaced. We now check for
|
||||
|
|
@ -1170,6 +1196,32 @@ RecursivelyPlanCTEs(Query *query, RecursivePlanningContext *planningContext)
|
|||
|
||||
uint32 subPlanId = list_length(planningContext->subPlanList) + 1;
|
||||
|
||||
/*
|
||||
* PostgreSQL 18 may generate NULL or "?column?" as column names
|
||||
* for intermediate results. We need to fix these before the subquery
|
||||
* gets planned, as the intermediate result files will use these names.
|
||||
*/
|
||||
ListCell *targetEntryCell = NULL;
|
||||
int columnNumber = 1;
|
||||
foreach(targetEntryCell, subquery->targetList)
|
||||
{
|
||||
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
|
||||
|
||||
if (targetEntry->resjunk)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (targetEntry->resname == NULL || strcmp(targetEntry->resname, "?column?") == 0)
|
||||
{
|
||||
StringInfo generatedName = makeStringInfo();
|
||||
appendStringInfo(generatedName, "intermediate_column_%d", columnNumber);
|
||||
targetEntry->resname = generatedName->data;
|
||||
}
|
||||
|
||||
columnNumber++;
|
||||
}
|
||||
|
||||
if (IsLoggableLevel(DEBUG1))
|
||||
{
|
||||
StringInfo subPlanString = makeStringInfo();
|
||||
|
|
@ -1272,6 +1324,32 @@ RecursivelyPlanSubqueryWalker(Node *node, RecursivePlanningContext *context)
|
|||
{
|
||||
Query *query = (Query *) node;
|
||||
|
||||
/*
|
||||
* PostgreSQL 18 may generate NULL or "?column?" as column names
|
||||
* for intermediate results. Fix these before processing any subqueries
|
||||
* that might reference this query's target list.
|
||||
*/
|
||||
ListCell *targetEntryCell = NULL;
|
||||
int columnNumber = 1;
|
||||
foreach(targetEntryCell, query->targetList)
|
||||
{
|
||||
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
|
||||
|
||||
if (targetEntry->resjunk)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (targetEntry->resname == NULL || strcmp(targetEntry->resname, "?column?") == 0)
|
||||
{
|
||||
StringInfo generatedName = makeStringInfo();
|
||||
appendStringInfo(generatedName, "walker_query_column_%d_%d", context->level, columnNumber);
|
||||
targetEntry->resname = generatedName->data;
|
||||
}
|
||||
|
||||
columnNumber++;
|
||||
}
|
||||
|
||||
context->level += 1;
|
||||
|
||||
/*
|
||||
|
|
@ -1569,6 +1647,31 @@ RecursivelyPlanSubquery(Query *subquery, RecursivePlanningContext *planningConte
|
|||
debugQuery = copyObject(subquery);
|
||||
}
|
||||
|
||||
/*
|
||||
* PostgreSQL 18 may generate NULL or "?column?" as column names
|
||||
* for intermediate results. We need to fix these before the subquery
|
||||
* gets planned, as the intermediate result files will use these names.
|
||||
*/
|
||||
ListCell *targetEntryCell = NULL;
|
||||
int columnNumber = 1;
|
||||
foreach(targetEntryCell, subquery->targetList)
|
||||
{
|
||||
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
|
||||
|
||||
if (targetEntry->resjunk)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (targetEntry->resname == NULL || strcmp(targetEntry->resname, "?column?") == 0)
|
||||
{
|
||||
StringInfo generatedName = makeStringInfo();
|
||||
appendStringInfo(generatedName, "intermediate_column_%d", columnNumber);
|
||||
targetEntry->resname = generatedName->data;
|
||||
}
|
||||
|
||||
columnNumber++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create the subplan and append it to the list in the planning context.
|
||||
|
|
@ -1937,9 +2040,16 @@ GenerateRequiredColNamesFromTargetList(List *targetList)
|
|||
* column names of the inner subquery should only contain the
|
||||
* required columns, as in if we choose 'b' from ('a','b') colnames
|
||||
* should be 'a' not ('a','b')
|
||||
*
|
||||
* PostgreSQL 18 compatibility: handle NULL or "?column?" resname
|
||||
*/
|
||||
innerSubqueryColNames = lappend(innerSubqueryColNames, makeString(
|
||||
entry->resname));
|
||||
char *resname = entry->resname;
|
||||
if (resname == NULL || strcmp(resname, "?column?") == 0)
|
||||
{
|
||||
resname = psprintf("subquery_col_%d", entry->resno);
|
||||
}
|
||||
|
||||
innerSubqueryColNames = lappend(innerSubqueryColNames, makeString(resname));
|
||||
}
|
||||
}
|
||||
return innerSubqueryColNames;
|
||||
|
|
@ -2482,6 +2592,17 @@ BuildReadIntermediateResultsQuery(List *targetEntryList, List *columnAliasList,
|
|||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* PostgreSQL 18 may generate NULL or "?column?" as column names
|
||||
* for intermediate results. Generate a proper column name in such cases.
|
||||
*/
|
||||
if (columnName == NULL || strcmp(columnName, "?column?") == 0)
|
||||
{
|
||||
StringInfo generatedName = makeStringInfo();
|
||||
appendStringInfo(generatedName, "intermediate_column_%d", columnNumber);
|
||||
columnName = generatedName->data;
|
||||
}
|
||||
|
||||
funcColNames = lappend(funcColNames, makeString(columnName));
|
||||
funcColTypes = lappend_int(funcColTypes, columnType);
|
||||
funcColTypMods = lappend_int(funcColTypMods, columnTypMod);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,16 @@
|
|||
2025-08-02 06:34:55.234 UTC [41803] LOG: starting PostgreSQL 18beta2 on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0, 64-bit
|
||||
2025-08-02 06:34:55.235 UTC [41803] LOG: could not bind IPv6 address "::1": Address already in use
|
||||
2025-08-02 06:34:55.235 UTC [41803] HINT: Is another postmaster already running on port 9700? If not, wait a few seconds and retry.
|
||||
2025-08-02 06:34:55.235 UTC [41803] LOG: could not bind IPv4 address "127.0.0.1": Address already in use
|
||||
2025-08-02 06:34:55.235 UTC [41803] HINT: Is another postmaster already running on port 9700? If not, wait a few seconds and retry.
|
||||
2025-08-02 06:34:55.235 UTC [41803] WARNING: could not create listen socket for "localhost"
|
||||
2025-08-02 06:34:55.235 UTC [41803] FATAL: could not create any TCP/IP sockets
|
||||
2025-08-02 06:34:55.240 UTC [41803] LOG: database system is shut down
|
||||
2025-08-02 06:35:12.489 UTC [42020] LOG: starting PostgreSQL 18beta2 on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0, 64-bit
|
||||
2025-08-02 06:35:12.490 UTC [42020] LOG: could not bind IPv6 address "::1": Address already in use
|
||||
2025-08-02 06:35:12.490 UTC [42020] HINT: Is another postmaster already running on port 9700? If not, wait a few seconds and retry.
|
||||
2025-08-02 06:35:12.490 UTC [42020] LOG: could not bind IPv4 address "127.0.0.1": Address already in use
|
||||
2025-08-02 06:35:12.490 UTC [42020] HINT: Is another postmaster already running on port 9700? If not, wait a few seconds and retry.
|
||||
2025-08-02 06:35:12.490 UTC [42020] WARNING: could not create listen socket for "localhost"
|
||||
2025-08-02 06:35:12.490 UTC [42020] FATAL: could not create any TCP/IP sockets
|
||||
2025-08-02 06:35:12.494 UTC [42020] LOG: database system is shut down
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
-- Test aggregate with GROUP BY to isolate the problem
|
||||
\c citus
|
||||
|
||||
-- Tables should already exist
|
||||
CREATE TABLE IF NOT EXISTS users_table (user_id int, value_1 int, value_2 int);
|
||||
CREATE TABLE IF NOT EXISTS events_table (user_id int, event_type int, value_2 int);
|
||||
|
||||
-- Populate with minimal data
|
||||
INSERT INTO users_table (user_id, value_1, value_2) VALUES (1, 10, 100), (2, 20, 200) ON CONFLICT DO NOTHING;
|
||||
INSERT INTO events_table (user_id, event_type, value_2) VALUES (1, 1, 1), (2, 2, 2) ON CONFLICT DO NOTHING;
|
||||
|
||||
-- Test simple aggregate without GROUP BY (should work)
|
||||
SELECT avg(event_type) FROM events_table;
|
||||
|
||||
-- Test simple aggregate with GROUP BY (might fail)
|
||||
SELECT avg(event_type) FROM events_table GROUP BY user_id;
|
||||
|
||||
-- Test nested aggregate with GROUP BY (likely to fail)
|
||||
SELECT avg(event_type)
|
||||
FROM (
|
||||
SELECT event_type, user_id
|
||||
FROM events_table
|
||||
WHERE value_2 < 3
|
||||
) sub
|
||||
GROUP BY sub.user_id;
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
-- Debug test with detailed logging
|
||||
\c citus
|
||||
|
||||
SET client_min_messages TO DEBUG2;
|
||||
SET citus.log_remote_commands TO ON;
|
||||
|
||||
-- Test the failing query with detailed logging
|
||||
SELECT DISTINCT user_id
|
||||
FROM (
|
||||
SELECT users_table.user_id
|
||||
FROM users_table,
|
||||
(
|
||||
SELECT avg(event_type) as avg_val
|
||||
FROM (
|
||||
SELECT event_type, users_table.user_id
|
||||
FROM users_table,
|
||||
(
|
||||
SELECT user_id, event_type
|
||||
FROM events_table
|
||||
WHERE value_2 < 3
|
||||
ORDER BY 1, 2
|
||||
LIMIT 1
|
||||
) as foo
|
||||
WHERE foo.user_id = users_table.user_id
|
||||
) bar, users_table
|
||||
WHERE bar.user_id = users_table.user_id
|
||||
GROUP BY users_table.value_1
|
||||
) as baz
|
||||
WHERE baz.avg_val < users_table.user_id
|
||||
ORDER BY 1
|
||||
LIMIT 1
|
||||
) as sub1;
|
||||
|
|
@ -0,0 +1,116 @@
|
|||
-- Test to debug PostgreSQL 1-- Test the actual failing pattern before
|
||||
SELECT users_table.value_1 as value1
|
||||
FROM (
|
||||
SELECT event_type, users_table.user_id
|
||||
FROM users_table,
|
||||
(
|
||||
SELECT user_id, event_type
|
||||
FROM events_table
|
||||
WHERE value_2 < 3
|
||||
ORDER BY 1, 2
|
||||
LIMIT 1
|
||||
) as foo
|
||||
WHERE foo.user_id = users_table.user_id
|
||||
) bar, users_table
|
||||
WHERE bar.user_id = users_table.user_id
|
||||
GROUP BY value1;
|
||||
-- First let's test with our existing setup
|
||||
\c citus
|
||||
|
||||
-- Simple subquery that should work
|
||||
SELECT user_id FROM (SELECT user_id FROM users_table WHERE user_id = 1) sub;
|
||||
|
||||
-- Test aggregate subquery (this might have ?column? issue)
|
||||
SELECT * FROM (SELECT avg(user_id) FROM users_table) sub;
|
||||
|
||||
-- Test complex subquery similar to the failing one
|
||||
SELECT * FROM (
|
||||
SELECT avg(event_type) as avg_val
|
||||
FROM (
|
||||
SELECT event_type
|
||||
FROM events_table
|
||||
WHERE value_2 < 3
|
||||
LIMIT 1
|
||||
) foo
|
||||
) bar;
|
||||
|
||||
--Test simple alias
|
||||
SELECT value_1 as value1
|
||||
from (
|
||||
select * from (
|
||||
select * from users_table
|
||||
) as tab2
|
||||
) as tab;
|
||||
|
||||
-- Test the actual failing pattern before
|
||||
SELECT users_table.value_1 as value1
|
||||
FROM (
|
||||
SELECT event_type, users_table.user_id
|
||||
FROM users_table,
|
||||
(
|
||||
SELECT user_id, event_type
|
||||
FROM events_table
|
||||
WHERE value_2 < 3
|
||||
ORDER BY 1, 2
|
||||
LIMIT 1
|
||||
) as foo
|
||||
WHERE foo.user_id = users_table.user_id
|
||||
) bar, users_table
|
||||
WHERE bar.user_id = users_table.user_id
|
||||
;
|
||||
|
||||
-- Test the actual failing pattern
|
||||
-- Test the simplified ?column? issue
|
||||
SELECT DISTINCT user_id
|
||||
FROM (
|
||||
SELECT users_table.user_id
|
||||
FROM users_table,
|
||||
(
|
||||
SELECT event_type as avg_val
|
||||
FROM (
|
||||
SELECT event_type, a_users.user_id
|
||||
FROM users_table as a_users,
|
||||
(
|
||||
SELECT user_id, event_type
|
||||
FROM events_table
|
||||
WHERE value_2 < 3
|
||||
ORDER BY 1, 2
|
||||
LIMIT 1
|
||||
) as foo
|
||||
WHERE foo.user_id = a_users.user_id
|
||||
) bar, users_table as b_users
|
||||
WHERE bar.user_id = b_users.user_id
|
||||
) as baz
|
||||
WHERE baz.avg_val < users_table.user_id
|
||||
ORDER BY 1
|
||||
LIMIT 1
|
||||
) as sub1;
|
||||
|
||||
|
||||
-- Test the actual failing pattern
|
||||
-- Test the simplified ?column? issue
|
||||
SELECT DISTINCT user_id
|
||||
FROM (
|
||||
SELECT users_table.user_id
|
||||
FROM users_table,
|
||||
(
|
||||
SELECT avg(event_type) as avg_val
|
||||
FROM (
|
||||
SELECT event_type, a_users.user_id
|
||||
FROM users_table as a_users,
|
||||
(
|
||||
SELECT user_id, event_type
|
||||
FROM events_table
|
||||
WHERE value_2 < 3
|
||||
ORDER BY 1, 2
|
||||
LIMIT 1
|
||||
) as foo
|
||||
WHERE foo.user_id = a_users.user_id
|
||||
) bar, users_table as b_users
|
||||
WHERE bar.user_id = b_users.user_id
|
||||
GROUP BY b_users.value_1
|
||||
) as baz
|
||||
WHERE baz.avg_val < users_table.user_id
|
||||
ORDER BY 1
|
||||
LIMIT 1
|
||||
) as sub1;
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
-- Test only the failing complex nested query
|
||||
\c citus
|
||||
|
||||
SET client_min_messages TO DEBUG5;
|
||||
|
||||
-- The exact failing pattern (this should fail without our fix)
|
||||
SELECT avg(event_type) as avg_val
|
||||
FROM (
|
||||
SELECT event_type, a_users.user_id
|
||||
FROM users_table as a_users,
|
||||
(
|
||||
SELECT user_id, event_type
|
||||
FROM events_table
|
||||
WHERE value_2 < 3
|
||||
ORDER BY 1, 2
|
||||
LIMIT 1
|
||||
) as foo
|
||||
WHERE foo.user_id = a_users.user_id
|
||||
) bar, users_table as b_users
|
||||
WHERE bar.user_id = b_users.user_id
|
||||
GROUP BY b_users.value_1;
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
-- Focus on the ?column? issue specifically
|
||||
\c citus
|
||||
|
||||
-- Tables should already exist, just in case
|
||||
CREATE TABLE IF NOT EXISTS users_table (user_id int, value_1 int, value_2 int);
|
||||
CREATE TABLE IF NOT EXISTS events_table (user_id int, event_type int, value_2 int);
|
||||
|
||||
-- Populate with minimal data
|
||||
INSERT INTO users_table (user_id, value_1, value_2) VALUES (1, 10, 100), (2, 20, 200) ON CONFLICT DO NOTHING;
|
||||
INSERT INTO events_table (user_id, event_type, value_2) VALUES (1, 1, 1), (2, 2, 2) ON CONFLICT DO NOTHING;
|
||||
|
||||
-- Test the simplified ?column? issue
|
||||
SELECT DISTINCT user_id
|
||||
FROM (
|
||||
SELECT users_table.user_id
|
||||
FROM users_table,
|
||||
(
|
||||
SELECT avg(event_type) as avg_val
|
||||
FROM (
|
||||
SELECT event_type, a_users.user_id
|
||||
FROM users_table as a_users,
|
||||
(
|
||||
SELECT user_id, event_type
|
||||
FROM events_table
|
||||
WHERE value_2 < 3
|
||||
ORDER BY 1, 2
|
||||
LIMIT 1
|
||||
) as foo
|
||||
WHERE foo.user_id = a_users.user_id
|
||||
) bar, users_table as b_users
|
||||
WHERE bar.user_id = b_users.user_id
|
||||
GROUP BY b_users.value_1
|
||||
) as baz
|
||||
WHERE baz.avg_val < users_table.user_id
|
||||
ORDER BY 1
|
||||
LIMIT 1
|
||||
) as sub1;
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
-- Test deeply nested queries with JOINs and GROUP BY
|
||||
\c citus
|
||||
|
||||
-- Test the exact failing pattern step by step
|
||||
-- Step 1: Simple subquery with JOIN (should work)
|
||||
SELECT avg(event_type) as avg_val
|
||||
FROM (
|
||||
SELECT event_type, users_table.user_id
|
||||
FROM users_table, events_table
|
||||
WHERE events_table.user_id = users_table.user_id
|
||||
) sub
|
||||
GROUP BY sub.user_id;
|
||||
|
||||
-- Step 2: Add one more level of nesting (might fail)
|
||||
SELECT avg_val
|
||||
FROM (
|
||||
SELECT avg(event_type) as avg_val
|
||||
FROM (
|
||||
SELECT event_type, users_table.user_id
|
||||
FROM users_table, events_table
|
||||
WHERE events_table.user_id = users_table.user_id
|
||||
) sub
|
||||
GROUP BY sub.user_id
|
||||
) outer_sub;
|
||||
|
||||
-- Step 3: The exact failing pattern (this should fail)
|
||||
SELECT avg(event_type) as avg_val
|
||||
FROM (
|
||||
SELECT event_type, a_users.user_id
|
||||
FROM users_table as a_users,
|
||||
(
|
||||
SELECT user_id, event_type
|
||||
FROM events_table
|
||||
WHERE value_2 < 3
|
||||
ORDER BY 1, 2
|
||||
LIMIT 1
|
||||
) as foo
|
||||
WHERE foo.user_id = a_users.user_id
|
||||
) bar, users_table as b_users
|
||||
WHERE bar.user_id = b_users.user_id
|
||||
GROUP BY b_users.value_1;
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
-- Test PostgreSQL 18 compatibility using existing distributed tables
|
||||
\c citus
|
||||
|
||||
-- Test the exact pattern from subqueries_deep.sql that should fail without our fix
|
||||
SELECT
|
||||
DISTINCT user_id
|
||||
FROM
|
||||
(
|
||||
SELECT users_table.user_id FROM users_table,
|
||||
(
|
||||
SELECT
|
||||
avg(event_type) as avg_val
|
||||
FROM
|
||||
(SELECT
|
||||
event_type, users_table.user_id
|
||||
FROM
|
||||
users_table, (SELECT user_id, event_type FROM events_table WHERE value_2 < 3 ORDER BY 1, 2 OFFSET 0) as foo
|
||||
WHERE
|
||||
foo.user_id = users_table.user_id) bar, users_table
|
||||
WHERE
|
||||
bar.user_id = users_table.user_id
|
||||
GROUP BY
|
||||
users_table.value_1
|
||||
) as baz
|
||||
WHERE
|
||||
baz.avg_val < users_table.user_id
|
||||
ORDER BY 1
|
||||
LIMIT 3
|
||||
) as sub1
|
||||
ORDER BY 1 DESC;
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
-- Simple PostgreSQL 18 test for "?column?" issue
|
||||
CREATE SCHEMA test_simple;
|
||||
SET search_path TO test_simple;
|
||||
|
||||
CREATE TABLE simple_table (
|
||||
id INTEGER,
|
||||
value TEXT
|
||||
);
|
||||
|
||||
SELECT create_distributed_table('simple_table', 'id');
|
||||
|
||||
INSERT INTO simple_table VALUES (1, 'a'), (2, 'b'), (3, 'c');
|
||||
|
||||
-- Test a simple subquery that might trigger the "?column?" issue
|
||||
SELECT * FROM (
|
||||
SELECT id FROM simple_table
|
||||
) AS sub;
|
||||
|
||||
-- Test more complex nested subquery
|
||||
SELECT * FROM (
|
||||
SELECT id FROM (
|
||||
SELECT id, value FROM simple_table
|
||||
) inner_sub
|
||||
) outer_sub;
|
||||
|
||||
DROP SCHEMA test_simple CASCADE;
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
-- Simple test to isolate the PostgreSQL 18 issue
|
||||
-- This should show where the ?column? error occurs
|
||||
|
||||
-- Drop and recreate to start fresh
|
||||
DROP SCHEMA IF EXISTS simple_test CASCADE;
|
||||
CREATE SCHEMA simple_test;
|
||||
SET search_path TO simple_test, public;
|
||||
|
||||
CREATE TABLE events_table (
|
||||
user_id int,
|
||||
event_type int,
|
||||
value_2 int
|
||||
);
|
||||
|
||||
SELECT create_distributed_table('events_table', 'user_id');
|
||||
|
||||
INSERT INTO events_table VALUES
|
||||
(1, 1, 1),
|
||||
(2, 2, 2),
|
||||
(3, 3, 3);
|
||||
|
||||
SET client_min_messages TO DEBUG1;
|
||||
|
||||
-- This is the simplest subquery that should trigger the issue
|
||||
SELECT user_id, event_type FROM events_table WHERE value_2 < 3 ORDER BY 1, 2 OFFSET 3;
|
||||
|
||||
-- Clean up
|
||||
SET search_path TO public;
|
||||
DROP SCHEMA simple_test CASCADE;
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
-- Test PostgreSQL 18 "?column?" fix for subqueries_deep
|
||||
-- Create basic tables for testing
|
||||
CREATE SCHEMA test_pg18_fix;
|
||||
SET search_path TO test_pg18_fix, public;
|
||||
|
||||
-- Create test tables similar to what the regular tests use
|
||||
CREATE TABLE users_table (
|
||||
user_id int,
|
||||
time timestamp,
|
||||
value_1 int,
|
||||
value_2 int,
|
||||
value_3 double precision,
|
||||
value_4 bigint
|
||||
);
|
||||
|
||||
CREATE TABLE events_table (
|
||||
user_id int,
|
||||
time timestamp,
|
||||
event_type int,
|
||||
value_2 int,
|
||||
value_3 double precision,
|
||||
value_4 bigint
|
||||
);
|
||||
|
||||
-- Create distributed tables
|
||||
SELECT create_distributed_table('users_table', 'user_id');
|
||||
SELECT create_distributed_table('events_table', 'user_id');
|
||||
|
||||
-- Insert some test data
|
||||
INSERT INTO users_table VALUES
|
||||
(1, '2024-01-01', 10, 1, 1.1, 100),
|
||||
(2, '2024-01-02', 20, 2, 2.2, 200),
|
||||
(3, '2024-01-03', 30, 3, 3.3, 300),
|
||||
(4, '2024-01-04', 40, 4, 4.4, 400),
|
||||
(5, '2024-01-05', 50, 5, 5.5, 500),
|
||||
(6, '2024-01-06', 60, 6, 6.6, 600);
|
||||
|
||||
INSERT INTO events_table VALUES
|
||||
(1, '2024-01-01', 1, 1, 1.1, 100),
|
||||
(2, '2024-01-02', 2, 2, 2.2, 200),
|
||||
(3, '2024-01-03', 3, 3, 3.3, 300),
|
||||
(4, '2024-01-04', 4, 4, 4.4, 400),
|
||||
(5, '2024-01-05', 5, 5, 5.5, 500),
|
||||
(6, '2024-01-06', 1, 6, 6.6, 600);
|
||||
|
||||
SET client_min_messages TO DEBUG1;
|
||||
|
||||
-- Test the original failing query from subqueries_deep.sql
|
||||
SELECT
|
||||
DISTINCT user_id
|
||||
FROM
|
||||
(
|
||||
SELECT users_table.user_id FROM users_table,
|
||||
(
|
||||
SELECT
|
||||
avg(event_type) as avg_val
|
||||
FROM
|
||||
(SELECT
|
||||
event_type, users_table.user_id
|
||||
FROM
|
||||
users_table, (SELECT user_id, event_type FROM events_table WHERE value_2 < 3 ORDER BY 1, 2 OFFSET 3) as foo
|
||||
WHERE
|
||||
foo.user_id = users_table.user_id) bar, users_table
|
||||
WHERE
|
||||
bar.user_id = users_table.user_id
|
||||
GROUP BY
|
||||
users_table.value_1
|
||||
) as baz
|
||||
WHERE
|
||||
baz.avg_val < users_table.user_id
|
||||
ORDER BY 1
|
||||
LIMIT 3
|
||||
) as sub1
|
||||
ORDER BY 1 DESC;
|
||||
|
||||
-- Clean up
|
||||
SET search_path TO public;
|
||||
DROP SCHEMA test_pg18_fix CASCADE;
|
||||
Loading…
Reference in New Issue