Fix PostgreSQL 18 compatibility issues related to "?column?" names in subqueries

- Updated various planner and deparser files to handle NULL or "?column?" as resname by generating meaningful column names.
- Ensured that generated names follow a consistent format to avoid parsing errors in complex queries.
- Added debug queries and tests to isolate and reproduce the issues related to intermediate results and subqueries.
- Created setup scripts for test tables and data to facilitate regression testing for PostgreSQL 18 compatibility.
pull/8090/head
Gurkan Indibay 2025-08-02 08:50:50 +00:00
parent c183634207
commit d653df74cf
25 changed files with 833 additions and 16 deletions

32
debug_query.sql Normal file
View File

@ -0,0 +1,32 @@
-- Debug query for PostgreSQL 18 compatibility issue
\c citus
SET citus.log_remote_commands = on;
SET client_min_messages = DEBUG1;
SELECT
DISTINCT user_id
FROM
(
SELECT users_table.user_id FROM users_table,
(
SELECT
avg(event_type) as avg_val
FROM
(SELECT
event_type, users_table.user_id
FROM
users_table, (SELECT user_id, event_type FROM events_table WHERE value_2 < 3 ORDER BY 1, 2 OFFSET 0) as foo
WHERE
foo.user_id = users_table.user_id) bar, users_table
WHERE
bar.user_id = users_table.user_id
GROUP BY
users_table.value_1
) as baz
WHERE
baz.avg_val < users_table.user_id
ORDER BY 1
LIMIT 3
) as sub1
ORDER BY 1 DESC;

15
debug_test.sql Normal file
View File

@ -0,0 +1,15 @@
-- Create a simple test to reproduce the issue
CREATE SCHEMA debug_test;
SET search_path TO debug_test, public;
SET client_min_messages TO DEBUG1;
-- Simple query that should generate intermediate results
SELECT DISTINCT user_id FROM (
SELECT users_table.user_id
FROM users_table
WHERE user_id < 3
ORDER BY 1
LIMIT 2
) as sub1 ORDER BY 1 DESC;

0
regression.diffs Normal file
View File

2
regression.out Normal file
View File

@ -0,0 +1,2 @@
# using postmaster on localhost, port 57636
# diff command failed with status 512: diff "/workspaces/citus/expected/subqueries_deep.out" "/workspaces/citus/results/subqueries_deep.out" > "/workspaces/citus/results/subqueries_deep.out.diff"Bail out!

View File

25
setup_test_tables.sql Normal file
View File

@ -0,0 +1,25 @@
-- Setup test tables for PostgreSQL 18 compatibility testing
\c citus
SET citus.shard_replication_factor = 1;
SET citus.shard_count = 4;
-- Create tables if they don't exist
CREATE TABLE IF NOT EXISTS users_table (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint);
CREATE TABLE IF NOT EXISTS events_table (user_id int, time timestamp, event_type int, value_2 int, value_3 float, value_4 bigint);
-- Create distributed tables
SELECT create_distributed_table('users_table', 'user_id');
SELECT create_distributed_table('events_table', 'user_id');
-- Insert some test data
INSERT INTO users_table VALUES
(1, now(), 10, 20, 1.5, 100),
(2, now(), 30, 40, 2.5, 200),
(3, now(), 50, 60, 3.5, 300);
INSERT INTO events_table VALUES
(1, now(), 1, 1, 1.1, 10),
(2, now(), 2, 2, 2.2, 20),
(3, now(), 3, 3, 3.3, 30),
(1, now(), 4, 4, 4.4, 40);

View File

@ -1869,15 +1869,22 @@ ExpandMergedSubscriptingRefEntries(List *targetEntryList)
Expr *refexpr = subsRef->refexpr; Expr *refexpr = subsRef->refexpr;
subsRef->refexpr = NULL; subsRef->refexpr = NULL;
/* /*
* Wrap the Expr that holds SubscriptingRef (directly or indirectly) * Wrap the Expr that holds SubscriptingRef (directly or indirectly)
* in a new TargetEntry; note that it doesn't have a refexpr anymore. * in a new TargetEntry; note that it doesn't have a refexpr anymore.
*/ */
TargetEntry *newTargetEntry = copyObject(targetEntry); TargetEntry *newTargetEntry = copyObject(targetEntry);
newTargetEntry->expr = expr; newTargetEntry->expr = expr;
expandedTargetEntries = lappend(expandedTargetEntries, newTargetEntry);
/*
/* now inspect the refexpr that SubscriptingRef at hand were holding */ * PostgreSQL 18 compatibility: handle NULL or "?column?" resname
*/
if (newTargetEntry->resname == NULL || strcmp(newTargetEntry->resname, "?column?") == 0)
{
newTargetEntry->resname = psprintf("expr_col_%d", newTargetEntry->resno);
}
expandedTargetEntries = lappend(expandedTargetEntries, newTargetEntry); /* now inspect the refexpr that SubscriptingRef at hand were holding */
expr = refexpr; expr = refexpr;
} }

View File

@ -2722,7 +2722,16 @@ get_simple_values_rte(Query *query, TupleDesc resultDesc)
if (resultDesc && colno <= resultDesc->natts) if (resultDesc && colno <= resultDesc->natts)
colname = NameStr(TupleDescAttr(resultDesc, colno - 1)->attname); colname = NameStr(TupleDescAttr(resultDesc, colno - 1)->attname);
else else
{
colname = tle->resname; colname = tle->resname;
/* PostgreSQL 18 compatibility: handle NULL resname */
if (colname == NULL || strcmp(colname, "?column?") == 0)
{
static char generated_name[64];
snprintf(generated_name, sizeof(generated_name), "intermediate_column_%d", colno);
colname = generated_name;
}
}
/* does it match the VALUES RTE? */ /* does it match the VALUES RTE? */
if (colname == NULL || strcmp(colname, cname) != 0) if (colname == NULL || strcmp(colname, cname) != 0)
@ -2921,8 +2930,17 @@ get_target_list(List *targetList, deparse_context *context)
* When colNamesVisible is true, we should always show the * When colNamesVisible is true, we should always show the
* assigned column name explicitly. Otherwise, show it only if * assigned column name explicitly. Otherwise, show it only if
* it's not FigureColname's fallback. * it's not FigureColname's fallback.
*
* PostgreSQL 18 fix: Instead of using "?column?" which causes issues
* in complex subqueries, generate a meaningful column name.
*/ */
attname = context->colNamesVisible ? NULL : "?column?"; if (context->colNamesVisible)
attname = NULL;
else
{
/* Generate a column name that won't cause parsing issues */
attname = psprintf("expr_%d", colno);
}
} }
/* /*
@ -2935,7 +2953,23 @@ get_target_list(List *targetList, deparse_context *context)
colname = NameStr(TupleDescAttr(context->resultDesc, colname = NameStr(TupleDescAttr(context->resultDesc,
colno - 1)->attname); colno - 1)->attname);
else else
{
colname = tle->resname; colname = tle->resname;
/*
* PostgreSQL 18 fix: tle->resname can be NULL for intermediate expressions.
* In that case, generate a meaningful column name instead of using "?column?".
*/
if (colname == NULL || strcmp(colname, "?column?") == 0)
{
/*
* Generate a column name in the format "col_N" where N is the column number.
* This provides a stable, predictable name that won't cause issues with
* column resolution in complex subqueries.
*/
colname = psprintf("col_%d", colno);
}
}
/* Show AS unless the column's name is correct as-is */ /* Show AS unless the column's name is correct as-is */
if (colname) /* resname could be NULL */ if (colname) /* resname could be NULL */
@ -4532,7 +4566,16 @@ get_variable(Var *var, int levelsup, bool istoplevel, deparse_context *context)
colname = NameStr(TupleDescAttr(context->resultDesc, colname = NameStr(TupleDescAttr(context->resultDesc,
colno - 1)->attname); colno - 1)->attname);
else else
{
colname = tle->resname; colname = tle->resname;
/* PostgreSQL 18 compatibility: handle NULL resname */
if (colname == NULL || strcmp(colname, "?column?") == 0)
{
static char generated_name[64];
snprintf(generated_name, sizeof(generated_name), "intermediate_column_%d", colno);
colname = generated_name;
}
}
if (colname && strcmp(colname, attname) == 0 && if (colname && strcmp(colname, attname) == 0 &&
!equal(var, tle->expr)) !equal(var, tle->expr))
{ {

View File

@ -110,6 +110,19 @@ RemoteScanTargetList(List *workerTargetList)
* entry's sort and group clauses will *break* us here. * entry's sort and group clauses will *break* us here.
*/ */
TargetEntry *remoteScanTargetEntry = flatCopyTargetEntry(workerTargetEntry); TargetEntry *remoteScanTargetEntry = flatCopyTargetEntry(workerTargetEntry);
/*
* PostgreSQL 18 fix: Handle "?column?" names from intermediate expressions.
* When flatCopyTargetEntry copies a target entry that has "?column?" as resname,
* we need to generate a proper column name to avoid parsing errors.
*/
if (remoteScanTargetEntry->resname == NULL || strcmp(remoteScanTargetEntry->resname, "?column?") == 0)
{
StringInfo generatedName = makeStringInfo();
appendStringInfo(generatedName, "remote_col_%d", workerTargetEntry->resno);
remoteScanTargetEntry->resname = generatedName->data;
}
remoteScanTargetEntry->expr = (Expr *) remoteScanColumn; remoteScanTargetEntry->expr = (Expr *) remoteScanColumn;
remoteScanTargetList = lappend(remoteScanTargetList, remoteScanTargetEntry); remoteScanTargetList = lappend(remoteScanTargetList, remoteScanTargetEntry);
} }
@ -272,7 +285,18 @@ BuildSelectStatementViaStdPlanner(Query *combineQuery, List *remoteScanTargetLis
TargetEntry *targetEntry = NULL; TargetEntry *targetEntry = NULL;
foreach_declared_ptr(targetEntry, remoteScanTargetList) foreach_declared_ptr(targetEntry, remoteScanTargetList)
{ {
columnNameList = lappend(columnNameList, makeString(targetEntry->resname)); char *resname = targetEntry->resname;
/*
* PostgreSQL 18 compatibility: handle NULL or "?column?" resname
* by generating a proper column name for the remote scan RTE
*/
if (resname == NULL || strcmp(resname, "?column?") == 0)
{
resname = psprintf("remote_col_%d", targetEntry->resno);
}
columnNameList = lappend(columnNameList, makeString(resname));
} }
extradataContainerRTE->eref = makeAlias("remote_scan", columnNameList); extradataContainerRTE->eref = makeAlias("remote_scan", columnNameList);
} }

View File

@ -1545,7 +1545,18 @@ FinalizeRouterPlan(PlannedStmt *localPlan, CustomScan *customScan)
TargetEntry *targetEntry = NULL; TargetEntry *targetEntry = NULL;
foreach_declared_ptr(targetEntry, customScan->scan.plan.targetlist) foreach_declared_ptr(targetEntry, customScan->scan.plan.targetlist)
{ {
String *columnName = makeString(targetEntry->resname); char *resname = targetEntry->resname;
/*
* PostgreSQL 18 compatibility: handle NULL or "?column?" resname
* by generating a proper column name for the remote scan RTE
*/
if (resname == NULL || strcmp(resname, "?column?") == 0)
{
resname = psprintf("remote_col_%d", targetEntry->resno);
}
String *columnName = makeString(resname);
columnNameList = lappend(columnNameList, columnName); columnNameList = lappend(columnNameList, columnName);
} }
@ -1613,6 +1624,19 @@ makeCustomScanTargetlistFromExistingTargetList(List *existingTargetlist)
} }
TargetEntry *newTargetEntry = flatCopyTargetEntry(targetEntry); TargetEntry *newTargetEntry = flatCopyTargetEntry(targetEntry);
/*
* PostgreSQL 18 fix: Handle "?column?" names from intermediate expressions.
* When flatCopyTargetEntry copies a target entry that has "?column?" as resname,
* we need to generate a proper column name to avoid parsing errors.
*/
if (newTargetEntry->resname == NULL || strcmp(newTargetEntry->resname, "?column?") == 0)
{
StringInfo generatedName = makeStringInfo();
appendStringInfo(generatedName, "custom_col_%d", targetEntry->resno);
newTargetEntry->resname = generatedName->data;
}
newTargetEntry->expr = (Expr *) newVar; newTargetEntry->expr = (Expr *) newVar;
custom_scan_tlist = lappend(custom_scan_tlist, newTargetEntry); custom_scan_tlist = lappend(custom_scan_tlist, newTargetEntry);
} }

View File

@ -654,6 +654,18 @@ CreateTargetListForCombineQuery(List *targetList)
foreach_declared_ptr(originalTargetEntry, targetList) foreach_declared_ptr(originalTargetEntry, targetList)
{ {
TargetEntry *newTargetEntry = flatCopyTargetEntry(originalTargetEntry); TargetEntry *newTargetEntry = flatCopyTargetEntry(originalTargetEntry);
/*
* PostgreSQL 18 fix: Handle "?column?" names from intermediate expressions.
* When flatCopyTargetEntry copies a target entry that has "?column?" as resname,
* we need to generate a proper column name to avoid parsing errors.
*/
if (newTargetEntry->resname == NULL || strcmp(newTargetEntry->resname, "?column?") == 0)
{
StringInfo generatedName = makeStringInfo();
appendStringInfo(generatedName, "insert_col_%d", originalTargetEntry->resno);
newTargetEntry->resname = generatedName->data;
}
Var *column = makeVarFromTargetEntry(masterTableId, originalTargetEntry); Var *column = makeVarFromTargetEntry(masterTableId, originalTargetEntry);
column->varattno = columnId; column->varattno = columnId;
@ -1116,6 +1128,14 @@ ReorderInsertSelectTargetLists(Query *originalQuery, RangeTblEntry *insertRte,
newSubqueryTargetEntry = copyObject(oldSubqueryTle); newSubqueryTargetEntry = copyObject(oldSubqueryTle);
/*
* PostgreSQL 18 compatibility: handle NULL or "?column?" resname
*/
if (newSubqueryTargetEntry->resname == NULL || strcmp(newSubqueryTargetEntry->resname, "?column?") == 0)
{
newSubqueryTargetEntry->resname = psprintf("expr_col_%d", newSubqueryTargetEntry->resno);
}
newSubqueryTargetEntry->resno = resno; newSubqueryTargetEntry->resno = resno;
newSubqueryTargetlist = lappend(newSubqueryTargetlist, newSubqueryTargetlist = lappend(newSubqueryTargetlist,
newSubqueryTargetEntry); newSubqueryTargetEntry);
@ -1130,7 +1150,17 @@ ReorderInsertSelectTargetLists(Query *originalQuery, RangeTblEntry *insertRte,
newSubqueryTargetEntry); newSubqueryTargetEntry);
} }
String *columnName = makeString(newSubqueryTargetEntry->resname); /*
* PostgreSQL 18 compatibility: handle NULL or "?column?" resname
* before creating columnName for eref->colnames
*/
char *resname = newSubqueryTargetEntry->resname;
if (resname == NULL || strcmp(resname, "?column?") == 0)
{
resname = psprintf("insert_col_%d", newSubqueryTargetEntry->resno);
}
String *columnName = makeString(resname);
columnNameList = lappend(columnNameList, columnName); columnNameList = lappend(columnNameList, columnName);
/* /*
@ -1175,6 +1205,14 @@ ReorderInsertSelectTargetLists(Query *originalQuery, RangeTblEntry *insertRte,
TargetEntry *newSubqueryTargetEntry = copyObject(oldSubqueryTle); TargetEntry *newSubqueryTargetEntry = copyObject(oldSubqueryTle);
/*
* PostgreSQL 18 compatibility: handle NULL or "?column?" resname
*/
if (newSubqueryTargetEntry->resname == NULL || strcmp(newSubqueryTargetEntry->resname, "?column?") == 0)
{
newSubqueryTargetEntry->resname = psprintf("expr_col_%d", newSubqueryTargetEntry->resno);
}
newSubqueryTargetEntry->resno = resno; newSubqueryTargetEntry->resno = resno;
newSubqueryTargetlist = lappend(newSubqueryTargetlist, newSubqueryTargetlist = lappend(newSubqueryTargetlist,
newSubqueryTargetEntry); newSubqueryTargetEntry);

View File

@ -1430,6 +1430,18 @@ MasterExtendedOpNode(MultiExtendedOp *originalOpNode,
TargetEntry *newTargetEntry = flatCopyTargetEntry(originalTargetEntry); TargetEntry *newTargetEntry = flatCopyTargetEntry(originalTargetEntry);
Expr *originalExpression = originalTargetEntry->expr; Expr *originalExpression = originalTargetEntry->expr;
Expr *newExpression = NULL; Expr *newExpression = NULL;
/*
* PostgreSQL 18 fix: Handle "?column?" names from intermediate expressions.
* When flatCopyTargetEntry copies a target entry that has "?column?" as resname,
* we need to generate a proper column name to avoid parsing errors on workers.
*/
if (newTargetEntry->resname == NULL || strcmp(newTargetEntry->resname, "?column?") == 0)
{
StringInfo generatedName = makeStringInfo();
appendStringInfo(generatedName, "expr_col_%d", originalTargetEntry->resno);
newTargetEntry->resname = generatedName->data;
}
if (CanPushDownExpression((Node *) originalExpression, extendedOpNodeProperties)) if (CanPushDownExpression((Node *) originalExpression, extendedOpNodeProperties))
{ {
@ -2947,7 +2959,7 @@ GenerateWorkerTargetEntry(TargetEntry *targetEntry, Expr *workerExpression,
newTargetEntry = makeNode(TargetEntry); newTargetEntry = makeNode(TargetEntry);
} }
if (newTargetEntry->resname == NULL) if (newTargetEntry->resname == NULL || strcmp(newTargetEntry->resname, "?column?") == 0)
{ {
newTargetEntry->resname = WorkerColumnName(targetProjectionNumber); newTargetEntry->resname = WorkerColumnName(targetProjectionNumber);
} }

View File

@ -1436,6 +1436,29 @@ ExtractColumns(RangeTblEntry *callingRTE, int rangeTableId,
columnNames, columnNames,
columnVars); columnVars);
#endif #endif
#if PG_VERSION_NUM >= PG_VERSION_18
/*
* PostgreSQL 18 compatibility: expandRTE might return NULL or "?column?" names
* for intermediate expressions. Fix these column names to avoid errors.
*/
List *fixedColumnNames = NIL;
ListCell *nameCell = NULL;
int colIndex = 1;
foreach(nameCell, *columnNames)
{
char *colName = strVal(lfirst(nameCell));
if (colName == NULL || strcmp(colName, "?column?") == 0)
{
colName = psprintf("expand_col_%d", colIndex);
}
fixedColumnNames = lappend(fixedColumnNames, makeString(colName));
colIndex++;
}
*columnNames = fixedColumnNames;
#endif
} }

View File

@ -305,6 +305,32 @@ RecursivelyPlanSubqueriesAndCTEs(Query *query, RecursivePlanningContext *context
/* descend into subqueries */ /* descend into subqueries */
query_tree_walker(query, RecursivelyPlanSubqueryWalker, context, 0); query_tree_walker(query, RecursivelyPlanSubqueryWalker, context, 0);
/*
* PostgreSQL 18 may generate NULL or "?column?" as column names
* for intermediate results. After subquery replacement, we need to fix
* any remaining "?column?" references in the main query's target list.
*/
ListCell *targetEntryCell = NULL;
int columnNumber = 1;
foreach(targetEntryCell, query->targetList)
{
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
if (targetEntry->resjunk)
{
continue;
}
if (targetEntry->resname == NULL || strcmp(targetEntry->resname, "?column?") == 0)
{
StringInfo generatedName = makeStringInfo();
appendStringInfo(generatedName, "main_query_column_%d", columnNumber);
targetEntry->resname = generatedName->data;
}
columnNumber++;
}
/* /*
* At this point, all CTEs, leaf subqueries containing local tables and * At this point, all CTEs, leaf subqueries containing local tables and
* non-pushdownable subqueries have been replaced. We now check for * non-pushdownable subqueries have been replaced. We now check for
@ -1170,6 +1196,32 @@ RecursivelyPlanCTEs(Query *query, RecursivePlanningContext *planningContext)
uint32 subPlanId = list_length(planningContext->subPlanList) + 1; uint32 subPlanId = list_length(planningContext->subPlanList) + 1;
/*
* PostgreSQL 18 may generate NULL or "?column?" as column names
* for intermediate results. We need to fix these before the subquery
* gets planned, as the intermediate result files will use these names.
*/
ListCell *targetEntryCell = NULL;
int columnNumber = 1;
foreach(targetEntryCell, subquery->targetList)
{
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
if (targetEntry->resjunk)
{
continue;
}
if (targetEntry->resname == NULL || strcmp(targetEntry->resname, "?column?") == 0)
{
StringInfo generatedName = makeStringInfo();
appendStringInfo(generatedName, "intermediate_column_%d", columnNumber);
targetEntry->resname = generatedName->data;
}
columnNumber++;
}
if (IsLoggableLevel(DEBUG1)) if (IsLoggableLevel(DEBUG1))
{ {
StringInfo subPlanString = makeStringInfo(); StringInfo subPlanString = makeStringInfo();
@ -1272,6 +1324,32 @@ RecursivelyPlanSubqueryWalker(Node *node, RecursivePlanningContext *context)
{ {
Query *query = (Query *) node; Query *query = (Query *) node;
/*
* PostgreSQL 18 may generate NULL or "?column?" as column names
* for intermediate results. Fix these before processing any subqueries
* that might reference this query's target list.
*/
ListCell *targetEntryCell = NULL;
int columnNumber = 1;
foreach(targetEntryCell, query->targetList)
{
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
if (targetEntry->resjunk)
{
continue;
}
if (targetEntry->resname == NULL || strcmp(targetEntry->resname, "?column?") == 0)
{
StringInfo generatedName = makeStringInfo();
appendStringInfo(generatedName, "walker_query_column_%d_%d", context->level, columnNumber);
targetEntry->resname = generatedName->data;
}
columnNumber++;
}
context->level += 1; context->level += 1;
/* /*
@ -1569,6 +1647,31 @@ RecursivelyPlanSubquery(Query *subquery, RecursivePlanningContext *planningConte
debugQuery = copyObject(subquery); debugQuery = copyObject(subquery);
} }
/*
* PostgreSQL 18 may generate NULL or "?column?" as column names
* for intermediate results. We need to fix these before the subquery
* gets planned, as the intermediate result files will use these names.
*/
ListCell *targetEntryCell = NULL;
int columnNumber = 1;
foreach(targetEntryCell, subquery->targetList)
{
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
if (targetEntry->resjunk)
{
continue;
}
if (targetEntry->resname == NULL || strcmp(targetEntry->resname, "?column?") == 0)
{
StringInfo generatedName = makeStringInfo();
appendStringInfo(generatedName, "intermediate_column_%d", columnNumber);
targetEntry->resname = generatedName->data;
}
columnNumber++;
}
/* /*
* Create the subplan and append it to the list in the planning context. * Create the subplan and append it to the list in the planning context.
@ -1937,9 +2040,16 @@ GenerateRequiredColNamesFromTargetList(List *targetList)
* column names of the inner subquery should only contain the * column names of the inner subquery should only contain the
* required columns, as in if we choose 'b' from ('a','b') colnames * required columns, as in if we choose 'b' from ('a','b') colnames
* should be 'a' not ('a','b') * should be 'a' not ('a','b')
*
* PostgreSQL 18 compatibility: handle NULL or "?column?" resname
*/ */
innerSubqueryColNames = lappend(innerSubqueryColNames, makeString( char *resname = entry->resname;
entry->resname)); if (resname == NULL || strcmp(resname, "?column?") == 0)
{
resname = psprintf("subquery_col_%d", entry->resno);
}
innerSubqueryColNames = lappend(innerSubqueryColNames, makeString(resname));
} }
} }
return innerSubqueryColNames; return innerSubqueryColNames;
@ -2482,6 +2592,17 @@ BuildReadIntermediateResultsQuery(List *targetEntryList, List *columnAliasList,
continue; continue;
} }
/*
* PostgreSQL 18 may generate NULL or "?column?" as column names
* for intermediate results. Generate a proper column name in such cases.
*/
if (columnName == NULL || strcmp(columnName, "?column?") == 0)
{
StringInfo generatedName = makeStringInfo();
appendStringInfo(generatedName, "intermediate_column_%d", columnNumber);
columnName = generatedName->data;
}
funcColNames = lappend(funcColNames, makeString(columnName)); funcColNames = lappend(funcColNames, makeString(columnName));
funcColTypes = lappend_int(funcColTypes, columnType); funcColTypes = lappend_int(funcColTypes, columnType);
funcColTypMods = lappend_int(funcColTypMods, columnTypMod); funcColTypMods = lappend_int(funcColTypMods, columnTypMod);

View File

@ -0,0 +1,16 @@
2025-08-02 06:34:55.234 UTC [41803] LOG: starting PostgreSQL 18beta2 on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0, 64-bit
2025-08-02 06:34:55.235 UTC [41803] LOG: could not bind IPv6 address "::1": Address already in use
2025-08-02 06:34:55.235 UTC [41803] HINT: Is another postmaster already running on port 9700? If not, wait a few seconds and retry.
2025-08-02 06:34:55.235 UTC [41803] LOG: could not bind IPv4 address "127.0.0.1": Address already in use
2025-08-02 06:34:55.235 UTC [41803] HINT: Is another postmaster already running on port 9700? If not, wait a few seconds and retry.
2025-08-02 06:34:55.235 UTC [41803] WARNING: could not create listen socket for "localhost"
2025-08-02 06:34:55.235 UTC [41803] FATAL: could not create any TCP/IP sockets
2025-08-02 06:34:55.240 UTC [41803] LOG: database system is shut down
2025-08-02 06:35:12.489 UTC [42020] LOG: starting PostgreSQL 18beta2 on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0, 64-bit
2025-08-02 06:35:12.490 UTC [42020] LOG: could not bind IPv6 address "::1": Address already in use
2025-08-02 06:35:12.490 UTC [42020] HINT: Is another postmaster already running on port 9700? If not, wait a few seconds and retry.
2025-08-02 06:35:12.490 UTC [42020] LOG: could not bind IPv4 address "127.0.0.1": Address already in use
2025-08-02 06:35:12.490 UTC [42020] HINT: Is another postmaster already running on port 9700? If not, wait a few seconds and retry.
2025-08-02 06:35:12.490 UTC [42020] WARNING: could not create listen socket for "localhost"
2025-08-02 06:35:12.490 UTC [42020] FATAL: could not create any TCP/IP sockets
2025-08-02 06:35:12.494 UTC [42020] LOG: database system is shut down

25
test_aggregate_pg18.sql Normal file
View File

@ -0,0 +1,25 @@
-- Test aggregate with GROUP BY to isolate the problem
\c citus
-- Tables should already exist
CREATE TABLE IF NOT EXISTS users_table (user_id int, value_1 int, value_2 int);
CREATE TABLE IF NOT EXISTS events_table (user_id int, event_type int, value_2 int);
-- Populate with minimal data
INSERT INTO users_table (user_id, value_1, value_2) VALUES (1, 10, 100), (2, 20, 200) ON CONFLICT DO NOTHING;
INSERT INTO events_table (user_id, event_type, value_2) VALUES (1, 1, 1), (2, 2, 2) ON CONFLICT DO NOTHING;
-- Test simple aggregate without GROUP BY (should work)
SELECT avg(event_type) FROM events_table;
-- Test simple aggregate with GROUP BY (might fail)
SELECT avg(event_type) FROM events_table GROUP BY user_id;
-- Test nested aggregate with GROUP BY (likely to fail)
SELECT avg(event_type)
FROM (
SELECT event_type, user_id
FROM events_table
WHERE value_2 < 3
) sub
GROUP BY sub.user_id;

32
test_debug_detailed.sql Normal file
View File

@ -0,0 +1,32 @@
-- Debug test with detailed logging
\c citus
SET client_min_messages TO DEBUG2;
SET citus.log_remote_commands TO ON;
-- Test the failing query with detailed logging
SELECT DISTINCT user_id
FROM (
SELECT users_table.user_id
FROM users_table,
(
SELECT avg(event_type) as avg_val
FROM (
SELECT event_type, users_table.user_id
FROM users_table,
(
SELECT user_id, event_type
FROM events_table
WHERE value_2 < 3
ORDER BY 1, 2
LIMIT 1
) as foo
WHERE foo.user_id = users_table.user_id
) bar, users_table
WHERE bar.user_id = users_table.user_id
GROUP BY users_table.value_1
) as baz
WHERE baz.avg_val < users_table.user_id
ORDER BY 1
LIMIT 1
) as sub1;

116
test_debug_pg18.sql Normal file
View File

@ -0,0 +1,116 @@
-- Test to debug PostgreSQL 1-- Test the actual failing pattern before
SELECT users_table.value_1 as value1
FROM (
SELECT event_type, users_table.user_id
FROM users_table,
(
SELECT user_id, event_type
FROM events_table
WHERE value_2 < 3
ORDER BY 1, 2
LIMIT 1
) as foo
WHERE foo.user_id = users_table.user_id
) bar, users_table
WHERE bar.user_id = users_table.user_id
GROUP BY value1;
-- First let's test with our existing setup
\c citus
-- Simple subquery that should work
SELECT user_id FROM (SELECT user_id FROM users_table WHERE user_id = 1) sub;
-- Test aggregate subquery (this might have ?column? issue)
SELECT * FROM (SELECT avg(user_id) FROM users_table) sub;
-- Test complex subquery similar to the failing one
SELECT * FROM (
SELECT avg(event_type) as avg_val
FROM (
SELECT event_type
FROM events_table
WHERE value_2 < 3
LIMIT 1
) foo
) bar;
--Test simple alias
SELECT value_1 as value1
from (
select * from (
select * from users_table
) as tab2
) as tab;
-- Test the actual failing pattern before
SELECT users_table.value_1 as value1
FROM (
SELECT event_type, users_table.user_id
FROM users_table,
(
SELECT user_id, event_type
FROM events_table
WHERE value_2 < 3
ORDER BY 1, 2
LIMIT 1
) as foo
WHERE foo.user_id = users_table.user_id
) bar, users_table
WHERE bar.user_id = users_table.user_id
;
-- Test the actual failing pattern
-- Test the simplified ?column? issue
SELECT DISTINCT user_id
FROM (
SELECT users_table.user_id
FROM users_table,
(
SELECT event_type as avg_val
FROM (
SELECT event_type, a_users.user_id
FROM users_table as a_users,
(
SELECT user_id, event_type
FROM events_table
WHERE value_2 < 3
ORDER BY 1, 2
LIMIT 1
) as foo
WHERE foo.user_id = a_users.user_id
) bar, users_table as b_users
WHERE bar.user_id = b_users.user_id
) as baz
WHERE baz.avg_val < users_table.user_id
ORDER BY 1
LIMIT 1
) as sub1;
-- Test the actual failing pattern
-- Test the simplified ?column? issue
SELECT DISTINCT user_id
FROM (
SELECT users_table.user_id
FROM users_table,
(
SELECT avg(event_type) as avg_val
FROM (
SELECT event_type, a_users.user_id
FROM users_table as a_users,
(
SELECT user_id, event_type
FROM events_table
WHERE value_2 < 3
ORDER BY 1, 2
LIMIT 1
) as foo
WHERE foo.user_id = a_users.user_id
) bar, users_table as b_users
WHERE bar.user_id = b_users.user_id
GROUP BY b_users.value_1
) as baz
WHERE baz.avg_val < users_table.user_id
ORDER BY 1
LIMIT 1
) as sub1;

21
test_failing_query.sql Normal file
View File

@ -0,0 +1,21 @@
-- Test only the failing complex nested query
\c citus
SET client_min_messages TO DEBUG5;
-- The exact failing pattern (this should fail without our fix)
SELECT avg(event_type) as avg_val
FROM (
SELECT event_type, a_users.user_id
FROM users_table as a_users,
(
SELECT user_id, event_type
FROM events_table
WHERE value_2 < 3
ORDER BY 1, 2
LIMIT 1
) as foo
WHERE foo.user_id = a_users.user_id
) bar, users_table as b_users
WHERE bar.user_id = b_users.user_id
GROUP BY b_users.value_1;

37
test_focus_pg18.sql Normal file
View File

@ -0,0 +1,37 @@
-- Focus on the ?column? issue specifically
\c citus
-- Tables should already exist, just in case
CREATE TABLE IF NOT EXISTS users_table (user_id int, value_1 int, value_2 int);
CREATE TABLE IF NOT EXISTS events_table (user_id int, event_type int, value_2 int);
-- Populate with minimal data
INSERT INTO users_table (user_id, value_1, value_2) VALUES (1, 10, 100), (2, 20, 200) ON CONFLICT DO NOTHING;
INSERT INTO events_table (user_id, event_type, value_2) VALUES (1, 1, 1), (2, 2, 2) ON CONFLICT DO NOTHING;
-- Test the simplified ?column? issue
SELECT DISTINCT user_id
FROM (
SELECT users_table.user_id
FROM users_table,
(
SELECT avg(event_type) as avg_val
FROM (
SELECT event_type, a_users.user_id
FROM users_table as a_users,
(
SELECT user_id, event_type
FROM events_table
WHERE value_2 < 3
ORDER BY 1, 2
LIMIT 1
) as foo
WHERE foo.user_id = a_users.user_id
) bar, users_table as b_users
WHERE bar.user_id = b_users.user_id
GROUP BY b_users.value_1
) as baz
WHERE baz.avg_val < users_table.user_id
ORDER BY 1
LIMIT 1
) as sub1;

41
test_nested_pg18.sql Normal file
View File

@ -0,0 +1,41 @@
-- Test deeply nested queries with JOINs and GROUP BY
\c citus
-- Test the exact failing pattern step by step
-- Step 1: Simple subquery with JOIN (should work)
SELECT avg(event_type) as avg_val
FROM (
SELECT event_type, users_table.user_id
FROM users_table, events_table
WHERE events_table.user_id = users_table.user_id
) sub
GROUP BY sub.user_id;
-- Step 2: Add one more level of nesting (might fail)
SELECT avg_val
FROM (
SELECT avg(event_type) as avg_val
FROM (
SELECT event_type, users_table.user_id
FROM users_table, events_table
WHERE events_table.user_id = users_table.user_id
) sub
GROUP BY sub.user_id
) outer_sub;
-- Step 3: The exact failing pattern (this should fail)
SELECT avg(event_type) as avg_val
FROM (
SELECT event_type, a_users.user_id
FROM users_table as a_users,
(
SELECT user_id, event_type
FROM events_table
WHERE value_2 < 3
ORDER BY 1, 2
LIMIT 1
) as foo
WHERE foo.user_id = a_users.user_id
) bar, users_table as b_users
WHERE bar.user_id = b_users.user_id
GROUP BY b_users.value_1;

30
test_pg18_fix_final.sql Normal file
View File

@ -0,0 +1,30 @@
-- Test PostgreSQL 18 compatibility using existing distributed tables
\c citus
-- Test the exact pattern from subqueries_deep.sql that should fail without our fix
SELECT
DISTINCT user_id
FROM
(
SELECT users_table.user_id FROM users_table,
(
SELECT
avg(event_type) as avg_val
FROM
(SELECT
event_type, users_table.user_id
FROM
users_table, (SELECT user_id, event_type FROM events_table WHERE value_2 < 3 ORDER BY 1, 2 OFFSET 0) as foo
WHERE
foo.user_id = users_table.user_id) bar, users_table
WHERE
bar.user_id = users_table.user_id
GROUP BY
users_table.value_1
) as baz
WHERE
baz.avg_val < users_table.user_id
ORDER BY 1
LIMIT 3
) as sub1
ORDER BY 1 DESC;

26
test_simple_pg18.sql Normal file
View File

@ -0,0 +1,26 @@
-- Simple PostgreSQL 18 test for "?column?" issue
CREATE SCHEMA test_simple;
SET search_path TO test_simple;
CREATE TABLE simple_table (
id INTEGER,
value TEXT
);
SELECT create_distributed_table('simple_table', 'id');
INSERT INTO simple_table VALUES (1, 'a'), (2, 'b'), (3, 'c');
-- Test a simple subquery that might trigger the "?column?" issue
SELECT * FROM (
SELECT id FROM simple_table
) AS sub;
-- Test more complex nested subquery
SELECT * FROM (
SELECT id FROM (
SELECT id, value FROM simple_table
) inner_sub
) outer_sub;
DROP SCHEMA test_simple CASCADE;

29
test_simple_subquery.sql Normal file
View File

@ -0,0 +1,29 @@
-- Simple test to isolate the PostgreSQL 18 issue
-- This should show where the ?column? error occurs
-- Drop and recreate to start fresh
DROP SCHEMA IF EXISTS simple_test CASCADE;
CREATE SCHEMA simple_test;
SET search_path TO simple_test, public;
CREATE TABLE events_table (
user_id int,
event_type int,
value_2 int
);
SELECT create_distributed_table('events_table', 'user_id');
INSERT INTO events_table VALUES
(1, 1, 1),
(2, 2, 2),
(3, 3, 3);
SET client_min_messages TO DEBUG1;
-- This is the simplest subquery that should trigger the issue
SELECT user_id, event_type FROM events_table WHERE value_2 < 3 ORDER BY 1, 2 OFFSET 3;
-- Clean up
SET search_path TO public;
DROP SCHEMA simple_test CASCADE;

View File

@ -0,0 +1,78 @@
-- Test PostgreSQL 18 "?column?" fix for subqueries_deep
-- Create basic tables for testing
CREATE SCHEMA test_pg18_fix;
SET search_path TO test_pg18_fix, public;
-- Create test tables similar to what the regular tests use
CREATE TABLE users_table (
user_id int,
time timestamp,
value_1 int,
value_2 int,
value_3 double precision,
value_4 bigint
);
CREATE TABLE events_table (
user_id int,
time timestamp,
event_type int,
value_2 int,
value_3 double precision,
value_4 bigint
);
-- Create distributed tables
SELECT create_distributed_table('users_table', 'user_id');
SELECT create_distributed_table('events_table', 'user_id');
-- Insert some test data
INSERT INTO users_table VALUES
(1, '2024-01-01', 10, 1, 1.1, 100),
(2, '2024-01-02', 20, 2, 2.2, 200),
(3, '2024-01-03', 30, 3, 3.3, 300),
(4, '2024-01-04', 40, 4, 4.4, 400),
(5, '2024-01-05', 50, 5, 5.5, 500),
(6, '2024-01-06', 60, 6, 6.6, 600);
INSERT INTO events_table VALUES
(1, '2024-01-01', 1, 1, 1.1, 100),
(2, '2024-01-02', 2, 2, 2.2, 200),
(3, '2024-01-03', 3, 3, 3.3, 300),
(4, '2024-01-04', 4, 4, 4.4, 400),
(5, '2024-01-05', 5, 5, 5.5, 500),
(6, '2024-01-06', 1, 6, 6.6, 600);
SET client_min_messages TO DEBUG1;
-- Test the original failing query from subqueries_deep.sql
SELECT
DISTINCT user_id
FROM
(
SELECT users_table.user_id FROM users_table,
(
SELECT
avg(event_type) as avg_val
FROM
(SELECT
event_type, users_table.user_id
FROM
users_table, (SELECT user_id, event_type FROM events_table WHERE value_2 < 3 ORDER BY 1, 2 OFFSET 3) as foo
WHERE
foo.user_id = users_table.user_id) bar, users_table
WHERE
bar.user_id = users_table.user_id
GROUP BY
users_table.value_1
) as baz
WHERE
baz.avg_val < users_table.user_id
ORDER BY 1
LIMIT 3
) as sub1
ORDER BY 1 DESC;
-- Clean up
SET search_path TO public;
DROP SCHEMA test_pg18_fix CASCADE;