diff --git a/debug_query.sql b/debug_query.sql new file mode 100644 index 000000000..4ffde825c --- /dev/null +++ b/debug_query.sql @@ -0,0 +1,32 @@ +-- Debug query for PostgreSQL 18 compatibility issue +\c citus + +SET citus.log_remote_commands = on; +SET client_min_messages = DEBUG1; + +SELECT + DISTINCT user_id +FROM + ( + SELECT users_table.user_id FROM users_table, + ( + SELECT + avg(event_type) as avg_val + FROM + (SELECT + event_type, users_table.user_id + FROM + users_table, (SELECT user_id, event_type FROM events_table WHERE value_2 < 3 ORDER BY 1, 2 OFFSET 0) as foo + WHERE + foo.user_id = users_table.user_id) bar, users_table + WHERE + bar.user_id = users_table.user_id + GROUP BY + users_table.value_1 + ) as baz + WHERE + baz.avg_val < users_table.user_id + ORDER BY 1 + LIMIT 3 + ) as sub1 + ORDER BY 1 DESC; diff --git a/debug_test.sql b/debug_test.sql new file mode 100644 index 000000000..b641615a8 --- /dev/null +++ b/debug_test.sql @@ -0,0 +1,15 @@ + +-- Create a simple test to reproduce the issue +CREATE SCHEMA debug_test; +SET search_path TO debug_test, public; +SET client_min_messages TO DEBUG1; + +-- Simple query that should generate intermediate results +SELECT DISTINCT user_id FROM ( + SELECT users_table.user_id + FROM users_table + WHERE user_id < 3 + ORDER BY 1 + LIMIT 2 +) as sub1 ORDER BY 1 DESC; + diff --git a/regression.diffs b/regression.diffs new file mode 100644 index 000000000..e69de29bb diff --git a/regression.out b/regression.out new file mode 100644 index 000000000..2ca51aa58 --- /dev/null +++ b/regression.out @@ -0,0 +1,2 @@ +# using postmaster on localhost, port 57636 +# diff command failed with status 512: diff "/workspaces/citus/expected/subqueries_deep.out" "/workspaces/citus/results/subqueries_deep.out" > "/workspaces/citus/results/subqueries_deep.out.diff"Bail out! diff --git a/results/subqueries_deep.out.diff b/results/subqueries_deep.out.diff new file mode 100644 index 000000000..e69de29bb diff --git a/setup_test_tables.sql b/setup_test_tables.sql new file mode 100644 index 000000000..46f671150 --- /dev/null +++ b/setup_test_tables.sql @@ -0,0 +1,25 @@ +-- Setup test tables for PostgreSQL 18 compatibility testing +\c citus + +SET citus.shard_replication_factor = 1; +SET citus.shard_count = 4; + +-- Create tables if they don't exist +CREATE TABLE IF NOT EXISTS users_table (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint); +CREATE TABLE IF NOT EXISTS events_table (user_id int, time timestamp, event_type int, value_2 int, value_3 float, value_4 bigint); + +-- Create distributed tables +SELECT create_distributed_table('users_table', 'user_id'); +SELECT create_distributed_table('events_table', 'user_id'); + +-- Insert some test data +INSERT INTO users_table VALUES +(1, now(), 10, 20, 1.5, 100), +(2, now(), 30, 40, 2.5, 200), +(3, now(), 50, 60, 3.5, 300); + +INSERT INTO events_table VALUES +(1, now(), 1, 1, 1.1, 10), +(2, now(), 2, 2, 2.2, 20), +(3, now(), 3, 3, 3.3, 30), +(1, now(), 4, 4, 4.4, 40); diff --git a/src/backend/distributed/deparser/citus_ruleutils.c b/src/backend/distributed/deparser/citus_ruleutils.c index a25cdf5bb..9d54804d4 100644 --- a/src/backend/distributed/deparser/citus_ruleutils.c +++ b/src/backend/distributed/deparser/citus_ruleutils.c @@ -1869,15 +1869,22 @@ ExpandMergedSubscriptingRefEntries(List *targetEntryList) Expr *refexpr = subsRef->refexpr; subsRef->refexpr = NULL; - /* - * Wrap the Expr that holds SubscriptingRef (directly or indirectly) - * in a new TargetEntry; note that it doesn't have a refexpr anymore. - */ - TargetEntry *newTargetEntry = copyObject(targetEntry); - newTargetEntry->expr = expr; - expandedTargetEntries = lappend(expandedTargetEntries, newTargetEntry); - - /* now inspect the refexpr that SubscriptingRef at hand were holding */ + /* + * Wrap the Expr that holds SubscriptingRef (directly or indirectly) + * in a new TargetEntry; note that it doesn't have a refexpr anymore. + */ + TargetEntry *newTargetEntry = copyObject(targetEntry); + newTargetEntry->expr = expr; + + /* + * PostgreSQL 18 compatibility: handle NULL or "?column?" resname + */ + if (newTargetEntry->resname == NULL || strcmp(newTargetEntry->resname, "?column?") == 0) + { + newTargetEntry->resname = psprintf("expr_col_%d", newTargetEntry->resno); + } + + expandedTargetEntries = lappend(expandedTargetEntries, newTargetEntry); /* now inspect the refexpr that SubscriptingRef at hand were holding */ expr = refexpr; } diff --git a/src/backend/distributed/deparser/ruleutils_18.c b/src/backend/distributed/deparser/ruleutils_18.c index 7d4b99741..d57c94466 100644 --- a/src/backend/distributed/deparser/ruleutils_18.c +++ b/src/backend/distributed/deparser/ruleutils_18.c @@ -2722,7 +2722,16 @@ get_simple_values_rte(Query *query, TupleDesc resultDesc) if (resultDesc && colno <= resultDesc->natts) colname = NameStr(TupleDescAttr(resultDesc, colno - 1)->attname); else + { colname = tle->resname; + /* PostgreSQL 18 compatibility: handle NULL resname */ + if (colname == NULL || strcmp(colname, "?column?") == 0) + { + static char generated_name[64]; + snprintf(generated_name, sizeof(generated_name), "intermediate_column_%d", colno); + colname = generated_name; + } + } /* does it match the VALUES RTE? */ if (colname == NULL || strcmp(colname, cname) != 0) @@ -2921,8 +2930,17 @@ get_target_list(List *targetList, deparse_context *context) * When colNamesVisible is true, we should always show the * assigned column name explicitly. Otherwise, show it only if * it's not FigureColname's fallback. + * + * PostgreSQL 18 fix: Instead of using "?column?" which causes issues + * in complex subqueries, generate a meaningful column name. */ - attname = context->colNamesVisible ? NULL : "?column?"; + if (context->colNamesVisible) + attname = NULL; + else + { + /* Generate a column name that won't cause parsing issues */ + attname = psprintf("expr_%d", colno); + } } /* @@ -2935,7 +2953,23 @@ get_target_list(List *targetList, deparse_context *context) colname = NameStr(TupleDescAttr(context->resultDesc, colno - 1)->attname); else + { colname = tle->resname; + + /* + * PostgreSQL 18 fix: tle->resname can be NULL for intermediate expressions. + * In that case, generate a meaningful column name instead of using "?column?". + */ + if (colname == NULL || strcmp(colname, "?column?") == 0) + { + /* + * Generate a column name in the format "col_N" where N is the column number. + * This provides a stable, predictable name that won't cause issues with + * column resolution in complex subqueries. + */ + colname = psprintf("col_%d", colno); + } + } /* Show AS unless the column's name is correct as-is */ if (colname) /* resname could be NULL */ @@ -4532,7 +4566,16 @@ get_variable(Var *var, int levelsup, bool istoplevel, deparse_context *context) colname = NameStr(TupleDescAttr(context->resultDesc, colno - 1)->attname); else + { colname = tle->resname; + /* PostgreSQL 18 compatibility: handle NULL resname */ + if (colname == NULL || strcmp(colname, "?column?") == 0) + { + static char generated_name[64]; + snprintf(generated_name, sizeof(generated_name), "intermediate_column_%d", colno); + colname = generated_name; + } + } if (colname && strcmp(colname, attname) == 0 && !equal(var, tle->expr)) { diff --git a/src/backend/distributed/planner/combine_query_planner.c b/src/backend/distributed/planner/combine_query_planner.c index c8ab2a4b3..a8f39d419 100644 --- a/src/backend/distributed/planner/combine_query_planner.c +++ b/src/backend/distributed/planner/combine_query_planner.c @@ -110,6 +110,19 @@ RemoteScanTargetList(List *workerTargetList) * entry's sort and group clauses will *break* us here. */ TargetEntry *remoteScanTargetEntry = flatCopyTargetEntry(workerTargetEntry); + + /* + * PostgreSQL 18 fix: Handle "?column?" names from intermediate expressions. + * When flatCopyTargetEntry copies a target entry that has "?column?" as resname, + * we need to generate a proper column name to avoid parsing errors. + */ + if (remoteScanTargetEntry->resname == NULL || strcmp(remoteScanTargetEntry->resname, "?column?") == 0) + { + StringInfo generatedName = makeStringInfo(); + appendStringInfo(generatedName, "remote_col_%d", workerTargetEntry->resno); + remoteScanTargetEntry->resname = generatedName->data; + } + remoteScanTargetEntry->expr = (Expr *) remoteScanColumn; remoteScanTargetList = lappend(remoteScanTargetList, remoteScanTargetEntry); } @@ -272,7 +285,18 @@ BuildSelectStatementViaStdPlanner(Query *combineQuery, List *remoteScanTargetLis TargetEntry *targetEntry = NULL; foreach_declared_ptr(targetEntry, remoteScanTargetList) { - columnNameList = lappend(columnNameList, makeString(targetEntry->resname)); + char *resname = targetEntry->resname; + + /* + * PostgreSQL 18 compatibility: handle NULL or "?column?" resname + * by generating a proper column name for the remote scan RTE + */ + if (resname == NULL || strcmp(resname, "?column?") == 0) + { + resname = psprintf("remote_col_%d", targetEntry->resno); + } + + columnNameList = lappend(columnNameList, makeString(resname)); } extradataContainerRTE->eref = makeAlias("remote_scan", columnNameList); } diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index e22296ec7..34f114fff 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -1545,7 +1545,18 @@ FinalizeRouterPlan(PlannedStmt *localPlan, CustomScan *customScan) TargetEntry *targetEntry = NULL; foreach_declared_ptr(targetEntry, customScan->scan.plan.targetlist) { - String *columnName = makeString(targetEntry->resname); + char *resname = targetEntry->resname; + + /* + * PostgreSQL 18 compatibility: handle NULL or "?column?" resname + * by generating a proper column name for the remote scan RTE + */ + if (resname == NULL || strcmp(resname, "?column?") == 0) + { + resname = psprintf("remote_col_%d", targetEntry->resno); + } + + String *columnName = makeString(resname); columnNameList = lappend(columnNameList, columnName); } @@ -1613,6 +1624,19 @@ makeCustomScanTargetlistFromExistingTargetList(List *existingTargetlist) } TargetEntry *newTargetEntry = flatCopyTargetEntry(targetEntry); + + /* + * PostgreSQL 18 fix: Handle "?column?" names from intermediate expressions. + * When flatCopyTargetEntry copies a target entry that has "?column?" as resname, + * we need to generate a proper column name to avoid parsing errors. + */ + if (newTargetEntry->resname == NULL || strcmp(newTargetEntry->resname, "?column?") == 0) + { + StringInfo generatedName = makeStringInfo(); + appendStringInfo(generatedName, "custom_col_%d", targetEntry->resno); + newTargetEntry->resname = generatedName->data; + } + newTargetEntry->expr = (Expr *) newVar; custom_scan_tlist = lappend(custom_scan_tlist, newTargetEntry); } diff --git a/src/backend/distributed/planner/insert_select_planner.c b/src/backend/distributed/planner/insert_select_planner.c index 3bf0bb327..6b03b3602 100644 --- a/src/backend/distributed/planner/insert_select_planner.c +++ b/src/backend/distributed/planner/insert_select_planner.c @@ -654,6 +654,18 @@ CreateTargetListForCombineQuery(List *targetList) foreach_declared_ptr(originalTargetEntry, targetList) { TargetEntry *newTargetEntry = flatCopyTargetEntry(originalTargetEntry); + + /* + * PostgreSQL 18 fix: Handle "?column?" names from intermediate expressions. + * When flatCopyTargetEntry copies a target entry that has "?column?" as resname, + * we need to generate a proper column name to avoid parsing errors. + */ + if (newTargetEntry->resname == NULL || strcmp(newTargetEntry->resname, "?column?") == 0) + { + StringInfo generatedName = makeStringInfo(); + appendStringInfo(generatedName, "insert_col_%d", originalTargetEntry->resno); + newTargetEntry->resname = generatedName->data; + } Var *column = makeVarFromTargetEntry(masterTableId, originalTargetEntry); column->varattno = columnId; @@ -1116,6 +1128,14 @@ ReorderInsertSelectTargetLists(Query *originalQuery, RangeTblEntry *insertRte, newSubqueryTargetEntry = copyObject(oldSubqueryTle); + /* + * PostgreSQL 18 compatibility: handle NULL or "?column?" resname + */ + if (newSubqueryTargetEntry->resname == NULL || strcmp(newSubqueryTargetEntry->resname, "?column?") == 0) + { + newSubqueryTargetEntry->resname = psprintf("expr_col_%d", newSubqueryTargetEntry->resno); + } + newSubqueryTargetEntry->resno = resno; newSubqueryTargetlist = lappend(newSubqueryTargetlist, newSubqueryTargetEntry); @@ -1130,7 +1150,17 @@ ReorderInsertSelectTargetLists(Query *originalQuery, RangeTblEntry *insertRte, newSubqueryTargetEntry); } - String *columnName = makeString(newSubqueryTargetEntry->resname); + /* + * PostgreSQL 18 compatibility: handle NULL or "?column?" resname + * before creating columnName for eref->colnames + */ + char *resname = newSubqueryTargetEntry->resname; + if (resname == NULL || strcmp(resname, "?column?") == 0) + { + resname = psprintf("insert_col_%d", newSubqueryTargetEntry->resno); + } + + String *columnName = makeString(resname); columnNameList = lappend(columnNameList, columnName); /* @@ -1175,6 +1205,14 @@ ReorderInsertSelectTargetLists(Query *originalQuery, RangeTblEntry *insertRte, TargetEntry *newSubqueryTargetEntry = copyObject(oldSubqueryTle); + /* + * PostgreSQL 18 compatibility: handle NULL or "?column?" resname + */ + if (newSubqueryTargetEntry->resname == NULL || strcmp(newSubqueryTargetEntry->resname, "?column?") == 0) + { + newSubqueryTargetEntry->resname = psprintf("expr_col_%d", newSubqueryTargetEntry->resno); + } + newSubqueryTargetEntry->resno = resno; newSubqueryTargetlist = lappend(newSubqueryTargetlist, newSubqueryTargetEntry); diff --git a/src/backend/distributed/planner/multi_logical_optimizer.c b/src/backend/distributed/planner/multi_logical_optimizer.c index 7deced084..e7205d029 100644 --- a/src/backend/distributed/planner/multi_logical_optimizer.c +++ b/src/backend/distributed/planner/multi_logical_optimizer.c @@ -1430,6 +1430,18 @@ MasterExtendedOpNode(MultiExtendedOp *originalOpNode, TargetEntry *newTargetEntry = flatCopyTargetEntry(originalTargetEntry); Expr *originalExpression = originalTargetEntry->expr; Expr *newExpression = NULL; + + /* + * PostgreSQL 18 fix: Handle "?column?" names from intermediate expressions. + * When flatCopyTargetEntry copies a target entry that has "?column?" as resname, + * we need to generate a proper column name to avoid parsing errors on workers. + */ + if (newTargetEntry->resname == NULL || strcmp(newTargetEntry->resname, "?column?") == 0) + { + StringInfo generatedName = makeStringInfo(); + appendStringInfo(generatedName, "expr_col_%d", originalTargetEntry->resno); + newTargetEntry->resname = generatedName->data; + } if (CanPushDownExpression((Node *) originalExpression, extendedOpNodeProperties)) { @@ -2947,7 +2959,7 @@ GenerateWorkerTargetEntry(TargetEntry *targetEntry, Expr *workerExpression, newTargetEntry = makeNode(TargetEntry); } - if (newTargetEntry->resname == NULL) + if (newTargetEntry->resname == NULL || strcmp(newTargetEntry->resname, "?column?") == 0) { newTargetEntry->resname = WorkerColumnName(targetProjectionNumber); } diff --git a/src/backend/distributed/planner/multi_physical_planner.c b/src/backend/distributed/planner/multi_physical_planner.c index e0f4c4332..3e6ba00ff 100644 --- a/src/backend/distributed/planner/multi_physical_planner.c +++ b/src/backend/distributed/planner/multi_physical_planner.c @@ -1436,6 +1436,29 @@ ExtractColumns(RangeTblEntry *callingRTE, int rangeTableId, columnNames, columnVars); #endif + +#if PG_VERSION_NUM >= PG_VERSION_18 + /* + * PostgreSQL 18 compatibility: expandRTE might return NULL or "?column?" names + * for intermediate expressions. Fix these column names to avoid errors. + */ + List *fixedColumnNames = NIL; + ListCell *nameCell = NULL; + int colIndex = 1; + + foreach(nameCell, *columnNames) + { + char *colName = strVal(lfirst(nameCell)); + if (colName == NULL || strcmp(colName, "?column?") == 0) + { + colName = psprintf("expand_col_%d", colIndex); + } + fixedColumnNames = lappend(fixedColumnNames, makeString(colName)); + colIndex++; + } + + *columnNames = fixedColumnNames; +#endif } diff --git a/src/backend/distributed/planner/recursive_planning.c b/src/backend/distributed/planner/recursive_planning.c index 9db6481cb..14871393f 100644 --- a/src/backend/distributed/planner/recursive_planning.c +++ b/src/backend/distributed/planner/recursive_planning.c @@ -305,6 +305,32 @@ RecursivelyPlanSubqueriesAndCTEs(Query *query, RecursivePlanningContext *context /* descend into subqueries */ query_tree_walker(query, RecursivelyPlanSubqueryWalker, context, 0); + /* + * PostgreSQL 18 may generate NULL or "?column?" as column names + * for intermediate results. After subquery replacement, we need to fix + * any remaining "?column?" references in the main query's target list. + */ + ListCell *targetEntryCell = NULL; + int columnNumber = 1; + foreach(targetEntryCell, query->targetList) + { + TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell); + + if (targetEntry->resjunk) + { + continue; + } + + if (targetEntry->resname == NULL || strcmp(targetEntry->resname, "?column?") == 0) + { + StringInfo generatedName = makeStringInfo(); + appendStringInfo(generatedName, "main_query_column_%d", columnNumber); + targetEntry->resname = generatedName->data; + } + + columnNumber++; + } + /* * At this point, all CTEs, leaf subqueries containing local tables and * non-pushdownable subqueries have been replaced. We now check for @@ -1170,6 +1196,32 @@ RecursivelyPlanCTEs(Query *query, RecursivePlanningContext *planningContext) uint32 subPlanId = list_length(planningContext->subPlanList) + 1; + /* + * PostgreSQL 18 may generate NULL or "?column?" as column names + * for intermediate results. We need to fix these before the subquery + * gets planned, as the intermediate result files will use these names. + */ + ListCell *targetEntryCell = NULL; + int columnNumber = 1; + foreach(targetEntryCell, subquery->targetList) + { + TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell); + + if (targetEntry->resjunk) + { + continue; + } + + if (targetEntry->resname == NULL || strcmp(targetEntry->resname, "?column?") == 0) + { + StringInfo generatedName = makeStringInfo(); + appendStringInfo(generatedName, "intermediate_column_%d", columnNumber); + targetEntry->resname = generatedName->data; + } + + columnNumber++; + } + if (IsLoggableLevel(DEBUG1)) { StringInfo subPlanString = makeStringInfo(); @@ -1272,6 +1324,32 @@ RecursivelyPlanSubqueryWalker(Node *node, RecursivePlanningContext *context) { Query *query = (Query *) node; + /* + * PostgreSQL 18 may generate NULL or "?column?" as column names + * for intermediate results. Fix these before processing any subqueries + * that might reference this query's target list. + */ + ListCell *targetEntryCell = NULL; + int columnNumber = 1; + foreach(targetEntryCell, query->targetList) + { + TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell); + + if (targetEntry->resjunk) + { + continue; + } + + if (targetEntry->resname == NULL || strcmp(targetEntry->resname, "?column?") == 0) + { + StringInfo generatedName = makeStringInfo(); + appendStringInfo(generatedName, "walker_query_column_%d_%d", context->level, columnNumber); + targetEntry->resname = generatedName->data; + } + + columnNumber++; + } + context->level += 1; /* @@ -1569,6 +1647,31 @@ RecursivelyPlanSubquery(Query *subquery, RecursivePlanningContext *planningConte debugQuery = copyObject(subquery); } + /* + * PostgreSQL 18 may generate NULL or "?column?" as column names + * for intermediate results. We need to fix these before the subquery + * gets planned, as the intermediate result files will use these names. + */ + ListCell *targetEntryCell = NULL; + int columnNumber = 1; + foreach(targetEntryCell, subquery->targetList) + { + TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell); + + if (targetEntry->resjunk) + { + continue; + } + + if (targetEntry->resname == NULL || strcmp(targetEntry->resname, "?column?") == 0) + { + StringInfo generatedName = makeStringInfo(); + appendStringInfo(generatedName, "intermediate_column_%d", columnNumber); + targetEntry->resname = generatedName->data; + } + + columnNumber++; + } /* * Create the subplan and append it to the list in the planning context. @@ -1937,9 +2040,16 @@ GenerateRequiredColNamesFromTargetList(List *targetList) * column names of the inner subquery should only contain the * required columns, as in if we choose 'b' from ('a','b') colnames * should be 'a' not ('a','b') + * + * PostgreSQL 18 compatibility: handle NULL or "?column?" resname */ - innerSubqueryColNames = lappend(innerSubqueryColNames, makeString( - entry->resname)); + char *resname = entry->resname; + if (resname == NULL || strcmp(resname, "?column?") == 0) + { + resname = psprintf("subquery_col_%d", entry->resno); + } + + innerSubqueryColNames = lappend(innerSubqueryColNames, makeString(resname)); } } return innerSubqueryColNames; @@ -2482,6 +2592,17 @@ BuildReadIntermediateResultsQuery(List *targetEntryList, List *columnAliasList, continue; } + /* + * PostgreSQL 18 may generate NULL or "?column?" as column names + * for intermediate results. Generate a proper column name in such cases. + */ + if (columnName == NULL || strcmp(columnName, "?column?") == 0) + { + StringInfo generatedName = makeStringInfo(); + appendStringInfo(generatedName, "intermediate_column_%d", columnNumber); + columnName = generatedName->data; + } + funcColNames = lappend(funcColNames, makeString(columnName)); funcColTypes = lappend_int(funcColTypes, columnType); funcColTypMods = lappend_int(funcColTypMods, columnTypMod); diff --git a/src/test/regress/coordinator_logfile b/src/test/regress/coordinator_logfile new file mode 100644 index 000000000..564e1ae05 --- /dev/null +++ b/src/test/regress/coordinator_logfile @@ -0,0 +1,16 @@ +2025-08-02 06:34:55.234 UTC [41803] LOG: starting PostgreSQL 18beta2 on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0, 64-bit +2025-08-02 06:34:55.235 UTC [41803] LOG: could not bind IPv6 address "::1": Address already in use +2025-08-02 06:34:55.235 UTC [41803] HINT: Is another postmaster already running on port 9700? If not, wait a few seconds and retry. +2025-08-02 06:34:55.235 UTC [41803] LOG: could not bind IPv4 address "127.0.0.1": Address already in use +2025-08-02 06:34:55.235 UTC [41803] HINT: Is another postmaster already running on port 9700? If not, wait a few seconds and retry. +2025-08-02 06:34:55.235 UTC [41803] WARNING: could not create listen socket for "localhost" +2025-08-02 06:34:55.235 UTC [41803] FATAL: could not create any TCP/IP sockets +2025-08-02 06:34:55.240 UTC [41803] LOG: database system is shut down +2025-08-02 06:35:12.489 UTC [42020] LOG: starting PostgreSQL 18beta2 on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0, 64-bit +2025-08-02 06:35:12.490 UTC [42020] LOG: could not bind IPv6 address "::1": Address already in use +2025-08-02 06:35:12.490 UTC [42020] HINT: Is another postmaster already running on port 9700? If not, wait a few seconds and retry. +2025-08-02 06:35:12.490 UTC [42020] LOG: could not bind IPv4 address "127.0.0.1": Address already in use +2025-08-02 06:35:12.490 UTC [42020] HINT: Is another postmaster already running on port 9700? If not, wait a few seconds and retry. +2025-08-02 06:35:12.490 UTC [42020] WARNING: could not create listen socket for "localhost" +2025-08-02 06:35:12.490 UTC [42020] FATAL: could not create any TCP/IP sockets +2025-08-02 06:35:12.494 UTC [42020] LOG: database system is shut down diff --git a/test_aggregate_pg18.sql b/test_aggregate_pg18.sql new file mode 100644 index 000000000..a78433750 --- /dev/null +++ b/test_aggregate_pg18.sql @@ -0,0 +1,25 @@ +-- Test aggregate with GROUP BY to isolate the problem +\c citus + +-- Tables should already exist +CREATE TABLE IF NOT EXISTS users_table (user_id int, value_1 int, value_2 int); +CREATE TABLE IF NOT EXISTS events_table (user_id int, event_type int, value_2 int); + +-- Populate with minimal data +INSERT INTO users_table (user_id, value_1, value_2) VALUES (1, 10, 100), (2, 20, 200) ON CONFLICT DO NOTHING; +INSERT INTO events_table (user_id, event_type, value_2) VALUES (1, 1, 1), (2, 2, 2) ON CONFLICT DO NOTHING; + +-- Test simple aggregate without GROUP BY (should work) +SELECT avg(event_type) FROM events_table; + +-- Test simple aggregate with GROUP BY (might fail) +SELECT avg(event_type) FROM events_table GROUP BY user_id; + +-- Test nested aggregate with GROUP BY (likely to fail) +SELECT avg(event_type) +FROM ( + SELECT event_type, user_id + FROM events_table + WHERE value_2 < 3 +) sub +GROUP BY sub.user_id; diff --git a/test_debug_detailed.sql b/test_debug_detailed.sql new file mode 100644 index 000000000..b8a72d8d0 --- /dev/null +++ b/test_debug_detailed.sql @@ -0,0 +1,32 @@ +-- Debug test with detailed logging +\c citus + +SET client_min_messages TO DEBUG2; +SET citus.log_remote_commands TO ON; + +-- Test the failing query with detailed logging +SELECT DISTINCT user_id +FROM ( + SELECT users_table.user_id + FROM users_table, + ( + SELECT avg(event_type) as avg_val + FROM ( + SELECT event_type, users_table.user_id + FROM users_table, + ( + SELECT user_id, event_type + FROM events_table + WHERE value_2 < 3 + ORDER BY 1, 2 + LIMIT 1 + ) as foo + WHERE foo.user_id = users_table.user_id + ) bar, users_table + WHERE bar.user_id = users_table.user_id + GROUP BY users_table.value_1 + ) as baz + WHERE baz.avg_val < users_table.user_id + ORDER BY 1 + LIMIT 1 +) as sub1; diff --git a/test_debug_pg18.sql b/test_debug_pg18.sql new file mode 100644 index 000000000..25a3e1791 --- /dev/null +++ b/test_debug_pg18.sql @@ -0,0 +1,116 @@ +-- Test to debug PostgreSQL 1-- Test the actual failing pattern before + SELECT users_table.value_1 as value1 + FROM ( + SELECT event_type, users_table.user_id + FROM users_table, + ( + SELECT user_id, event_type + FROM events_table + WHERE value_2 < 3 + ORDER BY 1, 2 + LIMIT 1 + ) as foo + WHERE foo.user_id = users_table.user_id + ) bar, users_table + WHERE bar.user_id = users_table.user_id + GROUP BY value1; +-- First let's test with our existing setup +\c citus + +-- Simple subquery that should work +SELECT user_id FROM (SELECT user_id FROM users_table WHERE user_id = 1) sub; + +-- Test aggregate subquery (this might have ?column? issue) +SELECT * FROM (SELECT avg(user_id) FROM users_table) sub; + +-- Test complex subquery similar to the failing one +SELECT * FROM ( + SELECT avg(event_type) as avg_val + FROM ( + SELECT event_type + FROM events_table + WHERE value_2 < 3 + LIMIT 1 + ) foo +) bar; + +--Test simple alias +SELECT value_1 as value1 +from ( + select * from ( + select * from users_table + ) as tab2 +) as tab; + +-- Test the actual failing pattern before + SELECT users_table.value_1 as value1 + FROM ( + SELECT event_type, users_table.user_id + FROM users_table, + ( + SELECT user_id, event_type + FROM events_table + WHERE value_2 < 3 + ORDER BY 1, 2 + LIMIT 1 + ) as foo + WHERE foo.user_id = users_table.user_id + ) bar, users_table + WHERE bar.user_id = users_table.user_id + ; + +-- Test the actual failing pattern +-- Test the simplified ?column? issue +SELECT DISTINCT user_id +FROM ( + SELECT users_table.user_id + FROM users_table, + ( + SELECT event_type as avg_val + FROM ( + SELECT event_type, a_users.user_id + FROM users_table as a_users, + ( + SELECT user_id, event_type + FROM events_table + WHERE value_2 < 3 + ORDER BY 1, 2 + LIMIT 1 + ) as foo + WHERE foo.user_id = a_users.user_id + ) bar, users_table as b_users + WHERE bar.user_id = b_users.user_id + ) as baz + WHERE baz.avg_val < users_table.user_id + ORDER BY 1 + LIMIT 1 +) as sub1; + + +-- Test the actual failing pattern +-- Test the simplified ?column? issue +SELECT DISTINCT user_id +FROM ( + SELECT users_table.user_id + FROM users_table, + ( + SELECT avg(event_type) as avg_val + FROM ( + SELECT event_type, a_users.user_id + FROM users_table as a_users, + ( + SELECT user_id, event_type + FROM events_table + WHERE value_2 < 3 + ORDER BY 1, 2 + LIMIT 1 + ) as foo + WHERE foo.user_id = a_users.user_id + ) bar, users_table as b_users + WHERE bar.user_id = b_users.user_id + GROUP BY b_users.value_1 + ) as baz + WHERE baz.avg_val < users_table.user_id + ORDER BY 1 + LIMIT 1 +) as sub1; \ No newline at end of file diff --git a/test_failing_query.sql b/test_failing_query.sql new file mode 100644 index 000000000..b2acbd8e9 --- /dev/null +++ b/test_failing_query.sql @@ -0,0 +1,21 @@ +-- Test only the failing complex nested query +\c citus + +SET client_min_messages TO DEBUG5; + +-- The exact failing pattern (this should fail without our fix) +SELECT avg(event_type) as avg_val +FROM ( + SELECT event_type, a_users.user_id + FROM users_table as a_users, + ( + SELECT user_id, event_type + FROM events_table + WHERE value_2 < 3 + ORDER BY 1, 2 + LIMIT 1 + ) as foo + WHERE foo.user_id = a_users.user_id +) bar, users_table as b_users +WHERE bar.user_id = b_users.user_id +GROUP BY b_users.value_1; diff --git a/test_focus_pg18.sql b/test_focus_pg18.sql new file mode 100644 index 000000000..3d2cf0bef --- /dev/null +++ b/test_focus_pg18.sql @@ -0,0 +1,37 @@ +-- Focus on the ?column? issue specifically +\c citus + +-- Tables should already exist, just in case +CREATE TABLE IF NOT EXISTS users_table (user_id int, value_1 int, value_2 int); +CREATE TABLE IF NOT EXISTS events_table (user_id int, event_type int, value_2 int); + +-- Populate with minimal data +INSERT INTO users_table (user_id, value_1, value_2) VALUES (1, 10, 100), (2, 20, 200) ON CONFLICT DO NOTHING; +INSERT INTO events_table (user_id, event_type, value_2) VALUES (1, 1, 1), (2, 2, 2) ON CONFLICT DO NOTHING; + +-- Test the simplified ?column? issue +SELECT DISTINCT user_id +FROM ( + SELECT users_table.user_id + FROM users_table, + ( + SELECT avg(event_type) as avg_val + FROM ( + SELECT event_type, a_users.user_id + FROM users_table as a_users, + ( + SELECT user_id, event_type + FROM events_table + WHERE value_2 < 3 + ORDER BY 1, 2 + LIMIT 1 + ) as foo + WHERE foo.user_id = a_users.user_id + ) bar, users_table as b_users + WHERE bar.user_id = b_users.user_id + GROUP BY b_users.value_1 + ) as baz + WHERE baz.avg_val < users_table.user_id + ORDER BY 1 + LIMIT 1 +) as sub1; diff --git a/test_nested_pg18.sql b/test_nested_pg18.sql new file mode 100644 index 000000000..565c77721 --- /dev/null +++ b/test_nested_pg18.sql @@ -0,0 +1,41 @@ +-- Test deeply nested queries with JOINs and GROUP BY +\c citus + +-- Test the exact failing pattern step by step +-- Step 1: Simple subquery with JOIN (should work) +SELECT avg(event_type) as avg_val +FROM ( + SELECT event_type, users_table.user_id + FROM users_table, events_table + WHERE events_table.user_id = users_table.user_id +) sub +GROUP BY sub.user_id; + +-- Step 2: Add one more level of nesting (might fail) +SELECT avg_val +FROM ( + SELECT avg(event_type) as avg_val + FROM ( + SELECT event_type, users_table.user_id + FROM users_table, events_table + WHERE events_table.user_id = users_table.user_id + ) sub + GROUP BY sub.user_id +) outer_sub; + +-- Step 3: The exact failing pattern (this should fail) +SELECT avg(event_type) as avg_val +FROM ( + SELECT event_type, a_users.user_id + FROM users_table as a_users, + ( + SELECT user_id, event_type + FROM events_table + WHERE value_2 < 3 + ORDER BY 1, 2 + LIMIT 1 + ) as foo + WHERE foo.user_id = a_users.user_id +) bar, users_table as b_users +WHERE bar.user_id = b_users.user_id +GROUP BY b_users.value_1; diff --git a/test_pg18_fix_final.sql b/test_pg18_fix_final.sql new file mode 100644 index 000000000..36e7e371b --- /dev/null +++ b/test_pg18_fix_final.sql @@ -0,0 +1,30 @@ +-- Test PostgreSQL 18 compatibility using existing distributed tables +\c citus + +-- Test the exact pattern from subqueries_deep.sql that should fail without our fix +SELECT + DISTINCT user_id +FROM + ( + SELECT users_table.user_id FROM users_table, + ( + SELECT + avg(event_type) as avg_val + FROM + (SELECT + event_type, users_table.user_id + FROM + users_table, (SELECT user_id, event_type FROM events_table WHERE value_2 < 3 ORDER BY 1, 2 OFFSET 0) as foo + WHERE + foo.user_id = users_table.user_id) bar, users_table + WHERE + bar.user_id = users_table.user_id + GROUP BY + users_table.value_1 + ) as baz + WHERE + baz.avg_val < users_table.user_id + ORDER BY 1 + LIMIT 3 + ) as sub1 + ORDER BY 1 DESC; diff --git a/test_simple_pg18.sql b/test_simple_pg18.sql new file mode 100644 index 000000000..6d0383b4a --- /dev/null +++ b/test_simple_pg18.sql @@ -0,0 +1,26 @@ +-- Simple PostgreSQL 18 test for "?column?" issue +CREATE SCHEMA test_simple; +SET search_path TO test_simple; + +CREATE TABLE simple_table ( + id INTEGER, + value TEXT +); + +SELECT create_distributed_table('simple_table', 'id'); + +INSERT INTO simple_table VALUES (1, 'a'), (2, 'b'), (3, 'c'); + +-- Test a simple subquery that might trigger the "?column?" issue +SELECT * FROM ( + SELECT id FROM simple_table +) AS sub; + +-- Test more complex nested subquery +SELECT * FROM ( + SELECT id FROM ( + SELECT id, value FROM simple_table + ) inner_sub +) outer_sub; + +DROP SCHEMA test_simple CASCADE; diff --git a/test_simple_subquery.sql b/test_simple_subquery.sql new file mode 100644 index 000000000..13d4f7c59 --- /dev/null +++ b/test_simple_subquery.sql @@ -0,0 +1,29 @@ +-- Simple test to isolate the PostgreSQL 18 issue +-- This should show where the ?column? error occurs + +-- Drop and recreate to start fresh +DROP SCHEMA IF EXISTS simple_test CASCADE; +CREATE SCHEMA simple_test; +SET search_path TO simple_test, public; + +CREATE TABLE events_table ( + user_id int, + event_type int, + value_2 int +); + +SELECT create_distributed_table('events_table', 'user_id'); + +INSERT INTO events_table VALUES +(1, 1, 1), +(2, 2, 2), +(3, 3, 3); + +SET client_min_messages TO DEBUG1; + +-- This is the simplest subquery that should trigger the issue +SELECT user_id, event_type FROM events_table WHERE value_2 < 3 ORDER BY 1, 2 OFFSET 3; + +-- Clean up +SET search_path TO public; +DROP SCHEMA simple_test CASCADE; diff --git a/test_subqueries_deep_custom.sql b/test_subqueries_deep_custom.sql new file mode 100644 index 000000000..33f4def91 --- /dev/null +++ b/test_subqueries_deep_custom.sql @@ -0,0 +1,78 @@ +-- Test PostgreSQL 18 "?column?" fix for subqueries_deep +-- Create basic tables for testing +CREATE SCHEMA test_pg18_fix; +SET search_path TO test_pg18_fix, public; + +-- Create test tables similar to what the regular tests use +CREATE TABLE users_table ( + user_id int, + time timestamp, + value_1 int, + value_2 int, + value_3 double precision, + value_4 bigint +); + +CREATE TABLE events_table ( + user_id int, + time timestamp, + event_type int, + value_2 int, + value_3 double precision, + value_4 bigint +); + +-- Create distributed tables +SELECT create_distributed_table('users_table', 'user_id'); +SELECT create_distributed_table('events_table', 'user_id'); + +-- Insert some test data +INSERT INTO users_table VALUES +(1, '2024-01-01', 10, 1, 1.1, 100), +(2, '2024-01-02', 20, 2, 2.2, 200), +(3, '2024-01-03', 30, 3, 3.3, 300), +(4, '2024-01-04', 40, 4, 4.4, 400), +(5, '2024-01-05', 50, 5, 5.5, 500), +(6, '2024-01-06', 60, 6, 6.6, 600); + +INSERT INTO events_table VALUES +(1, '2024-01-01', 1, 1, 1.1, 100), +(2, '2024-01-02', 2, 2, 2.2, 200), +(3, '2024-01-03', 3, 3, 3.3, 300), +(4, '2024-01-04', 4, 4, 4.4, 400), +(5, '2024-01-05', 5, 5, 5.5, 500), +(6, '2024-01-06', 1, 6, 6.6, 600); + +SET client_min_messages TO DEBUG1; + +-- Test the original failing query from subqueries_deep.sql +SELECT + DISTINCT user_id +FROM + ( + SELECT users_table.user_id FROM users_table, + ( + SELECT + avg(event_type) as avg_val + FROM + (SELECT + event_type, users_table.user_id + FROM + users_table, (SELECT user_id, event_type FROM events_table WHERE value_2 < 3 ORDER BY 1, 2 OFFSET 3) as foo + WHERE + foo.user_id = users_table.user_id) bar, users_table + WHERE + bar.user_id = users_table.user_id + GROUP BY + users_table.value_1 + ) as baz + WHERE + baz.avg_val < users_table.user_id + ORDER BY 1 + LIMIT 3 + ) as sub1 + ORDER BY 1 DESC; + +-- Clean up +SET search_path TO public; +DROP SCHEMA test_pg18_fix CASCADE;