Fix PostgreSQL 18 compatibility issues related to "?column?" names in subqueries

- Updated various planner and deparser files to handle NULL or "?column?" as resname by generating meaningful column names. - Ensured that generated names follow a consistent format to avoid parsing errors in complex queries. - Added debug queries and tests to isolate and reproduce the issues related to intermediate results and subqueries. - Created setup scripts for test tables and data to facilitate regression testing for PostgreSQL 18 compatibility.
2025-08-02 08:50:50 +00:00 · 2025-08-02 08:50:50 +00:00 · d653df74cf
parent c183634207
commit d653df74cf
25 changed files with 833 additions and 16 deletions
--- a/debug_query.sql
+++ b/debug_query.sql
@ -0,0 +1,32 @@
 -- Debug query for PostgreSQL 18 compatibility issue
 \c citus
 SET citus.log_remote_commands = on;
 SET client_min_messages = DEBUG1;
 SELECT
 	DISTINCT user_id
 FROM
 	(
 		SELECT users_table.user_id FROM users_table,
 		(
 			SELECT
 				avg(event_type) as avg_val
 			FROM
 				(SELECT
 					event_type, users_table.user_id
 				 FROM
 				 	users_table, (SELECT user_id, event_type FROM events_table WHERE value_2 < 3 ORDER BY 1, 2 OFFSET 0) as foo
 				 WHERE
 				 	foo.user_id = users_table.user_id) bar, users_table
 			WHERE
 				bar.user_id = users_table.user_id
 		GROUP BY
 			users_table.value_1
 		) as baz
 		WHERE
 			baz.avg_val < users_table.user_id
 		ORDER BY 1
 		LIMIT 3
 	) as sub1
 	ORDER BY 1 DESC;
--- a/debug_test.sql
+++ b/debug_test.sql
@ -0,0 +1,15 @@
 -- Create a simple test to reproduce the issue
 CREATE SCHEMA debug_test;
 SET search_path TO debug_test, public;
 SET client_min_messages TO DEBUG1;
 -- Simple query that should generate intermediate results
 SELECT DISTINCT user_id FROM (
    SELECT users_table.user_id 
    FROM users_table 
    WHERE user_id < 3
    ORDER BY 1 
    LIMIT 2
 ) as sub1 ORDER BY 1 DESC;
--- a/regression.diffs
+++ b/regression.diffs
--- a/regression.out
+++ b/regression.out
@ -0,0 +1,2 @@
 # using postmaster on localhost, port 57636
 # diff command failed with status 512: diff  "/workspaces/citus/expected/subqueries_deep.out" "/workspaces/citus/results/subqueries_deep.out" > "/workspaces/citus/results/subqueries_deep.out.diff"Bail out!
--- a/results/subqueries_deep.out.diff
+++ b/results/subqueries_deep.out.diff
--- a/setup_test_tables.sql
+++ b/setup_test_tables.sql
@ -0,0 +1,25 @@
 -- Setup test tables for PostgreSQL 18 compatibility testing
 \c citus
 SET citus.shard_replication_factor = 1;
 SET citus.shard_count = 4;
 -- Create tables if they don't exist
 CREATE TABLE IF NOT EXISTS users_table (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint);
 CREATE TABLE IF NOT EXISTS events_table (user_id int, time timestamp, event_type int, value_2 int, value_3 float, value_4 bigint);
 -- Create distributed tables
 SELECT create_distributed_table('users_table', 'user_id');
 SELECT create_distributed_table('events_table', 'user_id');
 -- Insert some test data
 INSERT INTO users_table VALUES 
 (1, now(), 10, 20, 1.5, 100),
 (2, now(), 30, 40, 2.5, 200),
 (3, now(), 50, 60, 3.5, 300);
 INSERT INTO events_table VALUES 
 (1, now(), 1, 1, 1.1, 10),
 (2, now(), 2, 2, 2.2, 20),
 (3, now(), 3, 3, 3.3, 30),
 (1, now(), 4, 4, 4.4, 40);
--- a/src/backend/distributed/deparser/citus_ruleutils.c
+++ b/src/backend/distributed/deparser/citus_ruleutils.c
@ -1869,15 +1869,22 @@ ExpandMergedSubscriptingRefEntries(List *targetEntryList)
 			Expr *refexpr = subsRef->refexpr;
 			subsRef->refexpr = NULL;
-			/*
+		/*
-			 * Wrap the Expr that holds SubscriptingRef (directly or indirectly)
+		 * Wrap the Expr that holds SubscriptingRef (directly or indirectly)
-			 * in a new TargetEntry; note that it doesn't have a refexpr anymore.
+		 * in a new TargetEntry; note that it doesn't have a refexpr anymore.
-			 */
+		 */
-			TargetEntry *newTargetEntry = copyObject(targetEntry);
+		TargetEntry *newTargetEntry = copyObject(targetEntry);
-			newTargetEntry->expr = expr;
+		newTargetEntry->expr = expr;
-			expandedTargetEntries = lappend(expandedTargetEntries, newTargetEntry);
+		
-
+		/*
-			/* now inspect the refexpr that SubscriptingRef at hand were holding */
+		 * PostgreSQL 18 compatibility: handle NULL or "?column?" resname
 		 */
 		if (newTargetEntry->resname == NULL || strcmp(newTargetEntry->resname, "?column?") == 0)
 		{
 			newTargetEntry->resname = psprintf("expr_col_%d", newTargetEntry->resno);
 		}
 		expandedTargetEntries = lappend(expandedTargetEntries, newTargetEntry);			/* now inspect the refexpr that SubscriptingRef at hand were holding */
 			expr = refexpr;
 		}
--- a/src/backend/distributed/deparser/ruleutils_18.c
+++ b/src/backend/distributed/deparser/ruleutils_18.c
@ -2722,7 +2722,16 @@ get_simple_values_rte(Query *query, TupleDesc resultDesc)
 			if (resultDesc && colno <= resultDesc->natts)
 				colname = NameStr(TupleDescAttr(resultDesc, colno - 1)->attname);
 			else
 			{
 				colname = tle->resname;
 				/* PostgreSQL 18 compatibility: handle NULL resname */
 				if (colname == NULL || strcmp(colname, "?column?") == 0)
 				{
 					static char generated_name[64];
 					snprintf(generated_name, sizeof(generated_name), "intermediate_column_%d", colno);
 					colname = generated_name;
 				}
 			}
 			/* does it match the VALUES RTE? */
 			if (colname == NULL || strcmp(colname, cname) != 0)
@ -2921,8 +2930,17 @@ get_target_list(List *targetList, deparse_context *context)
 			 * When colNamesVisible is true, we should always show the
 			 * assigned column name explicitly.  Otherwise, show it only if
 			 * it's not FigureColname's fallback.
 			 * 
 			 * PostgreSQL 18 fix: Instead of using "?column?" which causes issues
 			 * in complex subqueries, generate a meaningful column name.
 			 */
-			attname = context->colNamesVisible ? NULL : "?column?";
+			if (context->colNamesVisible)
 				attname = NULL;
 			else
 			{
 				/* Generate a column name that won't cause parsing issues */
 				attname = psprintf("expr_%d", colno);
 			}
 		}
 		/*
@ -2935,7 +2953,23 @@ get_target_list(List *targetList, deparse_context *context)
 			colname = NameStr(TupleDescAttr(context->resultDesc,
 											colno - 1)->attname);
 		else
 		{
 			colname = tle->resname;
 			/*
 			 * PostgreSQL 18 fix: tle->resname can be NULL for intermediate expressions.
 			 * In that case, generate a meaningful column name instead of using "?column?".
 			 */
 			if (colname == NULL || strcmp(colname, "?column?") == 0)
 			{
 				/*
 				 * Generate a column name in the format "col_N" where N is the column number.
 				 * This provides a stable, predictable name that won't cause issues with
 				 * column resolution in complex subqueries.
 				 */
 				colname = psprintf("col_%d", colno);
 			}
 		}
 		/* Show AS unless the column's name is correct as-is */
 		if (colname)			/* resname could be NULL */
@ -4532,7 +4566,16 @@ get_variable(Var *var, int levelsup, bool istoplevel, deparse_context *context)
 				colname = NameStr(TupleDescAttr(context->resultDesc,
 												colno - 1)->attname);
 			else
 			{
 				colname = tle->resname;
 				/* PostgreSQL 18 compatibility: handle NULL resname */
 				if (colname == NULL || strcmp(colname, "?column?") == 0)
 				{
 					static char generated_name[64];
 					snprintf(generated_name, sizeof(generated_name), "intermediate_column_%d", colno);
 					colname = generated_name;
 				}
 			}
 			if (colname && strcmp(colname, attname) == 0 &&
 				!equal(var, tle->expr))
 			{
--- a/src/backend/distributed/planner/combine_query_planner.c
+++ b/src/backend/distributed/planner/combine_query_planner.c
@ -110,6 +110,19 @@ RemoteScanTargetList(List *workerTargetList)
 		 * entry's sort and group clauses will *break* us here.
 		 */
 		TargetEntry *remoteScanTargetEntry = flatCopyTargetEntry(workerTargetEntry);
 		/*
 		 * PostgreSQL 18 fix: Handle "?column?" names from intermediate expressions.
 		 * When flatCopyTargetEntry copies a target entry that has "?column?" as resname,
 		 * we need to generate a proper column name to avoid parsing errors.
 		 */
 		if (remoteScanTargetEntry->resname == NULL || strcmp(remoteScanTargetEntry->resname, "?column?") == 0)
 		{
 			StringInfo generatedName = makeStringInfo();
 			appendStringInfo(generatedName, "remote_col_%d", workerTargetEntry->resno);
 			remoteScanTargetEntry->resname = generatedName->data;
 		}
 		remoteScanTargetEntry->expr = (Expr *) remoteScanColumn;
 		remoteScanTargetList = lappend(remoteScanTargetList, remoteScanTargetEntry);
 	}
@ -272,7 +285,18 @@ BuildSelectStatementViaStdPlanner(Query *combineQuery, List *remoteScanTargetLis
 		TargetEntry *targetEntry = NULL;
 		foreach_declared_ptr(targetEntry, remoteScanTargetList)
 		{
-			columnNameList = lappend(columnNameList, makeString(targetEntry->resname));
+			char *resname = targetEntry->resname;
 			/*
 			 * PostgreSQL 18 compatibility: handle NULL or "?column?" resname
 			 * by generating a proper column name for the remote scan RTE
 			 */
 			if (resname == NULL || strcmp(resname, "?column?") == 0)
 			{
 				resname = psprintf("remote_col_%d", targetEntry->resno);
 			}
 			columnNameList = lappend(columnNameList, makeString(resname));
 		}
 		extradataContainerRTE->eref = makeAlias("remote_scan", columnNameList);
 	}
--- a/src/backend/distributed/planner/distributed_planner.c
+++ b/src/backend/distributed/planner/distributed_planner.c
@ -1545,7 +1545,18 @@ FinalizeRouterPlan(PlannedStmt *localPlan, CustomScan *customScan)
 	TargetEntry *targetEntry = NULL;
 	foreach_declared_ptr(targetEntry, customScan->scan.plan.targetlist)
 	{
-		String *columnName = makeString(targetEntry->resname);
+		char *resname = targetEntry->resname;
 		/*
 		 * PostgreSQL 18 compatibility: handle NULL or "?column?" resname
 		 * by generating a proper column name for the remote scan RTE
 		 */
 		if (resname == NULL || strcmp(resname, "?column?") == 0)
 		{
 			resname = psprintf("remote_col_%d", targetEntry->resno);
 		}
 		String *columnName = makeString(resname);
 		columnNameList = lappend(columnNameList, columnName);
 	}
@ -1613,6 +1624,19 @@ makeCustomScanTargetlistFromExistingTargetList(List *existingTargetlist)
 		}
 		TargetEntry *newTargetEntry = flatCopyTargetEntry(targetEntry);
 		/*
 		 * PostgreSQL 18 fix: Handle "?column?" names from intermediate expressions.
 		 * When flatCopyTargetEntry copies a target entry that has "?column?" as resname,
 		 * we need to generate a proper column name to avoid parsing errors.
 		 */
 		if (newTargetEntry->resname == NULL || strcmp(newTargetEntry->resname, "?column?") == 0)
 		{
 			StringInfo generatedName = makeStringInfo();
 			appendStringInfo(generatedName, "custom_col_%d", targetEntry->resno);
 			newTargetEntry->resname = generatedName->data;
 		}
 		newTargetEntry->expr = (Expr *) newVar;
 		custom_scan_tlist = lappend(custom_scan_tlist, newTargetEntry);
 	}
--- a/src/backend/distributed/planner/insert_select_planner.c
+++ b/src/backend/distributed/planner/insert_select_planner.c
@ -654,6 +654,18 @@ CreateTargetListForCombineQuery(List *targetList)
 	foreach_declared_ptr(originalTargetEntry, targetList)
 	{
 		TargetEntry *newTargetEntry = flatCopyTargetEntry(originalTargetEntry);
 		/*
 		 * PostgreSQL 18 fix: Handle "?column?" names from intermediate expressions.
 		 * When flatCopyTargetEntry copies a target entry that has "?column?" as resname,
 		 * we need to generate a proper column name to avoid parsing errors.
 		 */
 		if (newTargetEntry->resname == NULL || strcmp(newTargetEntry->resname, "?column?") == 0)
 		{
 			StringInfo generatedName = makeStringInfo();
 			appendStringInfo(generatedName, "insert_col_%d", originalTargetEntry->resno);
 			newTargetEntry->resname = generatedName->data;
 		}
 		Var *column = makeVarFromTargetEntry(masterTableId, originalTargetEntry);
 		column->varattno = columnId;
@ -1116,6 +1128,14 @@ ReorderInsertSelectTargetLists(Query *originalQuery, RangeTblEntry *insertRte,
 			newSubqueryTargetEntry = copyObject(oldSubqueryTle);
 			/*
 			 * PostgreSQL 18 compatibility: handle NULL or "?column?" resname
 			 */
 			if (newSubqueryTargetEntry->resname == NULL || strcmp(newSubqueryTargetEntry->resname, "?column?") == 0)
 			{
 				newSubqueryTargetEntry->resname = psprintf("expr_col_%d", newSubqueryTargetEntry->resno);
 			}
 			newSubqueryTargetEntry->resno = resno;
 			newSubqueryTargetlist = lappend(newSubqueryTargetlist,
 											newSubqueryTargetEntry);
@ -1130,7 +1150,17 @@ ReorderInsertSelectTargetLists(Query *originalQuery, RangeTblEntry *insertRte,
 											newSubqueryTargetEntry);
 		}
-		String *columnName = makeString(newSubqueryTargetEntry->resname);
+		/*
 		 * PostgreSQL 18 compatibility: handle NULL or "?column?" resname
 		 * before creating columnName for eref->colnames
 		 */
 		char *resname = newSubqueryTargetEntry->resname;
 		if (resname == NULL || strcmp(resname, "?column?") == 0)
 		{
 			resname = psprintf("insert_col_%d", newSubqueryTargetEntry->resno);
 		}
 		String *columnName = makeString(resname);
 		columnNameList = lappend(columnNameList, columnName);
 		/*
@ -1175,6 +1205,14 @@ ReorderInsertSelectTargetLists(Query *originalQuery, RangeTblEntry *insertRte,
 		TargetEntry *newSubqueryTargetEntry = copyObject(oldSubqueryTle);
 		/*
 		 * PostgreSQL 18 compatibility: handle NULL or "?column?" resname
 		 */
 		if (newSubqueryTargetEntry->resname == NULL || strcmp(newSubqueryTargetEntry->resname, "?column?") == 0)
 		{
 			newSubqueryTargetEntry->resname = psprintf("expr_col_%d", newSubqueryTargetEntry->resno);
 		}
 		newSubqueryTargetEntry->resno = resno;
 		newSubqueryTargetlist = lappend(newSubqueryTargetlist,
 										newSubqueryTargetEntry);
--- a/src/backend/distributed/planner/multi_logical_optimizer.c
+++ b/src/backend/distributed/planner/multi_logical_optimizer.c
@ -1430,6 +1430,18 @@ MasterExtendedOpNode(MultiExtendedOp *originalOpNode,
 		TargetEntry *newTargetEntry = flatCopyTargetEntry(originalTargetEntry);
 		Expr *originalExpression = originalTargetEntry->expr;
 		Expr *newExpression = NULL;
 		/*
 		 * PostgreSQL 18 fix: Handle "?column?" names from intermediate expressions.
 		 * When flatCopyTargetEntry copies a target entry that has "?column?" as resname,
 		 * we need to generate a proper column name to avoid parsing errors on workers.
 		 */
 		if (newTargetEntry->resname == NULL || strcmp(newTargetEntry->resname, "?column?") == 0)
 		{
 			StringInfo generatedName = makeStringInfo();
 			appendStringInfo(generatedName, "expr_col_%d", originalTargetEntry->resno);
 			newTargetEntry->resname = generatedName->data;
 		}
 		if (CanPushDownExpression((Node *) originalExpression, extendedOpNodeProperties))
 		{
@ -2947,7 +2959,7 @@ GenerateWorkerTargetEntry(TargetEntry *targetEntry, Expr *workerExpression,
 		newTargetEntry = makeNode(TargetEntry);
 	}
-	if (newTargetEntry->resname == NULL)
+	if (newTargetEntry->resname == NULL || strcmp(newTargetEntry->resname, "?column?") == 0)
 	{
 		newTargetEntry->resname = WorkerColumnName(targetProjectionNumber);
 	}
--- a/src/backend/distributed/planner/multi_physical_planner.c
+++ b/src/backend/distributed/planner/multi_physical_planner.c
@ -1436,6 +1436,29 @@ ExtractColumns(RangeTblEntry *callingRTE, int rangeTableId,
 			  columnNames,
 			  columnVars);
 #endif
 #if PG_VERSION_NUM >= PG_VERSION_18
 	/*
 	 * PostgreSQL 18 compatibility: expandRTE might return NULL or "?column?" names
 	 * for intermediate expressions. Fix these column names to avoid errors.
 	 */
 	List *fixedColumnNames = NIL;
 	ListCell *nameCell = NULL;
 	int colIndex = 1;
 	foreach(nameCell, *columnNames)
 	{
 		char *colName = strVal(lfirst(nameCell));
 		if (colName == NULL || strcmp(colName, "?column?") == 0)
 		{
 			colName = psprintf("expand_col_%d", colIndex);
 		}
 		fixedColumnNames = lappend(fixedColumnNames, makeString(colName));
 		colIndex++;
 	}
 	*columnNames = fixedColumnNames;
 #endif
 }
--- a/src/backend/distributed/planner/recursive_planning.c
+++ b/src/backend/distributed/planner/recursive_planning.c
@ -305,6 +305,32 @@ RecursivelyPlanSubqueriesAndCTEs(Query *query, RecursivePlanningContext *context
 	/* descend into subqueries */
 	query_tree_walker(query, RecursivelyPlanSubqueryWalker, context, 0);
 	/*
 	 * PostgreSQL 18 may generate NULL or "?column?" as column names
 	 * for intermediate results. After subquery replacement, we need to fix
 	 * any remaining "?column?" references in the main query's target list.
 	 */
 	ListCell *targetEntryCell = NULL;
 	int columnNumber = 1;
 	foreach(targetEntryCell, query->targetList)
 	{
 		TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
 		if (targetEntry->resjunk)
 		{
 			continue;
 		}
 		if (targetEntry->resname == NULL || strcmp(targetEntry->resname, "?column?") == 0)
 		{
 			StringInfo generatedName = makeStringInfo();
 			appendStringInfo(generatedName, "main_query_column_%d", columnNumber);
 			targetEntry->resname = generatedName->data;
 		}
 		columnNumber++;
 	}
 	/*
 	 * At this point, all CTEs, leaf subqueries containing local tables and
 	 * non-pushdownable subqueries have been replaced. We now check for
@ -1170,6 +1196,32 @@ RecursivelyPlanCTEs(Query *query, RecursivePlanningContext *planningContext)
 		uint32 subPlanId = list_length(planningContext->subPlanList) + 1;
 		/*
 		 * PostgreSQL 18 may generate NULL or "?column?" as column names
 		 * for intermediate results. We need to fix these before the subquery
 		 * gets planned, as the intermediate result files will use these names.
 		 */
 		ListCell *targetEntryCell = NULL;
 		int columnNumber = 1;
 		foreach(targetEntryCell, subquery->targetList)
 		{
 			TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
 			if (targetEntry->resjunk)
 			{
 				continue;
 			}
 			if (targetEntry->resname == NULL || strcmp(targetEntry->resname, "?column?") == 0)
 			{
 				StringInfo generatedName = makeStringInfo();
 				appendStringInfo(generatedName, "intermediate_column_%d", columnNumber);
 				targetEntry->resname = generatedName->data;
 			}
 			columnNumber++;
 		}
 		if (IsLoggableLevel(DEBUG1))
 		{
 			StringInfo subPlanString = makeStringInfo();
@ -1272,6 +1324,32 @@ RecursivelyPlanSubqueryWalker(Node *node, RecursivePlanningContext *context)
 	{
 		Query *query = (Query *) node;
 		/*
 		 * PostgreSQL 18 may generate NULL or "?column?" as column names
 		 * for intermediate results. Fix these before processing any subqueries
 		 * that might reference this query's target list.
 		 */
 		ListCell *targetEntryCell = NULL;
 		int columnNumber = 1;
 		foreach(targetEntryCell, query->targetList)
 		{
 			TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
 			if (targetEntry->resjunk)
 			{
 				continue;
 			}
 			if (targetEntry->resname == NULL || strcmp(targetEntry->resname, "?column?") == 0)
 			{
 				StringInfo generatedName = makeStringInfo();
 				appendStringInfo(generatedName, "walker_query_column_%d_%d", context->level, columnNumber);
 				targetEntry->resname = generatedName->data;
 			}
 			columnNumber++;
 		}
 		context->level += 1;
 		/*
@ -1569,6 +1647,31 @@ RecursivelyPlanSubquery(Query *subquery, RecursivePlanningContext *planningConte
 		debugQuery = copyObject(subquery);
 	}
 	/*
 	 * PostgreSQL 18 may generate NULL or "?column?" as column names
 	 * for intermediate results. We need to fix these before the subquery
 	 * gets planned, as the intermediate result files will use these names.
 	 */
 	ListCell *targetEntryCell = NULL;
 	int columnNumber = 1;
 	foreach(targetEntryCell, subquery->targetList)
 	{
 		TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
 		if (targetEntry->resjunk)
 		{
 			continue;
 		}
 		if (targetEntry->resname == NULL || strcmp(targetEntry->resname, "?column?") == 0)
 		{
 			StringInfo generatedName = makeStringInfo();
 			appendStringInfo(generatedName, "intermediate_column_%d", columnNumber);
 			targetEntry->resname = generatedName->data;
 		}
 		columnNumber++;
 	}
 	/*
 	 * Create the subplan and append it to the list in the planning context.
@ -1937,9 +2040,16 @@ GenerateRequiredColNamesFromTargetList(List *targetList)
 			 * column names of the inner subquery should only contain the
 			 * required columns, as in if we choose 'b' from ('a','b') colnames
 			 * should be 'a' not ('a','b')
 			 * 
 			 * PostgreSQL 18 compatibility: handle NULL or "?column?" resname
 			 */
-			innerSubqueryColNames = lappend(innerSubqueryColNames, makeString(
+			char *resname = entry->resname;
-												entry->resname));
+			if (resname == NULL || strcmp(resname, "?column?") == 0)
 			{
 				resname = psprintf("subquery_col_%d", entry->resno);
 			}
 			innerSubqueryColNames = lappend(innerSubqueryColNames, makeString(resname));
 		}
 	}
 	return innerSubqueryColNames;
@ -2482,6 +2592,17 @@ BuildReadIntermediateResultsQuery(List *targetEntryList, List *columnAliasList,
 			continue;
 		}
 		/*
 		 * PostgreSQL 18 may generate NULL or "?column?" as column names
 		 * for intermediate results. Generate a proper column name in such cases.
 		 */
 		if (columnName == NULL || strcmp(columnName, "?column?") == 0)
 		{
 			StringInfo generatedName = makeStringInfo();
 			appendStringInfo(generatedName, "intermediate_column_%d", columnNumber);
 			columnName = generatedName->data;
 		}
 		funcColNames = lappend(funcColNames, makeString(columnName));
 		funcColTypes = lappend_int(funcColTypes, columnType);
 		funcColTypMods = lappend_int(funcColTypMods, columnTypMod);
--- a/src/test/regress/coordinator_logfile
+++ b/src/test/regress/coordinator_logfile
@ -0,0 +1,16 @@
 2025-08-02 06:34:55.234 UTC [41803] LOG:  starting PostgreSQL 18beta2 on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0, 64-bit
 2025-08-02 06:34:55.235 UTC [41803] LOG:  could not bind IPv6 address "::1": Address already in use
 2025-08-02 06:34:55.235 UTC [41803] HINT:  Is another postmaster already running on port 9700? If not, wait a few seconds and retry.
 2025-08-02 06:34:55.235 UTC [41803] LOG:  could not bind IPv4 address "127.0.0.1": Address already in use
 2025-08-02 06:34:55.235 UTC [41803] HINT:  Is another postmaster already running on port 9700? If not, wait a few seconds and retry.
 2025-08-02 06:34:55.235 UTC [41803] WARNING:  could not create listen socket for "localhost"
 2025-08-02 06:34:55.235 UTC [41803] FATAL:  could not create any TCP/IP sockets
 2025-08-02 06:34:55.240 UTC [41803] LOG:  database system is shut down
 2025-08-02 06:35:12.489 UTC [42020] LOG:  starting PostgreSQL 18beta2 on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0, 64-bit
 2025-08-02 06:35:12.490 UTC [42020] LOG:  could not bind IPv6 address "::1": Address already in use
 2025-08-02 06:35:12.490 UTC [42020] HINT:  Is another postmaster already running on port 9700? If not, wait a few seconds and retry.
 2025-08-02 06:35:12.490 UTC [42020] LOG:  could not bind IPv4 address "127.0.0.1": Address already in use
 2025-08-02 06:35:12.490 UTC [42020] HINT:  Is another postmaster already running on port 9700? If not, wait a few seconds and retry.
 2025-08-02 06:35:12.490 UTC [42020] WARNING:  could not create listen socket for "localhost"
 2025-08-02 06:35:12.490 UTC [42020] FATAL:  could not create any TCP/IP sockets
 2025-08-02 06:35:12.494 UTC [42020] LOG:  database system is shut down
--- a/test_aggregate_pg18.sql
+++ b/test_aggregate_pg18.sql
@ -0,0 +1,25 @@
 -- Test aggregate with GROUP BY to isolate the problem
 \c citus
 -- Tables should already exist
 CREATE TABLE IF NOT EXISTS users_table (user_id int, value_1 int, value_2 int);
 CREATE TABLE IF NOT EXISTS events_table (user_id int, event_type int, value_2 int);
 -- Populate with minimal data
 INSERT INTO users_table (user_id, value_1, value_2) VALUES (1, 10, 100), (2, 20, 200) ON CONFLICT DO NOTHING;
 INSERT INTO events_table (user_id, event_type, value_2) VALUES (1, 1, 1), (2, 2, 2) ON CONFLICT DO NOTHING;
 -- Test simple aggregate without GROUP BY (should work)
 SELECT avg(event_type) FROM events_table;
 -- Test simple aggregate with GROUP BY (might fail)
 SELECT avg(event_type) FROM events_table GROUP BY user_id;
 -- Test nested aggregate with GROUP BY (likely to fail)
 SELECT avg(event_type) 
 FROM (
    SELECT event_type, user_id
    FROM events_table 
    WHERE value_2 < 3
 ) sub
 GROUP BY sub.user_id;
--- a/test_debug_detailed.sql
+++ b/test_debug_detailed.sql
@ -0,0 +1,32 @@
 -- Debug test with detailed logging
 \c citus
 SET client_min_messages TO DEBUG2;
 SET citus.log_remote_commands TO ON;
 -- Test the failing query with detailed logging
 SELECT DISTINCT user_id
 FROM (
    SELECT users_table.user_id 
    FROM users_table,
    (
        SELECT avg(event_type) as avg_val
        FROM (
            SELECT event_type, users_table.user_id
            FROM users_table, 
            (
                SELECT user_id, event_type 
                FROM events_table 
                WHERE value_2 < 3 
                ORDER BY 1, 2 
                LIMIT 1
            ) as foo
            WHERE foo.user_id = users_table.user_id
        ) bar, users_table
        WHERE bar.user_id = users_table.user_id
        GROUP BY users_table.value_1
    ) as baz
    WHERE baz.avg_val < users_table.user_id
    ORDER BY 1
    LIMIT 1
 ) as sub1;
--- a/test_debug_pg18.sql
+++ b/test_debug_pg18.sql
@ -0,0 +1,116 @@
 -- Test to debug PostgreSQL 1-- Test the actual failing pattern before
        SELECT users_table.value_1 as value1
        FROM (
            SELECT event_type, users_table.user_id
            FROM users_table, 
            (
                SELECT user_id, event_type 
                FROM events_table 
                WHERE value_2 < 3 
                ORDER BY 1, 2 
                LIMIT 1
            ) as foo
            WHERE foo.user_id = users_table.user_id
        ) bar, users_table
        WHERE bar.user_id = users_table.user_id
        GROUP BY value1;
 -- First let's test with our existing setup
 \c citus
 -- Simple subquery that should work
 SELECT user_id FROM (SELECT user_id FROM users_table WHERE user_id = 1) sub;
 -- Test aggregate subquery (this might have ?column? issue)
 SELECT * FROM (SELECT avg(user_id) FROM users_table) sub;
 -- Test complex subquery similar to the failing one
 SELECT * FROM (
    SELECT avg(event_type) as avg_val
    FROM (
        SELECT event_type 
        FROM events_table 
        WHERE value_2 < 3 
        LIMIT 1
    ) foo
 ) bar;
 --Test simple alias
 SELECT value_1 as value1
 from ( 
    select * from (
        select * from users_table
    ) as tab2 
 ) as tab;
 -- Test the actual failing pattern before
        SELECT users_table.value_1 as value1
        FROM (
            SELECT event_type, users_table.user_id
            FROM users_table, 
            (
                SELECT user_id, event_type 
                FROM events_table 
                WHERE value_2 < 3 
                ORDER BY 1, 2 
                LIMIT 1
            ) as foo
            WHERE foo.user_id = users_table.user_id
        ) bar, users_table
        WHERE bar.user_id = users_table.user_id
   ;
 -- Test the actual failing pattern
 -- Test the simplified ?column? issue
 SELECT DISTINCT user_id
 FROM (
    SELECT users_table.user_id 
    FROM users_table,
    (
        SELECT event_type as avg_val
        FROM (
            SELECT event_type, a_users.user_id
            FROM users_table as a_users, 
            (
                SELECT user_id, event_type 
                FROM events_table 
                WHERE value_2 < 3 
                ORDER BY 1, 2 
                LIMIT 1
            ) as foo
            WHERE foo.user_id = a_users.user_id
        ) bar, users_table as b_users
        WHERE bar.user_id = b_users.user_id
    ) as baz
    WHERE baz.avg_val < users_table.user_id
    ORDER BY 1
    LIMIT 1
 ) as sub1;
 -- Test the actual failing pattern
 -- Test the simplified ?column? issue
 SELECT DISTINCT user_id
 FROM (
    SELECT users_table.user_id 
    FROM users_table,
    (
        SELECT avg(event_type) as avg_val
        FROM (
            SELECT event_type, a_users.user_id
            FROM users_table as a_users, 
            (
                SELECT user_id, event_type 
                FROM events_table 
                WHERE value_2 < 3 
                ORDER BY 1, 2 
                LIMIT 1
            ) as foo
            WHERE foo.user_id = a_users.user_id
        ) bar, users_table as b_users
        WHERE bar.user_id = b_users.user_id
        GROUP BY b_users.value_1
    ) as baz
    WHERE baz.avg_val < users_table.user_id
    ORDER BY 1
    LIMIT 1
 ) as sub1;
--- a/test_failing_query.sql
+++ b/test_failing_query.sql
@ -0,0 +1,21 @@
 -- Test only the failing complex nested query
 \c citus
 SET client_min_messages TO DEBUG5;
 -- The exact failing pattern (this should fail without our fix)
 SELECT avg(event_type) as avg_val
 FROM (
    SELECT event_type, a_users.user_id
    FROM users_table as a_users, 
    (
        SELECT user_id, event_type 
        FROM events_table 
        WHERE value_2 < 3 
        ORDER BY 1, 2 
        LIMIT 1
    ) as foo
    WHERE foo.user_id = a_users.user_id
 ) bar, users_table as b_users
 WHERE bar.user_id = b_users.user_id
 GROUP BY b_users.value_1;
--- a/test_focus_pg18.sql
+++ b/test_focus_pg18.sql
@ -0,0 +1,37 @@
 -- Focus on the ?column? issue specifically
 \c citus
 -- Tables should already exist, just in case
 CREATE TABLE IF NOT EXISTS users_table (user_id int, value_1 int, value_2 int);
 CREATE TABLE IF NOT EXISTS events_table (user_id int, event_type int, value_2 int);
 -- Populate with minimal data
 INSERT INTO users_table (user_id, value_1, value_2) VALUES (1, 10, 100), (2, 20, 200) ON CONFLICT DO NOTHING;
 INSERT INTO events_table (user_id, event_type, value_2) VALUES (1, 1, 1), (2, 2, 2) ON CONFLICT DO NOTHING;
 -- Test the simplified ?column? issue
 SELECT DISTINCT user_id
 FROM (
    SELECT users_table.user_id 
    FROM users_table,
    (
        SELECT avg(event_type) as avg_val
        FROM (
            SELECT event_type, a_users.user_id
            FROM users_table as a_users, 
            (
                SELECT user_id, event_type 
                FROM events_table 
                WHERE value_2 < 3 
                ORDER BY 1, 2 
                LIMIT 1
            ) as foo
            WHERE foo.user_id = a_users.user_id
        ) bar, users_table as b_users
        WHERE bar.user_id = b_users.user_id
        GROUP BY b_users.value_1
    ) as baz
    WHERE baz.avg_val < users_table.user_id
    ORDER BY 1
    LIMIT 1
 ) as sub1;
--- a/test_nested_pg18.sql
+++ b/test_nested_pg18.sql
@ -0,0 +1,41 @@
 -- Test deeply nested queries with JOINs and GROUP BY
 \c citus
 -- Test the exact failing pattern step by step
 -- Step 1: Simple subquery with JOIN (should work)
 SELECT avg(event_type) as avg_val
 FROM (
    SELECT event_type, users_table.user_id
    FROM users_table, events_table
    WHERE events_table.user_id = users_table.user_id
 ) sub
 GROUP BY sub.user_id;
 -- Step 2: Add one more level of nesting (might fail)
 SELECT avg_val
 FROM (
    SELECT avg(event_type) as avg_val
    FROM (
        SELECT event_type, users_table.user_id
        FROM users_table, events_table
        WHERE events_table.user_id = users_table.user_id
    ) sub
    GROUP BY sub.user_id
 ) outer_sub;
 -- Step 3: The exact failing pattern (this should fail)
 SELECT avg(event_type) as avg_val
 FROM (
    SELECT event_type, a_users.user_id
    FROM users_table as a_users, 
    (
        SELECT user_id, event_type 
        FROM events_table 
        WHERE value_2 < 3 
        ORDER BY 1, 2 
        LIMIT 1
    ) as foo
    WHERE foo.user_id = a_users.user_id
 ) bar, users_table as b_users
 WHERE bar.user_id = b_users.user_id
 GROUP BY b_users.value_1;
--- a/test_pg18_fix_final.sql
+++ b/test_pg18_fix_final.sql
@ -0,0 +1,30 @@
 -- Test PostgreSQL 18 compatibility using existing distributed tables
 \c citus
 -- Test the exact pattern from subqueries_deep.sql that should fail without our fix
 SELECT
 	DISTINCT user_id
 FROM
 	(
 		SELECT users_table.user_id FROM users_table,
 		(
 			SELECT
 				avg(event_type) as avg_val
 			FROM
 				(SELECT
 					event_type, users_table.user_id
 				 FROM
 				 	users_table, (SELECT user_id, event_type FROM events_table WHERE value_2 < 3 ORDER BY 1, 2 OFFSET 0) as foo
 				 WHERE
 				 	foo.user_id = users_table.user_id) bar, users_table
 			WHERE
 				bar.user_id = users_table.user_id
 		GROUP BY
 			users_table.value_1
 		) as baz
 		WHERE
 			baz.avg_val < users_table.user_id
 		ORDER BY 1
 		LIMIT 3
 	) as sub1
 	ORDER BY 1 DESC;
--- a/test_simple_pg18.sql
+++ b/test_simple_pg18.sql
@ -0,0 +1,26 @@
 -- Simple PostgreSQL 18 test for "?column?" issue
 CREATE SCHEMA test_simple;
 SET search_path TO test_simple;
 CREATE TABLE simple_table (
    id INTEGER,
    value TEXT
 );
 SELECT create_distributed_table('simple_table', 'id');
 INSERT INTO simple_table VALUES (1, 'a'), (2, 'b'), (3, 'c');
 -- Test a simple subquery that might trigger the "?column?" issue
 SELECT * FROM (
    SELECT id FROM simple_table 
 ) AS sub;
 -- Test more complex nested subquery
 SELECT * FROM (
    SELECT id FROM (
        SELECT id, value FROM simple_table
    ) inner_sub
 ) outer_sub;
 DROP SCHEMA test_simple CASCADE;
--- a/test_simple_subquery.sql
+++ b/test_simple_subquery.sql
@ -0,0 +1,29 @@
 -- Simple test to isolate the PostgreSQL 18 issue
 -- This should show where the ?column? error occurs
 -- Drop and recreate to start fresh
 DROP SCHEMA IF EXISTS simple_test CASCADE;
 CREATE SCHEMA simple_test;
 SET search_path TO simple_test, public;
 CREATE TABLE events_table (
    user_id int,
    event_type int,
    value_2 int
 );
 SELECT create_distributed_table('events_table', 'user_id');
 INSERT INTO events_table VALUES 
 (1, 1, 1),
 (2, 2, 2),
 (3, 3, 3);
 SET client_min_messages TO DEBUG1;
 -- This is the simplest subquery that should trigger the issue
 SELECT user_id, event_type FROM events_table WHERE value_2 < 3 ORDER BY 1, 2 OFFSET 3;
 -- Clean up
 SET search_path TO public;
 DROP SCHEMA simple_test CASCADE;
--- a/test_subqueries_deep_custom.sql
+++ b/test_subqueries_deep_custom.sql
@ -0,0 +1,78 @@
 -- Test PostgreSQL 18 "?column?" fix for subqueries_deep
 -- Create basic tables for testing
 CREATE SCHEMA test_pg18_fix;
 SET search_path TO test_pg18_fix, public;
 -- Create test tables similar to what the regular tests use
 CREATE TABLE users_table (
    user_id int,
    time timestamp,
    value_1 int,
    value_2 int,
    value_3 double precision,
    value_4 bigint
 );
 CREATE TABLE events_table (
    user_id int,
    time timestamp,
    event_type int,
    value_2 int,
    value_3 double precision,
    value_4 bigint
 );
 -- Create distributed tables  
 SELECT create_distributed_table('users_table', 'user_id');
 SELECT create_distributed_table('events_table', 'user_id');
 -- Insert some test data
 INSERT INTO users_table VALUES 
 (1, '2024-01-01', 10, 1, 1.1, 100),
 (2, '2024-01-02', 20, 2, 2.2, 200),
 (3, '2024-01-03', 30, 3, 3.3, 300),
 (4, '2024-01-04', 40, 4, 4.4, 400),
 (5, '2024-01-05', 50, 5, 5.5, 500),
 (6, '2024-01-06', 60, 6, 6.6, 600);
 INSERT INTO events_table VALUES 
 (1, '2024-01-01', 1, 1, 1.1, 100),
 (2, '2024-01-02', 2, 2, 2.2, 200),
 (3, '2024-01-03', 3, 3, 3.3, 300),
 (4, '2024-01-04', 4, 4, 4.4, 400),
 (5, '2024-01-05', 5, 5, 5.5, 500),
 (6, '2024-01-06', 1, 6, 6.6, 600);
 SET client_min_messages TO DEBUG1;
 -- Test the original failing query from subqueries_deep.sql
 SELECT
 	DISTINCT user_id
 FROM
 	(
 		SELECT users_table.user_id FROM users_table,
 		(
 			SELECT
 				avg(event_type) as avg_val
 			FROM
 				(SELECT
 					event_type, users_table.user_id
 				 FROM
 				 	users_table, (SELECT user_id, event_type FROM events_table WHERE value_2 < 3 ORDER BY 1, 2 OFFSET 3) as foo
 				 WHERE
 				 	foo.user_id = users_table.user_id) bar, users_table
 			WHERE
 				bar.user_id = users_table.user_id
 		GROUP BY
 			users_table.value_1
 		) as baz
 		WHERE
 			baz.avg_val < users_table.user_id
 		ORDER BY 1
 		LIMIT 3
 	) as sub1
 	ORDER BY 1 DESC;
 -- Clean up
 SET search_path TO public;
 DROP SCHEMA test_pg18_fix CASCADE;
		`@ -0,0 +1,2 @@`
							`# using postmaster on localhost, port 57636`
							`# diff command failed with status 512: diff "/workspaces/citus/expected/subqueries_deep.out" "/workspaces/citus/results/subqueries_deep.out" > "/workspaces/citus/results/subqueries_deep.out.diff"Bail out!`