PG18: Add support for virtual generated columns.

Generated columns can be virtual (not stored) and this is the default.
This PG18 feature requires tweaking citus_ruleutils and deparse table
to support in Citus. Relevant PG commit: 83ea6c540.

Also ensure that:
1) Cannot distribute on a GENERATED .. VIRTUAL column
2) undistribute_table() and alter_*_table UDFs handle
   GENERATED .. VIRTUAL in addition to STORED.
3) Citus COPY implementation takes GENERATED .. VIRTUAL
   columns into account.

Utility function `IsDroppedOrGenerated()` is used by the above to
detect generated columns; its agnostic to whether or not the column
is stored or virtual so does not need to be PG18-specific. Function
`DistributionColumnIsGeneratedCheck()` however has PG18-specific code
mainly because the error message thrown when a Citus client invokes
`create_distributed_table()` on a generated column reports whether
the column is stored or virtual.

The commit alson includes a fix for an 'unrecognized relation id'
error in GROUP BY on table with virtual column, which cropped up
in testing. The query:
```
   SELECT count(1), device_id FROM v_reading GROUP BY device_id
```
errored out with 'Unrecognized relid 2' when v_reading had a virtual
column, but ran fine if the column was stored. It turns out to be
because of Postgres commit 1e4351a "Expand virtual generated columns
in the planner", which fixed an issue with virtual cols (83ea6c5).
The fix involved constructing a new Query object and applying preprocessing
to that, with the consequence that changes made by the Postgres planner
to the Query are not available to the caller. One such change is
expanding of references to the GROUP BY expressions; they are not
reflected back when the table has at least one virtual column.

The broader implication for Citus is that after the distributed_planner()
hook has called Postgres' standard_planner(), it may not be aware of
modifications made to the Query. Citus tracks both the query passed
into its planner hook and the query given to the Postgres planner; the latter
may undergo transformations that Citus needs to be aware of in subsequent
distributed planning, for example expanding of GROUP BY expressions.
To resolve this, we enable Citus's planner hooks to access the distributed
planning context through the restriction context, and change the query field
if it no longer refers to the same query tree being used by the Postgres
planner. This is implanted as follows:

* Planner restriction context has a new field that refers to its distributed
  planner context
* Citus `distributed_planner()` hook initializes this when pushing a new
  restriction context
* Citus `multi_relation_restriction_hook()` checks if the distributed context
  query is no longer being used by the Postgres planner; this is only done at
  the outermost query level to stay in sync with Citus `distributed_planner()`
* Citus `distributed_planner()` hook clears the distributed planner context
  reference immediately after calling `standard_planner()`, to ensure that any
  Postgres planner calls made by Citus distributed planning do not get confused
  and incorrectly swap out the query tree
pull/8346/head
Colm McHugh 2025-11-17 17:09:01 +00:00
parent 79cabe7eca
commit 5f06ba26a8
11 changed files with 765 additions and 58 deletions

View File

@ -1927,14 +1927,10 @@ GetNonGeneratedStoredColumnNameList(Oid relationId)
for (int columnIndex = 0; columnIndex < tupleDescriptor->natts; columnIndex++) for (int columnIndex = 0; columnIndex < tupleDescriptor->natts; columnIndex++)
{ {
Form_pg_attribute currentColumn = TupleDescAttr(tupleDescriptor, columnIndex); Form_pg_attribute currentColumn = TupleDescAttr(tupleDescriptor, columnIndex);
if (currentColumn->attisdropped)
{
/* skip dropped columns */
continue;
}
if (currentColumn->attgenerated == ATTRIBUTE_GENERATED_STORED) if (IsDroppedOrGenerated(currentColumn))
{ {
/* skip dropped or generated columns */
continue; continue;
} }

View File

@ -175,8 +175,9 @@ static bool DistributionColumnUsesNumericColumnNegativeScale(TupleDesc relationD
static int numeric_typmod_scale(int32 typmod); static int numeric_typmod_scale(int32 typmod);
static bool is_valid_numeric_typmod(int32 typmod); static bool is_valid_numeric_typmod(int32 typmod);
static bool DistributionColumnUsesGeneratedStoredColumn(TupleDesc relationDesc, static void DistributionColumnIsGeneratedCheck(TupleDesc relationDesc,
Var *distributionColumn); Var *distributionColumn,
const char *relationName);
static bool CanUseExclusiveConnections(Oid relationId, bool localTableEmpty); static bool CanUseExclusiveConnections(Oid relationId, bool localTableEmpty);
static uint64 DoCopyFromLocalTableIntoShards(Relation distributedRelation, static uint64 DoCopyFromLocalTableIntoShards(Relation distributedRelation,
DestReceiver *copyDest, DestReceiver *copyDest,
@ -2103,13 +2104,10 @@ EnsureRelationCanBeDistributed(Oid relationId, Var *distributionColumn,
/* verify target relation is not distributed by a generated stored column /* verify target relation is not distributed by a generated stored column
*/ */
if (distributionMethod != DISTRIBUTE_BY_NONE && if (distributionMethod != DISTRIBUTE_BY_NONE)
DistributionColumnUsesGeneratedStoredColumn(relationDesc, distributionColumn))
{ {
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), DistributionColumnIsGeneratedCheck(relationDesc, distributionColumn,
errmsg("cannot distribute relation: %s", relationName), relationName);
errdetail("Distribution column must not use GENERATED ALWAYS "
"AS (...) STORED.")));
} }
/* verify target relation is not distributed by a column of type numeric with negative scale */ /* verify target relation is not distributed by a column of type numeric with negative scale */
@ -2829,9 +2827,7 @@ TupleDescColumnNameList(TupleDesc tupleDescriptor)
Form_pg_attribute currentColumn = TupleDescAttr(tupleDescriptor, columnIndex); Form_pg_attribute currentColumn = TupleDescAttr(tupleDescriptor, columnIndex);
char *columnName = NameStr(currentColumn->attname); char *columnName = NameStr(currentColumn->attname);
if (currentColumn->attisdropped || if (IsDroppedOrGenerated(currentColumn))
currentColumn->attgenerated == ATTRIBUTE_GENERATED_STORED
)
{ {
continue; continue;
} }
@ -2893,22 +2889,43 @@ DistributionColumnUsesNumericColumnNegativeScale(TupleDesc relationDesc,
/* /*
* DistributionColumnUsesGeneratedStoredColumn returns whether a given relation uses * DistributionColumnIsGeneratedCheck throws an error if a given relation uses
* GENERATED ALWAYS AS (...) STORED on distribution column * GENERATED ALWAYS AS (...) STORED | VIRTUAL on distribution column
*/ */
static bool static void
DistributionColumnUsesGeneratedStoredColumn(TupleDesc relationDesc, DistributionColumnIsGeneratedCheck(TupleDesc relationDesc,
Var *distributionColumn) Var *distributionColumn,
const char *relationName)
{ {
Form_pg_attribute attributeForm = TupleDescAttr(relationDesc, Form_pg_attribute attributeForm = TupleDescAttr(relationDesc,
distributionColumn->varattno - 1); distributionColumn->varattno - 1);
switch (attributeForm->attgenerated)
if (attributeForm->attgenerated == ATTRIBUTE_GENERATED_STORED)
{ {
return true; case ATTRIBUTE_GENERATED_STORED:
{
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot distribute relation: %s", relationName),
errdetail("Distribution column must not use GENERATED ALWAYS "
"AS (...) STORED.")));
break;
} }
return false; #if PG_VERSION_NUM >= PG_VERSION_18
case ATTRIBUTE_GENERATED_VIRTUAL:
{
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot distribute relation: %s", relationName),
errdetail("Distribution column must not use GENERATED ALWAYS "
"AS (...) VIRTUAL.")));
break;
}
#endif
default:
{
break;
}
}
} }

View File

@ -350,7 +350,6 @@ static void LogLocalCopyToRelationExecution(uint64 shardId);
static void LogLocalCopyToFileExecution(uint64 shardId); static void LogLocalCopyToFileExecution(uint64 shardId);
static void ErrorIfMergeInCopy(CopyStmt *copyStatement); static void ErrorIfMergeInCopy(CopyStmt *copyStatement);
/* exports for SQL callable functions */ /* exports for SQL callable functions */
PG_FUNCTION_INFO_V1(citus_text_send_as_jsonb); PG_FUNCTION_INFO_V1(citus_text_send_as_jsonb);
@ -484,9 +483,7 @@ CopyToExistingShards(CopyStmt *copyStatement, QueryCompletion *completionTag)
Form_pg_attribute currentColumn = TupleDescAttr(tupleDescriptor, columnIndex); Form_pg_attribute currentColumn = TupleDescAttr(tupleDescriptor, columnIndex);
char *columnName = NameStr(currentColumn->attname); char *columnName = NameStr(currentColumn->attname);
if (currentColumn->attisdropped || if (IsDroppedOrGenerated(currentColumn))
currentColumn->attgenerated == ATTRIBUTE_GENERATED_STORED
)
{ {
continue; continue;
} }
@ -804,9 +801,7 @@ CanUseBinaryCopyFormat(TupleDesc tupleDescription)
{ {
Form_pg_attribute currentColumn = TupleDescAttr(tupleDescription, columnIndex); Form_pg_attribute currentColumn = TupleDescAttr(tupleDescription, columnIndex);
if (currentColumn->attisdropped || if (IsDroppedOrGenerated(currentColumn))
currentColumn->attgenerated == ATTRIBUTE_GENERATED_STORED
)
{ {
continue; continue;
} }
@ -1316,9 +1311,7 @@ TypeArrayFromTupleDescriptor(TupleDesc tupleDescriptor)
for (int columnIndex = 0; columnIndex < columnCount; columnIndex++) for (int columnIndex = 0; columnIndex < columnCount; columnIndex++)
{ {
Form_pg_attribute attr = TupleDescAttr(tupleDescriptor, columnIndex); Form_pg_attribute attr = TupleDescAttr(tupleDescriptor, columnIndex);
if (attr->attisdropped || if (IsDroppedOrGenerated(attr))
attr->attgenerated == ATTRIBUTE_GENERATED_STORED
)
{ {
typeArray[columnIndex] = InvalidOid; typeArray[columnIndex] = InvalidOid;
} }
@ -1486,9 +1479,7 @@ AppendCopyRowData(Datum *valueArray, bool *isNullArray, TupleDesc rowDescriptor,
value = CoerceColumnValue(value, &columnCoercionPaths[columnIndex]); value = CoerceColumnValue(value, &columnCoercionPaths[columnIndex]);
} }
if (currentColumn->attisdropped || if (IsDroppedOrGenerated(currentColumn))
currentColumn->attgenerated == ATTRIBUTE_GENERATED_STORED
)
{ {
continue; continue;
} }
@ -1607,9 +1598,7 @@ AvailableColumnCount(TupleDesc tupleDescriptor)
{ {
Form_pg_attribute currentColumn = TupleDescAttr(tupleDescriptor, columnIndex); Form_pg_attribute currentColumn = TupleDescAttr(tupleDescriptor, columnIndex);
if (!currentColumn->attisdropped && if (!IsDroppedOrGenerated(currentColumn))
currentColumn->attgenerated != ATTRIBUTE_GENERATED_STORED
)
{ {
columnCount++; columnCount++;
} }
@ -3999,3 +3988,20 @@ UnclaimCopyConnections(List *connectionStateList)
UnclaimConnection(connectionState->connection); UnclaimConnection(connectionState->connection);
} }
} }
/*
* IsDroppedOrGenerated - helper function for determining if an attribute is
* dropped or generated. Used by COPY and Citus DDL to skip such columns.
*/
inline bool
IsDroppedOrGenerated(Form_pg_attribute attr)
{
/*
* If the "is dropped" flag is true or the generated column flag
* is not the default nul character (in which case its value is 's'
* for ATTRIBUTE_GENERATED_STORED or possibly 'v' with PG18+ for
* ATTRIBUTE_GENERATED_VIRTUAL) then return true.
*/
return attr->attisdropped || (attr->attgenerated != '\0');
}

View File

@ -177,8 +177,7 @@ ExtractDefaultColumnsAndOwnedSequences(Oid relationId, List **columnNameList,
{ {
Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, attributeIndex); Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, attributeIndex);
if (attributeForm->attisdropped || if (IsDroppedOrGenerated(attributeForm))
attributeForm->attgenerated == ATTRIBUTE_GENERATED_STORED)
{ {
/* skip dropped columns and columns with GENERATED AS ALWAYS expressions */ /* skip dropped columns and columns with GENERATED AS ALWAYS expressions */
continue; continue;

View File

@ -471,6 +471,13 @@ pg_get_tableschemadef_string(Oid tableRelationId, IncludeSequenceDefaults
appendStringInfo(&buffer, " GENERATED ALWAYS AS (%s) STORED", appendStringInfo(&buffer, " GENERATED ALWAYS AS (%s) STORED",
defaultString); defaultString);
} }
#if PG_VERSION_NUM >= PG_VERSION_18
else if (attributeForm->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
{
appendStringInfo(&buffer, " GENERATED ALWAYS AS (%s) VIRTUAL",
defaultString);
}
#endif
else else
{ {
Oid seqOid = GetSequenceOid(tableRelationId, defaultValue->adnum); Oid seqOid = GetSequenceOid(tableRelationId, defaultValue->adnum);

View File

@ -649,13 +649,18 @@ AppendAlterTableCmdAddColumn(StringInfo buf, AlterTableCmd *alterTableCmd,
} }
else if (constraint->contype == CONSTR_GENERATED) else if (constraint->contype == CONSTR_GENERATED)
{ {
char attgenerated = 's'; char attgenerated = ATTRIBUTE_GENERATED_STORED;
appendStringInfo(buf, " GENERATED %s AS (%s) STORED", #if PG_VERSION_NUM >= PG_VERSION_18
attgenerated = constraint->generated_kind;
#endif
appendStringInfo(buf, " GENERATED %s AS (%s) %s",
GeneratedWhenStr(constraint->generated_when), GeneratedWhenStr(constraint->generated_when),
DeparseRawExprForColumnDefault(relationId, typeOid, typmod, DeparseRawExprForColumnDefault(relationId, typeOid, typmod,
columnDefinition->colname, columnDefinition->colname,
attgenerated, attgenerated,
constraint->raw_expr)); constraint->raw_expr),
(attgenerated == ATTRIBUTE_GENERATED_STORED ? "STORED" :
"VIRTUAL"));
} }
else if (constraint->contype == CONSTR_CHECK || else if (constraint->contype == CONSTR_CHECK ||
constraint->contype == CONSTR_PRIMARY || constraint->contype == CONSTR_PRIMARY ||

View File

@ -125,8 +125,10 @@ static void AdjustReadIntermediateResultsCostInternal(RelOptInfo *relOptInfo,
Const *resultFormatConst); Const *resultFormatConst);
static List * OuterPlanParamsList(PlannerInfo *root); static List * OuterPlanParamsList(PlannerInfo *root);
static List * CopyPlanParamList(List *originalPlanParamList); static List * CopyPlanParamList(List *originalPlanParamList);
static PlannerRestrictionContext * CreateAndPushPlannerRestrictionContext( static void CreateAndPushPlannerRestrictionContext(
FastPathRestrictionContext *fastPathContext); DistributedPlanningContext *planContext,
FastPathRestrictionContext *
fastPathContext);
static PlannerRestrictionContext * CurrentPlannerRestrictionContext(void); static PlannerRestrictionContext * CurrentPlannerRestrictionContext(void);
static void PopPlannerRestrictionContext(void); static void PopPlannerRestrictionContext(void);
static void ResetPlannerRestrictionContext( static void ResetPlannerRestrictionContext(
@ -245,8 +247,8 @@ distributed_planner(Query *parse,
*/ */
HideCitusDependentObjectsOnQueriesOfPgMetaTables((Node *) parse, NULL); HideCitusDependentObjectsOnQueriesOfPgMetaTables((Node *) parse, NULL);
/* create a restriction context and put it at the end of context list */ /* create a restriction context and put it at the end of our plan context's context list */
planContext.plannerRestrictionContext = CreateAndPushPlannerRestrictionContext( CreateAndPushPlannerRestrictionContext(&planContext,
&fastPathContext); &fastPathContext);
/* /*
@ -281,6 +283,9 @@ distributed_planner(Query *parse,
Assert(saveNestLevel > 0); Assert(saveNestLevel > 0);
AtEOXact_GUC(true, saveNestLevel); AtEOXact_GUC(true, saveNestLevel);
} }
/* Pop the plan context from the current restriction context */
planContext.plannerRestrictionContext->planContext = NULL;
#endif #endif
needsDistributedPlanning = CheckPostPlanDistribution(&planContext, needsDistributedPlanning = CheckPostPlanDistribution(&planContext,
needsDistributedPlanning, needsDistributedPlanning,
@ -2033,6 +2038,32 @@ multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo,
lappend(relationRestrictionContext->relationRestrictionList, relationRestriction); lappend(relationRestrictionContext->relationRestrictionList, relationRestriction);
MemoryContextSwitchTo(oldMemoryContext); MemoryContextSwitchTo(oldMemoryContext);
#if PG_VERSION_NUM >= PG_VERSION_18
if (root->query_level == 1 && plannerRestrictionContext->planContext != NULL)
{
/* We're at the top query with a distributed context; see if Postgres
* has changed the query tree we passed to it in distributed_planner().
* This check was necessitated by PG commit 1e4351a, becuase in it the
* planner modfies a copy of the passed in query tree with the consequence
* that changes are not reflected back to the caller of standard_planner().
*/
Query *query = plannerRestrictionContext->planContext->query;
if (root->parse != query)
{
/*
* The Postgres planner has reconstructed the query tree, so the query
* tree our distributed context passed in (to standard_planner() is
* updated to track the new query tree.
*/
ereport(DEBUG4, (errmsg(
"Detected query reconstruction by Postgres planner, updating "
"planContext to track it")));
plannerRestrictionContext->planContext->query = root->parse;
}
}
#endif
} }
@ -2410,11 +2441,13 @@ CopyPlanParamList(List *originalPlanParamList)
* context with an empty relation restriction context and an empty join and * context with an empty relation restriction context and an empty join and
* a copy of the given fast path restriction context (if present). Finally, * a copy of the given fast path restriction context (if present). Finally,
* the planner restriction context is inserted to the beginning of the * the planner restriction context is inserted to the beginning of the
* global plannerRestrictionContextList and it is returned. * global plannerRestrictionContextList and, in PG18+, given a reference to
* its distributed plan context.
*/ */
static PlannerRestrictionContext * static void
CreateAndPushPlannerRestrictionContext( CreateAndPushPlannerRestrictionContext(DistributedPlanningContext *planContext,
FastPathRestrictionContext *fastPathRestrictionContext) FastPathRestrictionContext *
fastPathRestrictionContext)
{ {
PlannerRestrictionContext *plannerRestrictionContext = PlannerRestrictionContext *plannerRestrictionContext =
palloc0(sizeof(PlannerRestrictionContext)); palloc0(sizeof(PlannerRestrictionContext));
@ -2451,7 +2484,11 @@ CreateAndPushPlannerRestrictionContext(
plannerRestrictionContextList = lcons(plannerRestrictionContext, plannerRestrictionContextList = lcons(plannerRestrictionContext,
plannerRestrictionContextList); plannerRestrictionContextList);
return plannerRestrictionContext; planContext->plannerRestrictionContext = plannerRestrictionContext;
#if PG_VERSION_NUM >= PG_VERSION_18
plannerRestrictionContext->planContext = planContext;
#endif
} }
@ -2512,6 +2549,18 @@ CurrentPlannerRestrictionContext(void)
static void static void
PopPlannerRestrictionContext(void) PopPlannerRestrictionContext(void)
{ {
#if PG_VERSION_NUM >= PG_VERSION_18
/*
* PG18+: Clear the restriction context's planContext pointer; this is done
* by distributed_planner() when popping the context, but in case of error
* during standard_planner() we want to clean up here also.
*/
PlannerRestrictionContext *plannerRestrictionContext =
(PlannerRestrictionContext *) linitial(plannerRestrictionContextList);
plannerRestrictionContext->planContext = NULL;
#endif
plannerRestrictionContextList = list_delete_first(plannerRestrictionContextList); plannerRestrictionContextList = list_delete_first(plannerRestrictionContextList);
} }

View File

@ -112,5 +112,6 @@ extern void UndistributeDisconnectedCitusLocalTables(void);
extern void NotifyUtilityHookConstraintDropped(void); extern void NotifyUtilityHookConstraintDropped(void);
extern void ResetConstraintDropped(void); extern void ResetConstraintDropped(void);
extern void ExecuteDistributedDDLJob(DDLJob *ddlJob); extern void ExecuteDistributedDDLJob(DDLJob *ddlJob);
extern bool IsDroppedOrGenerated(Form_pg_attribute attr);
#endif /* MULTI_UTILITY_H */ #endif /* MULTI_UTILITY_H */

View File

@ -119,6 +119,7 @@ typedef struct FastPathRestrictionContext
bool delayFastPathPlanning; bool delayFastPathPlanning;
} FastPathRestrictionContext; } FastPathRestrictionContext;
struct DistributedPlanningContext;
typedef struct PlannerRestrictionContext typedef struct PlannerRestrictionContext
{ {
RelationRestrictionContext *relationRestrictionContext; RelationRestrictionContext *relationRestrictionContext;
@ -132,6 +133,18 @@ typedef struct PlannerRestrictionContext
*/ */
FastPathRestrictionContext *fastPathRestrictionContext; FastPathRestrictionContext *fastPathRestrictionContext;
MemoryContext memoryContext; MemoryContext memoryContext;
#if PG_VERSION_NUM >= PG_VERSION_18
/*
* Enable access to the distributed planning context from
* planner hooks called by Postgres. Enables Citus to track
* changes made by Postgres to the query tree (such as
* expansion of virtual columns) and ensure they are reflected
* back to subsequent distributed planning.
*/
struct DistributedPlanningContext *planContext;
#endif
} PlannerRestrictionContext; } PlannerRestrictionContext;
typedef struct RelationShard typedef struct RelationShard

View File

@ -1413,6 +1413,393 @@ INSERT INTO NE_CHECK_TBL (x) VALUES (5), (15), (8), (12);
ERROR: new row for relation "ne_check_tbl_4754044" violates check constraint "check_x2_4754044" ERROR: new row for relation "ne_check_tbl_4754044" violates check constraint "check_x2_4754044"
DETAIL: Failing row contains (15, null). DETAIL: Failing row contains (15, null).
CONTEXT: while executing command on localhost:xxxxx CONTEXT: while executing command on localhost:xxxxx
-- PG18 Feature: Generated Virtual Columns
-- PG18 commit: https://github.com/postgres/postgres/commit/83ea6c540
-- Verify that generated virtual columns are supported on distributed tables.
CREATE TABLE v_reading (
celsius DECIMAL(5,2),
farenheit DECIMAL(6, 2) GENERATED ALWAYS AS (celsius * 9/5 + 32) VIRTUAL,
created_at TIMESTAMPTZ DEFAULT now(),
device_id INT
);
-- Cannot distribute on a generated column (#4616) applies
-- to VIRTUAL columns.
SELECT create_distributed_table('v_reading', 'farenheit');
ERROR: cannot distribute relation: v_reading
DETAIL: Distribution column must not use GENERATED ALWAYS AS (...) VIRTUAL.
SELECT create_distributed_table('v_reading', 'device_id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
INSERT INTO v_reading (celsius, device_id) VALUES (0, 1), (100, 1), (37.5, 2), (25, 2), (-40, 3);
SELECT device_id, celsius, farenheit FROM v_reading ORDER BY device_id;
device_id | celsius | farenheit
---------------------------------------------------------------------
1 | 0.00 | 32.00
1 | 100.00 | 212.00
2 | 37.50 | 99.50
2 | 25.00 | 77.00
3 | -40.00 | -40.00
(5 rows)
ALTER TABLE v_reading ADD COLUMN kelvin DECIMAL(6, 2) GENERATED ALWAYS AS (celsius + 273.15) VIRTUAL;
SELECT device_id, celsius, kelvin FROM v_reading ORDER BY device_id, celsius;
device_id | celsius | kelvin
---------------------------------------------------------------------
1 | 0.00 | 273.15
1 | 100.00 | 373.15
2 | 25.00 | 298.15
2 | 37.50 | 310.65
3 | -40.00 | 233.15
(5 rows)
-- Show all columns that are generated
SELECT s.relname, a.attname, a.attgenerated
FROM pg_class s
JOIN pg_attribute a ON a.attrelid=s.oid
WHERE s.relname LIKE 'v_reading%' and attgenerated::int != 0
ORDER BY 1,2;
relname | attname | attgenerated
---------------------------------------------------------------------
v_reading | farenheit | v
v_reading | kelvin | v
(2 rows)
-- Generated columns are virtual by default - repeat the test without VIRTUAL keyword
CREATE TABLE d_reading (
celsius DECIMAL(5,2),
farenheit DECIMAL(6, 2) GENERATED ALWAYS AS (celsius * 9/5 + 32),
created_at TIMESTAMPTZ DEFAULT now(),
device_id INT
);
SELECT create_distributed_table('d_reading', 'farenheit');
ERROR: cannot distribute relation: d_reading
DETAIL: Distribution column must not use GENERATED ALWAYS AS (...) VIRTUAL.
SELECT create_distributed_table('d_reading', 'device_id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
INSERT INTO d_reading (celsius, device_id) VALUES (0, 1), (100, 1), (37.5, 2), (25, 2), (-40, 3);
SELECT device_id, celsius, farenheit FROM d_reading ORDER BY device_id;
device_id | celsius | farenheit
---------------------------------------------------------------------
1 | 0.00 | 32.00
1 | 100.00 | 212.00
2 | 37.50 | 99.50
2 | 25.00 | 77.00
3 | -40.00 | -40.00
(5 rows)
ALTER TABLE d_reading ADD COLUMN kelvin DECIMAL(6, 2) GENERATED ALWAYS AS (celsius + 273.15) VIRTUAL;
SELECT device_id, celsius, kelvin FROM d_reading ORDER BY device_id, celsius;
device_id | celsius | kelvin
---------------------------------------------------------------------
1 | 0.00 | 273.15
1 | 100.00 | 373.15
2 | 25.00 | 298.15
2 | 37.50 | 310.65
3 | -40.00 | 233.15
(5 rows)
-- Show all columns that are generated
SELECT s.relname, a.attname, a.attgenerated
FROM pg_class s
JOIN pg_attribute a ON a.attrelid=s.oid
WHERE s.relname LIKE 'd_reading%' and attgenerated::int != 0
ORDER BY 1,2;
relname | attname | attgenerated
---------------------------------------------------------------------
d_reading | farenheit | v
d_reading | kelvin | v
(2 rows)
-- COPY implementation needs to handle GENERATED ALWAYS AS (...) VIRTUAL columns.
\COPY d_reading FROM STDIN WITH DELIMITER ','
SELECT device_id, count(device_id) as count, round(avg(celsius), 2) as avg, min(farenheit), max(farenheit)
FROM d_reading
GROUP BY device_id
ORDER BY count DESC;
device_id | count | avg | min | max
---------------------------------------------------------------------
1 | 12 | 20.00 | 32.00 | 212.00
5 | 10 | 13.20 | 33.80 | 73.40
2 | 2 | 31.25 | 77.00 | 99.50
3 | 1 | -40.00 | -40.00 | -40.00
(4 rows)
-- Test GROUP BY on tables with generated virtual columns - this requires
-- special case handling in distributed planning. Test it out on some
-- some queries involving joins and set operations.
SELECT device_id, max(kelvin) as Kel
FROM v_reading
WHERE (device_id, celsius) NOT IN (SELECT device_id, max(celsius) FROM v_reading GROUP BY device_id)
GROUP BY device_id
ORDER BY device_id ASC;
device_id | kel
---------------------------------------------------------------------
1 | 273.15
2 | 298.15
(2 rows)
SELECT device_id, round(AVG( (d_farenheit + v_farenheit) / 2), 2) as Avg_Far
FROM (SELECT *
FROM (SELECT device_id, round(AVG(farenheit),2) as d_farenheit
FROM d_reading
GROUP BY device_id) AS subq
RIGHT JOIN (SELECT device_id, MAX(farenheit) AS v_farenheit
FROM d_reading
GROUP BY device_id) AS subq2
USING (device_id)
) AS finalq
GROUP BY device_id
ORDER BY device_id ASC;
device_id | avg_far
---------------------------------------------------------------------
1 | 140.00
2 | 93.88
3 | -40.00
5 | 64.58
(4 rows)
SELECT device_id, MAX(farenheit) as farenheit
FROM
((SELECT device_id, round(AVG(farenheit),2) as farenheit
FROM d_reading
GROUP BY device_id)
UNION ALL (SELECT device_id, MAX(farenheit) AS farenheit
FROM d_reading
GROUP BY device_id) ) AS unioned
GROUP BY device_id
ORDER BY device_id ASC;
device_id | farenheit
---------------------------------------------------------------------
1 | 212.00
2 | 99.50
3 | -40.00
5 | 73.40
(4 rows)
SELECT device_id, MAX(farenheit) as farenheit
FROM
((SELECT device_id, round(AVG(farenheit),2) as farenheit
FROM d_reading
GROUP BY device_id)
INTERSECT (SELECT device_id, MAX(farenheit) AS farenheit
FROM d_reading
GROUP BY device_id) ) AS intersected
GROUP BY device_id
ORDER BY device_id ASC;
device_id | farenheit
---------------------------------------------------------------------
3 | -40.00
(1 row)
SELECT device_id, MAX(farenheit) as farenheit
FROM
((SELECT device_id, round(AVG(farenheit),2) as farenheit
FROM d_reading
GROUP BY device_id)
EXCEPT (SELECT device_id, MAX(farenheit) AS farenheit
FROM d_reading
GROUP BY device_id) ) AS excepted
GROUP BY device_id
ORDER BY device_id ASC;
device_id | farenheit
---------------------------------------------------------------------
1 | 68.00
2 | 88.25
5 | 55.76
(3 rows)
-- Ensure that UDFs such as alter_distributed_table, undistribute_table
-- and add_local_table_to_metadata work fine with VIRTUAL columns. For
-- this, PR #4616 changes are modified to handle VIRTUAL columns in
-- addition to STORED columns.
CREATE TABLE generated_stored_dist (
col_1 int,
"col\'_2" text,
col_3 text generated always as (UPPER("col\'_2")) virtual
);
SELECT create_distributed_table ('generated_stored_dist', 'col_1');
create_distributed_table
---------------------------------------------------------------------
(1 row)
INSERT INTO generated_stored_dist VALUES (1, 'text_1'), (2, 'text_2');
SELECT * FROM generated_stored_dist ORDER BY 1,2,3;
col_1 | col\'_2 | col_3
---------------------------------------------------------------------
1 | text_1 | TEXT_1
2 | text_2 | TEXT_2
(2 rows)
INSERT INTO generated_stored_dist VALUES (1, 'text_1'), (2, 'text_2');
SELECT alter_distributed_table('generated_stored_dist', shard_count := 5, cascade_to_colocated := false);
NOTICE: creating a new table for pg18_nn.generated_stored_dist
NOTICE: moving the data of pg18_nn.generated_stored_dist
NOTICE: dropping the old pg18_nn.generated_stored_dist
NOTICE: renaming the new table to pg18_nn.generated_stored_dist
alter_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT * FROM generated_stored_dist ORDER BY 1,2,3;
col_1 | col\'_2 | col_3
---------------------------------------------------------------------
1 | text_1 | TEXT_1
1 | text_1 | TEXT_1
2 | text_2 | TEXT_2
2 | text_2 | TEXT_2
(4 rows)
CREATE TABLE generated_stored_local (
col_1 int,
"col\'_2" text,
col_3 text generated always as (UPPER("col\'_2")) stored
);
SELECT citus_add_local_table_to_metadata('generated_stored_local');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
INSERT INTO generated_stored_local VALUES (1, 'text_1'), (2, 'text_2');
SELECT * FROM generated_stored_local ORDER BY 1,2,3;
col_1 | col\'_2 | col_3
---------------------------------------------------------------------
1 | text_1 | TEXT_1
2 | text_2 | TEXT_2
(2 rows)
SELECT create_distributed_table ('generated_stored_local', 'col_1');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$pg18_nn.generated_stored_local$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
INSERT INTO generated_stored_local VALUES (1, 'text_1'), (2, 'text_2');
SELECT * FROM generated_stored_local ORDER BY 1,2,3;
col_1 | col\'_2 | col_3
---------------------------------------------------------------------
1 | text_1 | TEXT_1
1 | text_1 | TEXT_1
2 | text_2 | TEXT_2
2 | text_2 | TEXT_2
(4 rows)
CREATE TABLE generated_stored_ref (
col_1 int,
col_2 int,
col_3 int generated always as (col_1+col_2) virtual,
col_4 int,
col_5 int generated always as (col_4*2-col_1) virtual
);
SELECT create_reference_table ('generated_stored_ref');
create_reference_table
---------------------------------------------------------------------
(1 row)
INSERT INTO generated_stored_ref (col_1, col_4) VALUES (1,2), (11,12);
INSERT INTO generated_stored_ref (col_1, col_2, col_4) VALUES (100,101,102), (200,201,202);
SELECT * FROM generated_stored_ref ORDER BY 1,2,3,4,5;
col_1 | col_2 | col_3 | col_4 | col_5
---------------------------------------------------------------------
1 | | | 2 | 3
11 | | | 12 | 13
100 | 101 | 201 | 102 | 104
200 | 201 | 401 | 202 | 204
(4 rows)
BEGIN;
SELECT undistribute_table('generated_stored_ref');
NOTICE: creating a new table for pg18_nn.generated_stored_ref
NOTICE: moving the data of pg18_nn.generated_stored_ref
NOTICE: dropping the old pg18_nn.generated_stored_ref
NOTICE: renaming the new table to pg18_nn.generated_stored_ref
undistribute_table
---------------------------------------------------------------------
(1 row)
INSERT INTO generated_stored_ref (col_1, col_4) VALUES (11,12), (21,22);
INSERT INTO generated_stored_ref (col_1, col_2, col_4) VALUES (200,201,202), (300,301,302);
SELECT * FROM generated_stored_ref ORDER BY 1,2,3,4,5;
col_1 | col_2 | col_3 | col_4 | col_5
---------------------------------------------------------------------
1 | | | 2 | 3
11 | | | 12 | 13
11 | | | 12 | 13
21 | | | 22 | 23
100 | 101 | 201 | 102 | 104
200 | 201 | 401 | 202 | 204
200 | 201 | 401 | 202 | 204
300 | 301 | 601 | 302 | 304
(8 rows)
ROLLBACK;
BEGIN;
-- drop some of the columns not having "generated always as virtual" expressions
SET client_min_messages TO WARNING;
ALTER TABLE generated_stored_ref DROP COLUMN col_1 CASCADE;
RESET client_min_messages;
ALTER TABLE generated_stored_ref DROP COLUMN col_4;
-- show that undistribute_table works fine
SELECT undistribute_table('generated_stored_ref');
NOTICE: creating a new table for pg18_nn.generated_stored_ref
NOTICE: moving the data of pg18_nn.generated_stored_ref
NOTICE: dropping the old pg18_nn.generated_stored_ref
NOTICE: renaming the new table to pg18_nn.generated_stored_ref
undistribute_table
---------------------------------------------------------------------
(1 row)
INSERT INTO generated_stored_ref VALUES (5);
SELECT * FROM generated_stored_REF ORDER BY 1;
col_2
---------------------------------------------------------------------
5
101
201
(5 rows)
ROLLBACK;
BEGIN;
-- now drop all columns
ALTER TABLE generated_stored_ref DROP COLUMN col_3;
ALTER TABLE generated_stored_ref DROP COLUMN col_5;
ALTER TABLE generated_stored_ref DROP COLUMN col_1;
ALTER TABLE generated_stored_ref DROP COLUMN col_2;
ALTER TABLE generated_stored_ref DROP COLUMN col_4;
-- show that undistribute_table works fine
SELECT undistribute_table('generated_stored_ref');
NOTICE: creating a new table for pg18_nn.generated_stored_ref
NOTICE: moving the data of pg18_nn.generated_stored_ref
NOTICE: dropping the old pg18_nn.generated_stored_ref
NOTICE: renaming the new table to pg18_nn.generated_stored_ref
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT * FROM generated_stored_ref;
--
(4 rows)
ROLLBACK;
-- cleanup with minimum verbosity -- cleanup with minimum verbosity
SET client_min_messages TO ERROR; SET client_min_messages TO ERROR;
RESET search_path; RESET search_path;

View File

@ -890,6 +890,233 @@ ALTER TABLE NE_CHECK_TBL
-- CHECK_X2 is ENFORCED, so these inserts should fail -- CHECK_X2 is ENFORCED, so these inserts should fail
INSERT INTO NE_CHECK_TBL (x) VALUES (5), (15), (8), (12); INSERT INTO NE_CHECK_TBL (x) VALUES (5), (15), (8), (12);
-- PG18 Feature: Generated Virtual Columns
-- PG18 commit: https://github.com/postgres/postgres/commit/83ea6c540
-- Verify that generated virtual columns are supported on distributed tables.
CREATE TABLE v_reading (
celsius DECIMAL(5,2),
farenheit DECIMAL(6, 2) GENERATED ALWAYS AS (celsius * 9/5 + 32) VIRTUAL,
created_at TIMESTAMPTZ DEFAULT now(),
device_id INT
);
-- Cannot distribute on a generated column (#4616) applies
-- to VIRTUAL columns.
SELECT create_distributed_table('v_reading', 'farenheit');
SELECT create_distributed_table('v_reading', 'device_id');
INSERT INTO v_reading (celsius, device_id) VALUES (0, 1), (100, 1), (37.5, 2), (25, 2), (-40, 3);
SELECT device_id, celsius, farenheit FROM v_reading ORDER BY device_id;
ALTER TABLE v_reading ADD COLUMN kelvin DECIMAL(6, 2) GENERATED ALWAYS AS (celsius + 273.15) VIRTUAL;
SELECT device_id, celsius, kelvin FROM v_reading ORDER BY device_id, celsius;
-- Show all columns that are generated
SELECT s.relname, a.attname, a.attgenerated
FROM pg_class s
JOIN pg_attribute a ON a.attrelid=s.oid
WHERE s.relname LIKE 'v_reading%' and attgenerated::int != 0
ORDER BY 1,2;
-- Generated columns are virtual by default - repeat the test without VIRTUAL keyword
CREATE TABLE d_reading (
celsius DECIMAL(5,2),
farenheit DECIMAL(6, 2) GENERATED ALWAYS AS (celsius * 9/5 + 32),
created_at TIMESTAMPTZ DEFAULT now(),
device_id INT
);
SELECT create_distributed_table('d_reading', 'farenheit');
SELECT create_distributed_table('d_reading', 'device_id');
INSERT INTO d_reading (celsius, device_id) VALUES (0, 1), (100, 1), (37.5, 2), (25, 2), (-40, 3);
SELECT device_id, celsius, farenheit FROM d_reading ORDER BY device_id;
ALTER TABLE d_reading ADD COLUMN kelvin DECIMAL(6, 2) GENERATED ALWAYS AS (celsius + 273.15) VIRTUAL;
SELECT device_id, celsius, kelvin FROM d_reading ORDER BY device_id, celsius;
-- Show all columns that are generated
SELECT s.relname, a.attname, a.attgenerated
FROM pg_class s
JOIN pg_attribute a ON a.attrelid=s.oid
WHERE s.relname LIKE 'd_reading%' and attgenerated::int != 0
ORDER BY 1,2;
-- COPY implementation needs to handle GENERATED ALWAYS AS (...) VIRTUAL columns.
\COPY d_reading FROM STDIN WITH DELIMITER ','
3.00,2025-11-24 09:46:17.390872+00,1
6.00,2025-11-24 09:46:17.390872+00,5
2.00,2025-11-24 09:46:17.390872+00,1
22.00,2025-11-24 09:46:17.390872+00,5
15.00,2025-11-24 09:46:17.390872+00,1
13.00,2025-11-24 09:46:17.390872+00,5
27.00,2025-11-24 09:46:17.390872+00,1
14.00,2025-11-24 09:46:17.390872+00,5
2.00,2025-11-24 09:46:17.390872+00,1
23.00,2025-11-24 09:46:17.390872+00,5
22.00,2025-11-24 09:46:17.390872+00,1
3.00,2025-11-24 09:46:17.390872+00,5
2.00,2025-11-24 09:46:17.390872+00,1
7.00,2025-11-24 09:46:17.390872+00,5
6.00,2025-11-24 09:46:17.390872+00,1
21.00,2025-11-24 09:46:17.390872+00,5
30.00,2025-11-24 09:46:17.390872+00,1
1.00,2025-11-24 09:46:17.390872+00,5
31.00,2025-11-24 09:46:17.390872+00,1
22.00,2025-11-24 09:46:17.390872+00,5
\.
SELECT device_id, count(device_id) as count, round(avg(celsius), 2) as avg, min(farenheit), max(farenheit)
FROM d_reading
GROUP BY device_id
ORDER BY count DESC;
-- Test GROUP BY on tables with generated virtual columns - this requires
-- special case handling in distributed planning. Test it out on some
-- some queries involving joins and set operations.
SELECT device_id, max(kelvin) as Kel
FROM v_reading
WHERE (device_id, celsius) NOT IN (SELECT device_id, max(celsius) FROM v_reading GROUP BY device_id)
GROUP BY device_id
ORDER BY device_id ASC;
SELECT device_id, round(AVG( (d_farenheit + v_farenheit) / 2), 2) as Avg_Far
FROM (SELECT *
FROM (SELECT device_id, round(AVG(farenheit),2) as d_farenheit
FROM d_reading
GROUP BY device_id) AS subq
RIGHT JOIN (SELECT device_id, MAX(farenheit) AS v_farenheit
FROM d_reading
GROUP BY device_id) AS subq2
USING (device_id)
) AS finalq
GROUP BY device_id
ORDER BY device_id ASC;
SELECT device_id, MAX(farenheit) as farenheit
FROM
((SELECT device_id, round(AVG(farenheit),2) as farenheit
FROM d_reading
GROUP BY device_id)
UNION ALL (SELECT device_id, MAX(farenheit) AS farenheit
FROM d_reading
GROUP BY device_id) ) AS unioned
GROUP BY device_id
ORDER BY device_id ASC;
SELECT device_id, MAX(farenheit) as farenheit
FROM
((SELECT device_id, round(AVG(farenheit),2) as farenheit
FROM d_reading
GROUP BY device_id)
INTERSECT (SELECT device_id, MAX(farenheit) AS farenheit
FROM d_reading
GROUP BY device_id) ) AS intersected
GROUP BY device_id
ORDER BY device_id ASC;
SELECT device_id, MAX(farenheit) as farenheit
FROM
((SELECT device_id, round(AVG(farenheit),2) as farenheit
FROM d_reading
GROUP BY device_id)
EXCEPT (SELECT device_id, MAX(farenheit) AS farenheit
FROM d_reading
GROUP BY device_id) ) AS excepted
GROUP BY device_id
ORDER BY device_id ASC;
-- Ensure that UDFs such as alter_distributed_table, undistribute_table
-- and add_local_table_to_metadata work fine with VIRTUAL columns. For
-- this, PR #4616 changes are modified to handle VIRTUAL columns in
-- addition to STORED columns.
CREATE TABLE generated_stored_dist (
col_1 int,
"col\'_2" text,
col_3 text generated always as (UPPER("col\'_2")) virtual
);
SELECT create_distributed_table ('generated_stored_dist', 'col_1');
INSERT INTO generated_stored_dist VALUES (1, 'text_1'), (2, 'text_2');
SELECT * FROM generated_stored_dist ORDER BY 1,2,3;
INSERT INTO generated_stored_dist VALUES (1, 'text_1'), (2, 'text_2');
SELECT alter_distributed_table('generated_stored_dist', shard_count := 5, cascade_to_colocated := false);
SELECT * FROM generated_stored_dist ORDER BY 1,2,3;
CREATE TABLE generated_stored_local (
col_1 int,
"col\'_2" text,
col_3 text generated always as (UPPER("col\'_2")) stored
);
SELECT citus_add_local_table_to_metadata('generated_stored_local');
INSERT INTO generated_stored_local VALUES (1, 'text_1'), (2, 'text_2');
SELECT * FROM generated_stored_local ORDER BY 1,2,3;
SELECT create_distributed_table ('generated_stored_local', 'col_1');
INSERT INTO generated_stored_local VALUES (1, 'text_1'), (2, 'text_2');
SELECT * FROM generated_stored_local ORDER BY 1,2,3;
CREATE TABLE generated_stored_ref (
col_1 int,
col_2 int,
col_3 int generated always as (col_1+col_2) virtual,
col_4 int,
col_5 int generated always as (col_4*2-col_1) virtual
);
SELECT create_reference_table ('generated_stored_ref');
INSERT INTO generated_stored_ref (col_1, col_4) VALUES (1,2), (11,12);
INSERT INTO generated_stored_ref (col_1, col_2, col_4) VALUES (100,101,102), (200,201,202);
SELECT * FROM generated_stored_ref ORDER BY 1,2,3,4,5;
BEGIN;
SELECT undistribute_table('generated_stored_ref');
INSERT INTO generated_stored_ref (col_1, col_4) VALUES (11,12), (21,22);
INSERT INTO generated_stored_ref (col_1, col_2, col_4) VALUES (200,201,202), (300,301,302);
SELECT * FROM generated_stored_ref ORDER BY 1,2,3,4,5;
ROLLBACK;
BEGIN;
-- drop some of the columns not having "generated always as virtual" expressions
SET client_min_messages TO WARNING;
ALTER TABLE generated_stored_ref DROP COLUMN col_1 CASCADE;
RESET client_min_messages;
ALTER TABLE generated_stored_ref DROP COLUMN col_4;
-- show that undistribute_table works fine
SELECT undistribute_table('generated_stored_ref');
INSERT INTO generated_stored_ref VALUES (5);
SELECT * FROM generated_stored_REF ORDER BY 1;
ROLLBACK;
BEGIN;
-- now drop all columns
ALTER TABLE generated_stored_ref DROP COLUMN col_3;
ALTER TABLE generated_stored_ref DROP COLUMN col_5;
ALTER TABLE generated_stored_ref DROP COLUMN col_1;
ALTER TABLE generated_stored_ref DROP COLUMN col_2;
ALTER TABLE generated_stored_ref DROP COLUMN col_4;
-- show that undistribute_table works fine
SELECT undistribute_table('generated_stored_ref');
SELECT * FROM generated_stored_ref;
ROLLBACK;
-- cleanup with minimum verbosity -- cleanup with minimum verbosity
SET client_min_messages TO ERROR; SET client_min_messages TO ERROR;
RESET search_path; RESET search_path;