Fix the incorrect column count after ALTER TABLE, this fixes the bug #7378 (please read the analysis in the bug for more information)

(cherry picked from commit 00068e07c5)
2023-12-12 14:28:43 -08:00 · 2023-12-12 14:28:43 -08:00 · 7bd6b48cc0
parent 76aad29a72
commit 7bd6b48cc0
10 changed files with 148 additions and 11 deletions
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@ -451,9 +451,12 @@ jobs:
        else
          echo "Detected tests " $tests
        fi
-        echo tests="$tests" >> "$GITHUB_OUTPUT"
+
+        echo 'tests<<EOF' >> $GITHUB_OUTPUT
+        echo "$tests" >> "$GITHUB_OUTPUT"
+        echo 'EOF' >> $GITHUB_OUTPUT
  test-flakyness:
-    if: ${{ needs.test-flakyness-pre.outputs.tests != ''}}
+    if: false
    name: Test flakyness
    runs-on: ubuntu-20.04
    container:
--- a/.github/workflows/packaging-test-pipelines.yml
+++ b/.github/workflows/packaging-test-pipelines.yml
@ -45,7 +45,7 @@ jobs:
          - oraclelinux-7
          - oraclelinux-8
          - centos-7
-          - centos-8
+          - almalinux-8
          - almalinux-9
        POSTGRES_VERSION: ${{ fromJson(needs.get_postgres_versions_from_file.outputs.pg_versions) }}

@ -111,7 +111,6 @@ jobs:
          - debian-buster-all
          - debian-bookworm-all
          - debian-bullseye-all
-          - ubuntu-bionic-all
          - ubuntu-focal-all
          - ubuntu-jammy-all

--- a/src/backend/distributed/deparser/ruleutils_13.c
+++ b/src/backend/distributed/deparser/ruleutils_13.c
@ -1406,8 +1406,15 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,

 	/* Assert we processed the right number of columns */
 #ifdef USE_ASSERT_CHECKING
-	while (i < colinfo->num_cols && colinfo->colnames[i] == NULL)
-		i++;
+	for (int col_index = 0; col_index < colinfo->num_cols; col_index++)
+	{
+		/*
+		 * In the above processing-loops, "i" advances only if
+		 * the column is not new, check if this is a new column.
+		 */
+		if (colinfo->is_new_col[col_index])
+			i++;
+	}
 	Assert(i == colinfo->num_cols);
 	Assert(j == nnewcolumns);
 #endif
--- a/src/backend/distributed/deparser/ruleutils_14.c
+++ b/src/backend/distributed/deparser/ruleutils_14.c
@ -1529,8 +1529,15 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,

 	/* Assert we processed the right number of columns */
 #ifdef USE_ASSERT_CHECKING
-	while (i < colinfo->num_cols && colinfo->colnames[i] == NULL)
-		i++;
+	for (int col_index = 0; col_index < colinfo->num_cols; col_index++)
+	{
+		/*
+		 * In the above processing-loops, "i" advances only if
+		 * the column is not new, check if this is a new column.
+		 */
+		if (colinfo->is_new_col[col_index])
+			i++;
+	}
 	Assert(i == colinfo->num_cols);
 	Assert(j == nnewcolumns);
 #endif
--- a/src/backend/distributed/deparser/ruleutils_15.c
+++ b/src/backend/distributed/deparser/ruleutils_15.c
@ -1565,8 +1565,15 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,

 	/* Assert we processed the right number of columns */
 #ifdef USE_ASSERT_CHECKING
-	while (i < colinfo->num_cols && colinfo->colnames[i] == NULL)
-		i++;
+	for (int col_index = 0; col_index < colinfo->num_cols; col_index++)
+	{
+		/*
+		 * In the above processing-loops, "i" advances only if
+		 * the column is not new, check if this is a new column.
+		 */
+		if (colinfo->is_new_col[col_index])
+			i++;
+	}
 	Assert(i == colinfo->num_cols);
 	Assert(j == nnewcolumns);
 #endif
--- a/src/backend/distributed/utils/citus_nodefuncs.c
+++ b/src/backend/distributed/utils/citus_nodefuncs.c
@ -141,7 +141,17 @@ SetRangeTblExtraData(RangeTblEntry *rte, CitusRTEKind rteKind, char *fragmentSch
 	fauxFunction->funcexpr = (Node *) fauxFuncExpr;

 	/* set the column count to pass ruleutils checks, not used elsewhere */
-	fauxFunction->funccolcount = list_length(rte->eref->colnames);
+	if (rte->relid != 0)
+	{
+		Relation rel = RelationIdGetRelation(rte->relid);
+		fauxFunction->funccolcount = RelationGetNumberOfAttributes(rel);
+		RelationClose(rel);
+	}
+	else
+	{
+		fauxFunction->funccolcount = list_length(rte->eref->colnames);
+	}
+
 	fauxFunction->funccolnames = funcColumnNames;
 	fauxFunction->funccoltypes = funcColumnTypes;
 	fauxFunction->funccoltypmods = funcColumnTypeMods;
--- a/src/test/regress/expected/foreign_key_to_reference_shard_rebalance.out
+++ b/src/test/regress/expected/foreign_key_to_reference_shard_rebalance.out
@ -210,6 +210,7 @@ select create_distributed_table('partitioned_tbl_with_fkey','x');

 create table partition_1_with_fkey partition of partitioned_tbl_with_fkey for values from ('2022-01-01') to ('2022-12-31');
 create table partition_2_with_fkey partition of partitioned_tbl_with_fkey for values from ('2023-01-01') to ('2023-12-31');
+create table partition_3_with_fkey partition of partitioned_tbl_with_fkey for values from ('2024-01-01') to ('2024-12-31');
 insert into partitioned_tbl_with_fkey (x,y) select s,s%10 from generate_series(1,100) s;
 ALTER TABLE partitioned_tbl_with_fkey ADD CONSTRAINT fkey_to_ref_tbl FOREIGN KEY (y) REFERENCES ref_table_with_fkey(id);
 WITH shardid AS (SELECT shardid FROM pg_dist_shard where logicalrelid = 'partitioned_tbl_with_fkey'::regclass ORDER BY shardid LIMIT 1)
--- a/src/test/regress/expected/multi_alter_table_statements.out
+++ b/src/test/regress/expected/multi_alter_table_statements.out
@ -1290,8 +1290,82 @@ SELECT pg_identify_object_as_address(classid, objid, objsubid) from pg_catalog.p
 (schema,{test_schema_for_sequence_propagation},{})
 (1 row)

+-- Bug: https://github.com/citusdata/citus/issues/7378
+-- Create a reference table
+CREATE TABLE tbl_ref_mats(row_id integer primary key);
+INSERT INTO tbl_ref_mats VALUES (1), (2);
+SELECT create_reference_table('tbl_ref_mats');
+NOTICE:  Copying data from local table...
+NOTICE:  copying the data has completed
+DETAIL:  The local data in the table is no longer visible, but is still on disk.
+HINT:  To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$public.tbl_ref_mats$$)
+ create_reference_table
+---------------------------------------------------------------------
+
+(1 row)
+
+-- Create a distributed table
+CREATE TABLE tbl_dist_mats(series_id integer);
+INSERT INTO tbl_dist_mats VALUES (1), (1), (2), (2);
+SELECT create_distributed_table('tbl_dist_mats', 'series_id');
+NOTICE:  Copying data from local table...
+NOTICE:  copying the data has completed
+DETAIL:  The local data in the table is no longer visible, but is still on disk.
+HINT:  To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$public.tbl_dist_mats$$)
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+-- Create a view that joins the distributed table with the reference table on the distribution key.
+CREATE VIEW vw_citus_views as
+SELECT d.series_id FROM tbl_dist_mats d JOIN tbl_ref_mats r ON d.series_id = r.row_id;
+-- The view initially works fine
+SELECT * FROM vw_citus_views ORDER BY 1;
+ series_id
+---------------------------------------------------------------------
+         1
+         1
+         2
+         2
+(4 rows)
+
+-- Now, alter the table
+ALTER TABLE tbl_ref_mats ADD COLUMN category1 varchar(50);
+SELECT * FROM vw_citus_views ORDER BY 1;
+ series_id
+---------------------------------------------------------------------
+         1
+         1
+         2
+         2
+(4 rows)
+
+ALTER TABLE tbl_ref_mats ADD COLUMN category2 varchar(50);
+SELECT * FROM vw_citus_views ORDER BY 1;
+ series_id
+---------------------------------------------------------------------
+         1
+         1
+         2
+         2
+(4 rows)
+
+ALTER TABLE tbl_ref_mats DROP COLUMN category1;
+SELECT * FROM vw_citus_views ORDER BY 1;
+ series_id
+---------------------------------------------------------------------
+         1
+         1
+         2
+         2
+(4 rows)
+
 DROP SCHEMA test_schema_for_sequence_propagation CASCADE;
 NOTICE:  drop cascades to 2 other objects
 DETAIL:  drop cascades to sequence test_schema_for_sequence_propagation.seq_10
 drop cascades to default value for column x of table table_without_sequence
 DROP TABLE table_without_sequence;
+DROP TABLE tbl_ref_mats CASCADE;
+NOTICE: drop cascades to view vw_citus_views
+DROP TABLE tbl_dist_mats CASCADE;
--- a/src/test/regress/sql/foreign_key_to_reference_shard_rebalance.sql
+++ b/src/test/regress/sql/foreign_key_to_reference_shard_rebalance.sql
@ -84,6 +84,7 @@ create table partitioned_tbl_with_fkey (x int, y int, t timestamptz default now(
 select create_distributed_table('partitioned_tbl_with_fkey','x');
 create table partition_1_with_fkey partition of partitioned_tbl_with_fkey for values from ('2022-01-01') to ('2022-12-31');
 create table partition_2_with_fkey partition of partitioned_tbl_with_fkey for values from ('2023-01-01') to ('2023-12-31');
+create table partition_3_with_fkey partition of partitioned_tbl_with_fkey for values from ('2024-01-01') to ('2024-12-31');
 insert into partitioned_tbl_with_fkey (x,y) select s,s%10 from generate_series(1,100) s;

 ALTER TABLE partitioned_tbl_with_fkey ADD CONSTRAINT fkey_to_ref_tbl FOREIGN KEY (y) REFERENCES ref_table_with_fkey(id);
--- a/src/test/regress/sql/multi_alter_table_statements.sql
+++ b/src/test/regress/sql/multi_alter_table_statements.sql
@ -667,5 +667,33 @@ ALTER TABLE table_without_sequence ADD COLUMN x BIGINT DEFAULT nextval('test_sch
 SELECT pg_identify_object_as_address(classid, objid, objsubid) from pg_catalog.pg_dist_object WHERE objid IN ('test_schema_for_sequence_propagation.seq_10'::regclass);
 SELECT pg_identify_object_as_address(classid, objid, objsubid) from pg_catalog.pg_dist_object WHERE objid IN ('test_schema_for_sequence_propagation'::regnamespace);

+-- Bug: https://github.com/citusdata/citus/issues/7378
+
+-- Create a reference table
+CREATE TABLE tbl_ref_mats(row_id integer primary key);
+INSERT INTO tbl_ref_mats VALUES (1), (2);
+SELECT create_reference_table('tbl_ref_mats');
+
+-- Create a distributed table
+CREATE TABLE tbl_dist_mats(series_id integer);
+INSERT INTO tbl_dist_mats VALUES (1), (1), (2), (2);
+SELECT create_distributed_table('tbl_dist_mats', 'series_id');
+
+-- Create a view that joins the distributed table with the reference table on the distribution key.
+CREATE VIEW vw_citus_views as
+SELECT d.series_id FROM tbl_dist_mats d JOIN tbl_ref_mats r ON d.series_id = r.row_id;
+
+-- The view initially works fine
+SELECT * FROM vw_citus_views ORDER BY 1;
+-- Now, alter the table
+ALTER TABLE tbl_ref_mats ADD COLUMN category1 varchar(50);
+SELECT * FROM vw_citus_views ORDER BY 1;
+ALTER TABLE tbl_ref_mats ADD COLUMN category2 varchar(50);
+SELECT * FROM vw_citus_views ORDER BY 1;
+ALTER TABLE tbl_ref_mats DROP COLUMN category1;
+SELECT * FROM vw_citus_views ORDER BY 1;
+
 DROP SCHEMA test_schema_for_sequence_propagation CASCADE;
 DROP TABLE table_without_sequence;
+DROP TABLE tbl_ref_mats CASCADE;
+DROP TABLE tbl_dist_mats CASCADE;