diff --git a/src/backend/distributed/planner/multi_master_planner.c b/src/backend/distributed/planner/multi_master_planner.c index a574ccd7f..456a74267 100644 --- a/src/backend/distributed/planner/multi_master_planner.c +++ b/src/backend/distributed/planner/multi_master_planner.c @@ -177,6 +177,20 @@ CitusCustomScanPathPlan(PlannerInfo *root, { CitusCustomScanPath *citusPath = (CitusCustomScanPath *) best_path; + /* + * Columns could have been pruned from the target list by the standard planner. + * A situation in which this might happen is a CASE that is proven to be always the + * same causing the other column to become useless; + * CASE WHEN ... <> NULL + * THEN ... + * ELSE ... + * END + * Since nothing is equal to NULL it will always end up in the else branch. The final + * target list the planenr needs from our node is passed in as tlist. By placing that + * as the target list on our scan the internal rows will be projected to this one. + */ + citusPath->remoteScan->scan.plan.targetlist = tlist; + /* clauses might have been added by the planner, need to add them to our scan */ RestrictInfo *restrictInfo = NULL; List **quals = &citusPath->remoteScan->scan.plan.qual; diff --git a/src/test/regress/expected/sqlsmith_failures.out b/src/test/regress/expected/sqlsmith_failures.out new file mode 100644 index 000000000..6a2df9b99 --- /dev/null +++ b/src/test/regress/expected/sqlsmith_failures.out @@ -0,0 +1,151 @@ +-- setup schema used for sqlsmith runs +-- source: https://gist.github.com/will/e8a1e6efd46ac82f1b61d0c0ccab1b52 +CREATE SCHEMA sqlsmith_failures; +SET search_path TO sqlsmith_failures, public; +SET citus.shard_replication_factor TO 1; +SET citus.next_shard_id TO 1280000; +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 11 AS server_version_above_eleven +\gset +begin; +SET LOCAL citus.multi_shard_modify_mode TO 'sequential'; +create table countries( + id serial primary key + , name text + , code varchar(2) collate "C" unique +); +insert into countries(name, code) select 'country-'||i, i::text from generate_series(10,99) i; +select create_reference_table('countries'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$sqlsmith_failures.countries$$) + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +create table orgs ( + id bigserial primary key + , name text + , created_at timestamptz default now() +); +select create_distributed_table('orgs', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +\if :server_version_above_eleven +-- pg12 and above support generated columns +create table users ( + id bigserial + , org_id bigint references orgs(id) + , name text + , created_at timestamptz default now() + , country_id int -- references countries(id) + , score bigint generated always as (id + country_id) stored + , primary key (org_id, id) +); +\else +-- pg11 and below don't have generated columns, use a normal column +create table users ( + id bigserial + , org_id bigint references orgs(id) + , name text + , created_at timestamptz default now() + , country_id int -- references countries(id) + , score bigint + , primary key (org_id, id) +); +\endif +select create_distributed_table('users', 'org_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +alter table users add constraint fk_user_country foreign key (country_id) references countries(id); +create table orders ( + id bigserial + , org_id bigint references orgs(id) + , user_id bigint + , price int + , info jsonb + , primary key (org_id, id) + , foreign key (org_id, user_id) references users(org_id, id) +); +select create_distributed_table('orders', 'org_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +create table events ( + id bigserial not null + , user_id bigint not null + , org_id bigint not null + , event_time timestamp not null default now() + , event_type int not null default 0 + , payload jsonb + , primary key (user_id, id) +); +create index event_time_idx on events using BRIN (event_time); +create index event_json_idx on events using gin(payload); +select create_distributed_table('events', 'user_id'); -- on purpose don't collocate on correctly on org_id + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +create table local_data( + id bigserial primary key + , val int default ( (random()*100)::int ) +); +-- data loading takes ~30 seconds, lets hope we can skip this for all reproductions. When +-- there is a sqlsmith failure that needs the data we can uncomment the block below. +-- insert into orgs(id, name) select i,'org-'||i from generate_series(1,1000) i; +-- insert into users(id, name, org_id, country_id) select i,'user-'||i, i%1000+1, (i%90)+1 from generate_series(1,100000) i; +-- insert into orders(id, org_id, user_id, price) select i, ((i%100000+1)%1000)+1 , i%100000+1, i/100 from generate_series(1,1000000) i; +-- insert into events(id, org_id, user_id, event_type) select i, ((i%100000+1)%1000)+1 , i%100000+1, i/100 from generate_series(1,1000000) i; +-- insert into local_data(id) select generate_series(1,1000); +commit; +-- SQL SMITH ASSERTION FAILURE https://github.com/citusdata/citus/issues/3809 +-- Root cause: pruned worker columns not projected correctly on coordinator causing an assertion in the postgres standard planner +select + case when pg_catalog.bit_or(cast(cast(coalesce(cast(null as "bit"), cast(null as "bit")) as "bit") as "bit")) over (partition by subq_0.c3 order by subq_0.c0) <> cast(null as "bit") + then subq_0.c3 + else subq_0.c3 + end as c0, + 30 as c1, + subq_0.c2 as c2 +from + (select + pg_catalog.websearch_to_tsquery( + cast(pg_catalog.regconfigin(cast(cast(null as cstring) as cstring)) as regconfig), + cast((select type from citus.pg_dist_object limit 1 offset 1) as text) + ) as c0, + sample_0.org_id as c1, + sample_0.id as c2, + sample_0.score as c3, + sample_0.country_id as c4, + sample_0.org_id as c5, + sample_0.org_id as c6 + from + sqlsmith_failures.users as sample_0 tablesample system (7.5) + where sample_0.org_id is not NULL) as subq_0 +where (select pg_catalog.array_agg(id) from sqlsmith_failures.countries) + is not NULL; + c0 | c1 | c2 +--------------------------------------------------------------------- +(0 rows) + +-- cleanup +DROP SCHEMA sqlsmith_failures CASCADE; +NOTICE: drop cascades to 6 other objects +DETAIL: drop cascades to table countries +drop cascades to table orgs +drop cascades to table users +drop cascades to table orders +drop cascades to table events +drop cascades to table local_data diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 583a97ebf..1d6ce622e 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -361,3 +361,8 @@ test: ensure_no_intermediate_data_leak # in the shared memory # -------- test: ensure_no_shared_connection_leak + +# --------- +# run queries generated by sql smith that caused issues in the past +# -------- +test: sqlsmith_failures diff --git a/src/test/regress/sql/sqlsmith_failures.sql b/src/test/regress/sql/sqlsmith_failures.sql new file mode 100644 index 000000000..146147b2f --- /dev/null +++ b/src/test/regress/sql/sqlsmith_failures.sql @@ -0,0 +1,126 @@ +-- setup schema used for sqlsmith runs +-- source: https://gist.github.com/will/e8a1e6efd46ac82f1b61d0c0ccab1b52 +CREATE SCHEMA sqlsmith_failures; +SET search_path TO sqlsmith_failures, public; +SET citus.shard_replication_factor TO 1; +SET citus.next_shard_id TO 1280000; + +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 11 AS server_version_above_eleven +\gset + +begin; + +SET LOCAL citus.multi_shard_modify_mode TO 'sequential'; + +create table countries( + id serial primary key + , name text + , code varchar(2) collate "C" unique +); +insert into countries(name, code) select 'country-'||i, i::text from generate_series(10,99) i; +select create_reference_table('countries'); + +create table orgs ( + id bigserial primary key + , name text + , created_at timestamptz default now() +); +select create_distributed_table('orgs', 'id'); + +\if :server_version_above_eleven +-- pg12 and above support generated columns +create table users ( + id bigserial + , org_id bigint references orgs(id) + , name text + , created_at timestamptz default now() + , country_id int -- references countries(id) + , score bigint generated always as (id + country_id) stored + , primary key (org_id, id) +); +\else +-- pg11 and below don't have generated columns, use a normal column +create table users ( + id bigserial + , org_id bigint references orgs(id) + , name text + , created_at timestamptz default now() + , country_id int -- references countries(id) + , score bigint + , primary key (org_id, id) +); +\endif + +select create_distributed_table('users', 'org_id'); +alter table users add constraint fk_user_country foreign key (country_id) references countries(id); + +create table orders ( + id bigserial + , org_id bigint references orgs(id) + , user_id bigint + , price int + , info jsonb + , primary key (org_id, id) + , foreign key (org_id, user_id) references users(org_id, id) +); +select create_distributed_table('orders', 'org_id'); + +create table events ( + id bigserial not null + , user_id bigint not null + , org_id bigint not null + , event_time timestamp not null default now() + , event_type int not null default 0 + , payload jsonb + , primary key (user_id, id) +); +create index event_time_idx on events using BRIN (event_time); +create index event_json_idx on events using gin(payload); +select create_distributed_table('events', 'user_id'); -- on purpose don't collocate on correctly on org_id + +create table local_data( + id bigserial primary key + , val int default ( (random()*100)::int ) +); + +-- data loading takes ~30 seconds, lets hope we can skip this for all reproductions. When +-- there is a sqlsmith failure that needs the data we can uncomment the block below. + +-- insert into orgs(id, name) select i,'org-'||i from generate_series(1,1000) i; +-- insert into users(id, name, org_id, country_id) select i,'user-'||i, i%1000+1, (i%90)+1 from generate_series(1,100000) i; +-- insert into orders(id, org_id, user_id, price) select i, ((i%100000+1)%1000)+1 , i%100000+1, i/100 from generate_series(1,1000000) i; +-- insert into events(id, org_id, user_id, event_type) select i, ((i%100000+1)%1000)+1 , i%100000+1, i/100 from generate_series(1,1000000) i; +-- insert into local_data(id) select generate_series(1,1000); + +commit; + +-- SQL SMITH ASSERTION FAILURE https://github.com/citusdata/citus/issues/3809 +-- Root cause: pruned worker columns not projected correctly on coordinator causing an assertion in the postgres standard planner +select + case when pg_catalog.bit_or(cast(cast(coalesce(cast(null as "bit"), cast(null as "bit")) as "bit") as "bit")) over (partition by subq_0.c3 order by subq_0.c0) <> cast(null as "bit") + then subq_0.c3 + else subq_0.c3 + end as c0, + 30 as c1, + subq_0.c2 as c2 +from + (select + pg_catalog.websearch_to_tsquery( + cast(pg_catalog.regconfigin(cast(cast(null as cstring) as cstring)) as regconfig), + cast((select type from citus.pg_dist_object limit 1 offset 1) as text) + ) as c0, + sample_0.org_id as c1, + sample_0.id as c2, + sample_0.score as c3, + sample_0.country_id as c4, + sample_0.org_id as c5, + sample_0.org_id as c6 + from + sqlsmith_failures.users as sample_0 tablesample system (7.5) + where sample_0.org_id is not NULL) as subq_0 +where (select pg_catalog.array_agg(id) from sqlsmith_failures.countries) + is not NULL; + +-- cleanup +DROP SCHEMA sqlsmith_failures CASCADE;