From b4cc7219ae4739bb591dedd7cf5323edead43f13 Mon Sep 17 00:00:00 2001 From: Naisila Puka <37271756+naisila@users.noreply.github.com> Date: Thu, 26 Dec 2024 16:52:42 +0300 Subject: [PATCH] Add tests for FORCE_NULL * and FORCE_NOT_NULL * options for COPY FROM (#7812) These options already existed in PG17, and we support them and have tests for them in `multi_copy.sql`. In PG17, their capability was extended to specify ALL columns at once using *. Citus performs the COPY correctly, as is validated by the added tests in this PR. Relevant PG commit: https://github.com/postgres/postgres/commit/f6d4c9cf1 Copy-pasting from Postgres documentation what these options do, such that the reviewer may better understand the tests added: `FORCE_NOT_NULL`: Do not match the specified columns' values against the null string. In the default case where the null string is empty, this means that empty values will be read as zero-length strings rather than nulls, even when they are not quoted. If * is specified, the option will be applied to all columns. This option is allowed only in `COPY FROM`, and only when using `CSV` format. `FORCE_NULL`: Match the specified columns' values against the null string, even if it has been quoted, and if a match is found set the value to `NULL`. In the default case where the null string is empty, this converts a quoted empty string into `NULL`. If * is specified, the option will be applied to all columns. This option is allowed only in `COPY FROM`, and only when using `CSV` format. `FORCE_NULL` and `FORCE_NOT_NULL` can be used simultaneously on the same column. This results in converting quoted null strings to null values and unquoted null strings to empty strings. Explain it to me like I'm a 5-year-old, for a text column: `FORCE_NULL` looks for empty strings and registers them as `NULL` `FORCE_NOT_NULL` looks for null values and registers them as empty strings. --- src/test/regress/expected/pg17.out | 95 ++++++++++++++++++++++++++++++ src/test/regress/sql/pg17.sql | 90 ++++++++++++++++++++++++++++ 2 files changed, 185 insertions(+) diff --git a/src/test/regress/expected/pg17.out b/src/test/regress/expected/pg17.out index 99ed0c3e2..23efee4bd 100644 --- a/src/test/regress/expected/pg17.out +++ b/src/test/regress/expected/pg17.out @@ -1238,6 +1238,101 @@ ERROR: Citus does not support COPY FROM with ON_ERROR option. COPY check_ign_err FROM STDIN WITH (log_verbosity verbose); ERROR: Citus does not support COPY FROM with LOG_VERBOSITY option. -- End of Test for COPY ON_ERROR option +-- Test FORCE_NOT_NULL and FORCE_NULL options +-- FORCE_NULL * and FORCE_NOT_NULL * options for COPY FROM were added in PG17 +-- Same tests as in PG copy2.sql, we just distribute the table first +-- Relevant PG17 commit: https://github.com/postgres/postgres/commit/f6d4c9cf1 +CREATE TABLE forcetest ( + a INT NOT NULL, + b TEXT NOT NULL, + c TEXT, + d TEXT, + e TEXT +); +\pset null NULL +SELECT create_distributed_table('forcetest', 'a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- should succeed with no effect ("b" remains an empty string, "c" remains NULL) +-- expected output for inserted row in test: +-- b | c +--------------------------------------------------------------------- +-- | NULL +--(1 row) +BEGIN; +COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(b), FORCE_NULL(c)); +COMMIT; +SELECT b, c FROM forcetest WHERE a = 1; + b | c +--------------------------------------------------------------------- + | NULL +(1 row) + +-- should succeed, FORCE_NULL and FORCE_NOT_NULL can be both specified +-- expected output for inserted row in test: +-- c | d +--------------------------------------------------------------------- +-- | NULL +--(1 row) +BEGIN; +COPY forcetest (a, b, c, d) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(c,d), FORCE_NULL(c,d)); +COMMIT; +SELECT c, d FROM forcetest WHERE a = 2; + c | d +--------------------------------------------------------------------- + | NULL +(1 row) + +-- should succeed with no effect ("b" remains an empty string, "c" remains NULL) +-- expected output for inserted row in test: +-- b | c +--------------------------------------------------------------------- +-- | NULL +--(1 row) +BEGIN; +COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL *, FORCE_NULL *); +COMMIT; +SELECT b, c FROM forcetest WHERE a = 4; + b | c +--------------------------------------------------------------------- + | NULL +(1 row) + +-- should succeed with effect ("b" remains an empty string) +-- expected output for inserted row in test: +-- b | c +--------------------------------------------------------------------- +-- | +--(1 row) +BEGIN; +COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL *); +COMMIT; +SELECT b, c FROM forcetest WHERE a = 5; + b | c +--------------------------------------------------------------------- + | +(1 row) + +-- should succeed with effect ("c" remains NULL) +-- expected output for inserted row in test: +-- b | c +--------------------------------------------------------------------- +-- b | NULL +--(1 row) +BEGIN; +COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NULL *); +COMMIT; +SELECT b, c FROM forcetest WHERE a = 6; + b | c +--------------------------------------------------------------------- + b | NULL +(1 row) + +\pset null '' +-- End of Testing FORCE_NOT_NULL and FORCE_NULL options \set VERBOSITY terse SET client_min_messages TO WARNING; DROP SCHEMA pg17 CASCADE; diff --git a/src/test/regress/sql/pg17.sql b/src/test/regress/sql/pg17.sql index 2018d8b3b..ab8528f4b 100644 --- a/src/test/regress/sql/pg17.sql +++ b/src/test/regress/sql/pg17.sql @@ -621,6 +621,96 @@ COPY check_ign_err FROM STDIN WITH (log_verbosity verbose); -- End of Test for COPY ON_ERROR option +-- Test FORCE_NOT_NULL and FORCE_NULL options +-- FORCE_NULL * and FORCE_NOT_NULL * options for COPY FROM were added in PG17 +-- Same tests as in PG copy2.sql, we just distribute the table first +-- Relevant PG17 commit: https://github.com/postgres/postgres/commit/f6d4c9cf1 + +CREATE TABLE forcetest ( + a INT NOT NULL, + b TEXT NOT NULL, + c TEXT, + d TEXT, + e TEXT +); +\pset null NULL + +SELECT create_distributed_table('forcetest', 'a'); + +-- should succeed with no effect ("b" remains an empty string, "c" remains NULL) +-- expected output for inserted row in test: +-- b | c +-----+------ +-- | NULL +--(1 row) + +BEGIN; +COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(b), FORCE_NULL(c)); +1,,"" +\. +COMMIT; +SELECT b, c FROM forcetest WHERE a = 1; + +-- should succeed, FORCE_NULL and FORCE_NOT_NULL can be both specified +-- expected output for inserted row in test: +-- c | d +-----+------ +-- | NULL +--(1 row) + +BEGIN; +COPY forcetest (a, b, c, d) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(c,d), FORCE_NULL(c,d)); +2,'a',,"" +\. +COMMIT; +SELECT c, d FROM forcetest WHERE a = 2; + +-- should succeed with no effect ("b" remains an empty string, "c" remains NULL) +-- expected output for inserted row in test: +-- b | c +-----+------ +-- | NULL +--(1 row) + +BEGIN; +COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL *, FORCE_NULL *); +4,,"" +\. +COMMIT; +SELECT b, c FROM forcetest WHERE a = 4; + +-- should succeed with effect ("b" remains an empty string) +-- expected output for inserted row in test: +-- b | c +-----+--- +-- | +--(1 row) + +BEGIN; +COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL *); +5,,"" +\. +COMMIT; +SELECT b, c FROM forcetest WHERE a = 5; + +-- should succeed with effect ("c" remains NULL) +-- expected output for inserted row in test: +-- b | c +-----+------ +-- b | NULL +--(1 row) + +BEGIN; +COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NULL *); +6,"b","" +\. +COMMIT; +SELECT b, c FROM forcetest WHERE a = 6; + +\pset null '' + +-- End of Testing FORCE_NOT_NULL and FORCE_NULL options + \set VERBOSITY terse SET client_min_messages TO WARNING; DROP SCHEMA pg17 CASCADE;