Add tests for FORCE_NULL * and FORCE_NOT_NULL * options for COPY FROM (#7812)

These options already existed in PG17, and we support them and have
tests for them in `multi_copy.sql`.

In PG17, their capability was extended to specify ALL columns at once
using *.
Citus performs the COPY correctly, as is validated by the added tests in
this PR.

Relevant PG commit:
https://github.com/postgres/postgres/commit/f6d4c9cf1

Copy-pasting from Postgres documentation what these options do, such
that the reviewer may better understand the tests added:

`FORCE_NOT_NULL`: Do not match the specified columns' values against the
null string. In the default case where the null string is empty, this
means that empty values will be read as zero-length strings rather than
nulls, even when they are not quoted. If * is specified, the option will
be applied to all columns. This option is allowed only in `COPY FROM`,
and only when using `CSV` format.

`FORCE_NULL`: Match the specified columns' values against the null
string, even if it has been quoted, and if a match is found set the
value to `NULL`. In the default case where the null string is empty,
this converts a quoted empty string into `NULL`. If * is specified, the
option will be applied to all columns. This option is allowed only in
`COPY FROM`, and only when using `CSV` format.

`FORCE_NULL` and `FORCE_NOT_NULL` can be used simultaneously on the same
column. This results in converting quoted null strings to null values
and unquoted null strings to empty strings.

Explain it to me like I'm a 5-year-old, for a text column:
`FORCE_NULL` looks for empty strings and registers them as `NULL`
`FORCE_NOT_NULL` looks for null values and registers them as empty
strings.
pull/7922/head
Naisila Puka 2024-12-26 16:52:42 +03:00 committed by naisila
parent 5e9f8d838c
commit b7d04038cb
2 changed files with 185 additions and 0 deletions

View File

@ -1238,6 +1238,101 @@ ERROR: Citus does not support COPY FROM with ON_ERROR option.
COPY check_ign_err FROM STDIN WITH (log_verbosity verbose);
ERROR: Citus does not support COPY FROM with LOG_VERBOSITY option.
-- End of Test for COPY ON_ERROR option
-- Test FORCE_NOT_NULL and FORCE_NULL options
-- FORCE_NULL * and FORCE_NOT_NULL * options for COPY FROM were added in PG17
-- Same tests as in PG copy2.sql, we just distribute the table first
-- Relevant PG17 commit: https://github.com/postgres/postgres/commit/f6d4c9cf1
CREATE TABLE forcetest (
a INT NOT NULL,
b TEXT NOT NULL,
c TEXT,
d TEXT,
e TEXT
);
\pset null NULL
SELECT create_distributed_table('forcetest', 'a');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- should succeed with no effect ("b" remains an empty string, "c" remains NULL)
-- expected output for inserted row in test:
-- b | c
---------------------------------------------------------------------
-- | NULL
--(1 row)
BEGIN;
COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(b), FORCE_NULL(c));
COMMIT;
SELECT b, c FROM forcetest WHERE a = 1;
b | c
---------------------------------------------------------------------
| NULL
(1 row)
-- should succeed, FORCE_NULL and FORCE_NOT_NULL can be both specified
-- expected output for inserted row in test:
-- c | d
---------------------------------------------------------------------
-- | NULL
--(1 row)
BEGIN;
COPY forcetest (a, b, c, d) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(c,d), FORCE_NULL(c,d));
COMMIT;
SELECT c, d FROM forcetest WHERE a = 2;
c | d
---------------------------------------------------------------------
| NULL
(1 row)
-- should succeed with no effect ("b" remains an empty string, "c" remains NULL)
-- expected output for inserted row in test:
-- b | c
---------------------------------------------------------------------
-- | NULL
--(1 row)
BEGIN;
COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL *, FORCE_NULL *);
COMMIT;
SELECT b, c FROM forcetest WHERE a = 4;
b | c
---------------------------------------------------------------------
| NULL
(1 row)
-- should succeed with effect ("b" remains an empty string)
-- expected output for inserted row in test:
-- b | c
---------------------------------------------------------------------
-- |
--(1 row)
BEGIN;
COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL *);
COMMIT;
SELECT b, c FROM forcetest WHERE a = 5;
b | c
---------------------------------------------------------------------
|
(1 row)
-- should succeed with effect ("c" remains NULL)
-- expected output for inserted row in test:
-- b | c
---------------------------------------------------------------------
-- b | NULL
--(1 row)
BEGIN;
COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NULL *);
COMMIT;
SELECT b, c FROM forcetest WHERE a = 6;
b | c
---------------------------------------------------------------------
b | NULL
(1 row)
\pset null ''
-- End of Testing FORCE_NOT_NULL and FORCE_NULL options
\set VERBOSITY terse
SET client_min_messages TO WARNING;
DROP SCHEMA pg17 CASCADE;

View File

@ -621,6 +621,96 @@ COPY check_ign_err FROM STDIN WITH (log_verbosity verbose);
-- End of Test for COPY ON_ERROR option
-- Test FORCE_NOT_NULL and FORCE_NULL options
-- FORCE_NULL * and FORCE_NOT_NULL * options for COPY FROM were added in PG17
-- Same tests as in PG copy2.sql, we just distribute the table first
-- Relevant PG17 commit: https://github.com/postgres/postgres/commit/f6d4c9cf1
CREATE TABLE forcetest (
a INT NOT NULL,
b TEXT NOT NULL,
c TEXT,
d TEXT,
e TEXT
);
\pset null NULL
SELECT create_distributed_table('forcetest', 'a');
-- should succeed with no effect ("b" remains an empty string, "c" remains NULL)
-- expected output for inserted row in test:
-- b | c
-----+------
-- | NULL
--(1 row)
BEGIN;
COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(b), FORCE_NULL(c));
1,,""
\.
COMMIT;
SELECT b, c FROM forcetest WHERE a = 1;
-- should succeed, FORCE_NULL and FORCE_NOT_NULL can be both specified
-- expected output for inserted row in test:
-- c | d
-----+------
-- | NULL
--(1 row)
BEGIN;
COPY forcetest (a, b, c, d) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(c,d), FORCE_NULL(c,d));
2,'a',,""
\.
COMMIT;
SELECT c, d FROM forcetest WHERE a = 2;
-- should succeed with no effect ("b" remains an empty string, "c" remains NULL)
-- expected output for inserted row in test:
-- b | c
-----+------
-- | NULL
--(1 row)
BEGIN;
COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL *, FORCE_NULL *);
4,,""
\.
COMMIT;
SELECT b, c FROM forcetest WHERE a = 4;
-- should succeed with effect ("b" remains an empty string)
-- expected output for inserted row in test:
-- b | c
-----+---
-- |
--(1 row)
BEGIN;
COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL *);
5,,""
\.
COMMIT;
SELECT b, c FROM forcetest WHERE a = 5;
-- should succeed with effect ("c" remains NULL)
-- expected output for inserted row in test:
-- b | c
-----+------
-- b | NULL
--(1 row)
BEGIN;
COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NULL *);
6,"b",""
\.
COMMIT;
SELECT b, c FROM forcetest WHERE a = 6;
\pset null ''
-- End of Testing FORCE_NOT_NULL and FORCE_NULL options
\set VERBOSITY terse
SET client_min_messages TO WARNING;
DROP SCHEMA pg17 CASCADE;