From f5a4a4bc74a0fb3d0e2bfb217fc2c4d849f1a2dc Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Fri, 4 Dec 2020 16:49:38 -0800 Subject: [PATCH] Columnar: Support zstd compression --- configure | 94 +++++++++++++++++++++++ configure.in | 17 ++++ src/backend/columnar/cstore.c | 3 + src/backend/columnar/cstore_compression.c | 61 +++++++++++++++ src/include/citus_config.h.in | 3 + src/include/citus_version.h.in | 3 + src/include/columnar/cstore.h | 1 + src/test/regress/columnar_am_schedule | 2 +- src/test/regress/expected/am_zstd.out | 71 +++++++++++++++++ src/test/regress/expected/am_zstd_0.out | 4 + src/test/regress/sql/am_zstd.sql | 41 ++++++++++ 11 files changed, 299 insertions(+), 1 deletion(-) create mode 100644 src/test/regress/expected/am_zstd.out create mode 100644 src/test/regress/expected/am_zstd_0.out create mode 100644 src/test/regress/sql/am_zstd.sql diff --git a/configure b/configure index 44e87ef69..48839b544 100755 --- a/configure +++ b/configure @@ -630,6 +630,7 @@ CITUS_LDFLAGS CITUS_CPPFLAGS CITUS_CFLAGS GIT_BIN +with_zstd with_lz4 EGREP GREP @@ -692,6 +693,7 @@ enable_option_checking with_extra_version enable_coverage with_lz4 +with_zstd with_libcurl with_reports_hostname ' @@ -1334,6 +1336,7 @@ Optional Packages: --with-extra-version=STRING append STRING to version --with-lz4 use lz4 + --with-zstd use zstd --without-libcurl do not use libcurl for anonymous statistics collection --with-reports-hostname=HOSTNAME @@ -4449,6 +4452,97 @@ Use --without-lz4 to disable lz4 support." "$LINENO" 5 fi +fi + +# +# ZSTD +# + + + +# Check whether --with-zstd was given. +if test "${with_zstd+set}" = set; then : + withval=$with_zstd; + case $withval in + yes) + : + ;; + no) + : + ;; + *) + as_fn_error $? "no argument expected for --with-zstd option" "$LINENO" 5 + ;; + esac + +else + with_zstd=no + +fi + + + + +if test "$with_zstd" = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ZSTD_decompress in -lzstd" >&5 +$as_echo_n "checking for ZSTD_decompress in -lzstd... " >&6; } +if ${ac_cv_lib_zstd_ZSTD_decompress+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lzstd $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char ZSTD_decompress (); +int +main () +{ +return ZSTD_decompress (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_zstd_ZSTD_decompress=yes +else + ac_cv_lib_zstd_ZSTD_decompress=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_zstd_ZSTD_decompress" >&5 +$as_echo "$ac_cv_lib_zstd_ZSTD_decompress" >&6; } +if test "x$ac_cv_lib_zstd_ZSTD_decompress" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBZSTD 1 +_ACEOF + + LIBS="-lzstd $LIBS" + +else + as_fn_error $? "zstd library not found +If you have zstd installed, see config.log for details on the +failure. It is possible the compiler isn't looking in the proper directory." "$LINENO" 5 +fi + + ac_fn_c_check_header_mongrel "$LINENO" "zstd.h" "ac_cv_header_zstd_h" "$ac_includes_default" +if test "x$ac_cv_header_zstd_h" = xyes; then : + +else + as_fn_error $? "zstd header not found +If you have lz4zstd already installed, see config.log for details on the +failure. It is possible the compiler isn't looking in the proper directory." "$LINENO" 5 +fi + + fi # diff --git a/configure.in b/configure.in index 4680fa3cb..de008c853 100644 --- a/configure.in +++ b/configure.in @@ -204,6 +204,23 @@ failure. It is possible the compiler isn't looking in the proper directory. Use --without-lz4 to disable lz4 support.])]) fi +# +# ZSTD +# +PGAC_ARG_BOOL(with, zstd, no, + [use zstd]) +AC_SUBST(with_zstd) + +if test "$with_zstd" = yes; then + AC_CHECK_LIB(zstd, ZSTD_decompress, [], + [AC_MSG_ERROR([zstd library not found +If you have zstd installed, see config.log for details on the +failure. It is possible the compiler isn't looking in the proper directory.])]) + AC_CHECK_HEADER(zstd.h, [], [AC_MSG_ERROR([zstd header not found +If you have lz4zstd already installed, see config.log for details on the +failure. It is possible the compiler isn't looking in the proper directory.])]) +fi + # # libcurl # diff --git a/src/backend/columnar/cstore.c b/src/backend/columnar/cstore.c index 36f790c8e..8cef3e00a 100644 --- a/src/backend/columnar/cstore.c +++ b/src/backend/columnar/cstore.c @@ -38,6 +38,9 @@ static const struct config_enum_entry cstore_compression_options[] = { "pglz", COMPRESSION_PG_LZ, false }, #if HAVE_LIBLZ4 { "lz4", COMPRESSION_LZ4, false }, +#endif +#if HAVE_LIBZSTD + { "zstd", COMPRESSION_ZSTD, false }, #endif { NULL, 0, false } }; diff --git a/src/backend/columnar/cstore_compression.c b/src/backend/columnar/cstore_compression.c index cf3902dc6..a9740a6c2 100644 --- a/src/backend/columnar/cstore_compression.c +++ b/src/backend/columnar/cstore_compression.c @@ -21,6 +21,10 @@ #include #endif +#if HAVE_LIBZSTD +#include +#endif + /* * The information at the start of the compressed data. This decription is taken * from pg_lzcompress in pre-9.5 version of PostgreSQL. @@ -81,6 +85,33 @@ CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, } #endif +#if HAVE_LIBZSTD + case COMPRESSION_ZSTD: + { + int maximumLength = ZSTD_compressBound(inputBuffer->len); + int compressionLevel = 3; + + resetStringInfo(outputBuffer); + enlargeStringInfo(outputBuffer, maximumLength); + + size_t compressedSize = ZSTD_compress(outputBuffer->data, + outputBuffer->maxlen, + inputBuffer->data, + inputBuffer->len, + compressionLevel); + + if (ZSTD_isError(compressedSize)) + { + ereport(WARNING, (errmsg("zstd compression failed"), + (errdetail("%s", ZSTD_getErrorName(compressedSize))))); + return false; + } + + outputBuffer->len = compressedSize; + return true; + } +#endif + case COMPRESSION_PG_LZ: { uint64 maximumLength = PGLZ_MAX_OUTPUT(inputBuffer->len) + @@ -159,6 +190,36 @@ DecompressBuffer(StringInfo buffer, } #endif +#if HAVE_LIBZSTD + case COMPRESSION_ZSTD: + { + StringInfo decompressedBuffer = makeStringInfo(); + enlargeStringInfo(decompressedBuffer, decompressedSize); + + size_t zstdDecompressSize = ZSTD_decompress(decompressedBuffer->data, + decompressedSize, + buffer->data, + buffer->len); + if (ZSTD_isError(zstdDecompressSize)) + { + ereport(ERROR, (errmsg("zstd decompression failed"), + (errdetail("%s", ZSTD_getErrorName( + zstdDecompressSize))))); + } + + if (zstdDecompressSize != decompressedSize) + { + ereport(ERROR, (errmsg("unexpected decompressed size"), + errdetail("Expected %ld, received %ld", decompressedSize, + zstdDecompressSize))); + } + + decompressedBuffer->len = decompressedSize; + + return decompressedBuffer; + } +#endif + case COMPRESSION_PG_LZ: { StringInfo decompressedBuffer = NULL; diff --git a/src/include/citus_config.h.in b/src/include/citus_config.h.in index e3f39c5c2..428a9d870 100644 --- a/src/include/citus_config.h.in +++ b/src/include/citus_config.h.in @@ -43,6 +43,9 @@ /* Define to 1 if you have the `lz4' library (-llz4). */ #undef HAVE_LIBLZ4 +/* Define to 1 if you have the `zstd' library (-lzstd). */ +#undef HAVE_LIBZSTD + /* Define to 1 if you have the header file. */ #undef HAVE_MEMORY_H diff --git a/src/include/citus_version.h.in b/src/include/citus_version.h.in index 8f7a21327..a9bddd603 100644 --- a/src/include/citus_version.h.in +++ b/src/include/citus_version.h.in @@ -27,6 +27,9 @@ /* Define to 1 if you have the `liblz4' library (-llz4). */ #undef HAVE_LIBLZ4 +/* Define to 1 if you have the `libzstd' library (-lzstd). */ +#undef HAVE_LIBZSTD + /* Base URL for statistics collection and update checks */ #undef REPORTS_BASE_URL diff --git a/src/include/columnar/cstore.h b/src/include/columnar/cstore.h index 3268dbd42..a1a010f34 100644 --- a/src/include/columnar/cstore.h +++ b/src/include/columnar/cstore.h @@ -57,6 +57,7 @@ typedef enum COMPRESSION_NONE = 0, COMPRESSION_PG_LZ = 1, COMPRESSION_LZ4 = 2, + COMPRESSION_ZSTD = 3, COMPRESSION_COUNT } CompressionType; diff --git a/src/test/regress/columnar_am_schedule b/src/test/regress/columnar_am_schedule index 83e162f72..e861e7b59 100644 --- a/src/test/regress/columnar_am_schedule +++ b/src/test/regress/columnar_am_schedule @@ -14,7 +14,7 @@ test: am_update_delete test: am_copyto test: am_alter test: am_alter_set_type -test: am_lz4 +test: am_lz4 am_zstd test: am_rollback test: am_truncate test: am_vacuum diff --git a/src/test/regress/expected/am_zstd.out b/src/test/regress/expected/am_zstd.out new file mode 100644 index 000000000..ac315cdfa --- /dev/null +++ b/src/test/regress/expected/am_zstd.out @@ -0,0 +1,71 @@ +SELECT compression_type_supported('zstd') AS zstd_supported \gset +\if :zstd_supported +\else +\q +\endif +CREATE SCHEMA am_zstd; +SET search_path TO am_zstd; +SET columnar.compression TO 'zstd'; +CREATE TABLE test_zstd (a int, b text, c int) USING columnar; +INSERT INTO test_zstd SELECT floor(i / 1000), floor(i / 10)::text, 4 FROM generate_series(1, 10000) i; +SELECT count(*) FROM test_zstd; + count +--------------------------------------------------------------------- + 10000 +(1 row) + +INSERT INTO test_zstd SELECT floor(i / 2), floor(i / 10)::text, 5 FROM generate_series(1000, 11000) i; +SELECT count(*) FROM test_zstd; + count +--------------------------------------------------------------------- + 20001 +(1 row) + +VACUUM VERBOSE test_zstd; +INFO: statistics for "test_zstd": +storage id: xxxxx +total file size: 40960, total data size: 14947 +compression rate: 21.91x +total row count: 20001, stripe count: 2, average rows per stripe: 10000 +chunk count: 9, containing data for dropped columns: 0, zstd compressed: 9 + +SELECT DISTINCT * FROM test_zstd ORDER BY a, b, c LIMIT 5; + a | b | c +--------------------------------------------------------------------- + 0 | 0 | 4 + 0 | 1 | 4 + 0 | 10 | 4 + 0 | 11 | 4 + 0 | 12 | 4 +(5 rows) + +-- compare compression rate to pglz +SET columnar.compression TO 'pglz'; +CREATE TABLE test_pglz (LIKE test_zstd) USING columnar; +INSERT INTO test_pglz SELECT * FROM test_zstd; +VACUUM VERBOSE test_pglz; +INFO: statistics for "test_pglz": +storage id: xxxxx +total file size: 57344, total data size: 35986 +compression rate: 9.10x +total row count: 20001, stripe count: 1, average rows per stripe: 20001 +chunk count: 9, containing data for dropped columns: 0, none compressed: 3, pglz compressed: 6 + +-- Other operations +VACUUM FULL test_zstd; +ANALYZE test_zstd; +SELECT count(DISTINCT test_zstd.*) FROM test_zstd; + count +--------------------------------------------------------------------- + 6002 +(1 row) + +TRUNCATE test_zstd; +SELECT count(DISTINCT test_zstd.*) FROM test_zstd; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SET client_min_messages TO WARNING; +DROP SCHEMA am_zstd CASCADE; diff --git a/src/test/regress/expected/am_zstd_0.out b/src/test/regress/expected/am_zstd_0.out new file mode 100644 index 000000000..08128a713 --- /dev/null +++ b/src/test/regress/expected/am_zstd_0.out @@ -0,0 +1,4 @@ +SELECT compression_type_supported('zstd') AS zstd_supported \gset +\if :zstd_supported +\else +\q diff --git a/src/test/regress/sql/am_zstd.sql b/src/test/regress/sql/am_zstd.sql new file mode 100644 index 000000000..8e924709a --- /dev/null +++ b/src/test/regress/sql/am_zstd.sql @@ -0,0 +1,41 @@ +SELECT compression_type_supported('zstd') AS zstd_supported \gset +\if :zstd_supported +\else +\q +\endif + +CREATE SCHEMA am_zstd; +SET search_path TO am_zstd; + +SET columnar.compression TO 'zstd'; +CREATE TABLE test_zstd (a int, b text, c int) USING columnar; + +INSERT INTO test_zstd SELECT floor(i / 1000), floor(i / 10)::text, 4 FROM generate_series(1, 10000) i; +SELECT count(*) FROM test_zstd; + +INSERT INTO test_zstd SELECT floor(i / 2), floor(i / 10)::text, 5 FROM generate_series(1000, 11000) i; +SELECT count(*) FROM test_zstd; + +VACUUM VERBOSE test_zstd; + +SELECT DISTINCT * FROM test_zstd ORDER BY a, b, c LIMIT 5; + +-- compare compression rate to pglz +SET columnar.compression TO 'pglz'; +CREATE TABLE test_pglz (LIKE test_zstd) USING columnar; +INSERT INTO test_pglz SELECT * FROM test_zstd; + +VACUUM VERBOSE test_pglz; + +-- Other operations +VACUUM FULL test_zstd; +ANALYZE test_zstd; + +SELECT count(DISTINCT test_zstd.*) FROM test_zstd; + +TRUNCATE test_zstd; + +SELECT count(DISTINCT test_zstd.*) FROM test_zstd; + +SET client_min_messages TO WARNING; +DROP SCHEMA am_zstd CASCADE;