Columnar: Support zstd compression

pull/4388/head
Hadi Moshayedi 2020-12-04 16:49:38 -08:00
parent 3f81ee26fd
commit f5a4a4bc74
11 changed files with 299 additions and 1 deletions

94
configure vendored
View File

@ -630,6 +630,7 @@ CITUS_LDFLAGS
CITUS_CPPFLAGS
CITUS_CFLAGS
GIT_BIN
with_zstd
with_lz4
EGREP
GREP
@ -692,6 +693,7 @@ enable_option_checking
with_extra_version
enable_coverage
with_lz4
with_zstd
with_libcurl
with_reports_hostname
'
@ -1334,6 +1336,7 @@ Optional Packages:
--with-extra-version=STRING
append STRING to version
--with-lz4 use lz4
--with-zstd use zstd
--without-libcurl do not use libcurl for anonymous statistics
collection
--with-reports-hostname=HOSTNAME
@ -4449,6 +4452,97 @@ Use --without-lz4 to disable lz4 support." "$LINENO" 5
fi
fi
#
# ZSTD
#
# Check whether --with-zstd was given.
if test "${with_zstd+set}" = set; then :
withval=$with_zstd;
case $withval in
yes)
:
;;
no)
:
;;
*)
as_fn_error $? "no argument expected for --with-zstd option" "$LINENO" 5
;;
esac
else
with_zstd=no
fi
if test "$with_zstd" = yes; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ZSTD_decompress in -lzstd" >&5
$as_echo_n "checking for ZSTD_decompress in -lzstd... " >&6; }
if ${ac_cv_lib_zstd_ZSTD_decompress+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
LIBS="-lzstd $LIBS"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
Use char because int might match the return type of a GCC
builtin and then its argument prototype would still apply. */
#ifdef __cplusplus
extern "C"
#endif
char ZSTD_decompress ();
int
main ()
{
return ZSTD_decompress ();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
ac_cv_lib_zstd_ZSTD_decompress=yes
else
ac_cv_lib_zstd_ZSTD_decompress=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
LIBS=$ac_check_lib_save_LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_zstd_ZSTD_decompress" >&5
$as_echo "$ac_cv_lib_zstd_ZSTD_decompress" >&6; }
if test "x$ac_cv_lib_zstd_ZSTD_decompress" = xyes; then :
cat >>confdefs.h <<_ACEOF
#define HAVE_LIBZSTD 1
_ACEOF
LIBS="-lzstd $LIBS"
else
as_fn_error $? "zstd library not found
If you have zstd installed, see config.log for details on the
failure. It is possible the compiler isn't looking in the proper directory." "$LINENO" 5
fi
ac_fn_c_check_header_mongrel "$LINENO" "zstd.h" "ac_cv_header_zstd_h" "$ac_includes_default"
if test "x$ac_cv_header_zstd_h" = xyes; then :
else
as_fn_error $? "zstd header not found
If you have lz4zstd already installed, see config.log for details on the
failure. It is possible the compiler isn't looking in the proper directory." "$LINENO" 5
fi
fi
#

View File

@ -204,6 +204,23 @@ failure. It is possible the compiler isn't looking in the proper directory.
Use --without-lz4 to disable lz4 support.])])
fi
#
# ZSTD
#
PGAC_ARG_BOOL(with, zstd, no,
[use zstd])
AC_SUBST(with_zstd)
if test "$with_zstd" = yes; then
AC_CHECK_LIB(zstd, ZSTD_decompress, [],
[AC_MSG_ERROR([zstd library not found
If you have zstd installed, see config.log for details on the
failure. It is possible the compiler isn't looking in the proper directory.])])
AC_CHECK_HEADER(zstd.h, [], [AC_MSG_ERROR([zstd header not found
If you have lz4zstd already installed, see config.log for details on the
failure. It is possible the compiler isn't looking in the proper directory.])])
fi
#
# libcurl
#

View File

@ -38,6 +38,9 @@ static const struct config_enum_entry cstore_compression_options[] =
{ "pglz", COMPRESSION_PG_LZ, false },
#if HAVE_LIBLZ4
{ "lz4", COMPRESSION_LZ4, false },
#endif
#if HAVE_LIBZSTD
{ "zstd", COMPRESSION_ZSTD, false },
#endif
{ NULL, 0, false }
};

View File

@ -21,6 +21,10 @@
#include <lz4.h>
#endif
#if HAVE_LIBZSTD
#include <zstd.h>
#endif
/*
* The information at the start of the compressed data. This decription is taken
* from pg_lzcompress in pre-9.5 version of PostgreSQL.
@ -81,6 +85,33 @@ CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer,
}
#endif
#if HAVE_LIBZSTD
case COMPRESSION_ZSTD:
{
int maximumLength = ZSTD_compressBound(inputBuffer->len);
int compressionLevel = 3;
resetStringInfo(outputBuffer);
enlargeStringInfo(outputBuffer, maximumLength);
size_t compressedSize = ZSTD_compress(outputBuffer->data,
outputBuffer->maxlen,
inputBuffer->data,
inputBuffer->len,
compressionLevel);
if (ZSTD_isError(compressedSize))
{
ereport(WARNING, (errmsg("zstd compression failed"),
(errdetail("%s", ZSTD_getErrorName(compressedSize)))));
return false;
}
outputBuffer->len = compressedSize;
return true;
}
#endif
case COMPRESSION_PG_LZ:
{
uint64 maximumLength = PGLZ_MAX_OUTPUT(inputBuffer->len) +
@ -159,6 +190,36 @@ DecompressBuffer(StringInfo buffer,
}
#endif
#if HAVE_LIBZSTD
case COMPRESSION_ZSTD:
{
StringInfo decompressedBuffer = makeStringInfo();
enlargeStringInfo(decompressedBuffer, decompressedSize);
size_t zstdDecompressSize = ZSTD_decompress(decompressedBuffer->data,
decompressedSize,
buffer->data,
buffer->len);
if (ZSTD_isError(zstdDecompressSize))
{
ereport(ERROR, (errmsg("zstd decompression failed"),
(errdetail("%s", ZSTD_getErrorName(
zstdDecompressSize)))));
}
if (zstdDecompressSize != decompressedSize)
{
ereport(ERROR, (errmsg("unexpected decompressed size"),
errdetail("Expected %ld, received %ld", decompressedSize,
zstdDecompressSize)));
}
decompressedBuffer->len = decompressedSize;
return decompressedBuffer;
}
#endif
case COMPRESSION_PG_LZ:
{
StringInfo decompressedBuffer = NULL;

View File

@ -43,6 +43,9 @@
/* Define to 1 if you have the `lz4' library (-llz4). */
#undef HAVE_LIBLZ4
/* Define to 1 if you have the `zstd' library (-lzstd). */
#undef HAVE_LIBZSTD
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H

View File

@ -27,6 +27,9 @@
/* Define to 1 if you have the `liblz4' library (-llz4). */
#undef HAVE_LIBLZ4
/* Define to 1 if you have the `libzstd' library (-lzstd). */
#undef HAVE_LIBZSTD
/* Base URL for statistics collection and update checks */
#undef REPORTS_BASE_URL

View File

@ -57,6 +57,7 @@ typedef enum
COMPRESSION_NONE = 0,
COMPRESSION_PG_LZ = 1,
COMPRESSION_LZ4 = 2,
COMPRESSION_ZSTD = 3,
COMPRESSION_COUNT
} CompressionType;

View File

@ -14,7 +14,7 @@ test: am_update_delete
test: am_copyto
test: am_alter
test: am_alter_set_type
test: am_lz4
test: am_lz4 am_zstd
test: am_rollback
test: am_truncate
test: am_vacuum

View File

@ -0,0 +1,71 @@
SELECT compression_type_supported('zstd') AS zstd_supported \gset
\if :zstd_supported
\else
\q
\endif
CREATE SCHEMA am_zstd;
SET search_path TO am_zstd;
SET columnar.compression TO 'zstd';
CREATE TABLE test_zstd (a int, b text, c int) USING columnar;
INSERT INTO test_zstd SELECT floor(i / 1000), floor(i / 10)::text, 4 FROM generate_series(1, 10000) i;
SELECT count(*) FROM test_zstd;
count
---------------------------------------------------------------------
10000
(1 row)
INSERT INTO test_zstd SELECT floor(i / 2), floor(i / 10)::text, 5 FROM generate_series(1000, 11000) i;
SELECT count(*) FROM test_zstd;
count
---------------------------------------------------------------------
20001
(1 row)
VACUUM VERBOSE test_zstd;
INFO: statistics for "test_zstd":
storage id: xxxxx
total file size: 40960, total data size: 14947
compression rate: 21.91x
total row count: 20001, stripe count: 2, average rows per stripe: 10000
chunk count: 9, containing data for dropped columns: 0, zstd compressed: 9
SELECT DISTINCT * FROM test_zstd ORDER BY a, b, c LIMIT 5;
a | b | c
---------------------------------------------------------------------
0 | 0 | 4
0 | 1 | 4
0 | 10 | 4
0 | 11 | 4
0 | 12 | 4
(5 rows)
-- compare compression rate to pglz
SET columnar.compression TO 'pglz';
CREATE TABLE test_pglz (LIKE test_zstd) USING columnar;
INSERT INTO test_pglz SELECT * FROM test_zstd;
VACUUM VERBOSE test_pglz;
INFO: statistics for "test_pglz":
storage id: xxxxx
total file size: 57344, total data size: 35986
compression rate: 9.10x
total row count: 20001, stripe count: 1, average rows per stripe: 20001
chunk count: 9, containing data for dropped columns: 0, none compressed: 3, pglz compressed: 6
-- Other operations
VACUUM FULL test_zstd;
ANALYZE test_zstd;
SELECT count(DISTINCT test_zstd.*) FROM test_zstd;
count
---------------------------------------------------------------------
6002
(1 row)
TRUNCATE test_zstd;
SELECT count(DISTINCT test_zstd.*) FROM test_zstd;
count
---------------------------------------------------------------------
0
(1 row)
SET client_min_messages TO WARNING;
DROP SCHEMA am_zstd CASCADE;

View File

@ -0,0 +1,4 @@
SELECT compression_type_supported('zstd') AS zstd_supported \gset
\if :zstd_supported
\else
\q

View File

@ -0,0 +1,41 @@
SELECT compression_type_supported('zstd') AS zstd_supported \gset
\if :zstd_supported
\else
\q
\endif
CREATE SCHEMA am_zstd;
SET search_path TO am_zstd;
SET columnar.compression TO 'zstd';
CREATE TABLE test_zstd (a int, b text, c int) USING columnar;
INSERT INTO test_zstd SELECT floor(i / 1000), floor(i / 10)::text, 4 FROM generate_series(1, 10000) i;
SELECT count(*) FROM test_zstd;
INSERT INTO test_zstd SELECT floor(i / 2), floor(i / 10)::text, 5 FROM generate_series(1000, 11000) i;
SELECT count(*) FROM test_zstd;
VACUUM VERBOSE test_zstd;
SELECT DISTINCT * FROM test_zstd ORDER BY a, b, c LIMIT 5;
-- compare compression rate to pglz
SET columnar.compression TO 'pglz';
CREATE TABLE test_pglz (LIKE test_zstd) USING columnar;
INSERT INTO test_pglz SELECT * FROM test_zstd;
VACUUM VERBOSE test_pglz;
-- Other operations
VACUUM FULL test_zstd;
ANALYZE test_zstd;
SELECT count(DISTINCT test_zstd.*) FROM test_zstd;
TRUNCATE test_zstd;
SELECT count(DISTINCT test_zstd.*) FROM test_zstd;
SET client_min_messages TO WARNING;
DROP SCHEMA am_zstd CASCADE;