mirror of https://github.com/citusdata/citus.git
Columnar: Support zstd compression
parent
3f81ee26fd
commit
f5a4a4bc74
|
@ -630,6 +630,7 @@ CITUS_LDFLAGS
|
|||
CITUS_CPPFLAGS
|
||||
CITUS_CFLAGS
|
||||
GIT_BIN
|
||||
with_zstd
|
||||
with_lz4
|
||||
EGREP
|
||||
GREP
|
||||
|
@ -692,6 +693,7 @@ enable_option_checking
|
|||
with_extra_version
|
||||
enable_coverage
|
||||
with_lz4
|
||||
with_zstd
|
||||
with_libcurl
|
||||
with_reports_hostname
|
||||
'
|
||||
|
@ -1334,6 +1336,7 @@ Optional Packages:
|
|||
--with-extra-version=STRING
|
||||
append STRING to version
|
||||
--with-lz4 use lz4
|
||||
--with-zstd use zstd
|
||||
--without-libcurl do not use libcurl for anonymous statistics
|
||||
collection
|
||||
--with-reports-hostname=HOSTNAME
|
||||
|
@ -4449,6 +4452,97 @@ Use --without-lz4 to disable lz4 support." "$LINENO" 5
|
|||
fi
|
||||
|
||||
|
||||
fi
|
||||
|
||||
#
|
||||
# ZSTD
|
||||
#
|
||||
|
||||
|
||||
|
||||
# Check whether --with-zstd was given.
|
||||
if test "${with_zstd+set}" = set; then :
|
||||
withval=$with_zstd;
|
||||
case $withval in
|
||||
yes)
|
||||
:
|
||||
;;
|
||||
no)
|
||||
:
|
||||
;;
|
||||
*)
|
||||
as_fn_error $? "no argument expected for --with-zstd option" "$LINENO" 5
|
||||
;;
|
||||
esac
|
||||
|
||||
else
|
||||
with_zstd=no
|
||||
|
||||
fi
|
||||
|
||||
|
||||
|
||||
|
||||
if test "$with_zstd" = yes; then
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ZSTD_decompress in -lzstd" >&5
|
||||
$as_echo_n "checking for ZSTD_decompress in -lzstd... " >&6; }
|
||||
if ${ac_cv_lib_zstd_ZSTD_decompress+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
ac_check_lib_save_LIBS=$LIBS
|
||||
LIBS="-lzstd $LIBS"
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
|
||||
/* Override any GCC internal prototype to avoid an error.
|
||||
Use char because int might match the return type of a GCC
|
||||
builtin and then its argument prototype would still apply. */
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
char ZSTD_decompress ();
|
||||
int
|
||||
main ()
|
||||
{
|
||||
return ZSTD_decompress ();
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
_ACEOF
|
||||
if ac_fn_c_try_link "$LINENO"; then :
|
||||
ac_cv_lib_zstd_ZSTD_decompress=yes
|
||||
else
|
||||
ac_cv_lib_zstd_ZSTD_decompress=no
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext \
|
||||
conftest$ac_exeext conftest.$ac_ext
|
||||
LIBS=$ac_check_lib_save_LIBS
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_zstd_ZSTD_decompress" >&5
|
||||
$as_echo "$ac_cv_lib_zstd_ZSTD_decompress" >&6; }
|
||||
if test "x$ac_cv_lib_zstd_ZSTD_decompress" = xyes; then :
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
#define HAVE_LIBZSTD 1
|
||||
_ACEOF
|
||||
|
||||
LIBS="-lzstd $LIBS"
|
||||
|
||||
else
|
||||
as_fn_error $? "zstd library not found
|
||||
If you have zstd installed, see config.log for details on the
|
||||
failure. It is possible the compiler isn't looking in the proper directory." "$LINENO" 5
|
||||
fi
|
||||
|
||||
ac_fn_c_check_header_mongrel "$LINENO" "zstd.h" "ac_cv_header_zstd_h" "$ac_includes_default"
|
||||
if test "x$ac_cv_header_zstd_h" = xyes; then :
|
||||
|
||||
else
|
||||
as_fn_error $? "zstd header not found
|
||||
If you have lz4zstd already installed, see config.log for details on the
|
||||
failure. It is possible the compiler isn't looking in the proper directory." "$LINENO" 5
|
||||
fi
|
||||
|
||||
|
||||
fi
|
||||
|
||||
#
|
||||
|
|
17
configure.in
17
configure.in
|
@ -204,6 +204,23 @@ failure. It is possible the compiler isn't looking in the proper directory.
|
|||
Use --without-lz4 to disable lz4 support.])])
|
||||
fi
|
||||
|
||||
#
|
||||
# ZSTD
|
||||
#
|
||||
PGAC_ARG_BOOL(with, zstd, no,
|
||||
[use zstd])
|
||||
AC_SUBST(with_zstd)
|
||||
|
||||
if test "$with_zstd" = yes; then
|
||||
AC_CHECK_LIB(zstd, ZSTD_decompress, [],
|
||||
[AC_MSG_ERROR([zstd library not found
|
||||
If you have zstd installed, see config.log for details on the
|
||||
failure. It is possible the compiler isn't looking in the proper directory.])])
|
||||
AC_CHECK_HEADER(zstd.h, [], [AC_MSG_ERROR([zstd header not found
|
||||
If you have lz4zstd already installed, see config.log for details on the
|
||||
failure. It is possible the compiler isn't looking in the proper directory.])])
|
||||
fi
|
||||
|
||||
#
|
||||
# libcurl
|
||||
#
|
||||
|
|
|
@ -38,6 +38,9 @@ static const struct config_enum_entry cstore_compression_options[] =
|
|||
{ "pglz", COMPRESSION_PG_LZ, false },
|
||||
#if HAVE_LIBLZ4
|
||||
{ "lz4", COMPRESSION_LZ4, false },
|
||||
#endif
|
||||
#if HAVE_LIBZSTD
|
||||
{ "zstd", COMPRESSION_ZSTD, false },
|
||||
#endif
|
||||
{ NULL, 0, false }
|
||||
};
|
||||
|
|
|
@ -21,6 +21,10 @@
|
|||
#include <lz4.h>
|
||||
#endif
|
||||
|
||||
#if HAVE_LIBZSTD
|
||||
#include <zstd.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The information at the start of the compressed data. This decription is taken
|
||||
* from pg_lzcompress in pre-9.5 version of PostgreSQL.
|
||||
|
@ -81,6 +85,33 @@ CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer,
|
|||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_LIBZSTD
|
||||
case COMPRESSION_ZSTD:
|
||||
{
|
||||
int maximumLength = ZSTD_compressBound(inputBuffer->len);
|
||||
int compressionLevel = 3;
|
||||
|
||||
resetStringInfo(outputBuffer);
|
||||
enlargeStringInfo(outputBuffer, maximumLength);
|
||||
|
||||
size_t compressedSize = ZSTD_compress(outputBuffer->data,
|
||||
outputBuffer->maxlen,
|
||||
inputBuffer->data,
|
||||
inputBuffer->len,
|
||||
compressionLevel);
|
||||
|
||||
if (ZSTD_isError(compressedSize))
|
||||
{
|
||||
ereport(WARNING, (errmsg("zstd compression failed"),
|
||||
(errdetail("%s", ZSTD_getErrorName(compressedSize)))));
|
||||
return false;
|
||||
}
|
||||
|
||||
outputBuffer->len = compressedSize;
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
case COMPRESSION_PG_LZ:
|
||||
{
|
||||
uint64 maximumLength = PGLZ_MAX_OUTPUT(inputBuffer->len) +
|
||||
|
@ -159,6 +190,36 @@ DecompressBuffer(StringInfo buffer,
|
|||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_LIBZSTD
|
||||
case COMPRESSION_ZSTD:
|
||||
{
|
||||
StringInfo decompressedBuffer = makeStringInfo();
|
||||
enlargeStringInfo(decompressedBuffer, decompressedSize);
|
||||
|
||||
size_t zstdDecompressSize = ZSTD_decompress(decompressedBuffer->data,
|
||||
decompressedSize,
|
||||
buffer->data,
|
||||
buffer->len);
|
||||
if (ZSTD_isError(zstdDecompressSize))
|
||||
{
|
||||
ereport(ERROR, (errmsg("zstd decompression failed"),
|
||||
(errdetail("%s", ZSTD_getErrorName(
|
||||
zstdDecompressSize)))));
|
||||
}
|
||||
|
||||
if (zstdDecompressSize != decompressedSize)
|
||||
{
|
||||
ereport(ERROR, (errmsg("unexpected decompressed size"),
|
||||
errdetail("Expected %ld, received %ld", decompressedSize,
|
||||
zstdDecompressSize)));
|
||||
}
|
||||
|
||||
decompressedBuffer->len = decompressedSize;
|
||||
|
||||
return decompressedBuffer;
|
||||
}
|
||||
#endif
|
||||
|
||||
case COMPRESSION_PG_LZ:
|
||||
{
|
||||
StringInfo decompressedBuffer = NULL;
|
||||
|
|
|
@ -43,6 +43,9 @@
|
|||
/* Define to 1 if you have the `lz4' library (-llz4). */
|
||||
#undef HAVE_LIBLZ4
|
||||
|
||||
/* Define to 1 if you have the `zstd' library (-lzstd). */
|
||||
#undef HAVE_LIBZSTD
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#undef HAVE_MEMORY_H
|
||||
|
||||
|
|
|
@ -27,6 +27,9 @@
|
|||
/* Define to 1 if you have the `liblz4' library (-llz4). */
|
||||
#undef HAVE_LIBLZ4
|
||||
|
||||
/* Define to 1 if you have the `libzstd' library (-lzstd). */
|
||||
#undef HAVE_LIBZSTD
|
||||
|
||||
/* Base URL for statistics collection and update checks */
|
||||
#undef REPORTS_BASE_URL
|
||||
|
||||
|
|
|
@ -57,6 +57,7 @@ typedef enum
|
|||
COMPRESSION_NONE = 0,
|
||||
COMPRESSION_PG_LZ = 1,
|
||||
COMPRESSION_LZ4 = 2,
|
||||
COMPRESSION_ZSTD = 3,
|
||||
|
||||
COMPRESSION_COUNT
|
||||
} CompressionType;
|
||||
|
|
|
@ -14,7 +14,7 @@ test: am_update_delete
|
|||
test: am_copyto
|
||||
test: am_alter
|
||||
test: am_alter_set_type
|
||||
test: am_lz4
|
||||
test: am_lz4 am_zstd
|
||||
test: am_rollback
|
||||
test: am_truncate
|
||||
test: am_vacuum
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
SELECT compression_type_supported('zstd') AS zstd_supported \gset
|
||||
\if :zstd_supported
|
||||
\else
|
||||
\q
|
||||
\endif
|
||||
CREATE SCHEMA am_zstd;
|
||||
SET search_path TO am_zstd;
|
||||
SET columnar.compression TO 'zstd';
|
||||
CREATE TABLE test_zstd (a int, b text, c int) USING columnar;
|
||||
INSERT INTO test_zstd SELECT floor(i / 1000), floor(i / 10)::text, 4 FROM generate_series(1, 10000) i;
|
||||
SELECT count(*) FROM test_zstd;
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
10000
|
||||
(1 row)
|
||||
|
||||
INSERT INTO test_zstd SELECT floor(i / 2), floor(i / 10)::text, 5 FROM generate_series(1000, 11000) i;
|
||||
SELECT count(*) FROM test_zstd;
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
20001
|
||||
(1 row)
|
||||
|
||||
VACUUM VERBOSE test_zstd;
|
||||
INFO: statistics for "test_zstd":
|
||||
storage id: xxxxx
|
||||
total file size: 40960, total data size: 14947
|
||||
compression rate: 21.91x
|
||||
total row count: 20001, stripe count: 2, average rows per stripe: 10000
|
||||
chunk count: 9, containing data for dropped columns: 0, zstd compressed: 9
|
||||
|
||||
SELECT DISTINCT * FROM test_zstd ORDER BY a, b, c LIMIT 5;
|
||||
a | b | c
|
||||
---------------------------------------------------------------------
|
||||
0 | 0 | 4
|
||||
0 | 1 | 4
|
||||
0 | 10 | 4
|
||||
0 | 11 | 4
|
||||
0 | 12 | 4
|
||||
(5 rows)
|
||||
|
||||
-- compare compression rate to pglz
|
||||
SET columnar.compression TO 'pglz';
|
||||
CREATE TABLE test_pglz (LIKE test_zstd) USING columnar;
|
||||
INSERT INTO test_pglz SELECT * FROM test_zstd;
|
||||
VACUUM VERBOSE test_pglz;
|
||||
INFO: statistics for "test_pglz":
|
||||
storage id: xxxxx
|
||||
total file size: 57344, total data size: 35986
|
||||
compression rate: 9.10x
|
||||
total row count: 20001, stripe count: 1, average rows per stripe: 20001
|
||||
chunk count: 9, containing data for dropped columns: 0, none compressed: 3, pglz compressed: 6
|
||||
|
||||
-- Other operations
|
||||
VACUUM FULL test_zstd;
|
||||
ANALYZE test_zstd;
|
||||
SELECT count(DISTINCT test_zstd.*) FROM test_zstd;
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
6002
|
||||
(1 row)
|
||||
|
||||
TRUNCATE test_zstd;
|
||||
SELECT count(DISTINCT test_zstd.*) FROM test_zstd;
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SET client_min_messages TO WARNING;
|
||||
DROP SCHEMA am_zstd CASCADE;
|
|
@ -0,0 +1,4 @@
|
|||
SELECT compression_type_supported('zstd') AS zstd_supported \gset
|
||||
\if :zstd_supported
|
||||
\else
|
||||
\q
|
|
@ -0,0 +1,41 @@
|
|||
SELECT compression_type_supported('zstd') AS zstd_supported \gset
|
||||
\if :zstd_supported
|
||||
\else
|
||||
\q
|
||||
\endif
|
||||
|
||||
CREATE SCHEMA am_zstd;
|
||||
SET search_path TO am_zstd;
|
||||
|
||||
SET columnar.compression TO 'zstd';
|
||||
CREATE TABLE test_zstd (a int, b text, c int) USING columnar;
|
||||
|
||||
INSERT INTO test_zstd SELECT floor(i / 1000), floor(i / 10)::text, 4 FROM generate_series(1, 10000) i;
|
||||
SELECT count(*) FROM test_zstd;
|
||||
|
||||
INSERT INTO test_zstd SELECT floor(i / 2), floor(i / 10)::text, 5 FROM generate_series(1000, 11000) i;
|
||||
SELECT count(*) FROM test_zstd;
|
||||
|
||||
VACUUM VERBOSE test_zstd;
|
||||
|
||||
SELECT DISTINCT * FROM test_zstd ORDER BY a, b, c LIMIT 5;
|
||||
|
||||
-- compare compression rate to pglz
|
||||
SET columnar.compression TO 'pglz';
|
||||
CREATE TABLE test_pglz (LIKE test_zstd) USING columnar;
|
||||
INSERT INTO test_pglz SELECT * FROM test_zstd;
|
||||
|
||||
VACUUM VERBOSE test_pglz;
|
||||
|
||||
-- Other operations
|
||||
VACUUM FULL test_zstd;
|
||||
ANALYZE test_zstd;
|
||||
|
||||
SELECT count(DISTINCT test_zstd.*) FROM test_zstd;
|
||||
|
||||
TRUNCATE test_zstd;
|
||||
|
||||
SELECT count(DISTINCT test_zstd.*) FROM test_zstd;
|
||||
|
||||
SET client_min_messages TO WARNING;
|
||||
DROP SCHEMA am_zstd CASCADE;
|
Loading…
Reference in New Issue