mirror of https://github.com/citusdata/citus.git
Columnar: Support zstd compression
parent
3f81ee26fd
commit
f5a4a4bc74
|
@ -630,6 +630,7 @@ CITUS_LDFLAGS
|
||||||
CITUS_CPPFLAGS
|
CITUS_CPPFLAGS
|
||||||
CITUS_CFLAGS
|
CITUS_CFLAGS
|
||||||
GIT_BIN
|
GIT_BIN
|
||||||
|
with_zstd
|
||||||
with_lz4
|
with_lz4
|
||||||
EGREP
|
EGREP
|
||||||
GREP
|
GREP
|
||||||
|
@ -692,6 +693,7 @@ enable_option_checking
|
||||||
with_extra_version
|
with_extra_version
|
||||||
enable_coverage
|
enable_coverage
|
||||||
with_lz4
|
with_lz4
|
||||||
|
with_zstd
|
||||||
with_libcurl
|
with_libcurl
|
||||||
with_reports_hostname
|
with_reports_hostname
|
||||||
'
|
'
|
||||||
|
@ -1334,6 +1336,7 @@ Optional Packages:
|
||||||
--with-extra-version=STRING
|
--with-extra-version=STRING
|
||||||
append STRING to version
|
append STRING to version
|
||||||
--with-lz4 use lz4
|
--with-lz4 use lz4
|
||||||
|
--with-zstd use zstd
|
||||||
--without-libcurl do not use libcurl for anonymous statistics
|
--without-libcurl do not use libcurl for anonymous statistics
|
||||||
collection
|
collection
|
||||||
--with-reports-hostname=HOSTNAME
|
--with-reports-hostname=HOSTNAME
|
||||||
|
@ -4449,6 +4452,97 @@ Use --without-lz4 to disable lz4 support." "$LINENO" 5
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
#
|
||||||
|
# ZSTD
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Check whether --with-zstd was given.
|
||||||
|
if test "${with_zstd+set}" = set; then :
|
||||||
|
withval=$with_zstd;
|
||||||
|
case $withval in
|
||||||
|
yes)
|
||||||
|
:
|
||||||
|
;;
|
||||||
|
no)
|
||||||
|
:
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
as_fn_error $? "no argument expected for --with-zstd option" "$LINENO" 5
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
else
|
||||||
|
with_zstd=no
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if test "$with_zstd" = yes; then
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ZSTD_decompress in -lzstd" >&5
|
||||||
|
$as_echo_n "checking for ZSTD_decompress in -lzstd... " >&6; }
|
||||||
|
if ${ac_cv_lib_zstd_ZSTD_decompress+:} false; then :
|
||||||
|
$as_echo_n "(cached) " >&6
|
||||||
|
else
|
||||||
|
ac_check_lib_save_LIBS=$LIBS
|
||||||
|
LIBS="-lzstd $LIBS"
|
||||||
|
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||||
|
/* end confdefs.h. */
|
||||||
|
|
||||||
|
/* Override any GCC internal prototype to avoid an error.
|
||||||
|
Use char because int might match the return type of a GCC
|
||||||
|
builtin and then its argument prototype would still apply. */
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
char ZSTD_decompress ();
|
||||||
|
int
|
||||||
|
main ()
|
||||||
|
{
|
||||||
|
return ZSTD_decompress ();
|
||||||
|
;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
_ACEOF
|
||||||
|
if ac_fn_c_try_link "$LINENO"; then :
|
||||||
|
ac_cv_lib_zstd_ZSTD_decompress=yes
|
||||||
|
else
|
||||||
|
ac_cv_lib_zstd_ZSTD_decompress=no
|
||||||
|
fi
|
||||||
|
rm -f core conftest.err conftest.$ac_objext \
|
||||||
|
conftest$ac_exeext conftest.$ac_ext
|
||||||
|
LIBS=$ac_check_lib_save_LIBS
|
||||||
|
fi
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_zstd_ZSTD_decompress" >&5
|
||||||
|
$as_echo "$ac_cv_lib_zstd_ZSTD_decompress" >&6; }
|
||||||
|
if test "x$ac_cv_lib_zstd_ZSTD_decompress" = xyes; then :
|
||||||
|
cat >>confdefs.h <<_ACEOF
|
||||||
|
#define HAVE_LIBZSTD 1
|
||||||
|
_ACEOF
|
||||||
|
|
||||||
|
LIBS="-lzstd $LIBS"
|
||||||
|
|
||||||
|
else
|
||||||
|
as_fn_error $? "zstd library not found
|
||||||
|
If you have zstd installed, see config.log for details on the
|
||||||
|
failure. It is possible the compiler isn't looking in the proper directory." "$LINENO" 5
|
||||||
|
fi
|
||||||
|
|
||||||
|
ac_fn_c_check_header_mongrel "$LINENO" "zstd.h" "ac_cv_header_zstd_h" "$ac_includes_default"
|
||||||
|
if test "x$ac_cv_header_zstd_h" = xyes; then :
|
||||||
|
|
||||||
|
else
|
||||||
|
as_fn_error $? "zstd header not found
|
||||||
|
If you have lz4zstd already installed, see config.log for details on the
|
||||||
|
failure. It is possible the compiler isn't looking in the proper directory." "$LINENO" 5
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
#
|
#
|
||||||
|
|
17
configure.in
17
configure.in
|
@ -204,6 +204,23 @@ failure. It is possible the compiler isn't looking in the proper directory.
|
||||||
Use --without-lz4 to disable lz4 support.])])
|
Use --without-lz4 to disable lz4 support.])])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
#
|
||||||
|
# ZSTD
|
||||||
|
#
|
||||||
|
PGAC_ARG_BOOL(with, zstd, no,
|
||||||
|
[use zstd])
|
||||||
|
AC_SUBST(with_zstd)
|
||||||
|
|
||||||
|
if test "$with_zstd" = yes; then
|
||||||
|
AC_CHECK_LIB(zstd, ZSTD_decompress, [],
|
||||||
|
[AC_MSG_ERROR([zstd library not found
|
||||||
|
If you have zstd installed, see config.log for details on the
|
||||||
|
failure. It is possible the compiler isn't looking in the proper directory.])])
|
||||||
|
AC_CHECK_HEADER(zstd.h, [], [AC_MSG_ERROR([zstd header not found
|
||||||
|
If you have lz4zstd already installed, see config.log for details on the
|
||||||
|
failure. It is possible the compiler isn't looking in the proper directory.])])
|
||||||
|
fi
|
||||||
|
|
||||||
#
|
#
|
||||||
# libcurl
|
# libcurl
|
||||||
#
|
#
|
||||||
|
|
|
@ -38,6 +38,9 @@ static const struct config_enum_entry cstore_compression_options[] =
|
||||||
{ "pglz", COMPRESSION_PG_LZ, false },
|
{ "pglz", COMPRESSION_PG_LZ, false },
|
||||||
#if HAVE_LIBLZ4
|
#if HAVE_LIBLZ4
|
||||||
{ "lz4", COMPRESSION_LZ4, false },
|
{ "lz4", COMPRESSION_LZ4, false },
|
||||||
|
#endif
|
||||||
|
#if HAVE_LIBZSTD
|
||||||
|
{ "zstd", COMPRESSION_ZSTD, false },
|
||||||
#endif
|
#endif
|
||||||
{ NULL, 0, false }
|
{ NULL, 0, false }
|
||||||
};
|
};
|
||||||
|
|
|
@ -21,6 +21,10 @@
|
||||||
#include <lz4.h>
|
#include <lz4.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if HAVE_LIBZSTD
|
||||||
|
#include <zstd.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The information at the start of the compressed data. This decription is taken
|
* The information at the start of the compressed data. This decription is taken
|
||||||
* from pg_lzcompress in pre-9.5 version of PostgreSQL.
|
* from pg_lzcompress in pre-9.5 version of PostgreSQL.
|
||||||
|
@ -81,6 +85,33 @@ CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if HAVE_LIBZSTD
|
||||||
|
case COMPRESSION_ZSTD:
|
||||||
|
{
|
||||||
|
int maximumLength = ZSTD_compressBound(inputBuffer->len);
|
||||||
|
int compressionLevel = 3;
|
||||||
|
|
||||||
|
resetStringInfo(outputBuffer);
|
||||||
|
enlargeStringInfo(outputBuffer, maximumLength);
|
||||||
|
|
||||||
|
size_t compressedSize = ZSTD_compress(outputBuffer->data,
|
||||||
|
outputBuffer->maxlen,
|
||||||
|
inputBuffer->data,
|
||||||
|
inputBuffer->len,
|
||||||
|
compressionLevel);
|
||||||
|
|
||||||
|
if (ZSTD_isError(compressedSize))
|
||||||
|
{
|
||||||
|
ereport(WARNING, (errmsg("zstd compression failed"),
|
||||||
|
(errdetail("%s", ZSTD_getErrorName(compressedSize)))));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
outputBuffer->len = compressedSize;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
case COMPRESSION_PG_LZ:
|
case COMPRESSION_PG_LZ:
|
||||||
{
|
{
|
||||||
uint64 maximumLength = PGLZ_MAX_OUTPUT(inputBuffer->len) +
|
uint64 maximumLength = PGLZ_MAX_OUTPUT(inputBuffer->len) +
|
||||||
|
@ -159,6 +190,36 @@ DecompressBuffer(StringInfo buffer,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if HAVE_LIBZSTD
|
||||||
|
case COMPRESSION_ZSTD:
|
||||||
|
{
|
||||||
|
StringInfo decompressedBuffer = makeStringInfo();
|
||||||
|
enlargeStringInfo(decompressedBuffer, decompressedSize);
|
||||||
|
|
||||||
|
size_t zstdDecompressSize = ZSTD_decompress(decompressedBuffer->data,
|
||||||
|
decompressedSize,
|
||||||
|
buffer->data,
|
||||||
|
buffer->len);
|
||||||
|
if (ZSTD_isError(zstdDecompressSize))
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errmsg("zstd decompression failed"),
|
||||||
|
(errdetail("%s", ZSTD_getErrorName(
|
||||||
|
zstdDecompressSize)))));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (zstdDecompressSize != decompressedSize)
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errmsg("unexpected decompressed size"),
|
||||||
|
errdetail("Expected %ld, received %ld", decompressedSize,
|
||||||
|
zstdDecompressSize)));
|
||||||
|
}
|
||||||
|
|
||||||
|
decompressedBuffer->len = decompressedSize;
|
||||||
|
|
||||||
|
return decompressedBuffer;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
case COMPRESSION_PG_LZ:
|
case COMPRESSION_PG_LZ:
|
||||||
{
|
{
|
||||||
StringInfo decompressedBuffer = NULL;
|
StringInfo decompressedBuffer = NULL;
|
||||||
|
|
|
@ -43,6 +43,9 @@
|
||||||
/* Define to 1 if you have the `lz4' library (-llz4). */
|
/* Define to 1 if you have the `lz4' library (-llz4). */
|
||||||
#undef HAVE_LIBLZ4
|
#undef HAVE_LIBLZ4
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `zstd' library (-lzstd). */
|
||||||
|
#undef HAVE_LIBZSTD
|
||||||
|
|
||||||
/* Define to 1 if you have the <memory.h> header file. */
|
/* Define to 1 if you have the <memory.h> header file. */
|
||||||
#undef HAVE_MEMORY_H
|
#undef HAVE_MEMORY_H
|
||||||
|
|
||||||
|
|
|
@ -27,6 +27,9 @@
|
||||||
/* Define to 1 if you have the `liblz4' library (-llz4). */
|
/* Define to 1 if you have the `liblz4' library (-llz4). */
|
||||||
#undef HAVE_LIBLZ4
|
#undef HAVE_LIBLZ4
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `libzstd' library (-lzstd). */
|
||||||
|
#undef HAVE_LIBZSTD
|
||||||
|
|
||||||
/* Base URL for statistics collection and update checks */
|
/* Base URL for statistics collection and update checks */
|
||||||
#undef REPORTS_BASE_URL
|
#undef REPORTS_BASE_URL
|
||||||
|
|
||||||
|
|
|
@ -57,6 +57,7 @@ typedef enum
|
||||||
COMPRESSION_NONE = 0,
|
COMPRESSION_NONE = 0,
|
||||||
COMPRESSION_PG_LZ = 1,
|
COMPRESSION_PG_LZ = 1,
|
||||||
COMPRESSION_LZ4 = 2,
|
COMPRESSION_LZ4 = 2,
|
||||||
|
COMPRESSION_ZSTD = 3,
|
||||||
|
|
||||||
COMPRESSION_COUNT
|
COMPRESSION_COUNT
|
||||||
} CompressionType;
|
} CompressionType;
|
||||||
|
|
|
@ -14,7 +14,7 @@ test: am_update_delete
|
||||||
test: am_copyto
|
test: am_copyto
|
||||||
test: am_alter
|
test: am_alter
|
||||||
test: am_alter_set_type
|
test: am_alter_set_type
|
||||||
test: am_lz4
|
test: am_lz4 am_zstd
|
||||||
test: am_rollback
|
test: am_rollback
|
||||||
test: am_truncate
|
test: am_truncate
|
||||||
test: am_vacuum
|
test: am_vacuum
|
||||||
|
|
|
@ -0,0 +1,71 @@
|
||||||
|
SELECT compression_type_supported('zstd') AS zstd_supported \gset
|
||||||
|
\if :zstd_supported
|
||||||
|
\else
|
||||||
|
\q
|
||||||
|
\endif
|
||||||
|
CREATE SCHEMA am_zstd;
|
||||||
|
SET search_path TO am_zstd;
|
||||||
|
SET columnar.compression TO 'zstd';
|
||||||
|
CREATE TABLE test_zstd (a int, b text, c int) USING columnar;
|
||||||
|
INSERT INTO test_zstd SELECT floor(i / 1000), floor(i / 10)::text, 4 FROM generate_series(1, 10000) i;
|
||||||
|
SELECT count(*) FROM test_zstd;
|
||||||
|
count
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
10000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
INSERT INTO test_zstd SELECT floor(i / 2), floor(i / 10)::text, 5 FROM generate_series(1000, 11000) i;
|
||||||
|
SELECT count(*) FROM test_zstd;
|
||||||
|
count
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
20001
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
VACUUM VERBOSE test_zstd;
|
||||||
|
INFO: statistics for "test_zstd":
|
||||||
|
storage id: xxxxx
|
||||||
|
total file size: 40960, total data size: 14947
|
||||||
|
compression rate: 21.91x
|
||||||
|
total row count: 20001, stripe count: 2, average rows per stripe: 10000
|
||||||
|
chunk count: 9, containing data for dropped columns: 0, zstd compressed: 9
|
||||||
|
|
||||||
|
SELECT DISTINCT * FROM test_zstd ORDER BY a, b, c LIMIT 5;
|
||||||
|
a | b | c
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
0 | 0 | 4
|
||||||
|
0 | 1 | 4
|
||||||
|
0 | 10 | 4
|
||||||
|
0 | 11 | 4
|
||||||
|
0 | 12 | 4
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
-- compare compression rate to pglz
|
||||||
|
SET columnar.compression TO 'pglz';
|
||||||
|
CREATE TABLE test_pglz (LIKE test_zstd) USING columnar;
|
||||||
|
INSERT INTO test_pglz SELECT * FROM test_zstd;
|
||||||
|
VACUUM VERBOSE test_pglz;
|
||||||
|
INFO: statistics for "test_pglz":
|
||||||
|
storage id: xxxxx
|
||||||
|
total file size: 57344, total data size: 35986
|
||||||
|
compression rate: 9.10x
|
||||||
|
total row count: 20001, stripe count: 1, average rows per stripe: 20001
|
||||||
|
chunk count: 9, containing data for dropped columns: 0, none compressed: 3, pglz compressed: 6
|
||||||
|
|
||||||
|
-- Other operations
|
||||||
|
VACUUM FULL test_zstd;
|
||||||
|
ANALYZE test_zstd;
|
||||||
|
SELECT count(DISTINCT test_zstd.*) FROM test_zstd;
|
||||||
|
count
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
6002
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE test_zstd;
|
||||||
|
SELECT count(DISTINCT test_zstd.*) FROM test_zstd;
|
||||||
|
count
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
0
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SET client_min_messages TO WARNING;
|
||||||
|
DROP SCHEMA am_zstd CASCADE;
|
|
@ -0,0 +1,4 @@
|
||||||
|
SELECT compression_type_supported('zstd') AS zstd_supported \gset
|
||||||
|
\if :zstd_supported
|
||||||
|
\else
|
||||||
|
\q
|
|
@ -0,0 +1,41 @@
|
||||||
|
SELECT compression_type_supported('zstd') AS zstd_supported \gset
|
||||||
|
\if :zstd_supported
|
||||||
|
\else
|
||||||
|
\q
|
||||||
|
\endif
|
||||||
|
|
||||||
|
CREATE SCHEMA am_zstd;
|
||||||
|
SET search_path TO am_zstd;
|
||||||
|
|
||||||
|
SET columnar.compression TO 'zstd';
|
||||||
|
CREATE TABLE test_zstd (a int, b text, c int) USING columnar;
|
||||||
|
|
||||||
|
INSERT INTO test_zstd SELECT floor(i / 1000), floor(i / 10)::text, 4 FROM generate_series(1, 10000) i;
|
||||||
|
SELECT count(*) FROM test_zstd;
|
||||||
|
|
||||||
|
INSERT INTO test_zstd SELECT floor(i / 2), floor(i / 10)::text, 5 FROM generate_series(1000, 11000) i;
|
||||||
|
SELECT count(*) FROM test_zstd;
|
||||||
|
|
||||||
|
VACUUM VERBOSE test_zstd;
|
||||||
|
|
||||||
|
SELECT DISTINCT * FROM test_zstd ORDER BY a, b, c LIMIT 5;
|
||||||
|
|
||||||
|
-- compare compression rate to pglz
|
||||||
|
SET columnar.compression TO 'pglz';
|
||||||
|
CREATE TABLE test_pglz (LIKE test_zstd) USING columnar;
|
||||||
|
INSERT INTO test_pglz SELECT * FROM test_zstd;
|
||||||
|
|
||||||
|
VACUUM VERBOSE test_pglz;
|
||||||
|
|
||||||
|
-- Other operations
|
||||||
|
VACUUM FULL test_zstd;
|
||||||
|
ANALYZE test_zstd;
|
||||||
|
|
||||||
|
SELECT count(DISTINCT test_zstd.*) FROM test_zstd;
|
||||||
|
|
||||||
|
TRUNCATE test_zstd;
|
||||||
|
|
||||||
|
SELECT count(DISTINCT test_zstd.*) FROM test_zstd;
|
||||||
|
|
||||||
|
SET client_min_messages TO WARNING;
|
||||||
|
DROP SCHEMA am_zstd CASCADE;
|
Loading…
Reference in New Issue