Columnar: enable zstd & lz4 compilation by default (#4402)

* Columnar: enable zstd & lz4 compilation by default

* Make zstd & lz4 tests more consistent

* Don't require lz4 & zstd for postgres 11

Co-authored-by: Nils Dijk <nils@citusdata.com>
pull/4383/merge
Hadi Moshayedi 2020-12-21 12:11:58 -08:00 committed by GitHub
parent cceaf31e4c
commit dde0323b57
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 292 additions and 282 deletions

382
configure vendored
View File

@ -692,10 +692,10 @@ ac_user_opts='
enable_option_checking
with_extra_version
enable_coverage
with_lz4
with_zstd
with_libcurl
with_reports_hostname
with_lz4
with_zstd
'
ac_precious_vars='build_alias
host_alias
@ -1335,13 +1335,13 @@ Optional Packages:
--without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no)
--with-extra-version=STRING
append STRING to version
--with-lz4 use lz4
--with-zstd use zstd
--without-libcurl do not use libcurl for anonymous statistics
collection
--with-reports-hostname=HOSTNAME
Use HOSTNAME as hostname for statistics collection
and update checks
--without-lz4 do not use lz4
--without-zstd do not use zstd
Some influential environment variables:
PG_CONFIG Location to find pg_config for target PostgreSQL instalation
@ -4361,190 +4361,6 @@ if test "$enable_coverage" = yes; then
CITUS_LDFLAGS="$CITUS_LDFLAGS --coverage"
fi
#
# LZ4
#
# Check whether --with-lz4 was given.
if test "${with_lz4+set}" = set; then :
withval=$with_lz4;
case $withval in
yes)
:
;;
no)
:
;;
*)
as_fn_error $? "no argument expected for --with-lz4 option" "$LINENO" 5
;;
esac
else
with_lz4=no
fi
if test "$with_lz4" = yes; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for LZ4_compress_default in -llz4" >&5
$as_echo_n "checking for LZ4_compress_default in -llz4... " >&6; }
if ${ac_cv_lib_lz4_LZ4_compress_default+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
LIBS="-llz4 $LIBS"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
Use char because int might match the return type of a GCC
builtin and then its argument prototype would still apply. */
#ifdef __cplusplus
extern "C"
#endif
char LZ4_compress_default ();
int
main ()
{
return LZ4_compress_default ();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
ac_cv_lib_lz4_LZ4_compress_default=yes
else
ac_cv_lib_lz4_LZ4_compress_default=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
LIBS=$ac_check_lib_save_LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_lz4_LZ4_compress_default" >&5
$as_echo "$ac_cv_lib_lz4_LZ4_compress_default" >&6; }
if test "x$ac_cv_lib_lz4_LZ4_compress_default" = xyes; then :
cat >>confdefs.h <<_ACEOF
#define HAVE_LIBLZ4 1
_ACEOF
LIBS="-llz4 $LIBS"
else
as_fn_error $? "lz4 library not found
If you have lz4 installed, see config.log for details on the
failure. It is possible the compiler isn't looking in the proper directory.
Use --without-lz4 to disable zlib support." "$LINENO" 5
fi
ac_fn_c_check_header_mongrel "$LINENO" "lz4.h" "ac_cv_header_lz4_h" "$ac_includes_default"
if test "x$ac_cv_header_lz4_h" = xyes; then :
else
as_fn_error $? "lz4 header not found
If you have lz4 already installed, see config.log for details on the
failure. It is possible the compiler isn't looking in the proper directory.
Use --without-lz4 to disable lz4 support." "$LINENO" 5
fi
fi
#
# ZSTD
#
# Check whether --with-zstd was given.
if test "${with_zstd+set}" = set; then :
withval=$with_zstd;
case $withval in
yes)
:
;;
no)
:
;;
*)
as_fn_error $? "no argument expected for --with-zstd option" "$LINENO" 5
;;
esac
else
with_zstd=no
fi
if test "$with_zstd" = yes; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ZSTD_decompress in -lzstd" >&5
$as_echo_n "checking for ZSTD_decompress in -lzstd... " >&6; }
if ${ac_cv_lib_zstd_ZSTD_decompress+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
LIBS="-lzstd $LIBS"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
Use char because int might match the return type of a GCC
builtin and then its argument prototype would still apply. */
#ifdef __cplusplus
extern "C"
#endif
char ZSTD_decompress ();
int
main ()
{
return ZSTD_decompress ();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
ac_cv_lib_zstd_ZSTD_decompress=yes
else
ac_cv_lib_zstd_ZSTD_decompress=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
LIBS=$ac_check_lib_save_LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_zstd_ZSTD_decompress" >&5
$as_echo "$ac_cv_lib_zstd_ZSTD_decompress" >&6; }
if test "x$ac_cv_lib_zstd_ZSTD_decompress" = xyes; then :
cat >>confdefs.h <<_ACEOF
#define HAVE_LIBZSTD 1
_ACEOF
LIBS="-lzstd $LIBS"
else
as_fn_error $? "zstd library not found
If you have zstd installed, see config.log for details on the
failure. It is possible the compiler isn't looking in the proper directory." "$LINENO" 5
fi
ac_fn_c_check_header_mongrel "$LINENO" "zstd.h" "ac_cv_header_zstd_h" "$ac_includes_default"
if test "x$ac_cv_header_zstd_h" = xyes; then :
else
as_fn_error $? "zstd header not found
If you have lz4zstd already installed, see config.log for details on the
failure. It is possible the compiler isn't looking in the proper directory." "$LINENO" 5
fi
fi
#
# libcurl
#
@ -4681,6 +4497,196 @@ else
$as_echo "$as_me: postgres version does not support table access methodds" >&6;}
fi;
# Require lz4 & zstd only if we are compiling columnar
if test "$HAS_TABLEAM" == 'yes'; then
#
# LZ4
#
# Check whether --with-lz4 was given.
if test "${with_lz4+set}" = set; then :
withval=$with_lz4;
case $withval in
yes)
:
;;
no)
:
;;
*)
as_fn_error $? "no argument expected for --with-lz4 option" "$LINENO" 5
;;
esac
else
with_lz4=yes
fi
if test "$with_lz4" = yes; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for LZ4_compress_default in -llz4" >&5
$as_echo_n "checking for LZ4_compress_default in -llz4... " >&6; }
if ${ac_cv_lib_lz4_LZ4_compress_default+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
LIBS="-llz4 $LIBS"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
Use char because int might match the return type of a GCC
builtin and then its argument prototype would still apply. */
#ifdef __cplusplus
extern "C"
#endif
char LZ4_compress_default ();
int
main ()
{
return LZ4_compress_default ();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
ac_cv_lib_lz4_LZ4_compress_default=yes
else
ac_cv_lib_lz4_LZ4_compress_default=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
LIBS=$ac_check_lib_save_LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_lz4_LZ4_compress_default" >&5
$as_echo "$ac_cv_lib_lz4_LZ4_compress_default" >&6; }
if test "x$ac_cv_lib_lz4_LZ4_compress_default" = xyes; then :
cat >>confdefs.h <<_ACEOF
#define HAVE_LIBLZ4 1
_ACEOF
LIBS="-llz4 $LIBS"
else
as_fn_error $? "lz4 library not found
If you have lz4 installed, see config.log for details on the
failure. It is possible the compiler isn't looking in the proper directory.
Use --without-lz4 to disable zlib support." "$LINENO" 5
fi
ac_fn_c_check_header_mongrel "$LINENO" "lz4.h" "ac_cv_header_lz4_h" "$ac_includes_default"
if test "x$ac_cv_header_lz4_h" = xyes; then :
else
as_fn_error $? "lz4 header not found
If you have lz4 already installed, see config.log for details on the
failure. It is possible the compiler isn't looking in the proper directory.
Use --without-lz4 to disable lz4 support." "$LINENO" 5
fi
fi
#
# ZSTD
#
# Check whether --with-zstd was given.
if test "${with_zstd+set}" = set; then :
withval=$with_zstd;
case $withval in
yes)
:
;;
no)
:
;;
*)
as_fn_error $? "no argument expected for --with-zstd option" "$LINENO" 5
;;
esac
else
with_zstd=yes
fi
if test "$with_zstd" = yes; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ZSTD_decompress in -lzstd" >&5
$as_echo_n "checking for ZSTD_decompress in -lzstd... " >&6; }
if ${ac_cv_lib_zstd_ZSTD_decompress+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
LIBS="-lzstd $LIBS"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
Use char because int might match the return type of a GCC
builtin and then its argument prototype would still apply. */
#ifdef __cplusplus
extern "C"
#endif
char ZSTD_decompress ();
int
main ()
{
return ZSTD_decompress ();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
ac_cv_lib_zstd_ZSTD_decompress=yes
else
ac_cv_lib_zstd_ZSTD_decompress=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
LIBS=$ac_check_lib_save_LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_zstd_ZSTD_decompress" >&5
$as_echo "$ac_cv_lib_zstd_ZSTD_decompress" >&6; }
if test "x$ac_cv_lib_zstd_ZSTD_decompress" = xyes; then :
cat >>confdefs.h <<_ACEOF
#define HAVE_LIBZSTD 1
_ACEOF
LIBS="-lzstd $LIBS"
else
as_fn_error $? "zstd library not found
If you have zstd installed, see config.log for details on the
failure. It is possible the compiler isn't looking in the proper directory.
Use --without-zstd to disable zlib support." "$LINENO" 5
fi
ac_fn_c_check_header_mongrel "$LINENO" "zstd.h" "ac_cv_header_zstd_h" "$ac_includes_default"
if test "x$ac_cv_header_zstd_h" = xyes; then :
else
as_fn_error $? "zstd header not found
If you have zstd already installed, see config.log for details on the
failure. It is possible the compiler isn't looking in the proper directory.
Use --without-zstd to disable zlib support." "$LINENO" 5
fi
fi
fi # test "$HAS_TABLEAM" == 'yes'
# Check if git is installed, when installed the gitref of the checkout will be baked in the application
# Extract the first word of "git", so it can be a program name with args.
set dummy git; ac_word=$2

View File

@ -185,42 +185,6 @@ if test "$enable_coverage" = yes; then
CITUS_LDFLAGS="$CITUS_LDFLAGS --coverage"
fi
#
# LZ4
#
PGAC_ARG_BOOL(with, lz4, no,
[use lz4])
AC_SUBST(with_lz4)
if test "$with_lz4" = yes; then
AC_CHECK_LIB(lz4, LZ4_compress_default, [],
[AC_MSG_ERROR([lz4 library not found
If you have lz4 installed, see config.log for details on the
failure. It is possible the compiler isn't looking in the proper directory.
Use --without-lz4 to disable zlib support.])])
AC_CHECK_HEADER(lz4.h, [], [AC_MSG_ERROR([lz4 header not found
If you have lz4 already installed, see config.log for details on the
failure. It is possible the compiler isn't looking in the proper directory.
Use --without-lz4 to disable lz4 support.])])
fi
#
# ZSTD
#
PGAC_ARG_BOOL(with, zstd, no,
[use zstd])
AC_SUBST(with_zstd)
if test "$with_zstd" = yes; then
AC_CHECK_LIB(zstd, ZSTD_decompress, [],
[AC_MSG_ERROR([zstd library not found
If you have zstd installed, see config.log for details on the
failure. It is possible the compiler isn't looking in the proper directory.])])
AC_CHECK_HEADER(zstd.h, [], [AC_MSG_ERROR([zstd header not found
If you have lz4zstd already installed, see config.log for details on the
failure. It is possible the compiler isn't looking in the proper directory.])])
fi
#
# libcurl
#
@ -255,6 +219,48 @@ else
AC_MSG_NOTICE([postgres version does not support table access methodds])
fi;
# Require lz4 & zstd only if we are compiling columnar
if test "$HAS_TABLEAM" == 'yes'; then
#
# LZ4
#
PGAC_ARG_BOOL(with, lz4, yes,
[do not use lz4])
AC_SUBST(with_lz4)
if test "$with_lz4" = yes; then
AC_CHECK_LIB(lz4, LZ4_compress_default, [],
[AC_MSG_ERROR([lz4 library not found
If you have lz4 installed, see config.log for details on the
failure. It is possible the compiler isn't looking in the proper directory.
Use --without-lz4 to disable zlib support.])])
AC_CHECK_HEADER(lz4.h, [], [AC_MSG_ERROR([lz4 header not found
If you have lz4 already installed, see config.log for details on the
failure. It is possible the compiler isn't looking in the proper directory.
Use --without-lz4 to disable lz4 support.])])
fi
#
# ZSTD
#
PGAC_ARG_BOOL(with, zstd, yes,
[do not use zstd])
AC_SUBST(with_zstd)
if test "$with_zstd" = yes; then
AC_CHECK_LIB(zstd, ZSTD_decompress, [],
[AC_MSG_ERROR([zstd library not found
If you have zstd installed, see config.log for details on the
failure. It is possible the compiler isn't looking in the proper directory.
Use --without-zstd to disable zlib support.])])
AC_CHECK_HEADER(zstd.h, [], [AC_MSG_ERROR([zstd header not found
If you have zstd already installed, see config.log for details on the
failure. It is possible the compiler isn't looking in the proper directory.
Use --without-zstd to disable zlib support.])])
fi
fi # test "$HAS_TABLEAM" == 'yes'
# Check if git is installed, when installed the gitref of the checkout will be baked in the application
AC_PATH_PROG(GIT_BIN, git)
AC_CHECK_FILE(.git,[HAS_DOTGIT=yes], [HAS_DOTGIT=])

View File

@ -21,14 +21,7 @@ SELECT count(*) FROM test_lz4;
20001
(1 row)
VACUUM VERBOSE test_lz4;
INFO: statistics for "test_lz4":
storage id: xxxxx
total file size: 73728, total data size: 45729
compression rate: 7.16x
total row count: 20001, stripe count: 2, average rows per stripe: 10000
chunk count: 9, containing data for dropped columns: 0, lz4 compressed: 9
SELECT pg_relation_size('test_lz4') AS size_lz4 \gset
SELECT DISTINCT * FROM test_lz4 ORDER BY a, b, c LIMIT 5;
a | b | c
---------------------------------------------------------------------
@ -43,13 +36,13 @@ SELECT DISTINCT * FROM test_lz4 ORDER BY a, b, c LIMIT 5;
SET columnar.compression TO 'pglz';
CREATE TABLE test_pglz (LIKE test_lz4) USING columnar;
INSERT INTO test_pglz SELECT * FROM test_lz4;
VACUUM VERBOSE test_pglz;
INFO: statistics for "test_pglz":
storage id: xxxxx
total file size: 57344, total data size: 35986
compression rate: 9.10x
total row count: 20001, stripe count: 1, average rows per stripe: 20001
chunk count: 9, containing data for dropped columns: 0, none compressed: 3, pglz compressed: 6
SELECT pg_relation_size('test_pglz') AS size_pglz \gset
-- verify that pglz & lz4 resulted in different compression ratios
SELECT :size_pglz <> :size_lz4;
?column?
---------------------------------------------------------------------
t
(1 row)
-- Other operations
VACUUM FULL test_lz4;

View File

@ -7,7 +7,7 @@ CREATE SCHEMA am_zstd;
SET search_path TO am_zstd;
SET columnar.compression TO 'zstd';
CREATE TABLE test_zstd (a int, b text, c int) USING columnar;
INSERT INTO test_zstd SELECT floor(i / 1000), floor(i / 10)::text, 4 FROM generate_series(1, 10000) i;
INSERT INTO test_zstd SELECT i % 1000, (i % 10)::text, 4 FROM generate_series(1, 10000) i;
SELECT count(*) FROM test_zstd;
count
---------------------------------------------------------------------
@ -21,27 +21,21 @@ SELECT count(*) FROM test_zstd;
20001
(1 row)
VACUUM VERBOSE test_zstd;
INFO: statistics for "test_zstd":
storage id: xxxxx
total file size: 40960, total data size: 14945
compression rate: 21.91x
total row count: 20001, stripe count: 2, average rows per stripe: 10000
chunk count: 9, containing data for dropped columns: 0, zstd compressed: 9
CREATE TABLE test_none (LIKE test_zstd) USING columnar;
INSERT INTO test_none SELECT * FROM test_zstd;
SELECT DISTINCT * FROM test_zstd ORDER BY a, b, c LIMIT 5;
a | b | c
a | b | c
---------------------------------------------------------------------
0 | 0 | 4
0 | 1 | 4
0 | 10 | 4
0 | 11 | 4
0 | 12 | 4
0 | 0 | 4
1 | 1 | 4
2 | 2 | 4
3 | 3 | 4
4 | 4 | 4
(5 rows)
VACUUM FULL test_zstd;
SELECT pg_relation_size('test_zstd') AS size_comp_level_default \gset
-- change compression level
-- for this particular usecase, higher compression levels
-- don't improve compression ratio
SELECT alter_columnar_table_set('test_zstd', compression_level => 19);
alter_columnar_table_set
---------------------------------------------------------------------
@ -49,33 +43,32 @@ SELECT alter_columnar_table_set('test_zstd', compression_level => 19);
(1 row)
VACUUM FULL test_zstd;
VACUUM VERBOSE test_zstd;
INFO: statistics for "test_zstd":
storage id: xxxxx
total file size: 32768, total data size: 15201
compression rate: 21.55x
total row count: 20001, stripe count: 1, average rows per stripe: 20001
chunk count: 9, containing data for dropped columns: 0, zstd compressed: 9
SELECT pg_relation_size('test_zstd') AS size_comp_level_19 \gset
-- verify that higher compression level compressed better
SELECT :size_comp_level_default > :size_comp_level_19 AS size_changed;
size_changed
---------------------------------------------------------------------
t
(1 row)
-- compare compression rate to pglz
SET columnar.compression TO 'pglz';
CREATE TABLE test_pglz (LIKE test_zstd) USING columnar;
INSERT INTO test_pglz SELECT * FROM test_zstd;
VACUUM VERBOSE test_pglz;
INFO: statistics for "test_pglz":
storage id: xxxxx
total file size: 57344, total data size: 35986
compression rate: 9.10x
total row count: 20001, stripe count: 1, average rows per stripe: 20001
chunk count: 9, containing data for dropped columns: 0, none compressed: 3, pglz compressed: 6
SELECT pg_relation_size('test_pglz') AS size_pglz \gset
-- verify that zstd compressed better than pglz
SELECT :size_pglz > :size_comp_level_default;
?column?
---------------------------------------------------------------------
t
(1 row)
-- Other operations
VACUUM FULL test_zstd;
ANALYZE test_zstd;
SELECT count(DISTINCT test_zstd.*) FROM test_zstd;
count
---------------------------------------------------------------------
6002
6001
(1 row)
TRUNCATE test_zstd;

View File

@ -16,7 +16,7 @@ SELECT count(*) FROM test_lz4;
INSERT INTO test_lz4 SELECT floor(i / 2), floor(i / 10)::text, 5 FROM generate_series(1000, 11000) i;
SELECT count(*) FROM test_lz4;
VACUUM VERBOSE test_lz4;
SELECT pg_relation_size('test_lz4') AS size_lz4 \gset
SELECT DISTINCT * FROM test_lz4 ORDER BY a, b, c LIMIT 5;
@ -25,7 +25,10 @@ SET columnar.compression TO 'pglz';
CREATE TABLE test_pglz (LIKE test_lz4) USING columnar;
INSERT INTO test_pglz SELECT * FROM test_lz4;
VACUUM VERBOSE test_pglz;
SELECT pg_relation_size('test_pglz') AS size_pglz \gset
-- verify that pglz & lz4 resulted in different compression ratios
SELECT :size_pglz <> :size_lz4;
-- Other operations
VACUUM FULL test_lz4;

View File

@ -10,34 +10,43 @@ SET search_path TO am_zstd;
SET columnar.compression TO 'zstd';
CREATE TABLE test_zstd (a int, b text, c int) USING columnar;
INSERT INTO test_zstd SELECT floor(i / 1000), floor(i / 10)::text, 4 FROM generate_series(1, 10000) i;
INSERT INTO test_zstd SELECT i % 1000, (i % 10)::text, 4 FROM generate_series(1, 10000) i;
SELECT count(*) FROM test_zstd;
INSERT INTO test_zstd SELECT floor(i / 2), floor(i / 10)::text, 5 FROM generate_series(1000, 11000) i;
SELECT count(*) FROM test_zstd;
VACUUM VERBOSE test_zstd;
CREATE TABLE test_none (LIKE test_zstd) USING columnar;
INSERT INTO test_none SELECT * FROM test_zstd;
SELECT DISTINCT * FROM test_zstd ORDER BY a, b, c LIMIT 5;
VACUUM FULL test_zstd;
SELECT pg_relation_size('test_zstd') AS size_comp_level_default \gset
-- change compression level
-- for this particular usecase, higher compression levels
-- don't improve compression ratio
SELECT alter_columnar_table_set('test_zstd', compression_level => 19);
VACUUM FULL test_zstd;
VACUUM VERBOSE test_zstd;
SELECT pg_relation_size('test_zstd') AS size_comp_level_19 \gset
-- verify that higher compression level compressed better
SELECT :size_comp_level_default > :size_comp_level_19 AS size_changed;
-- compare compression rate to pglz
SET columnar.compression TO 'pglz';
CREATE TABLE test_pglz (LIKE test_zstd) USING columnar;
INSERT INTO test_pglz SELECT * FROM test_zstd;
VACUUM VERBOSE test_pglz;
SELECT pg_relation_size('test_pglz') AS size_pglz \gset
-- verify that zstd compressed better than pglz
SELECT :size_pglz > :size_comp_level_default;
-- Other operations
VACUUM FULL test_zstd;
ANALYZE test_zstd;
ANALYZE test_zstd;
SELECT count(DISTINCT test_zstd.*) FROM test_zstd;
TRUNCATE test_zstd;