diff --git a/src/backend/columnar/.circleci/build.sh b/src/backend/columnar/.circleci/build.sh new file mode 100755 index 000000000..6a9f14c74 --- /dev/null +++ b/src/backend/columnar/.circleci/build.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +set -euxo pipefail +IFS=$'\n\t' + +status=0 + +basedir="$(pwd)" +installdir="${basedir}/install-${PG_MAJOR}" + +make install DESTDIR="${installdir}" +pushd "${installdir}" +find . -type f -print > "${basedir}/files.lst" +cat "${basedir}/files.lst" +tar cvf "${basedir}/install-${PG_MAJOR}.tar" $(cat "${basedir}/files.lst") +popd diff --git a/src/backend/columnar/.circleci/config.yml b/src/backend/columnar/.circleci/config.yml new file mode 100644 index 000000000..645211182 --- /dev/null +++ b/src/backend/columnar/.circleci/config.yml @@ -0,0 +1,138 @@ +version: 2.1 +orbs: + codecov: codecov/codecov@1.1.1 + +jobs: + check-style: + docker: + - image: 'citus/stylechecker:latest' + steps: + - checkout + - run: + name: 'Check Style' + command: | + citus_indent --check + - run: + name: 'Check if whitespace fixing changed anything, install editorconfig if it did' + command: | + git diff --exit-code + + build-11: + docker: + - image: 'citus/extbuilder:11.9' + steps: + - checkout + - run: + name: 'Configure, Build, and Install' + command: | + PG_MAJOR=11 .circleci/build.sh + - persist_to_workspace: + root: . + paths: + - install-11.tar + + build-12: + docker: + - image: 'citus/extbuilder:12.4' + steps: + - checkout + - run: + name: 'Configure, Build, and Install' + command: | + PG_MAJOR=12 .circleci/build.sh + - persist_to_workspace: + root: . + paths: + - install-12.tar + + build-13: + docker: + - image: 'citus/extbuilder:13.0' + steps: + - checkout + - run: + name: 'Configure, Build, and Install' + command: | + PG_MAJOR=13 .circleci/build.sh + - persist_to_workspace: + root: . + paths: + - install-13.tar + + test-11_checkinstall: + docker: + - image: 'citus/exttester:11.9' + working_directory: /home/circleci/project + steps: + - checkout + - attach_workspace: + at: . + - run: + name: 'Prepare Container & Install Extension' + command: | + chown -R circleci:circleci /home/circleci + tar xfv "${CIRCLE_WORKING_DIRECTORY}/install-${PG_MAJOR}.tar" --directory / + - run: + name: 'Run Test' + command: | + gosu circleci .circleci/run_test.sh installcheck + - codecov/upload: + flags: 'test_11,installcheck' + + test-12_checkinstall: + docker: + - image: 'citus/exttester:12.4' + working_directory: /home/circleci/project + steps: + - checkout + - attach_workspace: + at: . + - run: + name: 'Prepare Container & Install Extension' + command: | + chown -R circleci:circleci /home/circleci + tar xfv "${CIRCLE_WORKING_DIRECTORY}/install-${PG_MAJOR}.tar" --directory / + - run: + name: 'Run Test' + command: | + gosu circleci .circleci/run_test.sh installcheck + - codecov/upload: + flags: 'test_12,installcheck' + + test-13_checkinstall: + docker: + - image: 'citus/exttester:13.0' + working_directory: /home/circleci/project + steps: + - checkout + - attach_workspace: + at: . + - run: + name: 'Prepare Container & Install Extension' + command: | + chown -R circleci:circleci /home/circleci + tar xfv "${CIRCLE_WORKING_DIRECTORY}/install-${PG_MAJOR}.tar" --directory / + - run: + name: 'Run Test' + command: | + gosu circleci .circleci/run_test.sh installcheck + - codecov/upload: + flags: 'test_13,installcheck' + +workflows: + version: 2 + build_and_test: + jobs: + + - check-style + + - build-11 + - build-12 + - build-13 + + - test-11_checkinstall: + requires: [build-11] + - test-12_checkinstall: + requires: [build-12] + - test-13_checkinstall: + requires: [build-13] diff --git a/src/backend/columnar/.circleci/run_test.sh b/src/backend/columnar/.circleci/run_test.sh new file mode 100755 index 000000000..f9e183b56 --- /dev/null +++ b/src/backend/columnar/.circleci/run_test.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +set -euxo pipefail +IFS=$'\n\t' + +status=0 + +export PGPORT=${PGPORT:-55432} + +function cleanup { + pg_ctl -D /tmp/postgres stop + rm -rf /tmp/postgres +} +trap cleanup EXIT + +rm -rf /tmp/postgres +initdb -E unicode /tmp/postgres +echo "shared_preload_libraries = 'cstore_fdw'" >> /tmp/postgres/postgresql.conf +pg_ctl -D /tmp/postgres -o "-p ${PGPORT}" -l /tmp/postgres_logfile start || status=$? +if [ -z $status ]; then cat /tmp/postgres_logfile; fi + +make "${@}" || status=$? +diffs="regression.diffs" + +if test -f "${diffs}"; then cat "${diffs}"; fi + +exit $status diff --git a/src/backend/columnar/.gitattributes b/src/backend/columnar/.gitattributes new file mode 100644 index 000000000..215ae1909 --- /dev/null +++ b/src/backend/columnar/.gitattributes @@ -0,0 +1,26 @@ +* whitespace=space-before-tab,trailing-space +*.[chly] whitespace=space-before-tab,trailing-space,indent-with-non-tab,tabwidth=4 +*.dsl whitespace=space-before-tab,trailing-space,tab-in-indent +*.patch -whitespace +*.pl whitespace=space-before-tab,trailing-space,tabwidth=4 +*.po whitespace=space-before-tab,trailing-space,tab-in-indent,-blank-at-eof +*.sgml whitespace=space-before-tab,trailing-space,tab-in-indent,-blank-at-eol +*.x[ms]l whitespace=space-before-tab,trailing-space,tab-in-indent + +# Avoid confusing ASCII underlines with leftover merge conflict markers +README conflict-marker-size=32 +README.* conflict-marker-size=32 + +# Certain data files that contain special whitespace, and other special cases +*.data -whitespace + +# Test output files that contain extra whitespace +*.out -whitespace +src/test/regress/output/*.source -whitespace + +# These files are maintained or generated elsewhere. We take them as is. +configure -whitespace + +# all C files (implementation and header) use our style... +*.[ch] citus-style + diff --git a/src/backend/columnar/.gitignore b/src/backend/columnar/.gitignore new file mode 100644 index 000000000..6b3554f3b --- /dev/null +++ b/src/backend/columnar/.gitignore @@ -0,0 +1,68 @@ +# ===== +# = C = +# ===== + +# Object files +*.o +*.ko +*.obj +*.elf +*.bc + +# Libraries +*.lib +*.a + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.app +*.i*86 +*.x86_64 +*.hex + +# ======== +# = Gcov = +# ======== + +# gcc coverage testing tool files + +*.gcno +*.gcda +*.gcov + +# ==================== +# = Project-Specific = +# ==================== + +/data/*.cstore +/data/*.footer + +/sql/*block_filtering.sql +/sql/*copyto.sql +/sql/*create.sql +/sql/*data_types.sql +/sql/*load.sql + +/expected/*block_filtering.out +/expected/*copyto.out +/expected/*create.out +/expected/*data_types.out +/expected/*load.out +/results/* +/.deps/* +/regression.diffs +/regression.out +.vscode + +*.pb-c.* + +# ignore files that could be created by circleci automation +files.lst +install-*.tar +install-*/ diff --git a/src/backend/columnar/LICENSE b/src/backend/columnar/LICENSE new file mode 100644 index 000000000..ad410e113 --- /dev/null +++ b/src/backend/columnar/LICENSE @@ -0,0 +1,201 @@ +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/src/backend/columnar/META.json b/src/backend/columnar/META.json new file mode 100644 index 000000000..19e819daa --- /dev/null +++ b/src/backend/columnar/META.json @@ -0,0 +1,45 @@ +{ + "name": "cstore_fdw", + "abstract": "Columnar Store for PostgreSQL", + "description": "PostgreSQL extension which implements a Columnar Store.", + "version": "1.7.0", + "maintainer": "Murat Tuncer ", + "license": "apache_2_0", + "provides": { + "cstore_fdw": { + "abstract": "Foreign Data Wrapper for Columnar Store Tables", + "file": "cstore_fdw--1.7.sql", + "docfile": "README.md", + "version": "1.7.0" + } + }, + "prereqs": { + "runtime": { + "requires": { + "PostgreSQL": "9.3.0" + } + } + }, + "resources": { + "bugtracker": { + "web": "http://github.com/citusdata/cstore_fdw/issues/" + }, + "repository": { + "url": "git://github.com/citusdata/cstore_fdw.git", + "web": "https://github.com/citusdata/cstore_fdw/", + "type": "git" + } + }, + "generated_by": "Murat Tuncer", + "meta-spec": { + "version": "1.0.0", + "url": "http://pgxn.org/meta/spec.txt" + }, + "tags": [ + "orc", + "fdw", + "foreign data wrapper", + "cstore_fdw", + "columnar store" + ] +} diff --git a/src/backend/columnar/Makefile b/src/backend/columnar/Makefile new file mode 100644 index 000000000..6be7bbd45 --- /dev/null +++ b/src/backend/columnar/Makefile @@ -0,0 +1,102 @@ +# cstore_fdw/Makefile +# +# Copyright (c) 2016 Citus Data, Inc. +# + +MODULE_big = cstore_fdw + +VER := $(lastword $(shell pg_config --version)) +VER_WORDS = $(subst ., ,$(VER)) +MVER = $(firstword $(VER_WORDS)) + +# error for versions earlier than 10 so that lex comparison will work +ifneq ($(shell printf '%02d' $(MVER)),$(MVER)) +$(error version $(VER) not supported) +endif + +# lexicographic comparison of version number +ifeq ($(lastword $(sort 12 $(MVER))),$(MVER)) + USE_TABLEAM = yes + USE_FDW = yes +else ifeq ($(lastword $(sort 11 $(MVER))),$(MVER)) + USE_TABLEAM = no + USE_FDW = yes +else +$(error version $(VER) is not supported) +endif + +PG_CFLAGS = -std=c11 -Wshadow -Werror +OBJS = cstore.o cstore_writer.o cstore_reader.o \ + cstore_compression.o mod.o cstore_metadata_tables.o + +EXTENSION = cstore_fdw +DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cstore_fdw--1.4--1.5.sql \ + cstore_fdw--1.3--1.4.sql cstore_fdw--1.2--1.3.sql cstore_fdw--1.1--1.2.sql \ + cstore_fdw--1.0--1.1.sql cstore_fdw--1.7--1.8.sql + +REGRESS = extension_create +ISOLATION = create +EXTRA_CLEAN = sql/fdw_block_filtering.sql sql/fdw_create.sql sql/fdw_data_types.sql sql/fdw_load.sql \ + sql/fdw_copyto.sql expected/fdw_block_filtering.out expected/fdw_create.out \ + expected/fdw_data_types.out expected/fdw_load.out expected/fdw_copyto.out \ + sql/am_block_filtering.sql sql/am_create.sql sql/am_data_types.sql sql/am_load.sql \ + sql/am_copyto.sql expected/am_block_filtering.out \ + expected/am_data_types.out expected/am_load.out expected/am_copyto.out + +ifeq ($(USE_FDW),yes) + PG_CFLAGS += -DUSE_FDW + OBJS += cstore_fdw.o + REGRESS += fdw_create fdw_load fdw_query fdw_analyze fdw_data_types \ + fdw_functions fdw_block_filtering fdw_drop fdw_insert \ + fdw_copyto fdw_alter fdw_rollback fdw_truncate fdw_clean +endif + +ifeq ($(USE_TABLEAM),yes) + PG_CFLAGS += -DUSE_TABLEAM + OBJS += cstore_tableam.o cstore_customscan.o + REGRESS += am_create am_load am_query am_analyze am_data_types am_functions \ + am_drop am_insert am_copyto am_alter am_rollback am_truncate am_vacuum am_clean \ + am_block_filtering am_join am_trigger am_tableoptions + ISOLATION += am_write_concurrency am_vacuum_vs_insert +endif + +ifeq ($(enable_coverage),yes) + PG_CPPFLAGS += --coverage + SHLIB_LINK += --coverage + EXTRA_CLEAN += *.gcno +endif + +UNAME_S := $(shell uname -s) +ifeq ($(UNAME_S),Darwin) + PG_CPPFLAGS += -I/usr/local/include +endif + +# +# Users need to specify their Postgres installation path through pg_config. For +# example: /usr/local/pgsql/bin/pg_config or /usr/lib/postgresql/9.3/bin/pg_config +# + +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) + +# command for getting postgres source directory is taken from citus/configure.in +POSTGRES_SRCDIR=$(shell grep ^abs_top_srcdir $(shell dirname $(shell $(PG_CONFIG) --pgxs))/../Makefile.global|cut -d ' ' -f3-) +PGXS_ISOLATION_TESTER=$(top_builddir)/src/test/isolation/pg_isolation_regress + +# If postgres installation doesn't include pg_isolation_regress, try using the +# one in postgres source directory. +ifeq (,$(wildcard $(PGXS_ISOLATION_TESTER))) + pg_isolation_regress_installcheck = \ + $(POSTGRES_SRCDIR)/src/test/isolation/pg_isolation_regress \ + --inputdir=$(srcdir) $(EXTRA_REGRESS_OPTS) +else + pg_isolation_regress_installcheck = \ + $(PGXS_ISOLATION_TESTER) \ + --inputdir=$(srcdir) $(EXTRA_REGRESS_OPTS) +endif + +installcheck: + +reindent: + citus_indent . diff --git a/src/backend/columnar/README.md b/src/backend/columnar/README.md new file mode 100644 index 000000000..1a20f3abe --- /dev/null +++ b/src/backend/columnar/README.md @@ -0,0 +1,373 @@ +cstore_fdw +========== + +[![Build Status](https://travis-ci.org/citusdata/cstore_fdw.svg?branch=master)][status] +[![Coverage](http://img.shields.io/coveralls/citusdata/cstore_fdw/master.svg)][coverage] + +Cstore_fdw is an open source columnar store extension for PostgreSQL. Columnar stores provide notable benefits for analytics use cases where data is loaded in batches. Cstore_fdw’s columnar nature delivers performance by only reading relevant data from disk, and it may compress data 6x-10x to reduce space requirements for data archival. + +Cstore_fdw is developed by [Citus Data](https://www.citusdata.com) and can be used in combination with [Citus](https://github.com/citusdata/citus), a postgres extension that intelligently distributes your data and queries across many nodes so your database can scale and your queries are fast. If you have any questions about how Citus can help you scale or how to use Citus in combination with cstore_fdw, [please let us know](https://www.citusdata.com/about/contact_us/). + +Join the [Mailing List][mailing-list] to stay on top of the latest developments for Cstore_fdw. + + +Introduction +------------ + +This extension uses a format for its data layout that is inspired by ORC, +the Optimized Row Columnar format. Like ORC, the cstore format improves +upon RCFile developed at Facebook, and brings the following benefits: + +* Compression: Reduces in-memory and on-disk data size by 2-4x. Can be extended + to support different codecs. +* Column projections: Only reads column data relevant to the query. Improves + performance for I/O bound queries. +* Skip indexes: Stores min/max statistics for row groups, and uses them to skip + over unrelated rows. + +Further, we used the Postgres foreign data wrapper APIs and type representations +with this extension. This brings: + +* Support for 40+ Postgres data types. The user can also create new types and + use them. +* Statistics collection. PostgreSQL's query optimizer uses these stats to + evaluate different query plans and pick the best one. +* Simple setup. Create foreign table and copy data. Run SQL. + + +Building +-------- + +cstore\_fdw depends on protobuf-c for serializing and deserializing table metadata. +So we need to install these packages first: + + # Fedora 17+, CentOS, and Amazon Linux + sudo yum install protobuf-c-devel + + # Ubuntu 10.4+ + sudo apt-get install protobuf-c-compiler + sudo apt-get install libprotobuf-c0-dev + + # Ubuntu 18.4+ + sudo apt-get install protobuf-c-compiler + sudo apt-get install libprotobuf-c-dev + + # Mac OS X + brew install protobuf-c + +**Note.** In CentOS 5, 6, and 7, you may need to install or update EPEL 5, 6, or 7 repositories. + See [this page](https://support.rackspace.com/how-to/install-epel-and-additional-repositories-on-centos-and-red-hat/) +for instructions. + +**Note.** In Amazon Linux, the EPEL repository is installed by default, but not +enabled. See [these instructions](http://aws.amazon.com/amazon-linux-ami/faqs/#epel) +for how to enable it. + +Once you have protobuf-c installed on your machine, you are ready to build +cstore\_fdw. For this, you need to include the pg\_config directory path in +your make command. This path is typically the same as your PostgreSQL +installation's bin/ directory path. For example: + + PATH=/usr/local/pgsql/bin/:$PATH make + sudo PATH=/usr/local/pgsql/bin/:$PATH make install + +**Note.** cstore_fdw requires PostgreSQL version from 9.3 to 12. It doesn't +support earlier versions of PostgreSQL. + + +Usage +----- + +Before using cstore\_fdw, you need to add it to ```shared_preload_libraries``` +in your ```postgresql.conf``` and restart Postgres: + + shared_preload_libraries = 'cstore_fdw' # (change requires restart) + +The following parameters can be set on a cstore foreign table object. + +* filename (optional): The absolute path to the location for storing table data. + If you don't specify the filename option, cstore\_fdw will automatically + choose the $PGDATA/cstore\_fdw directory to store the files. If specified the + value of this parameter will be used as a prefix for all files created to + store table data. For example, the value ```/cstore_fdw/my_table``` could result in + the files ```/cstore_fdw/my_table``` and ```/cstore_fdw/my_table.footer``` being used + to manage table data. +* compression (optional): The compression used for compressing value streams. + Valid options are ```none``` and ```pglz```. The default is ```none```. +* stripe\_row\_count (optional): Number of rows per stripe. The default is + ```150000```. Reducing this decreases the amount memory used for loading data + and querying, but also decreases the performance. +* block\_row\_count (optional): Number of rows per column block. The default is + ```10000```. cstore\_fdw compresses, creates skip indexes, and reads from disk + at the block granularity. Increasing this value helps with compression and results + in fewer reads from disk. However, higher values also reduce the probability of + skipping over unrelated row blocks. + + +To load or append data into a cstore table, you have two options: + +* You can use the [```COPY``` command][copy-command] to load or append data from + a file, a program, or STDIN. +* You can use the ```INSERT INTO cstore_table SELECT ...``` syntax to load or + append data from another table. + +You can use the [```ANALYZE``` command][analyze-command] to collect statistics +about the table. These statistics help the query planner to help determine the +most efficient execution plan for each query. + +**Note.** We currently don't support updating table using DELETE, and UPDATE +commands. We also don't support single row inserts. + + +Updating from earlier versions to 1.7 +--------------------------------------- + +To update an existing cstore_fdw installation from versions earlier than 1.6 +you can take the following steps: + +* Download and install cstore_fdw version 1.6 using instructions from the "Building" + section, +* Restart the PostgreSQL server, +* Run ```ALTER EXTENSION cstore_fdw UPDATE;``` + + +Example +------- + +As an example, we demonstrate loading and querying data to/from a column store +table from scratch here. Let's start with downloading and decompressing the data +files. + + wget http://examples.citusdata.com/customer_reviews_1998.csv.gz + wget http://examples.citusdata.com/customer_reviews_1999.csv.gz + + gzip -d customer_reviews_1998.csv.gz + gzip -d customer_reviews_1999.csv.gz + +Then, let's log into Postgres, and run the following commands to create a column +store foreign table: + +```SQL +-- load extension first time after install +CREATE EXTENSION cstore_fdw; + +-- create server object +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; + +-- create foreign table +CREATE FOREIGN TABLE customer_reviews +( + customer_id TEXT, + review_date DATE, + review_rating INTEGER, + review_votes INTEGER, + review_helpful_votes INTEGER, + product_id CHAR(10), + product_title TEXT, + product_sales_rank BIGINT, + product_group TEXT, + product_category TEXT, + product_subcategory TEXT, + similar_product_ids CHAR(10)[] +) +SERVER cstore_server +OPTIONS(compression 'pglz'); +``` + +Next, we load data into the table: + +```SQL +\COPY customer_reviews FROM 'customer_reviews_1998.csv' WITH CSV; +\COPY customer_reviews FROM 'customer_reviews_1999.csv' WITH CSV; +``` + +**Note.** If you are getting ```ERROR: cannot copy to foreign table +"customer_reviews"``` when trying to run the COPY commands, double check that you +have added cstore\_fdw to ```shared_preload_libraries``` in ```postgresql.conf``` +and restarted Postgres. + +Next, we collect data distribution statistics about the table. This is optional, +but usually very helpful: + +```SQL +ANALYZE customer_reviews; +``` + +Finally, let's run some example SQL queries on the column store table. + +```SQL +-- Find all reviews a particular customer made on the Dune series in 1998. +SELECT + customer_id, review_date, review_rating, product_id, product_title +FROM + customer_reviews +WHERE + customer_id ='A27T7HVDXA3K2A' AND + product_title LIKE '%Dune%' AND + review_date >= '1998-01-01' AND + review_date <= '1998-12-31'; + +-- Do we have a correlation between a book's title's length and its review ratings? +SELECT + width_bucket(length(product_title), 1, 50, 5) title_length_bucket, + round(avg(review_rating), 2) AS review_average, + count(*) +FROM + customer_reviews +WHERE + product_group = 'Book' +GROUP BY + title_length_bucket +ORDER BY + title_length_bucket; +``` + + +Usage with Citus +---------------- + +The example above illustrated how to load data into a PostgreSQL database running +on a single host. However, sometimes your data is too large to analyze effectively +on a single host. Citus is a product built by Citus Data that allows you to run +a distributed PostgreSQL database to analyze your data using the power of multiple +hosts. You can easily install and run other PostgreSQL extensions and foreign data +wrappers—including cstore_fdw—alongside Citus. + +You can create a cstore_fdw table and distribute it using the +```create_distributed_table()``` UDF just like any other table. You can load data +using the ```copy``` command as you would do in single node PostgreSQL. + +Using Skip Indexes +------------------ + +cstore_fdw partitions each column into multiple blocks. Skip indexes store minimum +and maximum values for each of these blocks. While scanning the table, if min/max +values of the block contradict the WHERE clause, then the block is completely +skipped. This way, the query processes less data and hence finishes faster. + +To use skip indexes more efficiently, you should load the data after sorting it +on a column that is commonly used in the WHERE clause. This ensures that there is +a minimum overlap between blocks and the chance of them being skipped is higher. + +In practice, the data generally has an inherent dimension (for example a time field) +on which it is naturally sorted. Usually, the queries also have a filter clause on +that column (for example you want to query only the last week's data), and hence you +don't need to sort the data in such cases. + + +Uninstalling cstore_fdw +----------------------- + +Before uninstalling the extension, first you need to drop all the cstore tables: + + postgres=# DROP FOREIGN TABLE cstore_table_1; + ... + postgres=# DROP FOREIGN TABLE cstore_table_n; + +Then, you should drop the cstore server and extension: + + postgres=# DROP SERVER cstore_server; + postgres=# DROP EXTENSION cstore_fdw; + +cstore\_fdw automatically creates some directories inside the PostgreSQL's data +directory to store its files. To remove them, you can run: + + $ rm -rf $PGDATA/cstore_fdw + +Then, you should remove cstore\_fdw from ```shared_preload_libraries``` in +your ```postgresql.conf```: + + shared_preload_libraries = '' # (change requires restart) + +Finally, to uninstall the extension you can run the following command in the +extension's source code directory. This will clean up all the files copied during +the installation: + + $ sudo PATH=/usr/local/pgsql/bin/:$PATH make uninstall + + +Changeset +--------- +### Version 1.7.0 +* (Fix) Add support for PostgreSQL 12 +* (Fix) Support count(t.*) from t type queries +* (Fix) Build failures for MacOS 10.14+ +* (Fix) Make foreign scan parallel safe +* (Fix) Add support for PostgreSQL 11 COPY +### Version 1.6.2 +* (Fix) Add support for PostgreSQL 11 +### Version 1.6.1 +* (Fix) Fix crash during truncate (Cstore crashing server when enabled, not used) +* (Fix) No such file or directory warning when attempting to drop database +### Version 1.6 +* (Feature) Added support for PostgreSQL 10. +* (Fix) Removed table files when a schema, extension or database is dropped. +* (Fix) Removed unused code fragments. +* (Fix) Fixed incorrect initialization of stripe buffers. +* (Fix) Checked user access rights when executing truncate. +* (Fix) Made copy command cancellable. +* (Fix) Fixed namespace issue regarding drop table. + +### Version 1.5.1 +* (Fix) Verify cstore_fdw server on CREATE FOREIGN TABLE command + +### Version 1.5 +* (Feature) Added support for PostgreSQL 9.6. +* (Fix) Removed table data when cstore_fdw table is indirectly dropped. +* (Fix) Removed unused code fragments. +* (Fix) Fixed column selection logic to return columns used in expressions. +* (Fix) Prevented alter table command from changinf column type to incompatible types. + +### Version 1.4.1 + +* (Fix) Compatibility fix for Citus [copy command][copy-command]. + +### Version 1.4 + +* (Feature) Added support for ```TRUNCATE TABLE``` +* (Fix) Added support for PostgreSQL 9.5 + +### Version 1.3 + +* (Feature) Added support for ```ALTER TABLE ADD COLUMN``` and ```ALTER TABLE DROP COLUMN```. +* (Feature) Added column list support in ```COPY FROM```. +* (Optimization) Improve row count estimation, which results in better plans. +* (Fix) Fix the deadlock issue during concurrent inserts. +* (Fix) Return correct result when using whole row references. + +### Version 1.2 + +* (Feature) Added support for ```COPY TO```. +* (Feature) Added support for ```INSERT INTO cstore_table SELECT ...```. +* (Optimization) Improved memory usage. +* (Fix) Dropping multiple cstore tables in a single command cleans-up files + of all them. + +### Version 1.1 + +* (Feature) Make filename option optional, and use a default directory inside + $PGDATA to manage cstore tables. +* (Feature) Automatically delete files on DROP FOREIGN TABLE. +* (Fix) Return empty table if no data has been loaded. Previously, cstore_fdw + errored out. +* (Fix) Fix overestimating relation column counts when planning. +* (Feature) Added cstore\_table\_size(tablename) for getting the size of a cstore + table in bytes. + + +Copyright +--------- + +Copyright (c) 2017 Citus Data, Inc. + +This module is free software; you can redistribute it and/or modify it under the +Apache v2.0 License. + +For all types of questions and comments about the wrapper, please contact us at +engage @ citusdata.com. + +[status]: https://travis-ci.org/citusdata/cstore_fdw +[mailing-list]: https://groups.google.com/forum/#!forum/cstore-users +[coverage]: https://coveralls.io/r/citusdata/cstore_fdw +[copy-command]: http://www.postgresql.org/docs/current/static/sql-copy.html +[analyze-command]: http://www.postgresql.org/docs/current/static/sql-analyze.html diff --git a/src/backend/columnar/TODO.md b/src/backend/columnar/TODO.md new file mode 100644 index 000000000..179fbc8c7 --- /dev/null +++ b/src/backend/columnar/TODO.md @@ -0,0 +1,41 @@ +To see the list of features and bug-fixes planned for next releases, see our +[development roadmap][roadmap]. + +Requested Features +------------------ + +* Improve write performance +* Improve read performance +* Add checksum logic +* Add new compression methods +* Enable INSERT/DELETE/UPDATE +* Enable users other than superuser to safely create columnar tables (permissions) +* Transactional semantics +* Add config setting to make pg\_fsync() optional + + +Known Issues +------------ + +* Copy command ignores NOT NULL constraints. +* Planning functions don't take into account average column width. +* Planning functions don't correctly take into account block skipping benefits. +* On 32-bit platforms, when file size is outside the 32-bit signed range, EXPLAIN + command prints incorrect file size. +* If two different columnar tables are configured to point to the same file, + writes to the underlying file aren't protected from each other. +* When a data load is in progress, concurrent reads on the table overestimate the + page count. +* We have a minor memory leak in CStoreEndWrite. We need to also free the + comparisonFunctionArray. +* block\_filtering test fails on Ubuntu because the "da\_DK" locale is not enabled + by default. +* We don't yet incorporate the compression method's impact on disk I/O into cost + estimates. +* CitusDB integration errors: +* Concurrent staging cstore\_fdw tables doesn't work. +* Setting a default value for column with ALTER TABLE has limited support for + existing rows. + +[roadmap]: https://github.com/citusdata/cstore_fdw/wiki/Roadmap + diff --git a/src/backend/columnar/cstore.c b/src/backend/columnar/cstore.c new file mode 100644 index 000000000..a724a62a0 --- /dev/null +++ b/src/backend/columnar/cstore.c @@ -0,0 +1,101 @@ +/*------------------------------------------------------------------------- + * + * cstore.c + * + * This file contains... + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include +#include + +#include "miscadmin.h" +#include "utils/guc.h" +#include "utils/rel.h" + +#include "cstore.h" + +/* Default values for option parameters */ +#define DEFAULT_COMPRESSION_TYPE COMPRESSION_NONE +#define DEFAULT_STRIPE_ROW_COUNT 150000 +#define DEFAULT_BLOCK_ROW_COUNT 10000 + +int cstore_compression = DEFAULT_COMPRESSION_TYPE; +int cstore_stripe_row_count = DEFAULT_STRIPE_ROW_COUNT; +int cstore_block_row_count = DEFAULT_BLOCK_ROW_COUNT; + +static const struct config_enum_entry cstore_compression_options[] = +{ + { "none", COMPRESSION_NONE, false }, + { "pglz", COMPRESSION_PG_LZ, false }, + { NULL, 0, false } +}; + +void +cstore_init() +{ + DefineCustomEnumVariable("cstore.compression", + "Compression type for cstore.", + NULL, + &cstore_compression, + DEFAULT_COMPRESSION_TYPE, + cstore_compression_options, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + + DefineCustomIntVariable("cstore.stripe_row_count", + "Maximum number of tuples per stripe.", + NULL, + &cstore_stripe_row_count, + DEFAULT_STRIPE_ROW_COUNT, + STRIPE_ROW_COUNT_MINIMUM, + STRIPE_ROW_COUNT_MAXIMUM, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + + DefineCustomIntVariable("cstore.block_row_count", + "Maximum number of rows per block.", + NULL, + &cstore_block_row_count, + DEFAULT_BLOCK_ROW_COUNT, + BLOCK_ROW_COUNT_MINIMUM, + BLOCK_ROW_COUNT_MAXIMUM, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); +} + + +/* ParseCompressionType converts a string to a compression type. */ +CompressionType +ParseCompressionType(const char *compressionTypeString) +{ + CompressionType compressionType = COMPRESSION_TYPE_INVALID; + Assert(compressionTypeString != NULL); + + if (strncmp(compressionTypeString, COMPRESSION_STRING_NONE, NAMEDATALEN) == 0) + { + compressionType = COMPRESSION_NONE; + } + else if (strncmp(compressionTypeString, COMPRESSION_STRING_PG_LZ, NAMEDATALEN) == 0) + { + compressionType = COMPRESSION_PG_LZ; + } + + return compressionType; +} diff --git a/src/backend/columnar/cstore.h b/src/backend/columnar/cstore.h new file mode 100644 index 000000000..35598cd41 --- /dev/null +++ b/src/backend/columnar/cstore.h @@ -0,0 +1,351 @@ +/*------------------------------------------------------------------------- + * + * cstore.h + * + * Type and function declarations for CStore + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#ifndef CSTORE_H +#define CSTORE_H + +#include "fmgr.h" +#include "lib/stringinfo.h" +#include "nodes/parsenodes.h" +#include "storage/bufpage.h" +#include "storage/lockdefs.h" +#include "utils/relcache.h" +#include "utils/snapmgr.h" + +/* Defines for valid option names */ +#define OPTION_NAME_COMPRESSION_TYPE "compression" +#define OPTION_NAME_STRIPE_ROW_COUNT "stripe_row_count" +#define OPTION_NAME_BLOCK_ROW_COUNT "block_row_count" + +/* Limits for option parameters */ +#define STRIPE_ROW_COUNT_MINIMUM 1000 +#define STRIPE_ROW_COUNT_MAXIMUM 10000000 +#define BLOCK_ROW_COUNT_MINIMUM 1000 +#define BLOCK_ROW_COUNT_MAXIMUM 100000 + +/* String representations of compression types */ +#define COMPRESSION_STRING_NONE "none" +#define COMPRESSION_STRING_PG_LZ "pglz" + +/* CStore file signature */ +#define CSTORE_MAGIC_NUMBER "citus_cstore" +#define CSTORE_VERSION_MAJOR 1 +#define CSTORE_VERSION_MINOR 7 + +/* miscellaneous defines */ +#define CSTORE_FDW_NAME "cstore_fdw" +#define CSTORE_TUPLE_COST_MULTIPLIER 10 +#define CSTORE_POSTSCRIPT_SIZE_LENGTH 1 +#define CSTORE_POSTSCRIPT_SIZE_MAX 256 + +/* Enumaration for cstore file's compression method */ +typedef enum +{ + COMPRESSION_TYPE_INVALID = -1, + COMPRESSION_NONE = 0, + COMPRESSION_PG_LZ = 1, + + COMPRESSION_COUNT +} CompressionType; + + +/* + * CStoreFdwOptions holds the option values to be used when reading or writing + * a cstore file. To resolve these values, we first check foreign table's options, + * and if not present, we then fall back to the default values specified above. + */ +typedef struct CStoreOptions +{ + CompressionType compressionType; + uint64 stripeRowCount; + uint32 blockRowCount; +} CStoreOptions; + + +/* + * StripeMetadata represents information about a stripe. This information is + * stored in the cstore file's footer. + */ +typedef struct StripeMetadata +{ + uint64 fileOffset; + uint64 dataLength; + uint32 columnCount; + uint32 blockCount; + uint32 blockRowCount; + uint64 rowCount; + uint64 id; +} StripeMetadata; + + +/* DataFileMetadata represents the metadata of a cstore file. */ +typedef struct DataFileMetadata +{ + List *stripeMetadataList; + uint64 blockRowCount; + uint64 stripeRowCount; + CompressionType compression; +} DataFileMetadata; + + +/* ColumnBlockSkipNode contains statistics for a ColumnBlockData. */ +typedef struct ColumnBlockSkipNode +{ + /* statistics about values of a column block */ + bool hasMinMax; + Datum minimumValue; + Datum maximumValue; + uint64 rowCount; + + /* + * Offsets and sizes of value and exists streams in the column data. + * These enable us to skip reading suppressed row blocks, and start reading + * a block without reading previous blocks. + */ + uint64 valueBlockOffset; + uint64 valueLength; + uint64 existsBlockOffset; + uint64 existsLength; + + CompressionType valueCompressionType; +} ColumnBlockSkipNode; + + +/* + * StripeSkipList can be used for skipping row blocks. It contains a column block + * skip node for each block of each column. blockSkipNodeArray[column][block] + * is the entry for the specified column block. + */ +typedef struct StripeSkipList +{ + ColumnBlockSkipNode **blockSkipNodeArray; + uint32 columnCount; + uint32 blockCount; +} StripeSkipList; + + +/* + * BlockData represents a block of data for multiple columns. valueArray stores + * the values of data, and existsArray stores whether a value is present. + * valueBuffer is used to store (uncompressed) serialized values + * referenced by Datum's in valueArray. It is only used for by-reference Datum's. + * There is a one-to-one correspondence between valueArray and existsArray. + */ +typedef struct BlockData +{ + uint32 rowCount; + uint32 columnCount; + + /* + * Following are indexed by [column][row]. If a column is not projected, + * then existsArray[column] and valueArray[column] are NULL. + */ + bool **existsArray; + Datum **valueArray; + + /* valueBuffer keeps actual data for type-by-reference datums from valueArray. */ + StringInfo *valueBufferArray; +} BlockData; + + +/* + * ColumnBlockBuffers represents a block of serialized data in a column. + * valueBuffer stores the serialized values of data, and existsBuffer stores + * serialized value of presence information. valueCompressionType contains + * compression type if valueBuffer is compressed. Finally rowCount has + * the number of rows in this block. + */ +typedef struct ColumnBlockBuffers +{ + StringInfo existsBuffer; + StringInfo valueBuffer; + CompressionType valueCompressionType; +} ColumnBlockBuffers; + + +/* + * ColumnBuffers represents data buffers for a column in a row stripe. Each + * column is made of multiple column blocks. + */ +typedef struct ColumnBuffers +{ + ColumnBlockBuffers **blockBuffersArray; +} ColumnBuffers; + + +/* StripeBuffers represents data for a row stripe in a cstore file. */ +typedef struct StripeBuffers +{ + uint32 columnCount; + uint32 rowCount; + ColumnBuffers **columnBuffersArray; +} StripeBuffers; + + +/* TableReadState represents state of a cstore file read operation. */ +typedef struct TableReadState +{ + DataFileMetadata *datafileMetadata; + StripeMetadata *currentStripeMetadata; + TupleDesc tupleDescriptor; + Relation relation; + + /* + * List of Var pointers for columns in the query. We use this both for + * getting vector of projected columns, and also when we want to build + * base constraint to find selected row blocks. + */ + List *projectedColumnList; + + List *whereClauseList; + MemoryContext stripeReadContext; + StripeBuffers *stripeBuffers; + uint32 readStripeCount; + uint64 stripeReadRowCount; + BlockData *blockData; + int32 deserializedBlockIndex; +} TableReadState; + + +/* TableWriteState represents state of a cstore file write operation. */ +typedef struct TableWriteState +{ + CompressionType compressionType; + TupleDesc tupleDescriptor; + FmgrInfo **comparisonFunctionArray; + Relation relation; + + MemoryContext stripeWriteContext; + StripeBuffers *stripeBuffers; + StripeSkipList *stripeSkipList; + uint32 stripeMaxRowCount; + uint32 blockRowCount; + BlockData *blockData; + + /* + * compressionBuffer buffer is used as temporary storage during + * data value compression operation. It is kept here to minimize + * memory allocations. It lives in stripeWriteContext and gets + * deallocated when memory context is reset. + */ + StringInfo compressionBuffer; +} TableWriteState; + +extern int cstore_compression; +extern int cstore_stripe_row_count; +extern int cstore_block_row_count; + +extern void cstore_init(void); + +extern CompressionType ParseCompressionType(const char *compressionTypeString); + +/* Function declarations for writing to a cstore file */ +extern TableWriteState * CStoreBeginWrite(Relation relation, + CompressionType compressionType, + uint64 stripeMaxRowCount, + uint32 blockRowCount, + TupleDesc tupleDescriptor); +extern void CStoreWriteRow(TableWriteState *state, Datum *columnValues, + bool *columnNulls); +extern void CStoreEndWrite(TableWriteState *state); + +/* Function declarations for reading from a cstore file */ +extern TableReadState * CStoreBeginRead(Relation relation, + TupleDesc tupleDescriptor, + List *projectedColumnList, List *qualConditions); +extern bool CStoreReadFinished(TableReadState *state); +extern bool CStoreReadNextRow(TableReadState *state, Datum *columnValues, + bool *columnNulls); +extern void CStoreRescan(TableReadState *readState); +extern void CStoreEndRead(TableReadState *state); + +/* Function declarations for common functions */ +extern FmgrInfo * GetFunctionInfoOrNull(Oid typeId, Oid accessMethodId, + int16 procedureId); +extern BlockData * CreateEmptyBlockData(uint32 columnCount, bool *columnMask, + uint32 blockRowCount); +extern void FreeBlockData(BlockData *blockData); +extern uint64 CStoreTableRowCount(Relation relation); +extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, + CompressionType compressionType); +extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressionType); +extern char * CompressionTypeStr(CompressionType type); + +/* cstore_metadata_tables.c */ +extern void DeleteDataFileMetadataRowIfExists(Oid relfilenode); +extern void InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int + stripeRowCount, CompressionType compression); +extern void UpdateCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int + stripeRowCount, CompressionType compression); +extern DataFileMetadata * ReadDataFileMetadata(Oid relfilenode, bool missingOk); +extern uint64 GetHighestUsedAddress(Oid relfilenode); +extern StripeMetadata ReserveStripe(Relation rel, uint64 size, + uint64 rowCount, uint64 columnCount, + uint64 blockCount, uint64 blockRowCount); +extern void SaveStripeSkipList(Oid relfilenode, uint64 stripe, + StripeSkipList *stripeSkipList, + TupleDesc tupleDescriptor); +extern StripeSkipList * ReadStripeSkipList(Oid relfilenode, uint64 stripe, + TupleDesc tupleDescriptor, + uint32 blockCount); + +typedef struct SmgrAddr +{ + BlockNumber blockno; + uint32 offset; +} SmgrAddr; + +/* + * Map logical offsets (as tracked in the metadata) to a physical page and + * offset where the data is kept. + */ +static inline SmgrAddr +logical_to_smgr(uint64 logicalOffset) +{ + uint64 bytes_per_page = BLCKSZ - SizeOfPageHeaderData; + SmgrAddr addr; + + addr.blockno = logicalOffset / bytes_per_page; + addr.offset = SizeOfPageHeaderData + (logicalOffset % bytes_per_page); + + return addr; +} + + +/* + * Map a physical page adnd offset address to a logical address. + */ +static inline uint64 +smgr_to_logical(SmgrAddr addr) +{ + uint64 bytes_per_page = BLCKSZ - SizeOfPageHeaderData; + return bytes_per_page * addr.blockno + addr.offset - SizeOfPageHeaderData; +} + + +/* + * Get the first usable address of next block. + */ +static inline SmgrAddr +next_block_start(SmgrAddr addr) +{ + SmgrAddr result = { + .blockno = addr.blockno + 1, + .offset = SizeOfPageHeaderData + }; + + return result; +} + + +#endif /* CSTORE_H */ diff --git a/src/backend/columnar/cstore_compression.c b/src/backend/columnar/cstore_compression.c new file mode 100644 index 000000000..f36d8dd04 --- /dev/null +++ b/src/backend/columnar/cstore_compression.c @@ -0,0 +1,196 @@ +/*------------------------------------------------------------------------- + * + * cstore_compression.c + * + * This file contains compression/decompression functions definitions + * used in cstore_fdw. + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#if PG_VERSION_NUM >= 90500 +#include "common/pg_lzcompress.h" +#else +#include "utils/pg_lzcompress.h" +#endif + +#include "cstore.h" + + +#if PG_VERSION_NUM >= 90500 + +/* + * The information at the start of the compressed data. This decription is taken + * from pg_lzcompress in pre-9.5 version of PostgreSQL. + */ +typedef struct CStoreCompressHeader +{ + int32 vl_len_; /* varlena header (do not touch directly!) */ + int32 rawsize; +} CStoreCompressHeader; + +/* + * Utilities for manipulation of header information for compressed data + */ + +#define CSTORE_COMPRESS_HDRSZ ((int32) sizeof(CStoreCompressHeader)) +#define CSTORE_COMPRESS_RAWSIZE(ptr) (((CStoreCompressHeader *) (ptr))->rawsize) +#define CSTORE_COMPRESS_RAWDATA(ptr) (((char *) (ptr)) + CSTORE_COMPRESS_HDRSZ) +#define CSTORE_COMPRESS_SET_RAWSIZE(ptr, len) (((CStoreCompressHeader *) (ptr))->rawsize = \ + (len)) + +#else + +#define CSTORE_COMPRESS_HDRSZ (0) +#define CSTORE_COMPRESS_RAWSIZE(ptr) (PGLZ_RAW_SIZE((PGLZ_Header *) buffer->data)) +#define CSTORE_COMPRESS_RAWDATA(ptr) (((PGLZ_Header *) (ptr))) +#define CSTORE_COMPRESS_SET_RAWSIZE(ptr, len) (((CStoreCompressHeader *) (ptr))->rawsize = \ + (len)) + +#endif + + +/* + * CompressBuffer compresses the given buffer with the given compression type + * outputBuffer enlarged to contain compressed data. The function returns true + * if compression is done, returns false if compression is not done. + * outputBuffer is valid only if the function returns true. + */ +bool +CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, + CompressionType compressionType) +{ + uint64 maximumLength = PGLZ_MAX_OUTPUT(inputBuffer->len) + CSTORE_COMPRESS_HDRSZ; + bool compressionResult = false; +#if PG_VERSION_NUM >= 90500 + int32 compressedByteCount = 0; +#endif + + if (compressionType != COMPRESSION_PG_LZ) + { + return false; + } + + resetStringInfo(outputBuffer); + enlargeStringInfo(outputBuffer, maximumLength); + +#if PG_VERSION_NUM >= 90500 + compressedByteCount = pglz_compress((const char *) inputBuffer->data, + inputBuffer->len, + CSTORE_COMPRESS_RAWDATA(outputBuffer->data), + PGLZ_strategy_always); + if (compressedByteCount >= 0) + { + CSTORE_COMPRESS_SET_RAWSIZE(outputBuffer->data, inputBuffer->len); + SET_VARSIZE_COMPRESSED(outputBuffer->data, + compressedByteCount + CSTORE_COMPRESS_HDRSZ); + compressionResult = true; + } +#else + + compressionResult = pglz_compress(inputBuffer->data, inputBuffer->len, + CSTORE_COMPRESS_RAWDATA(outputBuffer->data), + PGLZ_strategy_always); +#endif + + if (compressionResult) + { + outputBuffer->len = VARSIZE(outputBuffer->data); + } + + return compressionResult; +} + + +/* + * DecompressBuffer decompresses the given buffer with the given compression + * type. This function returns the buffer as-is when no compression is applied. + */ +StringInfo +DecompressBuffer(StringInfo buffer, CompressionType compressionType) +{ + StringInfo decompressedBuffer = NULL; + + Assert(compressionType == COMPRESSION_NONE || compressionType == COMPRESSION_PG_LZ); + + if (compressionType == COMPRESSION_NONE) + { + /* in case of no compression, return buffer */ + decompressedBuffer = buffer; + } + else if (compressionType == COMPRESSION_PG_LZ) + { + uint32 compressedDataSize = VARSIZE(buffer->data) - CSTORE_COMPRESS_HDRSZ; + uint32 decompressedDataSize = CSTORE_COMPRESS_RAWSIZE(buffer->data); + char *decompressedData = NULL; +#if PG_VERSION_NUM >= 90500 + int32 decompressedByteCount = 0; +#endif + + if (compressedDataSize + CSTORE_COMPRESS_HDRSZ != buffer->len) + { + ereport(ERROR, (errmsg("cannot decompress the buffer"), + errdetail("Expected %u bytes, but received %u bytes", + compressedDataSize, buffer->len))); + } + + decompressedData = palloc0(decompressedDataSize); + +#if PG_VERSION_NUM >= 90500 + +#if PG_VERSION_NUM >= 120000 + decompressedByteCount = pglz_decompress(CSTORE_COMPRESS_RAWDATA(buffer->data), + compressedDataSize, decompressedData, + decompressedDataSize, true); +#else + decompressedByteCount = pglz_decompress(CSTORE_COMPRESS_RAWDATA(buffer->data), + compressedDataSize, decompressedData, + decompressedDataSize); +#endif + + if (decompressedByteCount < 0) + { + ereport(ERROR, (errmsg("cannot decompress the buffer"), + errdetail("compressed data is corrupted"))); + } +#else + pglz_decompress((PGLZ_Header *) buffer->data, decompressedData); +#endif + + decompressedBuffer = palloc0(sizeof(StringInfoData)); + decompressedBuffer->data = decompressedData; + decompressedBuffer->len = decompressedDataSize; + decompressedBuffer->maxlen = decompressedDataSize; + } + + return decompressedBuffer; +} + + +/* + * CompressionTypeStr returns string representation of a compression type. + */ +char * +CompressionTypeStr(CompressionType type) +{ + switch (type) + { + case COMPRESSION_NONE: + { + return "none"; + } + + case COMPRESSION_PG_LZ: + { + return "pglz"; + } + + default: + return "unknown"; + } +} diff --git a/src/backend/columnar/cstore_customscan.c b/src/backend/columnar/cstore_customscan.c new file mode 100644 index 000000000..7c163e5c9 --- /dev/null +++ b/src/backend/columnar/cstore_customscan.c @@ -0,0 +1,433 @@ +/*------------------------------------------------------------------------- + * + * cstore_customscan.c + * + * This file contains the implementation of a postgres custom scan that + * we use to push down the projections into the table access methods. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/skey.h" +#include "nodes/extensible.h" +#include "nodes/pg_list.h" +#include "nodes/plannodes.h" +#include "optimizer/optimizer.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/restrictinfo.h" +#include "utils/relcache.h" + +#include "cstore.h" +#include "cstore_customscan.h" +#include "cstore_tableam.h" + +typedef struct CStoreScanPath +{ + CustomPath custom_path; + + /* place for local state during planning */ +} CStoreScanPath; + +typedef struct CStoreScanScan +{ + CustomScan custom_scan; + + /* place for local state during execution */ +} CStoreScanScan; + +typedef struct CStoreScanState +{ + CustomScanState custom_scanstate; + + List *qual; +} CStoreScanState; + + +static void CStoreSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti, + RangeTblEntry *rte); +static Path * CreateCStoreScanPath(RelOptInfo *rel, RangeTblEntry *rte); +static Cost CStoreScanCost(RangeTblEntry *rte); +static Plan * CStoreScanPath_PlanCustomPath(PlannerInfo *root, + RelOptInfo *rel, + struct CustomPath *best_path, + List *tlist, + List *clauses, + List *custom_plans); + +static Node * CStoreScan_CreateCustomScanState(CustomScan *cscan); + +static void CStoreScan_BeginCustomScan(CustomScanState *node, EState *estate, int eflags); +static TupleTableSlot * CStoreScan_ExecCustomScan(CustomScanState *node); +static void CStoreScan_EndCustomScan(CustomScanState *node); +static void CStoreScan_ReScanCustomScan(CustomScanState *node); + +/* saved hook value in case of unload */ +static set_rel_pathlist_hook_type PreviousSetRelPathlistHook = NULL; + +static bool EnableCStoreCustomScan = true; + + +const struct CustomPathMethods CStoreScanPathMethods = { + .CustomName = "CStoreScan", + .PlanCustomPath = CStoreScanPath_PlanCustomPath, +}; + +const struct CustomScanMethods CStoreScanScanMethods = { + .CustomName = "CStoreScan", + .CreateCustomScanState = CStoreScan_CreateCustomScanState, +}; + +const struct CustomExecMethods CStoreExecuteMethods = { + .CustomName = "CStoreScan", + + .BeginCustomScan = CStoreScan_BeginCustomScan, + .ExecCustomScan = CStoreScan_ExecCustomScan, + .EndCustomScan = CStoreScan_EndCustomScan, + .ReScanCustomScan = CStoreScan_ReScanCustomScan, + + .ExplainCustomScan = NULL, +}; + + +/* + * cstore_customscan_init installs the hook required to intercept the postgres planner and + * provide extra paths for cstore tables + */ +void +cstore_customscan_init() +{ + PreviousSetRelPathlistHook = set_rel_pathlist_hook; + set_rel_pathlist_hook = CStoreSetRelPathlistHook; + + /* register customscan specific GUC's */ + DefineCustomBoolVariable( + "cstore.enable_custom_scan", + gettext_noop("Enables the use of a custom scan to push projections and quals " + "into the storage layer"), + NULL, + &EnableCStoreCustomScan, + true, + PGC_USERSET, + GUC_NO_SHOW_ALL, + NULL, NULL, NULL); +} + + +static void +clear_paths(RelOptInfo *rel) +{ + rel->pathlist = NULL; + rel->partial_pathlist = NULL; + rel->cheapest_startup_path = NULL; + rel->cheapest_total_path = NULL; + rel->cheapest_unique_path = NULL; +} + + +static void +CStoreSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti, + RangeTblEntry *rte) +{ + Relation relation; + + /* call into previous hook if assigned */ + if (PreviousSetRelPathlistHook) + { + PreviousSetRelPathlistHook(root, rel, rti, rte); + } + + if (!EnableCStoreCustomScan) + { + /* custon scans are disabled, use normal table access method api instead */ + return; + } + + if (!OidIsValid(rte->relid) || rte->rtekind != RTE_RELATION) + { + /* some calls to the pathlist hook don't have a valid relation set. Do nothing */ + return; + } + + /* + * Here we want to inspect if this relation pathlist hook is accessing a cstore table. + * If that is the case we want to insert an extra path that pushes down the projection + * into the scan of the table to minimize the data read. + */ + relation = RelationIdGetRelation(rte->relid); + if (relation->rd_tableam == GetCstoreTableAmRoutine()) + { + Path *customPath = CreateCStoreScanPath(rel, rte); + + ereport(DEBUG1, (errmsg("pathlist hook for cstore table am"))); + + /* we propose a new path that will be the only path for scanning this relation */ + clear_paths(rel); + add_path(rel, customPath); + } + RelationClose(relation); +} + + +static Path * +CreateCStoreScanPath(RelOptInfo *rel, RangeTblEntry *rte) +{ + CStoreScanPath *cspath = (CStoreScanPath *) newNode(sizeof(CStoreScanPath), + T_CustomPath); + CustomPath *cpath; + Path *path; + + /* + * popuate custom path information + */ + cpath = &cspath->custom_path; + cpath->methods = &CStoreScanPathMethods; + + /* + * populate generic path information + */ + path = &cpath->path; + path->pathtype = T_CustomScan; + path->parent = rel; + path->pathtarget = rel->reltarget; + + /* + * Add cost estimates for a cstore table scan, row count is the rows estimated by + * postgres' planner. + */ + path->rows = rel->rows; + path->startup_cost = 0; + path->total_cost = path->startup_cost + CStoreScanCost(rte); + + return (Path *) cspath; +} + + +/* + * CStoreScanCost calculates the cost of scanning the cstore table. The cost is estimated + * by using all stripe metadata to estimate based on the columns to read how many pages + * need to be read. + */ +static Cost +CStoreScanCost(RangeTblEntry *rte) +{ + Relation rel = RelationIdGetRelation(rte->relid); + DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, false); + uint32 maxColumnCount = 0; + uint64 totalStripeSize = 0; + ListCell *stripeMetadataCell = NULL; + + RelationClose(rel); + rel = NULL; + + foreach(stripeMetadataCell, metadata->stripeMetadataList) + { + StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell); + totalStripeSize += stripeMetadata->dataLength; + maxColumnCount = Max(maxColumnCount, stripeMetadata->columnCount); + } + + { + Bitmapset *attr_needed = rte->selectedCols; + double numberOfColumnsRead = bms_num_members(attr_needed); + double selectionRatio = numberOfColumnsRead / (double) maxColumnCount; + Cost scanCost = (double) totalStripeSize / BLCKSZ * selectionRatio; + return scanCost; + } +} + + +static Plan * +CStoreScanPath_PlanCustomPath(PlannerInfo *root, + RelOptInfo *rel, + struct CustomPath *best_path, + List *tlist, + List *clauses, + List *custom_plans) +{ + CStoreScanScan *plan = (CStoreScanScan *) newNode(sizeof(CStoreScanScan), + T_CustomScan); + + CustomScan *cscan = &plan->custom_scan; + cscan->methods = &CStoreScanScanMethods; + + /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */ + clauses = extract_actual_clauses(clauses, false); + + cscan->scan.plan.targetlist = list_copy(tlist); + cscan->scan.plan.qual = clauses; + cscan->scan.scanrelid = best_path->path.parent->relid; + + return (Plan *) plan; +} + + +static Node * +CStoreScan_CreateCustomScanState(CustomScan *cscan) +{ + CStoreScanState *cstorescanstate = (CStoreScanState *) newNode( + sizeof(CStoreScanState), T_CustomScanState); + + CustomScanState *cscanstate = &cstorescanstate->custom_scanstate; + cscanstate->methods = &CStoreExecuteMethods; + + cstorescanstate->qual = cscan->scan.plan.qual; + + return (Node *) cscanstate; +} + + +static void +CStoreScan_BeginCustomScan(CustomScanState *cscanstate, EState *estate, int eflags) +{ + /* scan slot is already initialized */ +} + + +static Bitmapset * +CStoreAttrNeeded(ScanState *ss) +{ + TupleTableSlot *slot = ss->ss_ScanTupleSlot; + int natts = slot->tts_tupleDescriptor->natts; + Bitmapset *attr_needed = NULL; + Plan *plan = ss->ps.plan; + int flags = PVC_RECURSE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | PVC_RECURSE_PLACEHOLDERS; + List *vars = list_concat(pull_var_clause((Node *) plan->targetlist, flags), + pull_var_clause((Node *) plan->qual, flags)); + ListCell *lc; + + foreach(lc, vars) + { + Var *var = lfirst(lc); + + if (var->varattno == 0) + { + elog(DEBUG1, "Need attribute: all"); + + /* all attributes are required, we don't need to add more so break*/ + attr_needed = bms_add_range(attr_needed, 0, natts - 1); + break; + } + + elog(DEBUG1, "Need attribute: %d", var->varattno); + attr_needed = bms_add_member(attr_needed, var->varattno - 1); + } + + return attr_needed; +} + + +static TupleTableSlot * +CStoreScanNext(CStoreScanState *cstorescanstate) +{ + CustomScanState *node = (CustomScanState *) cstorescanstate; + TableScanDesc scandesc; + EState *estate; + ScanDirection direction; + TupleTableSlot *slot; + + /* + * get information from the estate and scan state + */ + scandesc = node->ss.ss_currentScanDesc; + estate = node->ss.ps.state; + direction = estate->es_direction; + slot = node->ss.ss_ScanTupleSlot; + + if (scandesc == NULL) + { + /* the cstore access method does not use the flags, they are specific to heap */ + uint32 flags = 0; + Bitmapset *attr_needed = CStoreAttrNeeded(&node->ss); + + /* + * We reach here if the scan is not parallel, or if we're serially + * executing a scan that was planned to be parallel. + */ + scandesc = cstore_beginscan_extended(node->ss.ss_currentRelation, + estate->es_snapshot, + 0, NULL, NULL, flags, attr_needed, + cstorescanstate->qual); + bms_free(attr_needed); + + node->ss.ss_currentScanDesc = scandesc; + } + + /* + * get the next tuple from the table + */ + if (table_scan_getnextslot(scandesc, direction, slot)) + { + return slot; + } + return NULL; +} + + +/* + * SeqRecheck -- access method routine to recheck a tuple in EvalPlanQual + */ +static bool +CStoreScanRecheck(CStoreScanState *node, TupleTableSlot *slot) +{ + return true; +} + + +static TupleTableSlot * +CStoreScan_ExecCustomScan(CustomScanState *node) +{ + return ExecScan(&node->ss, + (ExecScanAccessMtd) CStoreScanNext, + (ExecScanRecheckMtd) CStoreScanRecheck); +} + + +static void +CStoreScan_EndCustomScan(CustomScanState *node) +{ + TableScanDesc scanDesc; + + /* + * get information from node + */ + scanDesc = node->ss.ss_currentScanDesc; + + /* + * Free the exprcontext + */ + ExecFreeExprContext(&node->ss.ps); + + /* + * clean out the tuple table + */ + if (node->ss.ps.ps_ResultTupleSlot) + { + ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); + } + ExecClearTuple(node->ss.ss_ScanTupleSlot); + + /* + * close heap scan + */ + if (scanDesc != NULL) + { + table_endscan(scanDesc); + } +} + + +static void +CStoreScan_ReScanCustomScan(CustomScanState *node) +{ + TableScanDesc scanDesc = node->ss.ss_currentScanDesc; + if (scanDesc != NULL) + { + table_rescan(node->ss.ss_currentScanDesc, NULL); + } +} diff --git a/src/backend/columnar/cstore_customscan.h b/src/backend/columnar/cstore_customscan.h new file mode 100644 index 000000000..9e388e13f --- /dev/null +++ b/src/backend/columnar/cstore_customscan.h @@ -0,0 +1,19 @@ +/*------------------------------------------------------------------------- + * + * cstore_customscan.h + * + * Forward declarations of functions to hookup the custom scan feature of + * cstore. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#ifndef CSTORE_FDW_CSTORE_CUSTOMSCAN_H +#define CSTORE_FDW_CSTORE_CUSTOMSCAN_H + +void cstore_customscan_init(void); + + +#endif /*CSTORE_FDW_CSTORE_CUSTOMSCAN_H */ diff --git a/src/backend/columnar/cstore_fdw--1.0--1.1.sql b/src/backend/columnar/cstore_fdw--1.0--1.1.sql new file mode 100644 index 000000000..9e8029638 --- /dev/null +++ b/src/backend/columnar/cstore_fdw--1.0--1.1.sql @@ -0,0 +1,26 @@ +/* cstore_fdw/cstore_fdw--1.0--1.1.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION UPDATE +\echo Use "ALTER EXTENSION cstore_fdw UPDATE TO '1.1'" to load this file. \quit + +CREATE FUNCTION cstore_ddl_event_end_trigger() +RETURNS event_trigger +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE EVENT TRIGGER cstore_ddl_event_end +ON ddl_command_end +EXECUTE PROCEDURE cstore_ddl_event_end_trigger(); + +CREATE FUNCTION cstore_table_size(relation regclass) +RETURNS bigint +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +-- cstore_fdw creates directories to store files for tables with automatically +-- determined filename during the CREATE SERVER statement. Since this feature +-- was newly added in v1.1, servers created with v1.0 did not create them. So, +-- we create a server with v1.1 to ensure that the required directories are +-- created to allow users to create automatically managed tables with old servers. +CREATE SERVER cstore_server_for_updating_1_0_to_1_1 FOREIGN DATA WRAPPER cstore_fdw; +DROP SERVER cstore_server_for_updating_1_0_to_1_1; diff --git a/src/backend/columnar/cstore_fdw--1.1--1.2.sql b/src/backend/columnar/cstore_fdw--1.1--1.2.sql new file mode 100644 index 000000000..6cabb8c5e --- /dev/null +++ b/src/backend/columnar/cstore_fdw--1.1--1.2.sql @@ -0,0 +1,3 @@ +/* cstore_fdw/cstore_fdw--1.1--1.2.sql */ + +-- No new functions or definitions were added in 1.2 diff --git a/src/backend/columnar/cstore_fdw--1.2--1.3.sql b/src/backend/columnar/cstore_fdw--1.2--1.3.sql new file mode 100644 index 000000000..3ad187d09 --- /dev/null +++ b/src/backend/columnar/cstore_fdw--1.2--1.3.sql @@ -0,0 +1,3 @@ +/* cstore_fdw/cstore_fdw--1.2--1.3.sql */ + +-- No new functions or definitions were added in 1.3 diff --git a/src/backend/columnar/cstore_fdw--1.3--1.4.sql b/src/backend/columnar/cstore_fdw--1.3--1.4.sql new file mode 100644 index 000000000..3b7b0f150 --- /dev/null +++ b/src/backend/columnar/cstore_fdw--1.3--1.4.sql @@ -0,0 +1,3 @@ +/* cstore_fdw/cstore_fdw--1.3--1.4.sql */ + +-- No new functions or definitions were added in 1.4 diff --git a/src/backend/columnar/cstore_fdw--1.4--1.5.sql b/src/backend/columnar/cstore_fdw--1.4--1.5.sql new file mode 100644 index 000000000..55bbb0b2a --- /dev/null +++ b/src/backend/columnar/cstore_fdw--1.4--1.5.sql @@ -0,0 +1,28 @@ +/* cstore_fdw/cstore_fdw--1.4--1.5.sql */ + +CREATE FUNCTION cstore_clean_table_resources(oid) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE OR REPLACE FUNCTION cstore_drop_trigger() + RETURNS event_trigger + LANGUAGE plpgsql + AS $csdt$ +DECLARE v_obj record; +BEGIN + FOR v_obj IN SELECT * FROM pg_event_trigger_dropped_objects() LOOP + + IF v_obj.object_type NOT IN ('table', 'foreign table') THEN + CONTINUE; + END IF; + + PERFORM cstore_clean_table_resources(v_obj.objid); + + END LOOP; +END; +$csdt$; + +CREATE EVENT TRIGGER cstore_drop_event + ON SQL_DROP + EXECUTE PROCEDURE cstore_drop_trigger(); diff --git a/src/backend/columnar/cstore_fdw--1.5--1.6.sql b/src/backend/columnar/cstore_fdw--1.5--1.6.sql new file mode 100644 index 000000000..c8f7e8097 --- /dev/null +++ b/src/backend/columnar/cstore_fdw--1.5--1.6.sql @@ -0,0 +1,19 @@ +/* cstore_fdw/cstore_fdw--1.5--1.6.sql */ + +CREATE OR REPLACE FUNCTION cstore_drop_trigger() + RETURNS event_trigger + LANGUAGE plpgsql + AS $csdt$ +DECLARE v_obj record; +BEGIN + FOR v_obj IN SELECT * FROM pg_event_trigger_dropped_objects() LOOP + + IF v_obj.object_type NOT IN ('table', 'foreign table') THEN + CONTINUE; + END IF; + + PERFORM public.cstore_clean_table_resources(v_obj.objid); + + END LOOP; +END; +$csdt$; diff --git a/src/backend/columnar/cstore_fdw--1.6--1.7.sql b/src/backend/columnar/cstore_fdw--1.6--1.7.sql new file mode 100644 index 000000000..c7f56f059 --- /dev/null +++ b/src/backend/columnar/cstore_fdw--1.6--1.7.sql @@ -0,0 +1,3 @@ +/* cstore_fdw/cstore_fdw--1.6--1.6.sql */ + +-- No new functions or definitions were added in 1.7 diff --git a/src/backend/columnar/cstore_fdw--1.7--1.8.sql b/src/backend/columnar/cstore_fdw--1.7--1.8.sql new file mode 100644 index 000000000..81cbadfb4 --- /dev/null +++ b/src/backend/columnar/cstore_fdw--1.7--1.8.sql @@ -0,0 +1,35 @@ +/* cstore_fdw/cstore_fdw--1.7--1.8.sql */ + +DO $proc$ +BEGIN + +IF version() ~ '12' or version() ~ '13' THEN + EXECUTE $$ + CREATE FUNCTION cstore_tableam_handler(internal) + RETURNS table_am_handler + LANGUAGE C + AS 'MODULE_PATHNAME', 'cstore_tableam_handler'; + + CREATE ACCESS METHOD cstore_tableam + TYPE TABLE HANDLER cstore_tableam_handler; + + CREATE FUNCTION pg_catalog.alter_cstore_table_set( + table_name regclass, + block_row_count int DEFAULT NULL, + stripe_row_count int DEFAULT NULL, + compression name DEFAULT null) + RETURNS void + LANGUAGE C + AS 'MODULE_PATHNAME', 'alter_cstore_table_set'; + + CREATE FUNCTION pg_catalog.alter_cstore_table_reset( + table_name regclass, + block_row_count bool DEFAULT false, + stripe_row_count bool DEFAULT false, + compression bool DEFAULT false) + RETURNS void + LANGUAGE C + AS 'MODULE_PATHNAME', 'alter_cstore_table_reset'; + $$; +END IF; +END$proc$; diff --git a/src/backend/columnar/cstore_fdw--1.7.sql b/src/backend/columnar/cstore_fdw--1.7.sql new file mode 100644 index 000000000..1f874ce60 --- /dev/null +++ b/src/backend/columnar/cstore_fdw--1.7.sql @@ -0,0 +1,88 @@ +/* cstore_fdw/cstore_fdw--1.7.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION cstore_fdw" to load this file. \quit + +CREATE FUNCTION cstore_fdw_handler() +RETURNS fdw_handler +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE FUNCTION cstore_fdw_validator(text[], oid) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE FOREIGN DATA WRAPPER cstore_fdw +HANDLER cstore_fdw_handler +VALIDATOR cstore_fdw_validator; + +CREATE FUNCTION cstore_ddl_event_end_trigger() +RETURNS event_trigger +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE EVENT TRIGGER cstore_ddl_event_end +ON ddl_command_end +EXECUTE PROCEDURE cstore_ddl_event_end_trigger(); + +CREATE FUNCTION public.cstore_table_size(relation regclass) +RETURNS bigint +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE TABLE cstore_data_files ( + relfilenode oid NOT NULL, + block_row_count int NOT NULL, + stripe_row_count int NOT NULL, + compression name NOT NULL, + version_major bigint NOT NULL, + version_minor bigint NOT NULL, + PRIMARY KEY (relfilenode) +) WITH (user_catalog_table = true); + +COMMENT ON TABLE cstore_data_files IS 'CStore data file wide metadata'; + +CREATE TABLE cstore_stripes ( + relfilenode oid NOT NULL, + stripe bigint NOT NULL, + file_offset bigint NOT NULL, + data_length bigint NOT NULL, + column_count int NOT NULL, + block_count int NOT NULL, + block_row_count int NOT NULL, + row_count bigint NOT NULL, + PRIMARY KEY (relfilenode, stripe), + FOREIGN KEY (relfilenode) REFERENCES cstore_data_files(relfilenode) ON DELETE CASCADE INITIALLY DEFERRED +) WITH (user_catalog_table = true); + +COMMENT ON TABLE cstore_stripes IS 'CStore per stripe metadata'; + +CREATE TABLE cstore_skipnodes ( + relfilenode oid NOT NULL, + stripe bigint NOT NULL, + attr int NOT NULL, + block int NOT NULL, + row_count bigint NOT NULL, + minimum_value bytea, + maximum_value bytea, + value_stream_offset bigint NOT NULL, + value_stream_length bigint NOT NULL, + exists_stream_offset bigint NOT NULL, + exists_stream_length bigint NOT NULL, + value_compression_type int NOT NULL, + PRIMARY KEY (relfilenode, stripe, attr, block), + FOREIGN KEY (relfilenode, stripe) REFERENCES cstore_stripes(relfilenode, stripe) ON DELETE CASCADE INITIALLY DEFERRED +) WITH (user_catalog_table = true); + +COMMENT ON TABLE cstore_skipnodes IS 'CStore per block metadata'; + +CREATE VIEW cstore_options AS +SELECT c.oid::regclass regclass, + d.block_row_count, + d.stripe_row_count, + d.compression +FROM pg_class c +JOIN cstore.cstore_data_files d USING(relfilenode); + +COMMENT ON VIEW cstore_options IS 'CStore per table settings'; diff --git a/src/backend/columnar/cstore_fdw.c b/src/backend/columnar/cstore_fdw.c new file mode 100644 index 000000000..c2497fd27 --- /dev/null +++ b/src/backend/columnar/cstore_fdw.c @@ -0,0 +1,2246 @@ +/*------------------------------------------------------------------------- + * + * cstore_fdw.c + * + * This file contains the function definitions for scanning, analyzing, and + * copying into cstore_fdw foreign tables. Note that this file uses the API + * provided by cstore_reader and cstore_writer for reading and writing cstore + * files. + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include + +#include "access/heapam.h" +#include "access/reloptions.h" +#if PG_VERSION_NUM >= 130000 +#include "access/heaptoast.h" +#else +#include "access/tuptoaster.h" +#endif +#include "access/xact.h" +#include "catalog/catalog.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_foreign_table.h" +#include "catalog/pg_namespace.h" +#include "catalog/storage.h" +#include "commands/copy.h" +#include "commands/dbcommands.h" +#include "commands/defrem.h" +#include "commands/event_trigger.h" +#include "commands/explain.h" +#include "commands/extension.h" +#include "commands/vacuum.h" +#include "foreign/fdwapi.h" +#include "foreign/foreign.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#if PG_VERSION_NUM < 120000 +#include "optimizer/cost.h" +#endif +#include "optimizer/pathnode.h" +#include "optimizer/planmain.h" +#include "optimizer/restrictinfo.h" +#if PG_VERSION_NUM >= 120000 +#include "access/heapam.h" +#include "optimizer/optimizer.h" +#else +#include "optimizer/var.h" +#endif +#include "parser/parser.h" +#include "parser/parse_coerce.h" +#include "parser/parse_type.h" +#include "storage/lmgr.h" +#include "storage/smgr.h" +#include "tcop/utility.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/lsyscache.h" +#if PG_VERSION_NUM < 120000 +#include "utils/rel.h" +#endif +#if PG_VERSION_NUM >= 120000 +#include "utils/snapmgr.h" +#else +#include "utils/tqual.h" +#endif +#include "utils/syscache.h" + +#include "cstore.h" +#include "cstore_fdw.h" +#include "cstore_version_compat.h" + +/* table containing information about how to partition distributed tables */ +#define CITUS_EXTENSION_NAME "citus" +#define CITUS_PARTITION_TABLE_NAME "pg_dist_partition" + +/* human-readable names for addressing columns of the pg_dist_partition table */ +#define ATTR_NUM_PARTITION_RELATION_ID 1 +#define ATTR_NUM_PARTITION_TYPE 2 +#define ATTR_NUM_PARTITION_KEY 3 + +/* + * CStoreValidOption keeps an option name and a context. When an option is passed + * into cstore_fdw objects (server and foreign table), we compare this option's + * name and context against those of valid options. + */ +typedef struct CStoreValidOption +{ + const char *optionName; + Oid optionContextId; +} CStoreValidOption; + +#define COMPRESSION_STRING_DELIMITED_LIST "none, pglz" + +/* Array of options that are valid for cstore_fdw */ +static const uint32 ValidOptionCount = 3; +static const CStoreValidOption ValidOptionArray[] = +{ + /* foreign table options */ + { OPTION_NAME_COMPRESSION_TYPE, ForeignTableRelationId }, + { OPTION_NAME_STRIPE_ROW_COUNT, ForeignTableRelationId }, + { OPTION_NAME_BLOCK_ROW_COUNT, ForeignTableRelationId } +}; + +static object_access_hook_type prevObjectAccessHook = NULL; + +/* local functions forward declarations */ +#if PG_VERSION_NUM >= 130000 +static void CStoreProcessUtility(PlannedStmt *plannedStatement, const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + QueryEnvironment *queryEnvironment, + DestReceiver *destReceiver, + QueryCompletion *queryCompletion); +#elif PG_VERSION_NUM >= 100000 +static void CStoreProcessUtility(PlannedStmt *plannedStatement, const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + QueryEnvironment *queryEnvironment, + DestReceiver *destReceiver, char *completionTag); +#else +static void CStoreProcessUtility(Node *parseTree, const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + DestReceiver *destReceiver, char *completionTag); +#endif +static bool CopyCStoreTableStatement(CopyStmt *copyStatement); +static void CheckSuperuserPrivilegesForCopy(const CopyStmt *copyStatement); +static void CStoreProcessCopyCommand(CopyStmt *copyStatement, const char *queryString, + char *completionTag); +static uint64 CopyIntoCStoreTable(const CopyStmt *copyStatement, + const char *queryString); +static uint64 CopyOutCStoreTable(CopyStmt *copyStatement, const char *queryString); +static void CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement); +static List * FindCStoreTables(List *tableList); +static List * OpenRelationsForTruncate(List *cstoreTableList); +static void FdwNewRelFileNode(Relation relation); +static void TruncateCStoreTables(List *cstoreRelationList); +static bool IsCStoreFdwTable(Oid relationId); +static bool IsCStoreServer(ForeignServer *server); +static bool DistributedTable(Oid relationId); +static bool DistributedWorkerCopy(CopyStmt *copyStatement); +static StringInfo OptionNamesString(Oid currentContextId); +static HeapTuple GetSlotHeapTuple(TupleTableSlot *tts); +static CStoreOptions * CStoreGetOptions(Oid foreignTableId); +static char * CStoreGetOptionValue(Oid foreignTableId, const char *optionName); +static void ValidateForeignTableOptions(char *compressionTypeString, + char *stripeRowCountString, + char *blockRowCountString); +static void CStoreGetForeignRelSize(PlannerInfo *root, RelOptInfo *baserel, + Oid foreignTableId); +static void CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, + Oid foreignTableId); +#if PG_VERSION_NUM >= 90500 +static ForeignScan * CStoreGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, + Oid foreignTableId, ForeignPath *bestPath, + List *targetList, List *scanClauses, + Plan *outerPlan); +#else +static ForeignScan * CStoreGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, + Oid foreignTableId, ForeignPath *bestPath, + List *targetList, List *scanClauses); +#endif +static double TupleCountEstimate(Relation relation, RelOptInfo *baserel); +static BlockNumber PageCount(Relation relation); +static List * ColumnList(RelOptInfo *baserel, Oid foreignTableId); +static void CStoreExplainForeignScan(ForeignScanState *scanState, + ExplainState *explainState); +static void CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags); +static TupleTableSlot * CStoreIterateForeignScan(ForeignScanState *scanState); +static void CStoreEndForeignScan(ForeignScanState *scanState); +static void CStoreReScanForeignScan(ForeignScanState *scanState); +static bool CStoreAnalyzeForeignTable(Relation relation, + AcquireSampleRowsFunc *acquireSampleRowsFunc, + BlockNumber *totalPageCount); +static int CStoreAcquireSampleRows(Relation relation, int logLevel, + HeapTuple *sampleRows, int targetRowCount, + double *totalRowCount, double *totalDeadRowCount); +static List * CStorePlanForeignModify(PlannerInfo *plannerInfo, ModifyTable *plan, + Index resultRelation, int subplanIndex); +static void CStoreBeginForeignModify(ModifyTableState *modifyTableState, + ResultRelInfo *relationInfo, List *fdwPrivate, + int subplanIndex, int executorflags); +static void CStoreBeginForeignInsert(ModifyTableState *modifyTableState, + ResultRelInfo *relationInfo); +static TupleTableSlot * CStoreExecForeignInsert(EState *executorState, + ResultRelInfo *relationInfo, + TupleTableSlot *tupleSlot, + TupleTableSlot *planSlot); +static void CStoreEndForeignModify(EState *executorState, ResultRelInfo *relationInfo); +static void CStoreEndForeignInsert(EState *executorState, ResultRelInfo *relationInfo); +#if PG_VERSION_NUM >= 90600 +static bool CStoreIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +#endif +static void cstore_fdw_initrel(Relation rel); +static Relation cstore_fdw_open(Oid relationId, LOCKMODE lockmode); +static Relation cstore_fdw_openrv(RangeVar *relation, LOCKMODE lockmode); +static void CStoreFdwObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId, + int subId, + void *arg); + +PG_FUNCTION_INFO_V1(cstore_ddl_event_end_trigger); +PG_FUNCTION_INFO_V1(cstore_table_size); +PG_FUNCTION_INFO_V1(cstore_fdw_handler); +PG_FUNCTION_INFO_V1(cstore_fdw_validator); + + +/* saved hook value in case of unload */ +static ProcessUtility_hook_type PreviousProcessUtilityHook = NULL; + + +/* + * Called when the module is loaded. In this function we save the + * previous utility hook, and then install our hook to pre-intercept calls to + * the copy command. + */ +void +cstore_fdw_init() +{ + PreviousProcessUtilityHook = (ProcessUtility_hook != NULL) ? + ProcessUtility_hook : standard_ProcessUtility; + ProcessUtility_hook = CStoreProcessUtility; + prevObjectAccessHook = object_access_hook; + object_access_hook = CStoreFdwObjectAccessHook; +} + + +/* + * Called when the module is unloaded. This function uninstalls the + * extension's hooks. + */ +void +cstore_fdw_finish() +{ + ProcessUtility_hook = PreviousProcessUtilityHook; +} + + +/* + * cstore_ddl_event_end_trigger is the event trigger function which is called on + * ddl_command_end event. This function creates required directories after the + * CREATE SERVER statement and valid data and footer files after the CREATE FOREIGN + * TABLE statement. + */ +Datum +cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS) +{ + EventTriggerData *triggerData = NULL; + Node *parseTree = NULL; + + /* error if event trigger manager did not call this function */ + if (!CALLED_AS_EVENT_TRIGGER(fcinfo)) + { + ereport(ERROR, (errmsg("trigger not fired by event trigger manager"))); + } + + triggerData = (EventTriggerData *) fcinfo->context; + parseTree = triggerData->parsetree; + + if (nodeTag(parseTree) == T_CreateForeignTableStmt) + { + CreateForeignTableStmt *createStatement = (CreateForeignTableStmt *) parseTree; + char *serverName = createStatement->servername; + + bool missingOK = false; + ForeignServer *server = GetForeignServerByName(serverName, missingOK); + if (IsCStoreServer(server)) + { + Oid relationId = RangeVarGetRelid(createStatement->base.relation, + AccessShareLock, false); + Relation relation = cstore_fdw_open(relationId, AccessExclusiveLock); + CStoreOptions *options = CStoreGetOptions(relationId); + InitCStoreDataFileMetadata(relation->rd_node.relNode, options->blockRowCount, + options->stripeRowCount, options->compressionType); + heap_close(relation, AccessExclusiveLock); + } + } + + PG_RETURN_NULL(); +} + + +/* + * CStoreProcessUtility is the hook for handling utility commands. This function + * customizes the behaviour of "COPY cstore_table" and "DROP FOREIGN TABLE + * cstore_table" commands. For all other utility statements, the function calls + * the previous utility hook or the standard utility command via macro + * CALL_PREVIOUS_UTILITY. + */ +#if PG_VERSION_NUM >= 130000 +static void +CStoreProcessUtility(PlannedStmt *plannedStatement, const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + QueryEnvironment *queryEnvironment, + DestReceiver *destReceiver, QueryCompletion *queryCompletion) +#elif PG_VERSION_NUM >= 100000 +static void +CStoreProcessUtility(PlannedStmt * plannedStatement, const char * queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + QueryEnvironment * queryEnvironment, + DestReceiver * destReceiver, char * completionTag) +#else +static void +CStoreProcessUtility(Node * parseTree, const char * queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + DestReceiver * destReceiver, char * completionTag) +#endif +{ +#if PG_VERSION_NUM >= 130000 + char *completionTag = NULL; +#endif +#if PG_VERSION_NUM >= 100000 + Node *parseTree = plannedStatement->utilityStmt; +#endif + + if (nodeTag(parseTree) == T_CopyStmt) + { + CopyStmt *copyStatement = (CopyStmt *) parseTree; + + if (CopyCStoreTableStatement(copyStatement)) + { + CStoreProcessCopyCommand(copyStatement, queryString, completionTag); + } + else + { + CALL_PREVIOUS_UTILITY(); + } + } + else if (nodeTag(parseTree) == T_TruncateStmt) + { + TruncateStmt *truncateStatement = (TruncateStmt *) parseTree; + List *allTablesList = truncateStatement->relations; + List *cstoreTablesList = FindCStoreTables(allTablesList); + List *otherTablesList = list_difference(allTablesList, cstoreTablesList); + List *cstoreRelationList = OpenRelationsForTruncate(cstoreTablesList); + ListCell *cstoreRelationCell = NULL; + + if (otherTablesList != NIL) + { + truncateStatement->relations = otherTablesList; + + CALL_PREVIOUS_UTILITY(); + + /* restore the former relation list. Our + * replacement could be freed but still needed + * in a cached plan. A truncate can be cached + * if run from a pl/pgSQL function */ + truncateStatement->relations = allTablesList; + } + + TruncateCStoreTables(cstoreRelationList); + + foreach(cstoreRelationCell, cstoreRelationList) + { + Relation relation = (Relation) lfirst(cstoreRelationCell); + heap_close(relation, AccessExclusiveLock); + } + } + else if (nodeTag(parseTree) == T_AlterTableStmt) + { + AlterTableStmt *alterTable = (AlterTableStmt *) parseTree; + CStoreProcessAlterTableCommand(alterTable); + CALL_PREVIOUS_UTILITY(); + } + else if (nodeTag(parseTree) == T_DropdbStmt) + { + /* let postgres handle error checking and dropping of the database */ + CALL_PREVIOUS_UTILITY(); + } + + /* handle other utility statements */ + else + { + CALL_PREVIOUS_UTILITY(); + } +} + + +/* + * CopyCStoreTableStatement check whether the COPY statement is a "COPY cstore_table FROM + * ..." or "COPY cstore_table TO ...." statement. If it is then the function returns + * true. The function returns false otherwise. + */ +static bool +CopyCStoreTableStatement(CopyStmt *copyStatement) +{ + bool copyCStoreTableStatement = false; + + if (copyStatement->relation != NULL) + { + Oid relationId = RangeVarGetRelid(copyStatement->relation, + AccessShareLock, true); + bool cstoreTable = IsCStoreFdwTable(relationId); + if (cstoreTable) + { + bool distributedTable = DistributedTable(relationId); + bool distributedCopy = DistributedWorkerCopy(copyStatement); + + if (distributedTable || distributedCopy) + { + /* let COPY on distributed tables fall through to Citus */ + copyCStoreTableStatement = false; + } + else + { + copyCStoreTableStatement = true; + } + } + } + + return copyCStoreTableStatement; +} + + +/* + * CheckSuperuserPrivilegesForCopy checks if superuser privilege is required by + * copy operation and reports error if user does not have superuser rights. + */ +static void +CheckSuperuserPrivilegesForCopy(const CopyStmt *copyStatement) +{ + /* + * We disallow copy from file or program except to superusers. These checks + * are based on the checks in DoCopy() function of copy.c. + */ + if (copyStatement->filename != NULL && !superuser()) + { + if (copyStatement->is_program) + { + ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to COPY to or from a program"), + errhint("Anyone can COPY to stdout or from stdin. " + "psql's \\copy command also works for anyone."))); + } + else + { + ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to COPY to or from a file"), + errhint("Anyone can COPY to stdout or from stdin. " + "psql's \\copy command also works for anyone."))); + } + } +} + + +/* + * CStoreProcessCopyCommand handles COPY FROM/TO ... statements. + * It determines the copy direction and forwards execution to appropriate function. + */ +static void +CStoreProcessCopyCommand(CopyStmt *copyStatement, const char *queryString, + char *completionTag) +{ + uint64 processedCount = 0; + + if (copyStatement->is_from) + { + processedCount = CopyIntoCStoreTable(copyStatement, queryString); + } + else + { + processedCount = CopyOutCStoreTable(copyStatement, queryString); + } + + if (completionTag != NULL) + { + snprintf(completionTag, COMPLETION_TAG_BUFSIZE, "COPY " UINT64_FORMAT, + processedCount); + } +} + + +/* + * CopyIntoCStoreTable handles a "COPY cstore_table FROM" statement. This + * function uses the COPY command's functions to read and parse rows from + * the data source specified in the COPY statement. The function then writes + * each row to the file specified in the cstore foreign table options. Finally, + * the function returns the number of copied rows. + */ +static uint64 +CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) +{ + uint64 processedRowCount = 0; + Relation relation = NULL; + Oid relationId = InvalidOid; + TupleDesc tupleDescriptor = NULL; + uint32 columnCount = 0; + CopyState copyState = NULL; + bool nextRowFound = true; + Datum *columnValues = NULL; + bool *columnNulls = NULL; + TableWriteState *writeState = NULL; + CStoreOptions *cstoreOptions = NULL; + MemoryContext tupleContext = NULL; + + /* Only superuser can copy from or to local file */ + CheckSuperuserPrivilegesForCopy(copyStatement); + + Assert(copyStatement->relation != NULL); + + /* + * Open and lock the relation. We acquire RowExclusiveLock to allow + * concurrent reads and writes. + */ + relation = cstore_fdw_openrv(copyStatement->relation, RowExclusiveLock); + relationId = RelationGetRelid(relation); + + /* allocate column values and nulls arrays */ + tupleDescriptor = RelationGetDescr(relation); + columnCount = tupleDescriptor->natts; + columnValues = palloc0(columnCount * sizeof(Datum)); + columnNulls = palloc0(columnCount * sizeof(bool)); + + cstoreOptions = CStoreGetOptions(relationId); + + /* + * We create a new memory context called tuple context, and read and write + * each row's values within this memory context. After each read and write, + * we reset the memory context. That way, we immediately release memory + * allocated for each row, and don't bloat memory usage with large input + * files. + */ + tupleContext = AllocSetContextCreate(CurrentMemoryContext, + "CStore COPY Row Memory Context", + ALLOCSET_DEFAULT_SIZES); + + /* init state to read from COPY data source */ +#if (PG_VERSION_NUM >= 100000) + { + ParseState *pstate = make_parsestate(NULL); + pstate->p_sourcetext = queryString; + + copyState = BeginCopyFrom(pstate, relation, copyStatement->filename, + copyStatement->is_program, + NULL, + copyStatement->attlist, + copyStatement->options); + free_parsestate(pstate); + } +#else + copyState = BeginCopyFrom(relation, copyStatement->filename, + copyStatement->is_program, + copyStatement->attlist, + copyStatement->options); +#endif + + /* init state to write to the cstore file */ + writeState = CStoreBeginWrite(relation, + cstoreOptions->compressionType, + cstoreOptions->stripeRowCount, + cstoreOptions->blockRowCount, + tupleDescriptor); + + while (nextRowFound) + { + /* read the next row in tupleContext */ + MemoryContext oldContext = MemoryContextSwitchTo(tupleContext); +#if PG_VERSION_NUM >= 120000 + nextRowFound = NextCopyFrom(copyState, NULL, columnValues, columnNulls); +#else + nextRowFound = NextCopyFrom(copyState, NULL, columnValues, columnNulls, NULL); +#endif + MemoryContextSwitchTo(oldContext); + + /* write the row to the cstore file */ + if (nextRowFound) + { + CStoreWriteRow(writeState, columnValues, columnNulls); + processedRowCount++; + } + + MemoryContextReset(tupleContext); + + CHECK_FOR_INTERRUPTS(); + } + + /* end read/write sessions and close the relation */ + EndCopyFrom(copyState); + CStoreEndWrite(writeState); + heap_close(relation, RowExclusiveLock); + + return processedRowCount; +} + + +/* + * CopyFromCStoreTable handles a "COPY cstore_table TO ..." statement. Statement + * is converted to "COPY (SELECT * FROM cstore_table) TO ..." and forwarded to + * postgres native COPY handler. Function returns number of files copied to external + * stream. Copying selected columns from cstore table is not currently supported. + */ +static uint64 +CopyOutCStoreTable(CopyStmt *copyStatement, const char *queryString) +{ + uint64 processedCount = 0; + RangeVar *relation = NULL; + char *qualifiedName = NULL; + List *queryList = NIL; + Node *rawQuery = NULL; + + StringInfo newQuerySubstring = makeStringInfo(); + + if (copyStatement->attlist != NIL) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("copy column list is not supported"), + errhint("use 'copy (select from ) to " + "...' instead"))); + } + + relation = copyStatement->relation; + qualifiedName = quote_qualified_identifier(relation->schemaname, + relation->relname); + appendStringInfo(newQuerySubstring, "select * from %s", qualifiedName); + queryList = raw_parser(newQuerySubstring->data); + + /* take the first parse tree */ + rawQuery = linitial(queryList); + + /* + * Set the relation field to NULL so that COPY command works on + * query field instead. + */ + copyStatement->relation = NULL; + +#if (PG_VERSION_NUM >= 100000) + + /* + * raw_parser returns list of RawStmt* in PG 10+ we need to + * extract actual query from it. + */ + { + ParseState *pstate = make_parsestate(NULL); + RawStmt *rawStatement = (RawStmt *) rawQuery; + + pstate->p_sourcetext = newQuerySubstring->data; + copyStatement->query = rawStatement->stmt; + + DoCopy(pstate, copyStatement, -1, -1, &processedCount); + free_parsestate(pstate); + } +#else + copyStatement->query = rawQuery; + + DoCopy(copyStatement, queryString, &processedCount); +#endif + + return processedCount; +} + + +/* + * CStoreProcessAlterTableCommand checks if given alter table statement is + * compatible with underlying data structure. Currently it only checks alter + * column type. The function errors out if current column type can not be safely + * converted to requested column type. This check is more restrictive than + * PostgreSQL's because we can not change existing data. + */ +static void +CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement) +{ + ObjectType objectType = alterStatement->relkind; + RangeVar *relationRangeVar = alterStatement->relation; + Oid relationId = InvalidOid; + List *commandList = alterStatement->cmds; + ListCell *commandCell = NULL; + + /* we are only interested in foreign table changes */ + if (objectType != OBJECT_TABLE && objectType != OBJECT_FOREIGN_TABLE) + { + return; + } + + relationId = RangeVarGetRelid(relationRangeVar, AccessShareLock, true); + if (!IsCStoreFdwTable(relationId)) + { + return; + } + + foreach(commandCell, commandList) + { + AlterTableCmd *alterCommand = (AlterTableCmd *) lfirst(commandCell); + if (alterCommand->subtype == AT_AlterColumnType) + { + char *columnName = alterCommand->name; + ColumnDef *columnDef = (ColumnDef *) alterCommand->def; + Oid targetTypeId = typenameTypeId(NULL, columnDef->typeName); + char *typeName = TypeNameToString(columnDef->typeName); + AttrNumber attributeNumber = get_attnum(relationId, columnName); + Oid currentTypeId = InvalidOid; + + if (attributeNumber <= 0) + { + /* let standard utility handle this */ + continue; + } + + currentTypeId = get_atttype(relationId, attributeNumber); + + /* + * We are only interested in implicit coersion type compatibility. + * Erroring out here to prevent further processing. + */ + if (!can_coerce_type(1, ¤tTypeId, &targetTypeId, COERCION_IMPLICIT)) + { + ereport(ERROR, (errmsg("Column %s cannot be cast automatically to " + "type %s", columnName, typeName))); + } + } + } +} + + +/* FindCStoreTables returns list of CStore tables from given table list */ +static List * +FindCStoreTables(List *tableList) +{ + List *cstoreTableList = NIL; + ListCell *relationCell = NULL; + foreach(relationCell, tableList) + { + RangeVar *rangeVar = (RangeVar *) lfirst(relationCell); + Oid relationId = RangeVarGetRelid(rangeVar, AccessShareLock, true); + if (IsCStoreFdwTable(relationId) && !DistributedTable(relationId)) + { + cstoreTableList = lappend(cstoreTableList, rangeVar); + } + } + + return cstoreTableList; +} + + +/* + * OpenRelationsForTruncate opens and locks relations for tables to be truncated. + * + * It also performs a permission checks to see if the user has truncate privilege + * on tables. + */ +static List * +OpenRelationsForTruncate(List *cstoreTableList) +{ + ListCell *relationCell = NULL; + List *relationIdList = NIL; + List *relationList = NIL; + foreach(relationCell, cstoreTableList) + { + RangeVar *rangeVar = (RangeVar *) lfirst(relationCell); + Relation relation = cstore_fdw_openrv(rangeVar, AccessExclusiveLock); + Oid relationId = relation->rd_id; + AclResult aclresult = pg_class_aclcheck(relationId, GetUserId(), + ACL_TRUNCATE); + if (aclresult != ACLCHECK_OK) + { + aclcheck_error(aclresult, ACLCHECK_OBJECT_TABLE, get_rel_name(relationId)); + } + + /* check if this relation is repeated */ + if (list_member_oid(relationIdList, relationId)) + { + heap_close(relation, AccessExclusiveLock); + } + else + { + relationIdList = lappend_oid(relationIdList, relationId); + relationList = lappend(relationList, relation); + } + } + + return relationList; +} + + +/* TruncateCStoreTable truncates given cstore tables */ +static void +TruncateCStoreTables(List *cstoreRelationList) +{ + ListCell *relationCell = NULL; + foreach(relationCell, cstoreRelationList) + { + Relation relation = (Relation) lfirst(relationCell); + Oid relationId = relation->rd_id; + CStoreOptions *options = CStoreGetOptions(relationId); + + Assert(IsCStoreFdwTable(relationId)); + + FdwNewRelFileNode(relation); + InitCStoreDataFileMetadata(relation->rd_node.relNode, options->blockRowCount, + options->stripeRowCount, options->compressionType); + } +} + + +/* + * Version 11 and earlier already assign a relfilenode for foreign + * tables. Version 12 and later do not, so we need to create one manually. + */ +static void +FdwNewRelFileNode(Relation relation) +{ + Relation pg_class; + HeapTuple tuple; + Form_pg_class classform; + + pg_class = heap_open(RelationRelationId, RowExclusiveLock); + + tuple = SearchSysCacheCopy1(RELOID, + ObjectIdGetDatum(RelationGetRelid(relation))); + if (!HeapTupleIsValid(tuple)) + { + elog(ERROR, "could not find tuple for relation %u", + RelationGetRelid(relation)); + } + classform = (Form_pg_class) GETSTRUCT(tuple); + + if (true) + { + char persistence = relation->rd_rel->relpersistence; + Relation tmprel; + Oid tablespace; + Oid filenode; + + /* + * Upgrade to AccessExclusiveLock, and hold until the end of the + * transaction. This shouldn't happen during a read, but it's hard to + * prove that because it happens lazily. + */ + tmprel = heap_open(relation->rd_id, AccessExclusiveLock); + heap_close(tmprel, NoLock); + + if (OidIsValid(relation->rd_rel->relfilenode)) + { + RelationDropStorage(relation); + DeleteDataFileMetadataRowIfExists(relation->rd_rel->relfilenode); + } + + if (OidIsValid(relation->rd_rel->reltablespace)) + { + tablespace = relation->rd_rel->reltablespace; + } + else + { + tablespace = MyDatabaseTableSpace; + } + + filenode = GetNewRelFileNode(tablespace, NULL, persistence); + + classform->relfilenode = filenode; + classform->relpages = 0; /* it's empty until further notice */ + classform->reltuples = 0; + classform->relallvisible = 0; + classform->relfrozenxid = InvalidTransactionId; + classform->relminmxid = InvalidTransactionId; + + CatalogTupleUpdate(pg_class, &tuple->t_self, tuple); + CommandCounterIncrement(); + + relation->rd_node.spcNode = tablespace; + relation->rd_node.dbNode = MyDatabaseId; + relation->rd_node.relNode = filenode; + } + + heap_freetuple(tuple); + heap_close(pg_class, RowExclusiveLock); +} + + +static void +FdwCreateStorage(Relation relation) +{ + Assert(OidIsValid(relation->rd_rel->relfilenode)); + RelationOpenSmgr(relation); + if (!smgrexists(relation->rd_smgr, MAIN_FORKNUM)) + { +#if PG_VERSION_NUM >= 120000 + SMgrRelation srel; + srel = RelationCreateStorage(relation->rd_node, + relation->rd_rel->relpersistence); + smgrclose(srel); +#else + RelationCreateStorage(relation->rd_node, + relation->rd_rel->relpersistence); +#endif + } +} + + +/* + * IsCStoreFdwTable checks if the given table name belongs to a foreign columnar store + * table. If it does, the function returns true. Otherwise, it returns false. + */ +bool +IsCStoreFdwTable(Oid relationId) +{ + bool cstoreTable = false; + char relationKind = 0; + + if (relationId == InvalidOid) + { + return false; + } + + relationKind = get_rel_relkind(relationId); + if (relationKind == RELKIND_FOREIGN_TABLE) + { + ForeignTable *foreignTable = GetForeignTable(relationId); + ForeignServer *server = GetForeignServer(foreignTable->serverid); + if (IsCStoreServer(server)) + { + cstoreTable = true; + } + } + + return cstoreTable; +} + + +/* + * IsCStoreServer checks if the given foreign server belongs to cstore_fdw. If it + * does, the function returns true. Otherwise, it returns false. + */ +static bool +IsCStoreServer(ForeignServer *server) +{ + ForeignDataWrapper *foreignDataWrapper = GetForeignDataWrapper(server->fdwid); + bool cstoreServer = false; + + char *foreignWrapperName = foreignDataWrapper->fdwname; + if (strncmp(foreignWrapperName, CSTORE_FDW_NAME, NAMEDATALEN) == 0) + { + cstoreServer = true; + } + + return cstoreServer; +} + + +/* + * DistributedTable checks if the given relationId is the OID of a distributed table, + * which may also be a cstore_fdw table, but in that case COPY should be handled by + * Citus. + */ +static bool +DistributedTable(Oid relationId) +{ + bool distributedTable = false; + Oid partitionOid = InvalidOid; + Relation heapRelation = NULL; + TableScanDesc scanDesc = NULL; + const int scanKeyCount = 1; + ScanKeyData scanKey[1]; + HeapTuple heapTuple = NULL; + + bool missingOK = true; + Oid extensionOid = get_extension_oid(CITUS_EXTENSION_NAME, missingOK); + if (extensionOid == InvalidOid) + { + /* if the citus extension isn't created, no tables are distributed */ + return false; + } + + partitionOid = get_relname_relid(CITUS_PARTITION_TABLE_NAME, PG_CATALOG_NAMESPACE); + if (partitionOid == InvalidOid) + { + /* the pg_dist_partition table does not exist */ + return false; + } + + heapRelation = heap_open(partitionOid, AccessShareLock); + + ScanKeyInit(&scanKey[0], ATTR_NUM_PARTITION_RELATION_ID, InvalidStrategy, + F_OIDEQ, ObjectIdGetDatum(relationId)); + + scanDesc = table_beginscan(heapRelation, SnapshotSelf, scanKeyCount, scanKey); + + heapTuple = heap_getnext(scanDesc, ForwardScanDirection); + + distributedTable = HeapTupleIsValid(heapTuple); + + table_endscan(scanDesc); + relation_close(heapRelation, AccessShareLock); + + return distributedTable; +} + + +/* + * DistributedWorkerCopy returns whether the Citus-specific master_host option is + * present in the COPY options. + */ +static bool +DistributedWorkerCopy(CopyStmt *copyStatement) +{ + ListCell *optionCell = NULL; + foreach(optionCell, copyStatement->options) + { + DefElem *defel = (DefElem *) lfirst(optionCell); + if (strncmp(defel->defname, "master_host", NAMEDATALEN) == 0) + { + return true; + } + } + + return false; +} + + +/* + * cstore_table_size returns the total on-disk size of a cstore table in bytes. + * The result includes the sizes of data file and footer file. + */ +Datum +cstore_table_size(PG_FUNCTION_ARGS) +{ + Oid relationId = PG_GETARG_OID(0); + bool cstoreTable = IsCStoreFdwTable(relationId); + Relation relation; + BlockNumber nblocks; + + if (!cstoreTable) + { + ereport(ERROR, (errmsg("relation is not a cstore table"))); + } + + relation = cstore_fdw_open(relationId, AccessShareLock); + RelationOpenSmgr(relation); + nblocks = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM); + heap_close(relation, AccessShareLock); + PG_RETURN_INT64(nblocks * BLCKSZ); +} + + +/* + * cstore_fdw_handler creates and returns a struct with pointers to foreign + * table callback functions. + */ +Datum +cstore_fdw_handler(PG_FUNCTION_ARGS) +{ + FdwRoutine *fdwRoutine = makeNode(FdwRoutine); + + fdwRoutine->GetForeignRelSize = CStoreGetForeignRelSize; + fdwRoutine->GetForeignPaths = CStoreGetForeignPaths; + fdwRoutine->GetForeignPlan = CStoreGetForeignPlan; + fdwRoutine->ExplainForeignScan = CStoreExplainForeignScan; + fdwRoutine->BeginForeignScan = CStoreBeginForeignScan; + fdwRoutine->IterateForeignScan = CStoreIterateForeignScan; + fdwRoutine->ReScanForeignScan = CStoreReScanForeignScan; + fdwRoutine->EndForeignScan = CStoreEndForeignScan; + fdwRoutine->AnalyzeForeignTable = CStoreAnalyzeForeignTable; + fdwRoutine->PlanForeignModify = CStorePlanForeignModify; + fdwRoutine->BeginForeignModify = CStoreBeginForeignModify; + fdwRoutine->ExecForeignInsert = CStoreExecForeignInsert; + fdwRoutine->EndForeignModify = CStoreEndForeignModify; + +#if PG_VERSION_NUM >= 110000 + fdwRoutine->BeginForeignInsert = CStoreBeginForeignInsert; + fdwRoutine->EndForeignInsert = CStoreEndForeignInsert; +#endif + +#if PG_VERSION_NUM >= 90600 + fdwRoutine->IsForeignScanParallelSafe = CStoreIsForeignScanParallelSafe; +#endif + + PG_RETURN_POINTER(fdwRoutine); +} + + +/* + * cstore_fdw_validator validates options given to one of the following commands: + * foreign data wrapper, server, user mapping, or foreign table. This function + * errors out if the given option name or its value is considered invalid. + */ +Datum +cstore_fdw_validator(PG_FUNCTION_ARGS) +{ + Datum optionArray = PG_GETARG_DATUM(0); + Oid optionContextId = PG_GETARG_OID(1); + List *optionList = untransformRelOptions(optionArray); + ListCell *optionCell = NULL; + char *compressionTypeString = NULL; + char *stripeRowCountString = NULL; + char *blockRowCountString = NULL; + + foreach(optionCell, optionList) + { + DefElem *optionDef = (DefElem *) lfirst(optionCell); + char *optionName = optionDef->defname; + bool optionValid = false; + + int32 optionIndex = 0; + for (optionIndex = 0; optionIndex < ValidOptionCount; optionIndex++) + { + const CStoreValidOption *validOption = &(ValidOptionArray[optionIndex]); + + if ((optionContextId == validOption->optionContextId) && + (strncmp(optionName, validOption->optionName, NAMEDATALEN) == 0)) + { + optionValid = true; + break; + } + } + + /* if invalid option, display an informative error message */ + if (!optionValid) + { + StringInfo optionNamesString = OptionNamesString(optionContextId); + + ereport(ERROR, (errcode(ERRCODE_FDW_INVALID_OPTION_NAME), + errmsg("invalid option \"%s\"", optionName), + errhint("Valid options in this context are: %s", + optionNamesString->data))); + } + + if (strncmp(optionName, OPTION_NAME_COMPRESSION_TYPE, NAMEDATALEN) == 0) + { + compressionTypeString = defGetString(optionDef); + } + else if (strncmp(optionName, OPTION_NAME_STRIPE_ROW_COUNT, NAMEDATALEN) == 0) + { + stripeRowCountString = defGetString(optionDef); + } + else if (strncmp(optionName, OPTION_NAME_BLOCK_ROW_COUNT, NAMEDATALEN) == 0) + { + blockRowCountString = defGetString(optionDef); + } + } + + if (optionContextId == ForeignTableRelationId) + { + ValidateForeignTableOptions(compressionTypeString, + stripeRowCountString, blockRowCountString); + } + + PG_RETURN_VOID(); +} + + +/* + * OptionNamesString finds all options that are valid for the current context, + * and concatenates these option names in a comma separated string. The function + * is unchanged from mongo_fdw. + */ +static StringInfo +OptionNamesString(Oid currentContextId) +{ + StringInfo optionNamesString = makeStringInfo(); + bool firstOptionAppended = false; + + int32 optionIndex = 0; + for (optionIndex = 0; optionIndex < ValidOptionCount; optionIndex++) + { + const CStoreValidOption *validOption = &(ValidOptionArray[optionIndex]); + + /* if option belongs to current context, append option name */ + if (currentContextId == validOption->optionContextId) + { + if (firstOptionAppended) + { + appendStringInfoString(optionNamesString, ", "); + } + + appendStringInfoString(optionNamesString, validOption->optionName); + firstOptionAppended = true; + } + } + + return optionNamesString; +} + + +/* + * GetSlotHeapTuple abstracts getting HeapTuple from TupleTableSlot between versions + */ +static HeapTuple +GetSlotHeapTuple(TupleTableSlot *tts) +{ +#if PG_VERSION_NUM >= 120000 + return tts->tts_ops->copy_heap_tuple(tts); +#else + return tts->tts_tuple; +#endif +} + + +/* + * CStoreGetOptions returns the option values to be used when reading and writing + * the cstore file. To resolve these values, the function checks options for the + * foreign table, and if not present, falls back to default values. This function + * errors out if given option values are considered invalid. + */ +static CStoreOptions * +CStoreGetOptions(Oid foreignTableId) +{ + CStoreOptions *cstoreOptions = NULL; + CompressionType compressionType = cstore_compression; + int32 stripeRowCount = cstore_stripe_row_count; + int32 blockRowCount = cstore_block_row_count; + char *compressionTypeString = NULL; + char *stripeRowCountString = NULL; + char *blockRowCountString = NULL; + + compressionTypeString = CStoreGetOptionValue(foreignTableId, + OPTION_NAME_COMPRESSION_TYPE); + stripeRowCountString = CStoreGetOptionValue(foreignTableId, + OPTION_NAME_STRIPE_ROW_COUNT); + blockRowCountString = CStoreGetOptionValue(foreignTableId, + OPTION_NAME_BLOCK_ROW_COUNT); + + ValidateForeignTableOptions(compressionTypeString, + stripeRowCountString, blockRowCountString); + + /* parse provided options */ + if (compressionTypeString != NULL) + { + compressionType = ParseCompressionType(compressionTypeString); + } + if (stripeRowCountString != NULL) + { + stripeRowCount = pg_atoi(stripeRowCountString, sizeof(int32), 0); + } + if (blockRowCountString != NULL) + { + blockRowCount = pg_atoi(blockRowCountString, sizeof(int32), 0); + } + + cstoreOptions = palloc0(sizeof(CStoreOptions)); + cstoreOptions->compressionType = compressionType; + cstoreOptions->stripeRowCount = stripeRowCount; + cstoreOptions->blockRowCount = blockRowCount; + + return cstoreOptions; +} + + +/* + * CStoreGetOptionValue walks over foreign table and foreign server options, and + * looks for the option with the given name. If found, the function returns the + * option's value. This function is unchanged from mongo_fdw. + */ +static char * +CStoreGetOptionValue(Oid foreignTableId, const char *optionName) +{ + ForeignTable *foreignTable = NULL; + ForeignServer *foreignServer = NULL; + List *optionList = NIL; + ListCell *optionCell = NULL; + char *optionValue = NULL; + + foreignTable = GetForeignTable(foreignTableId); + foreignServer = GetForeignServer(foreignTable->serverid); + + optionList = list_concat(optionList, foreignTable->options); + optionList = list_concat(optionList, foreignServer->options); + + foreach(optionCell, optionList) + { + DefElem *optionDef = (DefElem *) lfirst(optionCell); + char *optionDefName = optionDef->defname; + + if (strncmp(optionDefName, optionName, NAMEDATALEN) == 0) + { + optionValue = defGetString(optionDef); + break; + } + } + + return optionValue; +} + + +/* + * ValidateForeignTableOptions verifies if given options are valid cstore_fdw + * foreign table options. This function errors out if given option value is + * considered invalid. + */ +static void +ValidateForeignTableOptions(char *compressionTypeString, + char *stripeRowCountString, char *blockRowCountString) +{ + /* check if the provided compression type is valid */ + if (compressionTypeString != NULL) + { + CompressionType compressionType = ParseCompressionType(compressionTypeString); + if (compressionType == COMPRESSION_TYPE_INVALID) + { + ereport(ERROR, (errmsg("invalid compression type"), + errhint("Valid options are: %s", + COMPRESSION_STRING_DELIMITED_LIST))); + } + } + + /* check if the provided stripe row count has correct format and range */ + if (stripeRowCountString != NULL) + { + /* pg_atoi() errors out if the given string is not a valid 32-bit integer */ + int32 stripeRowCount = pg_atoi(stripeRowCountString, sizeof(int32), 0); + if (stripeRowCount < STRIPE_ROW_COUNT_MINIMUM || + stripeRowCount > STRIPE_ROW_COUNT_MAXIMUM) + { + ereport(ERROR, (errmsg("invalid stripe row count"), + errhint("Stripe row count must be an integer between " + "%d and %d", STRIPE_ROW_COUNT_MINIMUM, + STRIPE_ROW_COUNT_MAXIMUM))); + } + } + + /* check if the provided block row count has correct format and range */ + if (blockRowCountString != NULL) + { + /* pg_atoi() errors out if the given string is not a valid 32-bit integer */ + int32 blockRowCount = pg_atoi(blockRowCountString, sizeof(int32), 0); + if (blockRowCount < BLOCK_ROW_COUNT_MINIMUM || + blockRowCount > BLOCK_ROW_COUNT_MAXIMUM) + { + ereport(ERROR, (errmsg("invalid block row count"), + errhint("Block row count must be an integer between " + "%d and %d", BLOCK_ROW_COUNT_MINIMUM, + BLOCK_ROW_COUNT_MAXIMUM))); + } + } +} + + +/* + * CStoreGetForeignRelSize obtains relation size estimates for a foreign table and + * puts its estimate for row count into baserel->rows. + */ +static void +CStoreGetForeignRelSize(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId) +{ + Relation relation = cstore_fdw_open(foreignTableId, AccessShareLock); + double tupleCountEstimate = TupleCountEstimate(relation, baserel); + double rowSelectivity = clauselist_selectivity(root, baserel->baserestrictinfo, + 0, JOIN_INNER, NULL); + + double outputRowCount = clamp_row_est(tupleCountEstimate * rowSelectivity); + baserel->rows = outputRowCount; + heap_close(relation, AccessShareLock); +} + + +/* + * CStoreGetForeignPaths creates possible access paths for a scan on the foreign + * table. We currently have one possible access path. This path filters out row + * blocks that are refuted by where clauses, and only returns values for the + * projected columns. + */ +static void +CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId) +{ + Path *foreignScanPath = NULL; + Relation relation = cstore_fdw_open(foreignTableId, AccessShareLock); + + /* + * We skip reading columns that are not in query. Here we assume that all + * columns in relation have the same width, and estimate the number pages + * that will be read by query. + * + * Ideally, we should also take into account the row blocks that will be + * suppressed. But for that we need to know which columns are used for + * sorting. If we wrongly assume that we are sorted by a specific column + * and underestimate the page count, planner may choose nested loop join + * in a place it shouldn't be used. Choosing merge join or hash join is + * usually safer than nested loop join, so we take the more conservative + * approach and assume all rows in the columnar store file will be read. + * We intend to fix this in later version by improving the row sampling + * algorithm and using the correlation statistics to detect which columns + * are in stored in sorted order. + */ + List *queryColumnList = ColumnList(baserel, foreignTableId); + uint32 queryColumnCount = list_length(queryColumnList); + BlockNumber relationPageCount = PageCount(relation); + uint32 relationColumnCount = RelationGetNumberOfAttributes(relation); + + double queryColumnRatio = (double) queryColumnCount / relationColumnCount; + double queryPageCount = relationPageCount * queryColumnRatio; + double totalDiskAccessCost = seq_page_cost * queryPageCount; + + double tupleCountEstimate = TupleCountEstimate(relation, baserel); + + /* + * We estimate costs almost the same way as cost_seqscan(), thus assuming + * that I/O costs are equivalent to a regular table file of the same size. + */ + double filterCostPerTuple = baserel->baserestrictcost.per_tuple; + double cpuCostPerTuple = cpu_tuple_cost + filterCostPerTuple; + double totalCpuCost = cpuCostPerTuple * tupleCountEstimate; + + double startupCost = baserel->baserestrictcost.startup; + double totalCost = startupCost + totalCpuCost + totalDiskAccessCost; + + /* create a foreign path node and add it as the only possible path */ +#if PG_VERSION_NUM >= 90600 + foreignScanPath = (Path *) create_foreignscan_path(root, baserel, + NULL, /* path target */ + baserel->rows, + startupCost, totalCost, + NIL, /* no known ordering */ + NULL, /* not parameterized */ + NULL, /* no outer path */ + NIL); /* no fdw_private */ + +#elif PG_VERSION_NUM >= 90500 + foreignScanPath = (Path *) create_foreignscan_path(root, baserel, baserel->rows, + startupCost, totalCost, + NIL, /* no known ordering */ + NULL, /* not parameterized */ + NULL, /* no outer path */ + NIL); /* no fdw_private */ +#else + foreignScanPath = (Path *) create_foreignscan_path(root, baserel, baserel->rows, + startupCost, totalCost, + NIL, /* no known ordering */ + NULL, /* not parameterized */ + NIL); /* no fdw_private */ +#endif + + add_path(baserel, foreignScanPath); + heap_close(relation, AccessShareLock); +} + + +/* + * CStoreGetForeignPlan creates a ForeignScan plan node for scanning the foreign + * table. We also add the query column list to scan nodes private list, because + * we need it later for skipping over unused columns in the query. + */ +#if PG_VERSION_NUM >= 90500 +static ForeignScan * +CStoreGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId, + ForeignPath *bestPath, List *targetList, List *scanClauses, + Plan *outerPlan) +#else +static ForeignScan * +CStoreGetForeignPlan(PlannerInfo * root, RelOptInfo * baserel, Oid foreignTableId, + ForeignPath * bestPath, List * targetList, List * scanClauses) +#endif +{ + ForeignScan *foreignScan = NULL; + List *columnList = NIL; + List *foreignPrivateList = NIL; + + /* + * Although we skip row blocks that are refuted by the WHERE clause, but + * we have no native ability to evaluate restriction clauses and make sure + * that all non-related rows are filtered out. So we just put all of the + * scanClauses into the plan node's qual list for the executor to check. + */ + scanClauses = extract_actual_clauses(scanClauses, + false); /* extract regular clauses */ + + /* + * As an optimization, we only read columns that are present in the query. + * To find these columns, we need baserel. We don't have access to baserel + * in executor's callback functions, so we get the column list here and put + * it into foreign scan node's private list. + */ + columnList = ColumnList(baserel, foreignTableId); + foreignPrivateList = list_make1(columnList); + + /* create the foreign scan node */ +#if PG_VERSION_NUM >= 90500 + foreignScan = make_foreignscan(targetList, scanClauses, baserel->relid, + NIL, /* no expressions to evaluate */ + foreignPrivateList, + NIL, + NIL, + NULL); /* no outer path */ +#else + foreignScan = make_foreignscan(targetList, scanClauses, baserel->relid, + NIL, /* no expressions to evaluate */ + foreignPrivateList); +#endif + + return foreignScan; +} + + +/* + * TupleCountEstimate estimates the number of base relation tuples in the given + * file. + */ +static double +TupleCountEstimate(Relation relation, RelOptInfo *baserel) +{ + double tupleCountEstimate = 0.0; + + /* check if the user executed Analyze on this foreign table before */ + if (baserel->pages > 0) + { + /* + * We have number of pages and number of tuples from pg_class (from a + * previous ANALYZE), so compute a tuples-per-page estimate and scale + * that by the current file size. + */ + double tupleDensity = baserel->tuples / (double) baserel->pages; + BlockNumber pageCount = PageCount(relation); + + tupleCountEstimate = clamp_row_est(tupleDensity * (double) pageCount); + } + else + { + tupleCountEstimate = (double) CStoreTableRowCount(relation); + } + + return tupleCountEstimate; +} + + +/* PageCount calculates and returns the number of pages in a file. */ +static BlockNumber +PageCount(Relation relation) +{ + BlockNumber nblocks; + + RelationOpenSmgr(relation); + nblocks = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM); + + return (nblocks > 0) ? nblocks : 1; +} + + +/* + * ColumnList takes in the planner's information about this foreign table. The + * function then finds all columns needed for query execution, including those + * used in projections, joins, and filter clauses, de-duplicates these columns, + * and returns them in a new list. This function is taken from mongo_fdw with + * slight modifications. + */ +static List * +ColumnList(RelOptInfo *baserel, Oid foreignTableId) +{ + List *columnList = NIL; + List *neededColumnList = NIL; + AttrNumber columnIndex = 1; + AttrNumber columnCount = baserel->max_attr; +#if PG_VERSION_NUM >= 90600 + List *targetColumnList = baserel->reltarget->exprs; +#else + List *targetColumnList = baserel->reltargetlist; +#endif + ListCell *targetColumnCell = NULL; + List *restrictInfoList = baserel->baserestrictinfo; + ListCell *restrictInfoCell = NULL; + const AttrNumber wholeRow = 0; + Relation relation = cstore_fdw_open(foreignTableId, AccessShareLock); + TupleDesc tupleDescriptor = RelationGetDescr(relation); + + /* first add the columns used in joins and projections */ + foreach(targetColumnCell, targetColumnList) + { + List *targetVarList = NIL; + Node *targetExpr = (Node *) lfirst(targetColumnCell); + +#if PG_VERSION_NUM >= 90600 + targetVarList = pull_var_clause(targetExpr, + PVC_RECURSE_AGGREGATES | + PVC_RECURSE_PLACEHOLDERS); +#else + targetVarList = pull_var_clause(targetExpr, + PVC_RECURSE_AGGREGATES, + PVC_RECURSE_PLACEHOLDERS); +#endif + + neededColumnList = list_union(neededColumnList, targetVarList); + } + + /* then walk over all restriction clauses, and pull up any used columns */ + foreach(restrictInfoCell, restrictInfoList) + { + RestrictInfo *restrictInfo = (RestrictInfo *) lfirst(restrictInfoCell); + Node *restrictClause = (Node *) restrictInfo->clause; + List *clauseColumnList = NIL; + + /* recursively pull up any columns used in the restriction clause */ +#if PG_VERSION_NUM >= 90600 + clauseColumnList = pull_var_clause(restrictClause, + PVC_RECURSE_AGGREGATES | + PVC_RECURSE_PLACEHOLDERS); +#else + clauseColumnList = pull_var_clause(restrictClause, + PVC_RECURSE_AGGREGATES, + PVC_RECURSE_PLACEHOLDERS); +#endif + + neededColumnList = list_union(neededColumnList, clauseColumnList); + } + + /* walk over all column definitions, and de-duplicate column list */ + for (columnIndex = 1; columnIndex <= columnCount; columnIndex++) + { + ListCell *neededColumnCell = NULL; + Var *column = NULL; + Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex - 1); + + if (attributeForm->attisdropped) + { + continue; + } + + /* look for this column in the needed column list */ + foreach(neededColumnCell, neededColumnList) + { + Var *neededColumn = (Var *) lfirst(neededColumnCell); + if (neededColumn->varattno == columnIndex) + { + column = neededColumn; + break; + } + else if (neededColumn->varattno == wholeRow) + { + Index tableId = neededColumn->varno; + + column = makeVar(tableId, columnIndex, attributeForm->atttypid, + attributeForm->atttypmod, attributeForm->attcollation, + 0); + break; + } + } + + if (column != NULL) + { + columnList = lappend(columnList, column); + } + } + + heap_close(relation, AccessShareLock); + + return columnList; +} + + +/* CStoreExplainForeignScan produces extra output for the Explain command. */ +static void +CStoreExplainForeignScan(ForeignScanState *scanState, ExplainState *explainState) +{ + Relation relation = scanState->ss.ss_currentRelation; + + cstore_fdw_initrel(relation); + + /* supress file size if we're not showing cost details */ + if (explainState->costs) + { + long nblocks; + RelationOpenSmgr(relation); + nblocks = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM); + ExplainPropertyLong("CStore File Size", (long) (nblocks * BLCKSZ), + explainState); + } +} + + +/* CStoreBeginForeignScan starts reading the underlying cstore file. */ +static void +CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags) +{ + TableReadState *readState = NULL; + Oid foreignTableId = InvalidOid; + Relation currentRelation = scanState->ss.ss_currentRelation; + TupleDesc tupleDescriptor = RelationGetDescr(currentRelation); + List *columnList = NIL; + ForeignScan *foreignScan = NULL; + List *foreignPrivateList = NIL; + List *whereClauseList = NIL; + Relation relation = NULL; + + cstore_fdw_initrel(currentRelation); + + /* if Explain with no Analyze, do nothing */ + if (executorFlags & EXEC_FLAG_EXPLAIN_ONLY) + { + return; + } + + foreignTableId = RelationGetRelid(scanState->ss.ss_currentRelation); + + foreignScan = (ForeignScan *) scanState->ss.ps.plan; + foreignPrivateList = (List *) foreignScan->fdw_private; + whereClauseList = foreignScan->scan.plan.qual; + + columnList = (List *) linitial(foreignPrivateList); + relation = cstore_fdw_open(foreignTableId, AccessShareLock); + readState = CStoreBeginRead(relation, tupleDescriptor, columnList, whereClauseList); + + scanState->fdw_state = (void *) readState; +} + + +/* + * CStoreIterateForeignScan reads the next record from the cstore file, converts + * it to a Postgres tuple, and stores the converted tuple into the ScanTupleSlot + * as a virtual tuple. + */ +static TupleTableSlot * +CStoreIterateForeignScan(ForeignScanState *scanState) +{ + TableReadState *readState = (TableReadState *) scanState->fdw_state; + TupleTableSlot *tupleSlot = scanState->ss.ss_ScanTupleSlot; + bool nextRowFound = false; + + TupleDesc tupleDescriptor = tupleSlot->tts_tupleDescriptor; + Datum *columnValues = tupleSlot->tts_values; + bool *columnNulls = tupleSlot->tts_isnull; + uint32 columnCount = tupleDescriptor->natts; + + /* initialize all values for this row to null */ + memset(columnValues, 0, columnCount * sizeof(Datum)); + memset(columnNulls, true, columnCount * sizeof(bool)); + + ExecClearTuple(tupleSlot); + + nextRowFound = CStoreReadNextRow(readState, columnValues, columnNulls); + if (nextRowFound) + { + ExecStoreVirtualTuple(tupleSlot); + } + + return tupleSlot; +} + + +/* CStoreEndForeignScan finishes scanning the foreign table. */ +static void +CStoreEndForeignScan(ForeignScanState *scanState) +{ + TableReadState *readState = (TableReadState *) scanState->fdw_state; + if (readState != NULL) + { + heap_close(readState->relation, AccessShareLock); + CStoreEndRead(readState); + } +} + + +/* CStoreReScanForeignScan rescans the foreign table. */ +static void +CStoreReScanForeignScan(ForeignScanState *scanState) +{ + CStoreEndForeignScan(scanState); + CStoreBeginForeignScan(scanState, 0); +} + + +/* + * CStoreAnalyzeForeignTable sets the total page count and the function pointer + * used to acquire a random sample of rows from the foreign file. + */ +static bool +CStoreAnalyzeForeignTable(Relation relation, + AcquireSampleRowsFunc *acquireSampleRowsFunc, + BlockNumber *totalPageCount) +{ + cstore_fdw_initrel(relation); + RelationOpenSmgr(relation); + (*totalPageCount) = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM); + (*acquireSampleRowsFunc) = CStoreAcquireSampleRows; + + return true; +} + + +/* + * CStoreAcquireSampleRows acquires a random sample of rows from the foreign + * table. Selected rows are returned in the caller allocated sampleRows array, + * which must have at least target row count entries. The actual number of rows + * selected is returned as the function result. We also count the number of rows + * in the collection and return it in total row count. We also always set dead + * row count to zero. + * + * Note that the returned list of rows does not always follow their actual order + * in the cstore file. Therefore, correlation estimates derived later could be + * inaccurate, but that's OK. We currently don't use correlation estimates (the + * planner only pays attention to correlation for index scans). + */ +static int +CStoreAcquireSampleRows(Relation relation, int logLevel, + HeapTuple *sampleRows, int targetRowCount, + double *totalRowCount, double *totalDeadRowCount) +{ + int sampleRowCount = 0; + double rowCount = 0.0; + double rowCountToSkip = -1; /* -1 means not set yet */ + double selectionState = 0; + MemoryContext oldContext = CurrentMemoryContext; + MemoryContext tupleContext = NULL; + Datum *columnValues = NULL; + bool *columnNulls = NULL; + TupleTableSlot *scanTupleSlot = NULL; + List *columnList = NIL; + List *foreignPrivateList = NULL; + ForeignScanState *scanState = NULL; + ForeignScan *foreignScan = NULL; + char *relationName = NULL; + int executorFlags = 0; + uint32 columnIndex = 0; + + TupleDesc tupleDescriptor = RelationGetDescr(relation); + uint32 columnCount = tupleDescriptor->natts; + + cstore_fdw_initrel(relation); + + /* create list of columns of the relation */ + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); + const Index tableId = 1; + + if (!attributeForm->attisdropped) + { + Var *column = makeVar(tableId, columnIndex + 1, attributeForm->atttypid, + attributeForm->atttypmod, attributeForm->attcollation, + 0); + columnList = lappend(columnList, column); + } + } + + /* setup foreign scan plan node */ + foreignPrivateList = list_make1(columnList); + foreignScan = makeNode(ForeignScan); + foreignScan->fdw_private = foreignPrivateList; + + /* set up tuple slot */ + columnValues = palloc0(columnCount * sizeof(Datum)); + columnNulls = palloc0(columnCount * sizeof(bool)); +#if PG_VERSION_NUM >= 120000 + scanTupleSlot = MakeTupleTableSlot(NULL, &TTSOpsVirtual); +#elif PG_VERSION_NUM >= 110000 + scanTupleSlot = MakeTupleTableSlot(NULL); +#else + scanTupleSlot = MakeTupleTableSlot(); +#endif + scanTupleSlot->tts_tupleDescriptor = tupleDescriptor; + scanTupleSlot->tts_values = columnValues; + scanTupleSlot->tts_isnull = columnNulls; + + /* setup scan state */ + scanState = makeNode(ForeignScanState); + scanState->ss.ss_currentRelation = relation; + scanState->ss.ps.plan = (Plan *) foreignScan; + scanState->ss.ss_ScanTupleSlot = scanTupleSlot; + + /* + * Use per-tuple memory context to prevent leak of memory used to read and + * parse rows from the file. + */ + tupleContext = AllocSetContextCreate(CurrentMemoryContext, + "cstore_fdw temporary context", + ALLOCSET_DEFAULT_SIZES); + + CStoreBeginForeignScan(scanState, executorFlags); + + /* prepare for sampling rows */ + selectionState = anl_init_selection_state(targetRowCount); + + for (;;) + { + /* check for user-requested abort or sleep */ + vacuum_delay_point(); + + memset(columnValues, 0, columnCount * sizeof(Datum)); + memset(columnNulls, true, columnCount * sizeof(bool)); + + MemoryContextReset(tupleContext); + MemoryContextSwitchTo(tupleContext); + + /* read the next record */ + CStoreIterateForeignScan(scanState); + + MemoryContextSwitchTo(oldContext); + + /* if there are no more records to read, break */ + if (TTS_EMPTY(scanTupleSlot)) + { + break; + } + + /* + * The first targetRowCount sample rows are simply copied into the + * reservoir. Then we start replacing tuples in the sample until we + * reach the end of the relation. This algorithm is from Jeff Vitter's + * paper (see more info in commands/analyze.c). + */ + if (sampleRowCount < targetRowCount) + { + sampleRows[sampleRowCount] = heap_form_tuple(tupleDescriptor, columnValues, + columnNulls); + sampleRowCount++; + } + else + { + /* + * t in Vitter's paper is the number of records already processed. + * If we need to compute a new S value, we must use the "not yet + * incremented" value of rowCount as t. + */ + if (rowCountToSkip < 0) + { + rowCountToSkip = anl_get_next_S(rowCount, targetRowCount, + &selectionState); + } + + if (rowCountToSkip <= 0) + { + /* + * Found a suitable tuple, so save it, replacing one old tuple + * at random. + */ + int rowIndex = (int) (targetRowCount * anl_random_fract()); + Assert(rowIndex >= 0); + Assert(rowIndex < targetRowCount); + + heap_freetuple(sampleRows[rowIndex]); + sampleRows[rowIndex] = heap_form_tuple(tupleDescriptor, + columnValues, columnNulls); + } + + rowCountToSkip--; + } + + rowCount++; + } + + /* clean up */ + MemoryContextDelete(tupleContext); + pfree(columnValues); + pfree(columnNulls); + + CStoreEndForeignScan(scanState); + + /* emit some interesting relation info */ + relationName = RelationGetRelationName(relation); + ereport(logLevel, (errmsg("\"%s\": file contains %.0f rows; %d rows in sample", + relationName, rowCount, sampleRowCount))); + + (*totalRowCount) = rowCount; + (*totalDeadRowCount) = 0; + + return sampleRowCount; +} + + +/* + * CStorePlanForeignModify checks if operation is supported. Only insert + * command with subquery (ie insert into
select ...) is supported. + * Other forms of insert, delete, and update commands are not supported. It + * throws an error when the command is not supported. + */ +static List * +CStorePlanForeignModify(PlannerInfo *plannerInfo, ModifyTable *plan, + Index resultRelation, int subplanIndex) +{ + bool operationSupported = false; + + if (plan->operation == CMD_INSERT) + { + ListCell *tableCell = NULL; + Query *query = NULL; + + /* + * Only insert operation with select subquery is supported. Other forms + * of insert, update, and delete operations are not supported. + */ + query = plannerInfo->parse; + foreach(tableCell, query->rtable) + { + RangeTblEntry *tableEntry = lfirst(tableCell); + + if (tableEntry->rtekind == RTE_SUBQUERY && + tableEntry->subquery != NULL && + tableEntry->subquery->commandType == CMD_SELECT) + { + operationSupported = true; + break; + } + } + } + + if (!operationSupported) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("operation is not supported"))); + } + + return NIL; +} + + +/* + * CStoreBeginForeignModify prepares cstore table for a modification. + * Only insert is currently supported. + */ +static void +CStoreBeginForeignModify(ModifyTableState *modifyTableState, + ResultRelInfo *relationInfo, List *fdwPrivate, + int subplanIndex, int executorFlags) +{ + /* if Explain with no Analyze, do nothing */ + if (executorFlags & EXEC_FLAG_EXPLAIN_ONLY) + { + return; + } + + Assert(modifyTableState->operation == CMD_INSERT); + + CStoreBeginForeignInsert(modifyTableState, relationInfo); +} + + +/* + * CStoreBeginForeignInsert prepares a cstore table for an insert or rows + * coming from a COPY. + */ +static void +CStoreBeginForeignInsert(ModifyTableState *modifyTableState, ResultRelInfo *relationInfo) +{ + Oid foreignTableOid = InvalidOid; + CStoreOptions *cstoreOptions = NULL; + TupleDesc tupleDescriptor = NULL; + TableWriteState *writeState = NULL; + Relation relation = NULL; + + foreignTableOid = RelationGetRelid(relationInfo->ri_RelationDesc); + relation = cstore_fdw_open(foreignTableOid, RowExclusiveLock); + cstoreOptions = CStoreGetOptions(foreignTableOid); + tupleDescriptor = RelationGetDescr(relationInfo->ri_RelationDesc); + + writeState = CStoreBeginWrite(relation, + cstoreOptions->compressionType, + cstoreOptions->stripeRowCount, + cstoreOptions->blockRowCount, + tupleDescriptor); + + relationInfo->ri_FdwState = (void *) writeState; +} + + +/* + * CStoreExecForeignInsert inserts a single row to cstore table + * and returns inserted row's data values. + */ +static TupleTableSlot * +CStoreExecForeignInsert(EState *executorState, ResultRelInfo *relationInfo, + TupleTableSlot *tupleSlot, TupleTableSlot *planSlot) +{ + TableWriteState *writeState = (TableWriteState *) relationInfo->ri_FdwState; + HeapTuple heapTuple; + + Assert(writeState != NULL); + + heapTuple = GetSlotHeapTuple(tupleSlot); + + if (HeapTupleHasExternal(heapTuple)) + { + /* detoast any toasted attributes */ + HeapTuple newTuple = toast_flatten_tuple(heapTuple, + tupleSlot->tts_tupleDescriptor); + + ExecForceStoreHeapTuple(newTuple, tupleSlot, true); + } + + slot_getallattrs(tupleSlot); + + CStoreWriteRow(writeState, tupleSlot->tts_values, tupleSlot->tts_isnull); + + return tupleSlot; +} + + +/* + * CStoreEndForeignModify ends the current modification. Only insert is currently + * supported. + */ +static void +CStoreEndForeignModify(EState *executorState, ResultRelInfo *relationInfo) +{ + CStoreEndForeignInsert(executorState, relationInfo); +} + + +/* + * CStoreEndForeignInsert ends the current insert or COPY operation. + */ +static void +CStoreEndForeignInsert(EState *executorState, ResultRelInfo *relationInfo) +{ + TableWriteState *writeState = (TableWriteState *) relationInfo->ri_FdwState; + + /* writeState is NULL during Explain queries */ + if (writeState != NULL) + { + Relation relation = writeState->relation; + + CStoreEndWrite(writeState); + heap_close(relation, RowExclusiveLock); + } +} + + +#if PG_VERSION_NUM >= 90600 + +/* + * CStoreIsForeignScanParallelSafe always returns true to indicate that + * reading from a cstore_fdw table in a parallel worker is safe. This + * does not enable parallelism for queries on individual cstore_fdw + * tables, but does allow parallel scans of cstore_fdw partitions. + * + * cstore_fdw is parallel-safe because all writes are immediately committed + * to disk and then read from disk. There is no uncommitted state that needs + * to be shared across processes. + */ +static bool +CStoreIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte) +{ + return true; +} + + +#endif + +/* + * Versions 12 and later do not initialize rd_node even if the relation has a + * valid relfilenode, so we need to initialize it each time a cstore FDW + * relation is opened. + */ +static void +cstore_fdw_initrel(Relation rel) +{ +#if PG_VERSION_NUM >= 120000 + if (rel->rd_rel->relfilenode == InvalidOid) + { + FdwNewRelFileNode(rel); + } + + /* + * Copied code from RelationInitPhysicalAddr(), which doesn't + * work on foreign tables. + */ + if (OidIsValid(rel->rd_rel->reltablespace)) + { + rel->rd_node.spcNode = rel->rd_rel->reltablespace; + } + else + { + rel->rd_node.spcNode = MyDatabaseTableSpace; + } + + rel->rd_node.dbNode = MyDatabaseId; + rel->rd_node.relNode = rel->rd_rel->relfilenode; +#endif + FdwCreateStorage(rel); +} + + +static Relation +cstore_fdw_open(Oid relationId, LOCKMODE lockmode) +{ + Relation rel = heap_open(relationId, lockmode); + + cstore_fdw_initrel(rel); + + return rel; +} + + +static Relation +cstore_fdw_openrv(RangeVar *relation, LOCKMODE lockmode) +{ + Relation rel = heap_openrv(relation, lockmode); + + cstore_fdw_initrel(rel); + + return rel; +} + + +/* + * Implements object_access_hook. One of the places this is called is just + * before dropping an object, which allows us to clean-up resources for + * cstore tables. + * + * When cleaning up resources, we need to have access to the pg_class record + * for the table so we can indentify the relfilenode belonging to the relation. + * We don't have access to this information in sql_drop event triggers, since + * the relation has already been dropped there. object_access_hook is called + * __before__ dropping tables, so we still have access to the pg_class + * entry here. + * + * Note that the utility hook is called once per __command__, and not for + * every object dropped, and since a drop can cascade to other objects, it + * is difficult to get full set of dropped objects in the utility hook. + * But object_access_hook is called once per dropped object, so it is + * much easier to clean-up all dropped objects here. + */ +static void +CStoreFdwObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId, + int subId, void *arg) +{ + if (prevObjectAccessHook) + { + prevObjectAccessHook(access, classId, objectId, subId, arg); + } + + /* + * Do nothing if this is not a DROP relation command. + */ + if (access != OAT_DROP || classId != RelationRelationId || OidIsValid(subId)) + { + return; + } + + /* + * Lock relation to prevent it from being dropped and to avoid + * race conditions in the next if block. + */ + LockRelationOid(objectId, AccessShareLock); + + if (IsCStoreFdwTable(objectId)) + { + /* + * Drop both metadata and storage. We need to drop storage here since + * we manage relfilenode for FDW tables in the extension. + */ + Relation rel = cstore_fdw_open(objectId, AccessExclusiveLock); + RelationOpenSmgr(rel); + RelationDropStorage(rel); + DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode); + + /* keep the lock since we did physical changes to the relation */ + relation_close(rel, NoLock); + } +} diff --git a/src/backend/columnar/cstore_fdw.control b/src/backend/columnar/cstore_fdw.control new file mode 100644 index 000000000..57fd0808a --- /dev/null +++ b/src/backend/columnar/cstore_fdw.control @@ -0,0 +1,6 @@ +# cstore_fdw extension +comment = 'foreign-data wrapper for flat cstore access' +default_version = '1.8' +module_pathname = '$libdir/cstore_fdw' +relocatable = false +schema = cstore diff --git a/src/backend/columnar/cstore_fdw.h b/src/backend/columnar/cstore_fdw.h new file mode 100644 index 000000000..1c8170ae8 --- /dev/null +++ b/src/backend/columnar/cstore_fdw.h @@ -0,0 +1,35 @@ +/*------------------------------------------------------------------------- + * + * cstore_fdw.h + * + * Type and function declarations for CStore foreign data wrapper. + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#ifndef CSTORE_FDW_H +#define CSTORE_FDW_H + +#include "postgres.h" + +#include "fmgr.h" + +void cstore_fdw_init(void); +void cstore_fdw_finish(void); + +/* event trigger function declarations */ +extern Datum cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS); + +/* Function declarations for utility UDFs */ +extern Datum cstore_table_size(PG_FUNCTION_ARGS); +extern Datum cstore_clean_table_resources(PG_FUNCTION_ARGS); + +/* Function declarations for foreign data wrapper */ +extern Datum cstore_fdw_handler(PG_FUNCTION_ARGS); +extern Datum cstore_fdw_validator(PG_FUNCTION_ARGS); + +#endif /* CSTORE_FDW_H */ diff --git a/src/backend/columnar/cstore_metadata_tables.c b/src/backend/columnar/cstore_metadata_tables.c new file mode 100644 index 000000000..a2eab1940 --- /dev/null +++ b/src/backend/columnar/cstore_metadata_tables.c @@ -0,0 +1,1034 @@ +/*------------------------------------------------------------------------- + * + * cstore_metadata_tables.c + * + * Copyright (c), Citus Data, Inc. + * + *------------------------------------------------------------------------- + */ + + +#include "postgres.h" +#include "cstore.h" +#include "cstore_version_compat.h" + +#include +#include "access/heapam.h" +#include "access/htup_details.h" +#include "access/nbtree.h" +#include "access/xact.h" +#include "catalog/indexing.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_type.h" +#include "catalog/namespace.h" +#include "commands/defrem.h" +#include "commands/trigger.h" +#include "executor/executor.h" +#include "executor/spi.h" +#include "miscadmin.h" +#include "nodes/execnodes.h" +#include "lib/stringinfo.h" +#include "port.h" +#include "storage/fd.h" +#include "storage/lmgr.h" +#include "storage/smgr.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/memutils.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" + +typedef struct +{ + Relation rel; + EState *estate; +} ModifyState; + +static void InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe); +static void GetHighestUsedAddressAndId(Oid relfilenode, + uint64 *highestUsedAddress, + uint64 *highestUsedId); +static List * ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot); +static Oid CStoreStripesRelationId(void); +static Oid CStoreStripesIndexRelationId(void); +static Oid CStoreDataFilesRelationId(void); +static Oid CStoreDataFilesIndexRelationId(void); +static Oid CStoreSkipNodesRelationId(void); +static Oid CStoreSkipNodesIndexRelationId(void); +static Oid CStoreNamespaceId(void); +static bool ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata); +static ModifyState * StartModifyRelation(Relation rel); +static void InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values, + bool *nulls); +static void DeleteTupleAndEnforceConstraints(ModifyState *state, HeapTuple heapTuple); +static void FinishModifyRelation(ModifyState *state); +static EState * create_estate_for_relation(Relation rel); +static bytea * DatumToBytea(Datum value, Form_pg_attribute attrForm); +static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); + +/* constants for cstore_table */ +#define Natts_cstore_data_files 6 +#define Anum_cstore_data_files_relfilenode 1 +#define Anum_cstore_data_files_block_row_count 2 +#define Anum_cstore_data_files_stripe_row_count 3 +#define Anum_cstore_data_files_compression 4 +#define Anum_cstore_data_files_version_major 5 +#define Anum_cstore_data_files_version_minor 6 + +/* ---------------- + * cstore.cstore_data_files definition. + * ---------------- + */ +typedef struct FormData_cstore_data_files +{ + Oid relfilenode; + int32 block_row_count; + int32 stripe_row_count; + NameData compression; + int64 version_major; + int64 version_minor; + +#ifdef CATALOG_VARLEN /* variable-length fields start here */ +#endif +} FormData_cstore_data_files; +typedef FormData_cstore_data_files *Form_cstore_data_files; + +/* constants for cstore_stripe */ +#define Natts_cstore_stripes 8 +#define Anum_cstore_stripes_relfilenode 1 +#define Anum_cstore_stripes_stripe 2 +#define Anum_cstore_stripes_file_offset 3 +#define Anum_cstore_stripes_data_length 4 +#define Anum_cstore_stripes_column_count 5 +#define Anum_cstore_stripes_block_count 6 +#define Anum_cstore_stripes_block_row_count 7 +#define Anum_cstore_stripes_row_count 8 + +/* constants for cstore_skipnodes */ +#define Natts_cstore_skipnodes 12 +#define Anum_cstore_skipnodes_relfilenode 1 +#define Anum_cstore_skipnodes_stripe 2 +#define Anum_cstore_skipnodes_attr 3 +#define Anum_cstore_skipnodes_block 4 +#define Anum_cstore_skipnodes_row_count 5 +#define Anum_cstore_skipnodes_minimum_value 6 +#define Anum_cstore_skipnodes_maximum_value 7 +#define Anum_cstore_skipnodes_value_stream_offset 8 +#define Anum_cstore_skipnodes_value_stream_length 9 +#define Anum_cstore_skipnodes_exists_stream_offset 10 +#define Anum_cstore_skipnodes_exists_stream_length 11 +#define Anum_cstore_skipnodes_value_compression_type 12 + + +/* + * InitCStoreDataFileMetadata adds a record for the given relfilenode + * in cstore_data_files. + */ +void +InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCount, + CompressionType compression) +{ + Oid cstoreDataFilesOid = InvalidOid; + Relation cstoreDataFiles = NULL; + ModifyState *modifyState = NULL; + NameData compressionName = { 0 }; + + namestrcpy(&compressionName, CompressionTypeStr(compression)); + + bool nulls[Natts_cstore_data_files] = { 0 }; + Datum values[Natts_cstore_data_files] = { + ObjectIdGetDatum(relfilenode), + Int32GetDatum(blockRowCount), + Int32GetDatum(stripeRowCount), + NameGetDatum(&compressionName), + Int32GetDatum(CSTORE_VERSION_MAJOR), + Int32GetDatum(CSTORE_VERSION_MINOR) + }; + + DeleteDataFileMetadataRowIfExists(relfilenode); + + cstoreDataFilesOid = CStoreDataFilesRelationId(); + cstoreDataFiles = heap_open(cstoreDataFilesOid, RowExclusiveLock); + + modifyState = StartModifyRelation(cstoreDataFiles); + InsertTupleAndEnforceConstraints(modifyState, values, nulls); + FinishModifyRelation(modifyState); + + CommandCounterIncrement(); + + heap_close(cstoreDataFiles, NoLock); +} + + +void +UpdateCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCount, + CompressionType compression) +{ + const int scanKeyCount = 1; + ScanKeyData scanKey[1]; + bool indexOK = true; + SysScanDesc scanDescriptor = NULL; + Form_cstore_data_files metadata = NULL; + HeapTuple heapTuple = NULL; + Datum values[Natts_cstore_data_files] = { 0 }; + bool isnull[Natts_cstore_data_files] = { 0 }; + bool replace[Natts_cstore_data_files] = { 0 }; + + Relation cstoreDataFiles = heap_open(CStoreDataFilesRelationId(), RowExclusiveLock); + TupleDesc tupleDescriptor = RelationGetDescr(cstoreDataFiles); + + ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode, BTEqualStrategyNumber, + F_INT8EQ, ObjectIdGetDatum(relfilenode)); + + scanDescriptor = systable_beginscan(cstoreDataFiles, + CStoreDataFilesIndexRelationId(), + indexOK, + NULL, scanKeyCount, scanKey); + + heapTuple = systable_getnext(scanDescriptor); + if (heapTuple == NULL) + { + ereport(ERROR, (errmsg("relfilenode %d doesn't belong to a cstore table", + relfilenode))); + } + + metadata = (Form_cstore_data_files) GETSTRUCT(heapTuple); + + bool changed = false; + if (metadata->block_row_count != blockRowCount) + { + values[Anum_cstore_data_files_block_row_count - 1] = Int32GetDatum(blockRowCount); + isnull[Anum_cstore_data_files_block_row_count - 1] = false; + replace[Anum_cstore_data_files_block_row_count - 1] = true; + changed = true; + } + + if (metadata->stripe_row_count != stripeRowCount) + { + values[Anum_cstore_data_files_stripe_row_count - 1] = Int32GetDatum( + stripeRowCount); + isnull[Anum_cstore_data_files_stripe_row_count - 1] = false; + replace[Anum_cstore_data_files_stripe_row_count - 1] = true; + changed = true; + } + + if (ParseCompressionType(NameStr(metadata->compression)) != compression) + { + Name compressionName = palloc0(sizeof(NameData)); + namestrcpy(compressionName, CompressionTypeStr(compression)); + values[Anum_cstore_data_files_compression - 1] = NameGetDatum(compressionName); + isnull[Anum_cstore_data_files_compression - 1] = false; + replace[Anum_cstore_data_files_compression - 1] = true; + changed = true; + } + + if (changed) + { + heapTuple = heap_modify_tuple(heapTuple, tupleDescriptor, values, isnull, + replace); + + CatalogTupleUpdate(cstoreDataFiles, &heapTuple->t_self, heapTuple); + + CommandCounterIncrement(); + } + + systable_endscan(scanDescriptor); + + heap_close(cstoreDataFiles, NoLock); +} + + +/* + * SaveStripeSkipList saves StripeSkipList for a given stripe as rows + * of cstore_skipnodes. + */ +void +SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipList, + TupleDesc tupleDescriptor) +{ + uint32 columnIndex = 0; + uint32 blockIndex = 0; + Oid cstoreSkipNodesOid = InvalidOid; + Relation cstoreSkipNodes = NULL; + ModifyState *modifyState = NULL; + uint32 columnCount = stripeSkipList->columnCount; + + cstoreSkipNodesOid = CStoreSkipNodesRelationId(); + cstoreSkipNodes = heap_open(cstoreSkipNodesOid, RowExclusiveLock); + modifyState = StartModifyRelation(cstoreSkipNodes); + + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) + { + ColumnBlockSkipNode *skipNode = + &stripeSkipList->blockSkipNodeArray[columnIndex][blockIndex]; + + Datum values[Natts_cstore_skipnodes] = { + ObjectIdGetDatum(relfilenode), + Int64GetDatum(stripe), + Int32GetDatum(columnIndex + 1), + Int32GetDatum(blockIndex), + Int64GetDatum(skipNode->rowCount), + 0, /* to be filled below */ + 0, /* to be filled below */ + Int64GetDatum(skipNode->valueBlockOffset), + Int64GetDatum(skipNode->valueLength), + Int64GetDatum(skipNode->existsBlockOffset), + Int64GetDatum(skipNode->existsLength), + Int32GetDatum(skipNode->valueCompressionType) + }; + + bool nulls[Natts_cstore_skipnodes] = { false }; + + if (skipNode->hasMinMax) + { + values[Anum_cstore_skipnodes_minimum_value - 1] = + PointerGetDatum(DatumToBytea(skipNode->minimumValue, + &tupleDescriptor->attrs[columnIndex])); + values[Anum_cstore_skipnodes_maximum_value - 1] = + PointerGetDatum(DatumToBytea(skipNode->maximumValue, + &tupleDescriptor->attrs[columnIndex])); + } + else + { + nulls[Anum_cstore_skipnodes_minimum_value - 1] = true; + nulls[Anum_cstore_skipnodes_maximum_value - 1] = true; + } + + InsertTupleAndEnforceConstraints(modifyState, values, nulls); + } + } + + FinishModifyRelation(modifyState); + heap_close(cstoreSkipNodes, NoLock); + + CommandCounterIncrement(); +} + + +/* + * ReadStripeSkipList fetches StripeSkipList for a given stripe. + */ +StripeSkipList * +ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor, + uint32 blockCount) +{ + StripeSkipList *skipList = NULL; + int32 columnIndex = 0; + Oid cstoreSkipNodesOid = InvalidOid; + Relation cstoreSkipNodes = NULL; + Relation index = NULL; + HeapTuple heapTuple = NULL; + uint32 columnCount = tupleDescriptor->natts; + ScanKeyData scanKey[2]; + SysScanDesc scanDescriptor = NULL; + + cstoreSkipNodesOid = CStoreSkipNodesRelationId(); + cstoreSkipNodes = heap_open(cstoreSkipNodesOid, AccessShareLock); + index = index_open(CStoreSkipNodesIndexRelationId(), AccessShareLock); + + ScanKeyInit(&scanKey[0], Anum_cstore_skipnodes_relfilenode, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); + ScanKeyInit(&scanKey[1], Anum_cstore_skipnodes_stripe, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(stripe)); + + scanDescriptor = systable_beginscan_ordered(cstoreSkipNodes, index, NULL, 2, scanKey); + + skipList = palloc0(sizeof(StripeSkipList)); + skipList->blockCount = blockCount; + skipList->columnCount = columnCount; + skipList->blockSkipNodeArray = palloc0(columnCount * sizeof(ColumnBlockSkipNode *)); + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + skipList->blockSkipNodeArray[columnIndex] = + palloc0(blockCount * sizeof(ColumnBlockSkipNode)); + } + + while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) + { + int32 attr = 0; + int32 blockIndex = 0; + ColumnBlockSkipNode *skipNode = NULL; + + Datum datumArray[Natts_cstore_skipnodes]; + bool isNullArray[Natts_cstore_skipnodes]; + + heap_deform_tuple(heapTuple, RelationGetDescr(cstoreSkipNodes), datumArray, + isNullArray); + + attr = DatumGetInt32(datumArray[Anum_cstore_skipnodes_attr - 1]); + blockIndex = DatumGetInt32(datumArray[Anum_cstore_skipnodes_block - 1]); + + if (attr <= 0 || attr > columnCount) + { + ereport(ERROR, (errmsg("invalid stripe skipnode entry"), + errdetail("Attribute number out of range: %d", attr))); + } + + if (blockIndex < 0 || blockIndex >= blockCount) + { + ereport(ERROR, (errmsg("invalid stripe skipnode entry"), + errdetail("Block number out of range: %d", blockIndex))); + } + + columnIndex = attr - 1; + + skipNode = &skipList->blockSkipNodeArray[columnIndex][blockIndex]; + skipNode->rowCount = DatumGetInt64(datumArray[Anum_cstore_skipnodes_row_count - + 1]); + skipNode->valueBlockOffset = + DatumGetInt64(datumArray[Anum_cstore_skipnodes_value_stream_offset - 1]); + skipNode->valueLength = + DatumGetInt64(datumArray[Anum_cstore_skipnodes_value_stream_length - 1]); + skipNode->existsBlockOffset = + DatumGetInt64(datumArray[Anum_cstore_skipnodes_exists_stream_offset - 1]); + skipNode->existsLength = + DatumGetInt64(datumArray[Anum_cstore_skipnodes_exists_stream_length - 1]); + skipNode->valueCompressionType = + DatumGetInt32(datumArray[Anum_cstore_skipnodes_value_compression_type - 1]); + + if (isNullArray[Anum_cstore_skipnodes_minimum_value - 1] || + isNullArray[Anum_cstore_skipnodes_maximum_value - 1]) + { + skipNode->hasMinMax = false; + } + else + { + bytea *minValue = DatumGetByteaP( + datumArray[Anum_cstore_skipnodes_minimum_value - 1]); + bytea *maxValue = DatumGetByteaP( + datumArray[Anum_cstore_skipnodes_maximum_value - 1]); + + skipNode->minimumValue = + ByteaToDatum(minValue, &tupleDescriptor->attrs[columnIndex]); + skipNode->maximumValue = + ByteaToDatum(maxValue, &tupleDescriptor->attrs[columnIndex]); + + skipNode->hasMinMax = true; + } + } + + systable_endscan_ordered(scanDescriptor); + index_close(index, NoLock); + heap_close(cstoreSkipNodes, NoLock); + + return skipList; +} + + +/* + * InsertStripeMetadataRow adds a row to cstore_stripes. + */ +static void +InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe) +{ + bool nulls[Natts_cstore_stripes] = { 0 }; + Datum values[Natts_cstore_stripes] = { + ObjectIdGetDatum(relfilenode), + Int64GetDatum(stripe->id), + Int64GetDatum(stripe->fileOffset), + Int64GetDatum(stripe->dataLength), + Int32GetDatum(stripe->columnCount), + Int32GetDatum(stripe->blockCount), + Int32GetDatum(stripe->blockRowCount), + Int64GetDatum(stripe->rowCount) + }; + + Oid cstoreStripesOid = CStoreStripesRelationId(); + Relation cstoreStripes = heap_open(cstoreStripesOid, RowExclusiveLock); + + ModifyState *modifyState = StartModifyRelation(cstoreStripes); + + InsertTupleAndEnforceConstraints(modifyState, values, nulls); + + FinishModifyRelation(modifyState); + + CommandCounterIncrement(); + + heap_close(cstoreStripes, NoLock); +} + + +/* + * ReadDataFileMetadata constructs DataFileMetadata for a given relfilenode by reading + * from cstore_data_files and cstore_stripes. + */ +DataFileMetadata * +ReadDataFileMetadata(Oid relfilenode, bool missingOk) +{ + DataFileMetadata *datafileMetadata = palloc0(sizeof(DataFileMetadata)); + bool found = ReadCStoreDataFiles(relfilenode, datafileMetadata); + if (!found) + { + if (!missingOk) + { + ereport(ERROR, (errmsg("Relfilenode %d doesn't belong to a cstore table.", + relfilenode))); + } + else + { + return NULL; + } + } + + datafileMetadata->stripeMetadataList = + ReadDataFileStripeList(relfilenode, GetTransactionSnapshot()); + + return datafileMetadata; +} + + +/* + * GetHighestUsedAddress returns the highest used address for the given + * relfilenode across all active and inactive transactions. + */ +uint64 +GetHighestUsedAddress(Oid relfilenode) +{ + uint64 highestUsedAddress = 0; + uint64 highestUsedId = 0; + + GetHighestUsedAddressAndId(relfilenode, &highestUsedAddress, &highestUsedId); + + return highestUsedAddress; +} + + +/* + * GetHighestUsedAddressAndId returns the highest used address and id for + * the given relfilenode across all active and inactive transactions. + */ +static void +GetHighestUsedAddressAndId(Oid relfilenode, + uint64 *highestUsedAddress, + uint64 *highestUsedId) +{ + ListCell *stripeMetadataCell = NULL; + List *stripeMetadataList = NIL; + + SnapshotData SnapshotDirty; + InitDirtySnapshot(SnapshotDirty); + + stripeMetadataList = ReadDataFileStripeList(relfilenode, &SnapshotDirty); + + *highestUsedId = 0; + *highestUsedAddress = 0; + + foreach(stripeMetadataCell, stripeMetadataList) + { + StripeMetadata *stripe = lfirst(stripeMetadataCell); + uint64 lastByte = stripe->fileOffset + stripe->dataLength - 1; + *highestUsedAddress = Max(*highestUsedAddress, lastByte); + *highestUsedId = Max(*highestUsedId, stripe->id); + } +} + + +/* + * ReserveStripe reserves and stripe of given size for the given relation, + * and inserts it into cstore_stripes. It is guaranteed that concurrent + * writes won't overwrite the returned stripe. + */ +StripeMetadata +ReserveStripe(Relation rel, uint64 sizeBytes, + uint64 rowCount, uint64 columnCount, + uint64 blockCount, uint64 blockRowCount) +{ + StripeMetadata stripe = { 0 }; + Oid relfilenode = InvalidOid; + uint64 currLogicalHigh = 0; + SmgrAddr currSmgrHigh; + uint64 nblocks = 0; + uint64 resLogicalStart = 0; + SmgrAddr resSmgrStart; + uint64 resLogicalEnd = 0; + SmgrAddr resSmgrEnd; + uint64 highestId = 0; + + /* + * We take ShareUpdateExclusiveLock here, so two space + * reservations conflict, space reservation <-> vacuum + * conflict, but space reservation doesn't conflict with + * reads & writes. + */ + LockRelation(rel, ShareUpdateExclusiveLock); + + relfilenode = rel->rd_node.relNode; + GetHighestUsedAddressAndId(relfilenode, &currLogicalHigh, &highestId); + currSmgrHigh = logical_to_smgr(currLogicalHigh); + + resSmgrStart = next_block_start(currSmgrHigh); + resLogicalStart = smgr_to_logical(resSmgrStart); + + resLogicalEnd = resLogicalStart + sizeBytes - 1; + resSmgrEnd = logical_to_smgr(resLogicalEnd); + + RelationOpenSmgr(rel); + nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + + while (resSmgrEnd.blockno >= nblocks) + { + Buffer newBuffer = ReadBuffer(rel, P_NEW); + ReleaseBuffer(newBuffer); + nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + } + + RelationCloseSmgr(rel); + + stripe.fileOffset = resLogicalStart; + stripe.dataLength = sizeBytes; + stripe.blockCount = blockCount; + stripe.blockRowCount = blockRowCount; + stripe.columnCount = columnCount; + stripe.rowCount = rowCount; + stripe.id = highestId + 1; + + InsertStripeMetadataRow(relfilenode, &stripe); + + UnlockRelation(rel, ShareUpdateExclusiveLock); + + return stripe; +} + + +/* + * ReadDataFileStripeList reads the stripe list for a given relfilenode + * in the given snapshot. + */ +static List * +ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot) +{ + List *stripeMetadataList = NIL; + Oid cstoreStripesOid = InvalidOid; + Relation cstoreStripes = NULL; + Relation index = NULL; + TupleDesc tupleDescriptor = NULL; + ScanKeyData scanKey[1]; + SysScanDesc scanDescriptor = NULL; + HeapTuple heapTuple; + + ScanKeyInit(&scanKey[0], Anum_cstore_stripes_relfilenode, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); + + cstoreStripesOid = CStoreStripesRelationId(); + cstoreStripes = heap_open(cstoreStripesOid, AccessShareLock); + index = index_open(CStoreStripesIndexRelationId(), AccessShareLock); + tupleDescriptor = RelationGetDescr(cstoreStripes); + + scanDescriptor = systable_beginscan_ordered(cstoreStripes, index, snapshot, 1, + scanKey); + + while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) + { + StripeMetadata *stripeMetadata = NULL; + Datum datumArray[Natts_cstore_stripes]; + bool isNullArray[Natts_cstore_stripes]; + + heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray); + + stripeMetadata = palloc0(sizeof(StripeMetadata)); + stripeMetadata->id = DatumGetInt64(datumArray[Anum_cstore_stripes_stripe - 1]); + stripeMetadata->fileOffset = DatumGetInt64( + datumArray[Anum_cstore_stripes_file_offset - 1]); + stripeMetadata->dataLength = DatumGetInt64( + datumArray[Anum_cstore_stripes_data_length - 1]); + stripeMetadata->columnCount = DatumGetInt32( + datumArray[Anum_cstore_stripes_column_count - 1]); + stripeMetadata->blockCount = DatumGetInt32( + datumArray[Anum_cstore_stripes_block_count - 1]); + stripeMetadata->blockRowCount = DatumGetInt32( + datumArray[Anum_cstore_stripes_block_row_count - 1]); + stripeMetadata->rowCount = DatumGetInt64( + datumArray[Anum_cstore_stripes_row_count - 1]); + + stripeMetadataList = lappend(stripeMetadataList, stripeMetadata); + } + + systable_endscan_ordered(scanDescriptor); + index_close(index, NoLock); + heap_close(cstoreStripes, NoLock); + + return stripeMetadataList; +} + + +/* + * ReadCStoreDataFiles reads corresponding record from cstore_data_files. Returns + * false if table was not found in cstore_data_files. + */ +static bool +ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata) +{ + bool found = false; + Oid cstoreDataFilesOid = InvalidOid; + Relation cstoreDataFiles = NULL; + Relation index = NULL; + TupleDesc tupleDescriptor = NULL; + ScanKeyData scanKey[1]; + SysScanDesc scanDescriptor = NULL; + HeapTuple heapTuple = NULL; + + ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); + + cstoreDataFilesOid = CStoreDataFilesRelationId(); + cstoreDataFiles = try_relation_open(cstoreDataFilesOid, AccessShareLock); + if (cstoreDataFiles == NULL) + { + /* + * Extension has been dropped. This can be called while + * dropping extension or database via ObjectAccess(). + */ + return false; + } + + index = try_relation_open(CStoreDataFilesIndexRelationId(), AccessShareLock); + if (index == NULL) + { + heap_close(cstoreDataFiles, NoLock); + + /* extension has been dropped */ + return false; + } + + tupleDescriptor = RelationGetDescr(cstoreDataFiles); + + scanDescriptor = systable_beginscan_ordered(cstoreDataFiles, index, NULL, 1, scanKey); + + heapTuple = systable_getnext(scanDescriptor); + if (HeapTupleIsValid(heapTuple)) + { + Datum datumArray[Natts_cstore_data_files]; + bool isNullArray[Natts_cstore_data_files]; + heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray); + + if (metadata) + { + Name compressionName = NULL; + + metadata->blockRowCount = DatumGetInt32( + datumArray[Anum_cstore_data_files_block_row_count - 1]); + metadata->stripeRowCount = DatumGetInt32( + datumArray[Anum_cstore_data_files_stripe_row_count - 1]); + compressionName = DatumGetName( + datumArray[Anum_cstore_data_files_compression - 1]); + metadata->compression = ParseCompressionType(NameStr(*compressionName)); + } + found = true; + } + + systable_endscan_ordered(scanDescriptor); + index_close(index, NoLock); + heap_close(cstoreDataFiles, NoLock); + + return found; +} + + +/* + * DeleteDataFileMetadataRowIfExists removes the row with given relfilenode from cstore_stripes. + */ +void +DeleteDataFileMetadataRowIfExists(Oid relfilenode) +{ + Oid cstoreDataFilesOid = InvalidOid; + Relation cstoreDataFiles = NULL; + Relation index = NULL; + ScanKeyData scanKey[1]; + SysScanDesc scanDescriptor = NULL; + HeapTuple heapTuple = NULL; + + /* + * During a restore for binary upgrade, metadata tables and indexes may or + * may not exist. + */ + if (IsBinaryUpgrade) + { + return; + } + + ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); + + cstoreDataFilesOid = CStoreDataFilesRelationId(); + cstoreDataFiles = try_relation_open(cstoreDataFilesOid, AccessShareLock); + if (cstoreDataFiles == NULL) + { + /* extension has been dropped */ + return; + } + + index = index_open(CStoreDataFilesIndexRelationId(), AccessShareLock); + + scanDescriptor = systable_beginscan_ordered(cstoreDataFiles, index, NULL, 1, scanKey); + + heapTuple = systable_getnext(scanDescriptor); + if (HeapTupleIsValid(heapTuple)) + { + ModifyState *modifyState = StartModifyRelation(cstoreDataFiles); + DeleteTupleAndEnforceConstraints(modifyState, heapTuple); + FinishModifyRelation(modifyState); + } + + systable_endscan_ordered(scanDescriptor); + index_close(index, NoLock); + heap_close(cstoreDataFiles, NoLock); +} + + +/* + * StartModifyRelation allocates resources for modifications. + */ +static ModifyState * +StartModifyRelation(Relation rel) +{ + ModifyState *modifyState = NULL; + EState *estate = create_estate_for_relation(rel); + + /* ExecSimpleRelationInsert, ... require caller to open indexes */ + ExecOpenIndices(estate->es_result_relation_info, false); + + modifyState = palloc(sizeof(ModifyState)); + modifyState->rel = rel; + modifyState->estate = estate; + + return modifyState; +} + + +/* + * InsertTupleAndEnforceConstraints inserts a tuple into a relation and makes + * sure constraints are enforced and indexes are updated. + */ +static void +InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values, bool *nulls) +{ + TupleDesc tupleDescriptor = RelationGetDescr(state->rel); + HeapTuple tuple = heap_form_tuple(tupleDescriptor, values, nulls); + +#if PG_VERSION_NUM >= 120000 + TupleTableSlot *slot = ExecInitExtraTupleSlot(state->estate, tupleDescriptor, + &TTSOpsHeapTuple); + + ExecStoreHeapTuple(tuple, slot, false); +#else + TupleTableSlot *slot = ExecInitExtraTupleSlot(state->estate, tupleDescriptor); + ExecStoreTuple(tuple, slot, InvalidBuffer, false); +#endif + + /* use ExecSimpleRelationInsert to enforce constraints */ + ExecSimpleRelationInsert(state->estate, slot); +} + + +/* + * DeleteTupleAndEnforceConstraints deletes a tuple from a relation and + * makes sure constraints (e.g. FK constraints) are enforced. + */ +static void +DeleteTupleAndEnforceConstraints(ModifyState *state, HeapTuple heapTuple) +{ + EState *estate = state->estate; + ResultRelInfo *resultRelInfo = estate->es_result_relation_info; + + ItemPointer tid = &(heapTuple->t_self); + simple_heap_delete(state->rel, tid); + + /* execute AFTER ROW DELETE Triggers to enforce constraints */ + ExecARDeleteTriggers(estate, resultRelInfo, tid, NULL, NULL); +} + + +/* + * FinishModifyRelation cleans up resources after modifications are done. + */ +static void +FinishModifyRelation(ModifyState *state) +{ + ExecCloseIndices(state->estate->es_result_relation_info); + + AfterTriggerEndQuery(state->estate); + ExecCleanUpTriggerState(state->estate); + ExecResetTupleTable(state->estate->es_tupleTable, false); + FreeExecutorState(state->estate); +} + + +/* + * Based on a similar function from + * postgres/src/backend/replication/logical/worker.c. + * + * Executor state preparation for evaluation of constraint expressions, + * indexes and triggers. + * + * This is based on similar code in copy.c + */ +static EState * +create_estate_for_relation(Relation rel) +{ + EState *estate; + ResultRelInfo *resultRelInfo; + RangeTblEntry *rte; + + estate = CreateExecutorState(); + + rte = makeNode(RangeTblEntry); + rte->rtekind = RTE_RELATION; + rte->relid = RelationGetRelid(rel); + rte->relkind = rel->rd_rel->relkind; +#if PG_VERSION_NUM >= 120000 + rte->rellockmode = AccessShareLock; + ExecInitRangeTable(estate, list_make1(rte)); +#endif + + resultRelInfo = makeNode(ResultRelInfo); + InitResultRelInfo(resultRelInfo, rel, 1, NULL, 0); + + estate->es_result_relations = resultRelInfo; + estate->es_num_result_relations = 1; + estate->es_result_relation_info = resultRelInfo; + + estate->es_output_cid = GetCurrentCommandId(true); + +#if PG_VERSION_NUM < 120000 + + /* Triggers might need a slot */ + if (resultRelInfo->ri_TrigDesc) + { + estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate, NULL); + } +#endif + + /* Prepare to catch AFTER triggers. */ + AfterTriggerBeginQuery(); + + return estate; +} + + +/* + * DatumToBytea serializes a datum into a bytea value. + */ +static bytea * +DatumToBytea(Datum value, Form_pg_attribute attrForm) +{ + int datumLength = att_addlength_datum(0, attrForm->attlen, value); + bytea *result = palloc0(datumLength + VARHDRSZ); + + SET_VARSIZE(result, datumLength + VARHDRSZ); + + if (attrForm->attlen > 0) + { + if (attrForm->attbyval) + { + store_att_byval(VARDATA(result), value, attrForm->attlen); + } + else + { + memcpy(VARDATA(result), DatumGetPointer(value), attrForm->attlen); + } + } + else + { + memcpy(VARDATA(result), DatumGetPointer(value), datumLength); + } + + return result; +} + + +/* + * ByteaToDatum deserializes a value which was previously serialized using + * DatumToBytea. + */ +static Datum +ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm) +{ + /* + * We copy the data so the result of this function lives even + * after the byteaDatum is freed. + */ + char *binaryDataCopy = palloc0(VARSIZE_ANY_EXHDR(bytes)); + memcpy(binaryDataCopy, VARDATA_ANY(bytes), VARSIZE_ANY_EXHDR(bytes)); + + return fetch_att(binaryDataCopy, attrForm->attbyval, attrForm->attlen); +} + + +/* + * CStoreStripesRelationId returns relation id of cstore_stripes. + * TODO: should we cache this similar to citus? + */ +static Oid +CStoreStripesRelationId(void) +{ + return get_relname_relid("cstore_stripes", CStoreNamespaceId()); +} + + +/* + * CStoreStripesIndexRelationId returns relation id of cstore_stripes_idx. + * TODO: should we cache this similar to citus? + */ +static Oid +CStoreStripesIndexRelationId(void) +{ + return get_relname_relid("cstore_stripes_pkey", CStoreNamespaceId()); +} + + +/* + * CStoreDataFilesRelationId returns relation id of cstore_data_files. + * TODO: should we cache this similar to citus? + */ +static Oid +CStoreDataFilesRelationId(void) +{ + return get_relname_relid("cstore_data_files", CStoreNamespaceId()); +} + + +/* + * CStoreDataFilesIndexRelationId returns relation id of cstore_data_files_pkey. + * TODO: should we cache this similar to citus? + */ +static Oid +CStoreDataFilesIndexRelationId(void) +{ + return get_relname_relid("cstore_data_files_pkey", CStoreNamespaceId()); +} + + +/* + * CStoreSkipNodesRelationId returns relation id of cstore_skipnodes. + * TODO: should we cache this similar to citus? + */ +static Oid +CStoreSkipNodesRelationId(void) +{ + return get_relname_relid("cstore_skipnodes", CStoreNamespaceId()); +} + + +/* + * CStoreSkipNodesIndexRelationId returns relation id of cstore_skipnodes_pkey. + * TODO: should we cache this similar to citus? + */ +static Oid +CStoreSkipNodesIndexRelationId(void) +{ + return get_relname_relid("cstore_skipnodes_pkey", CStoreNamespaceId()); +} + + +/* + * CStoreNamespaceId returns namespace id of the schema we store cstore + * related tables. + */ +static Oid +CStoreNamespaceId(void) +{ + return get_namespace_oid("cstore", false); +} diff --git a/src/backend/columnar/cstore_reader.c b/src/backend/columnar/cstore_reader.c new file mode 100644 index 000000000..c86021f7e --- /dev/null +++ b/src/backend/columnar/cstore_reader.c @@ -0,0 +1,1065 @@ +/*------------------------------------------------------------------------- + * + * cstore_reader.c + * + * This file contains function definitions for reading cstore files. This + * includes the logic for reading file level metadata, reading row stripes, + * and skipping unrelated row blocks and columns. + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + + +#include "postgres.h" + +#include "access/nbtree.h" +#include "catalog/pg_am.h" +#include "commands/defrem.h" +#include "nodes/makefuncs.h" +#if PG_VERSION_NUM >= 120000 +#include "nodes/nodeFuncs.h" +#include "optimizer/optimizer.h" +#else +#include "optimizer/clauses.h" +#include "optimizer/predtest.h" +#endif +#include "optimizer/restrictinfo.h" +#include "storage/fd.h" +#include "utils/guc.h" +#include "utils/memutils.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" + +#include "cstore.h" +#include "cstore_version_compat.h" + +/* static function declarations */ +static StripeBuffers * LoadFilteredStripeBuffers(Relation relation, + StripeMetadata *stripeMetadata, + TupleDesc tupleDescriptor, + List *projectedColumnList, + List *whereClauseList); +static void ReadStripeNextRow(StripeBuffers *stripeBuffers, List *projectedColumnList, + uint64 blockIndex, uint64 blockRowIndex, + BlockData *blockData, Datum *columnValues, + bool *columnNulls); +static ColumnBuffers * LoadColumnBuffers(Relation relation, + ColumnBlockSkipNode *blockSkipNodeArray, + uint32 blockCount, uint64 stripeOffset, + Form_pg_attribute attributeForm); +static bool * SelectedBlockMask(StripeSkipList *stripeSkipList, + List *projectedColumnList, List *whereClauseList); +static List * BuildRestrictInfoList(List *whereClauseList); +static Node * BuildBaseConstraint(Var *variable); +static OpExpr * MakeOpExpression(Var *variable, int16 strategyNumber); +static Oid GetOperatorByType(Oid typeId, Oid accessMethodId, int16 strategyNumber); +static void UpdateConstraint(Node *baseConstraint, Datum minValue, Datum maxValue); +static StripeSkipList * SelectedBlockSkipList(StripeSkipList *stripeSkipList, + bool *projectedColumnMask, + bool *selectedBlockMask); +static uint32 StripeSkipListRowCount(StripeSkipList *stripeSkipList); +static bool * ProjectedColumnMask(uint32 columnCount, List *projectedColumnList); +static void DeserializeBoolArray(StringInfo boolArrayBuffer, bool *boolArray, + uint32 boolArrayLength); +static void DeserializeDatumArray(StringInfo datumBuffer, bool *existsArray, + uint32 datumCount, bool datumTypeByValue, + int datumTypeLength, char datumTypeAlign, + Datum *datumArray); +static BlockData * DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, + uint32 rowCount, TupleDesc tupleDescriptor, + List *projectedColumnList); +static Datum ColumnDefaultValue(TupleConstr *tupleConstraints, + Form_pg_attribute attributeForm); +static StringInfo ReadFromSmgr(Relation rel, uint64 offset, uint32 size); + +/* + * CStoreBeginRead initializes a cstore read operation. This function returns a + * read handle that's used during reading rows and finishing the read operation. + */ +TableReadState * +CStoreBeginRead(Relation relation, TupleDesc tupleDescriptor, + List *projectedColumnList, List *whereClauseList) +{ + TableReadState *readState = NULL; + DataFileMetadata *datafileMetadata = NULL; + MemoryContext stripeReadContext = NULL; + Oid relNode = relation->rd_node.relNode; + + datafileMetadata = ReadDataFileMetadata(relNode, false); + + /* + * We allocate all stripe specific data in the stripeReadContext, and reset + * this memory context before loading a new stripe. This is to avoid memory + * leaks. + */ + stripeReadContext = AllocSetContextCreate(CurrentMemoryContext, + "Stripe Read Memory Context", + ALLOCSET_DEFAULT_SIZES); + + readState = palloc0(sizeof(TableReadState)); + readState->relation = relation; + readState->datafileMetadata = datafileMetadata; + readState->projectedColumnList = projectedColumnList; + readState->whereClauseList = whereClauseList; + readState->stripeBuffers = NULL; + readState->readStripeCount = 0; + readState->stripeReadRowCount = 0; + readState->tupleDescriptor = tupleDescriptor; + readState->stripeReadContext = stripeReadContext; + readState->blockData = NULL; + readState->deserializedBlockIndex = -1; + + return readState; +} + + +/* + * CStoreReadNextRow tries to read a row from the cstore file. On success, it sets + * column values and nulls, and returns true. If there are no more rows to read, + * the function returns false. + */ +bool +CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNulls) +{ + uint32 blockIndex = 0; + uint32 blockRowIndex = 0; + StripeMetadata *stripeMetadata = readState->currentStripeMetadata; + MemoryContext oldContext = NULL; + + /* + * If no stripes are loaded, load the next non-empty stripe. Note that when + * loading stripes, we skip over blocks whose contents can be filtered with + * the query's restriction qualifiers. So, even when a stripe is physically + * not empty, we may end up loading it as an empty stripe. + */ + while (readState->stripeBuffers == NULL) + { + StripeBuffers *stripeBuffers = NULL; + List *stripeMetadataList = readState->datafileMetadata->stripeMetadataList; + uint32 stripeCount = list_length(stripeMetadataList); + + /* if we have read all stripes, return false */ + if (readState->readStripeCount == stripeCount) + { + return false; + } + + oldContext = MemoryContextSwitchTo(readState->stripeReadContext); + MemoryContextReset(readState->stripeReadContext); + readState->blockData = NULL; + + stripeMetadata = list_nth(stripeMetadataList, readState->readStripeCount); + stripeBuffers = LoadFilteredStripeBuffers(readState->relation, + stripeMetadata, + readState->tupleDescriptor, + readState->projectedColumnList, + readState->whereClauseList); + readState->readStripeCount++; + readState->currentStripeMetadata = stripeMetadata; + + MemoryContextSwitchTo(oldContext); + + if (stripeBuffers->rowCount != 0) + { + readState->stripeBuffers = stripeBuffers; + readState->stripeReadRowCount = 0; + readState->deserializedBlockIndex = -1; + break; + } + } + + blockIndex = readState->stripeReadRowCount / stripeMetadata->blockRowCount; + blockRowIndex = readState->stripeReadRowCount % stripeMetadata->blockRowCount; + + if (blockIndex != readState->deserializedBlockIndex) + { + uint32 lastBlockIndex = 0; + uint32 blockRowCount = 0; + uint32 stripeRowCount = 0; + + stripeRowCount = stripeMetadata->rowCount; + lastBlockIndex = stripeRowCount / stripeMetadata->blockRowCount; + if (blockIndex == lastBlockIndex) + { + blockRowCount = stripeRowCount % stripeMetadata->blockRowCount; + } + else + { + blockRowCount = stripeMetadata->blockRowCount; + } + + oldContext = MemoryContextSwitchTo(readState->stripeReadContext); + + FreeBlockData(readState->blockData); + readState->blockData = + DeserializeBlockData(readState->stripeBuffers, blockIndex, + blockRowCount, readState->tupleDescriptor, + readState->projectedColumnList); + + MemoryContextSwitchTo(oldContext); + + readState->deserializedBlockIndex = blockIndex; + } + + ReadStripeNextRow(readState->stripeBuffers, readState->projectedColumnList, + blockIndex, blockRowIndex, readState->blockData, + columnValues, columnNulls); + + /* + * If we finished reading the current stripe, set stripe data to NULL. That + * way, we will load a new stripe the next time this function gets called. + */ + readState->stripeReadRowCount++; + if (readState->stripeReadRowCount == readState->stripeBuffers->rowCount) + { + readState->stripeBuffers = NULL; + } + + return true; +} + + +/* + * CStoreRescan clears the position where we were scanning so that the next read starts at + * the beginning again + */ +void +CStoreRescan(TableReadState *readState) +{ + readState->stripeBuffers = NULL; + readState->readStripeCount = 0; + readState->stripeReadRowCount = 0; +} + + +/* Finishes a cstore read operation. */ +void +CStoreEndRead(TableReadState *readState) +{ + MemoryContextDelete(readState->stripeReadContext); + list_free_deep(readState->datafileMetadata->stripeMetadataList); + pfree(readState->datafileMetadata); + pfree(readState); +} + + +/* + * CreateEmptyBlockDataArray creates data buffers to keep deserialized exist and + * value arrays for requested columns in columnMask. + */ +BlockData * +CreateEmptyBlockData(uint32 columnCount, bool *columnMask, uint32 blockRowCount) +{ + uint32 columnIndex = 0; + + BlockData *blockData = palloc0(sizeof(BlockData)); + blockData->existsArray = palloc0(columnCount * sizeof(bool *)); + blockData->valueArray = palloc0(columnCount * sizeof(Datum *)); + blockData->valueBufferArray = palloc0(columnCount * sizeof(StringInfo)); + blockData->columnCount = columnCount; + blockData->rowCount = blockRowCount; + + /* allocate block memory for deserialized data */ + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + if (columnMask[columnIndex]) + { + blockData->existsArray[columnIndex] = palloc0(blockRowCount * sizeof(bool)); + blockData->valueArray[columnIndex] = palloc0(blockRowCount * sizeof(Datum)); + blockData->valueBufferArray[columnIndex] = NULL; + } + } + + return blockData; +} + + +/* + * FreeBlockData deallocates data buffers to keep deserialized exist and + * value arrays for requested columns in columnMask. + * ColumnBlockData->serializedValueBuffer lives in memory read/write context + * so it is deallocated automatically when the context is deleted. + */ +void +FreeBlockData(BlockData *blockData) +{ + uint32 columnIndex = 0; + + if (blockData == NULL) + { + return; + } + + for (columnIndex = 0; columnIndex < blockData->columnCount; columnIndex++) + { + if (blockData->existsArray[columnIndex] != NULL) + { + pfree(blockData->existsArray[columnIndex]); + } + + if (blockData->valueArray[columnIndex] != NULL) + { + pfree(blockData->valueArray[columnIndex]); + } + } + + pfree(blockData->existsArray); + pfree(blockData->valueArray); + pfree(blockData); +} + + +/* CStoreTableRowCount returns the exact row count of a table using skiplists */ +uint64 +CStoreTableRowCount(Relation relation) +{ + DataFileMetadata *datafileMetadata = NULL; + ListCell *stripeMetadataCell = NULL; + uint64 totalRowCount = 0; + + datafileMetadata = ReadDataFileMetadata(relation->rd_node.relNode, false); + + foreach(stripeMetadataCell, datafileMetadata->stripeMetadataList) + { + StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell); + totalRowCount += stripeMetadata->rowCount; + } + + return totalRowCount; +} + + +/* + * LoadFilteredStripeBuffers reads serialized stripe data from the given file. + * The function skips over blocks whose rows are refuted by restriction qualifiers, + * and only loads columns that are projected in the query. + */ +static StripeBuffers * +LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, + TupleDesc tupleDescriptor, List *projectedColumnList, + List *whereClauseList) +{ + StripeBuffers *stripeBuffers = NULL; + ColumnBuffers **columnBuffersArray = NULL; + uint32 columnIndex = 0; + uint32 columnCount = tupleDescriptor->natts; + + bool *projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList); + + StripeSkipList *stripeSkipList = ReadStripeSkipList(relation->rd_node.relNode, + stripeMetadata->id, + tupleDescriptor, + stripeMetadata->blockCount); + + bool *selectedBlockMask = SelectedBlockMask(stripeSkipList, projectedColumnList, + whereClauseList); + + StripeSkipList *selectedBlockSkipList = + SelectedBlockSkipList(stripeSkipList, projectedColumnMask, + selectedBlockMask); + + /* load column data for projected columns */ + columnBuffersArray = palloc0(columnCount * sizeof(ColumnBuffers *)); + + for (columnIndex = 0; columnIndex < stripeMetadata->columnCount; columnIndex++) + { + if (projectedColumnMask[columnIndex]) + { + ColumnBlockSkipNode *blockSkipNode = + selectedBlockSkipList->blockSkipNodeArray[columnIndex]; + Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); + uint32 blockCount = selectedBlockSkipList->blockCount; + + ColumnBuffers *columnBuffers = LoadColumnBuffers(relation, blockSkipNode, + blockCount, + stripeMetadata->fileOffset, + attributeForm); + + columnBuffersArray[columnIndex] = columnBuffers; + } + } + + stripeBuffers = palloc0(sizeof(StripeBuffers)); + stripeBuffers->columnCount = columnCount; + stripeBuffers->rowCount = StripeSkipListRowCount(selectedBlockSkipList); + stripeBuffers->columnBuffersArray = columnBuffersArray; + + return stripeBuffers; +} + + +/* + * ReadStripeNextRow reads the next row from the given stripe, finds the projected + * column values within this row, and accordingly sets the column values and nulls. + * Note that this function sets the values for all non-projected columns to null. + */ +static void +ReadStripeNextRow(StripeBuffers *stripeBuffers, List *projectedColumnList, + uint64 blockIndex, uint64 blockRowIndex, + BlockData *blockData, Datum *columnValues, + bool *columnNulls) +{ + ListCell *projectedColumnCell = NULL; + + /* set all columns to null by default */ + memset(columnNulls, 1, stripeBuffers->columnCount * sizeof(bool)); + + foreach(projectedColumnCell, projectedColumnList) + { + Var *projectedColumn = lfirst(projectedColumnCell); + uint32 columnIndex = projectedColumn->varattno - 1; + + if (blockData->existsArray[columnIndex][blockRowIndex]) + { + columnValues[columnIndex] = blockData->valueArray[columnIndex][blockRowIndex]; + columnNulls[columnIndex] = false; + } + } +} + + +/* + * LoadColumnBuffers reads serialized column data from the given file. These + * column data are laid out as sequential blocks in the file; and block positions + * and lengths are retrieved from the column block skip node array. + */ +static ColumnBuffers * +LoadColumnBuffers(Relation relation, ColumnBlockSkipNode *blockSkipNodeArray, + uint32 blockCount, uint64 stripeOffset, + Form_pg_attribute attributeForm) +{ + ColumnBuffers *columnBuffers = NULL; + uint32 blockIndex = 0; + ColumnBlockBuffers **blockBuffersArray = + palloc0(blockCount * sizeof(ColumnBlockBuffers *)); + + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + blockBuffersArray[blockIndex] = palloc0(sizeof(ColumnBlockBuffers)); + } + + /* + * We first read the "exists" blocks. We don't read "values" array here, + * because "exists" blocks are stored sequentially on disk, and we want to + * minimize disk seeks. + */ + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; + uint64 existsOffset = stripeOffset + blockSkipNode->existsBlockOffset; + StringInfo rawExistsBuffer = ReadFromSmgr(relation, existsOffset, + blockSkipNode->existsLength); + + blockBuffersArray[blockIndex]->existsBuffer = rawExistsBuffer; + } + + /* then read "values" blocks, which are also stored sequentially on disk */ + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; + CompressionType compressionType = blockSkipNode->valueCompressionType; + uint64 valueOffset = stripeOffset + blockSkipNode->valueBlockOffset; + StringInfo rawValueBuffer = ReadFromSmgr(relation, valueOffset, + blockSkipNode->valueLength); + + blockBuffersArray[blockIndex]->valueBuffer = rawValueBuffer; + blockBuffersArray[blockIndex]->valueCompressionType = compressionType; + } + + columnBuffers = palloc0(sizeof(ColumnBuffers)); + columnBuffers->blockBuffersArray = blockBuffersArray; + + return columnBuffers; +} + + +/* + * SelectedBlockMask walks over each column's blocks and checks if a block can + * be filtered without reading its data. The filtering happens when all rows in + * the block can be refuted by the given qualifier conditions. + */ +static bool * +SelectedBlockMask(StripeSkipList *stripeSkipList, List *projectedColumnList, + List *whereClauseList) +{ + bool *selectedBlockMask = NULL; + ListCell *columnCell = NULL; + uint32 blockIndex = 0; + List *restrictInfoList = BuildRestrictInfoList(whereClauseList); + + selectedBlockMask = palloc0(stripeSkipList->blockCount * sizeof(bool)); + memset(selectedBlockMask, true, stripeSkipList->blockCount * sizeof(bool)); + + foreach(columnCell, projectedColumnList) + { + Var *column = lfirst(columnCell); + uint32 columnIndex = column->varattno - 1; + FmgrInfo *comparisonFunction = NULL; + Node *baseConstraint = NULL; + + /* if this column's data type doesn't have a comparator, skip it */ + comparisonFunction = GetFunctionInfoOrNull(column->vartype, BTREE_AM_OID, + BTORDER_PROC); + if (comparisonFunction == NULL) + { + continue; + } + + baseConstraint = BuildBaseConstraint(column); + for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) + { + bool predicateRefuted = false; + List *constraintList = NIL; + ColumnBlockSkipNode *blockSkipNodeArray = + stripeSkipList->blockSkipNodeArray[columnIndex]; + ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; + + /* + * A column block with comparable data type can miss min/max values + * if all values in the block are NULL. + */ + if (!blockSkipNode->hasMinMax) + { + continue; + } + + UpdateConstraint(baseConstraint, blockSkipNode->minimumValue, + blockSkipNode->maximumValue); + + constraintList = list_make1(baseConstraint); +#if (PG_VERSION_NUM >= 100000) + predicateRefuted = predicate_refuted_by(constraintList, restrictInfoList, + false); +#else + predicateRefuted = predicate_refuted_by(constraintList, restrictInfoList); +#endif + if (predicateRefuted) + { + selectedBlockMask[blockIndex] = false; + } + } + } + + return selectedBlockMask; +} + + +/* + * GetFunctionInfoOrNull first resolves the operator for the given data type, + * access method, and support procedure. The function then uses the resolved + * operator's identifier to fill in a function manager object, and returns + * this object. This function is based on a similar function from CitusDB's code. + */ +FmgrInfo * +GetFunctionInfoOrNull(Oid typeId, Oid accessMethodId, int16 procedureId) +{ + FmgrInfo *functionInfo = NULL; + Oid operatorClassId = InvalidOid; + Oid operatorFamilyId = InvalidOid; + Oid operatorId = InvalidOid; + + /* get default operator class from pg_opclass for datum type */ + operatorClassId = GetDefaultOpClass(typeId, accessMethodId); + if (operatorClassId == InvalidOid) + { + return NULL; + } + + operatorFamilyId = get_opclass_family(operatorClassId); + if (operatorFamilyId == InvalidOid) + { + return NULL; + } + + operatorId = get_opfamily_proc(operatorFamilyId, typeId, typeId, procedureId); + if (operatorId != InvalidOid) + { + functionInfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo)); + + /* fill in the FmgrInfo struct using the operatorId */ + fmgr_info(operatorId, functionInfo); + } + + return functionInfo; +} + + +/* + * BuildRestrictInfoList builds restrict info list using the selection criteria, + * and then return this list. The function is copied from CitusDB's shard pruning + * logic. + */ +static List * +BuildRestrictInfoList(List *whereClauseList) +{ + List *restrictInfoList = NIL; + + ListCell *qualCell = NULL; + foreach(qualCell, whereClauseList) + { + RestrictInfo *restrictInfo = NULL; + Node *qualNode = (Node *) lfirst(qualCell); + + restrictInfo = make_simple_restrictinfo((Expr *) qualNode); + restrictInfoList = lappend(restrictInfoList, restrictInfo); + } + + return restrictInfoList; +} + + +/* + * BuildBaseConstraint builds and returns a base constraint. This constraint + * implements an expression in the form of (var <= max && var >= min), where + * min and max values represent a block's min and max values. These block + * values are filled in after the constraint is built. This function is based + * on a similar function from CitusDB's shard pruning logic. + */ +static Node * +BuildBaseConstraint(Var *variable) +{ + Node *baseConstraint = NULL; + OpExpr *lessThanExpr = NULL; + OpExpr *greaterThanExpr = NULL; + + lessThanExpr = MakeOpExpression(variable, BTLessEqualStrategyNumber); + greaterThanExpr = MakeOpExpression(variable, BTGreaterEqualStrategyNumber); + + baseConstraint = make_and_qual((Node *) lessThanExpr, (Node *) greaterThanExpr); + + return baseConstraint; +} + + +/* + * MakeOpExpression builds an operator expression node. This operator expression + * implements the operator clause as defined by the variable and the strategy + * number. The function is copied from CitusDB's shard pruning logic. + */ +static OpExpr * +MakeOpExpression(Var *variable, int16 strategyNumber) +{ + Oid typeId = variable->vartype; + Oid typeModId = variable->vartypmod; + Oid collationId = variable->varcollid; + + Oid accessMethodId = BTREE_AM_OID; + Oid operatorId = InvalidOid; + Const *constantValue = NULL; + OpExpr *expression = NULL; + + /* Load the operator from system catalogs */ + operatorId = GetOperatorByType(typeId, accessMethodId, strategyNumber); + + constantValue = makeNullConst(typeId, typeModId, collationId); + + /* Now make the expression with the given variable and a null constant */ + expression = (OpExpr *) make_opclause(operatorId, + InvalidOid, /* no result type yet */ + false, /* no return set */ + (Expr *) variable, + (Expr *) constantValue, + InvalidOid, collationId); + + /* Set implementing function id and result type */ + expression->opfuncid = get_opcode(operatorId); + expression->opresulttype = get_func_rettype(expression->opfuncid); + + return expression; +} + + +/* + * GetOperatorByType returns operator Oid for the given type, access method, + * and strategy number. Note that this function incorrectly errors out when + * the given type doesn't have its own operator but can use another compatible + * type's default operator. The function is copied from CitusDB's shard pruning + * logic. + */ +static Oid +GetOperatorByType(Oid typeId, Oid accessMethodId, int16 strategyNumber) +{ + /* Get default operator class from pg_opclass */ + Oid operatorClassId = GetDefaultOpClass(typeId, accessMethodId); + + Oid operatorFamily = get_opclass_family(operatorClassId); + + Oid operatorId = get_opfamily_member(operatorFamily, typeId, typeId, strategyNumber); + + return operatorId; +} + + +/* + * UpdateConstraint updates the base constraint with the given min/max values. + * The function is copied from CitusDB's shard pruning logic. + */ +static void +UpdateConstraint(Node *baseConstraint, Datum minValue, Datum maxValue) +{ + BoolExpr *andExpr = (BoolExpr *) baseConstraint; + Node *lessThanExpr = (Node *) linitial(andExpr->args); + Node *greaterThanExpr = (Node *) lsecond(andExpr->args); + + Node *minNode = get_rightop((Expr *) greaterThanExpr); + Node *maxNode = get_rightop((Expr *) lessThanExpr); + Const *minConstant = NULL; + Const *maxConstant = NULL; + + Assert(IsA(minNode, Const)); + Assert(IsA(maxNode, Const)); + + minConstant = (Const *) minNode; + maxConstant = (Const *) maxNode; + + minConstant->constvalue = minValue; + maxConstant->constvalue = maxValue; + + minConstant->constisnull = false; + maxConstant->constisnull = false; + + minConstant->constbyval = true; + maxConstant->constbyval = true; +} + + +/* + * SelectedBlockSkipList constructs a new StripeSkipList in which the + * non-selected blocks are removed from the given stripeSkipList. + */ +static StripeSkipList * +SelectedBlockSkipList(StripeSkipList *stripeSkipList, bool *projectedColumnMask, + bool *selectedBlockMask) +{ + StripeSkipList *SelectedBlockSkipList = NULL; + ColumnBlockSkipNode **selectedBlockSkipNodeArray = NULL; + uint32 selectedBlockCount = 0; + uint32 blockIndex = 0; + uint32 columnIndex = 0; + uint32 columnCount = stripeSkipList->columnCount; + + for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) + { + if (selectedBlockMask[blockIndex]) + { + selectedBlockCount++; + } + } + + selectedBlockSkipNodeArray = palloc0(columnCount * sizeof(ColumnBlockSkipNode *)); + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + uint32 selectedBlockIndex = 0; + bool firstColumn = columnIndex == 0; + + /* first column's block skip node is always read */ + if (!projectedColumnMask[columnIndex] && !firstColumn) + { + selectedBlockSkipNodeArray[columnIndex] = NULL; + continue; + } + + Assert(stripeSkipList->blockSkipNodeArray[columnIndex] != NULL); + + selectedBlockSkipNodeArray[columnIndex] = palloc0(selectedBlockCount * + sizeof(ColumnBlockSkipNode)); + + for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) + { + if (selectedBlockMask[blockIndex]) + { + selectedBlockSkipNodeArray[columnIndex][selectedBlockIndex] = + stripeSkipList->blockSkipNodeArray[columnIndex][blockIndex]; + selectedBlockIndex++; + } + } + } + + SelectedBlockSkipList = palloc0(sizeof(StripeSkipList)); + SelectedBlockSkipList->blockSkipNodeArray = selectedBlockSkipNodeArray; + SelectedBlockSkipList->blockCount = selectedBlockCount; + SelectedBlockSkipList->columnCount = stripeSkipList->columnCount; + + return SelectedBlockSkipList; +} + + +/* + * StripeSkipListRowCount counts the number of rows in the given stripeSkipList. + * To do this, the function finds the first column, and sums up row counts across + * all blocks for that column. + */ +static uint32 +StripeSkipListRowCount(StripeSkipList *stripeSkipList) +{ + uint32 stripeSkipListRowCount = 0; + uint32 blockIndex = 0; + ColumnBlockSkipNode *firstColumnSkipNodeArray = + stripeSkipList->blockSkipNodeArray[0]; + + for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) + { + uint32 blockRowCount = firstColumnSkipNodeArray[blockIndex].rowCount; + stripeSkipListRowCount += blockRowCount; + } + + return stripeSkipListRowCount; +} + + +/* + * ProjectedColumnMask returns a boolean array in which the projected columns + * from the projected column list are marked as true. + */ +static bool * +ProjectedColumnMask(uint32 columnCount, List *projectedColumnList) +{ + bool *projectedColumnMask = palloc0(columnCount * sizeof(bool)); + ListCell *columnCell = NULL; + + foreach(columnCell, projectedColumnList) + { + Var *column = (Var *) lfirst(columnCell); + uint32 columnIndex = column->varattno - 1; + projectedColumnMask[columnIndex] = true; + } + + return projectedColumnMask; +} + + +/* + * DeserializeBoolArray reads an array of bits from the given buffer and stores + * it in provided bool array. + */ +static void +DeserializeBoolArray(StringInfo boolArrayBuffer, bool *boolArray, + uint32 boolArrayLength) +{ + uint32 boolArrayIndex = 0; + + uint32 maximumBoolCount = boolArrayBuffer->len * 8; + if (boolArrayLength > maximumBoolCount) + { + ereport(ERROR, (errmsg("insufficient data for reading boolean array"))); + } + + for (boolArrayIndex = 0; boolArrayIndex < boolArrayLength; boolArrayIndex++) + { + uint32 byteIndex = boolArrayIndex / 8; + uint32 bitIndex = boolArrayIndex % 8; + uint8 bitmask = (1 << bitIndex); + + uint8 shiftedBit = (boolArrayBuffer->data[byteIndex] & bitmask); + if (shiftedBit == 0) + { + boolArray[boolArrayIndex] = false; + } + else + { + boolArray[boolArrayIndex] = true; + } + } +} + + +/* + * DeserializeDatumArray reads an array of datums from the given buffer and stores + * them in provided datumArray. If a value is marked as false in the exists array, + * the function assumes that the datum isn't in the buffer, and simply skips it. + */ +static void +DeserializeDatumArray(StringInfo datumBuffer, bool *existsArray, uint32 datumCount, + bool datumTypeByValue, int datumTypeLength, + char datumTypeAlign, Datum *datumArray) +{ + uint32 datumIndex = 0; + uint32 currentDatumDataOffset = 0; + + for (datumIndex = 0; datumIndex < datumCount; datumIndex++) + { + char *currentDatumDataPointer = NULL; + + if (!existsArray[datumIndex]) + { + continue; + } + + currentDatumDataPointer = datumBuffer->data + currentDatumDataOffset; + + datumArray[datumIndex] = fetch_att(currentDatumDataPointer, datumTypeByValue, + datumTypeLength); + currentDatumDataOffset = att_addlength_datum(currentDatumDataOffset, + datumTypeLength, + currentDatumDataPointer); + currentDatumDataOffset = att_align_nominal(currentDatumDataOffset, + datumTypeAlign); + + if (currentDatumDataOffset > datumBuffer->len) + { + ereport(ERROR, (errmsg("insufficient data left in datum buffer"))); + } + } +} + + +/* + * DeserializeBlockData deserializes requested data block for all columns and + * stores in blockDataArray. It uncompresses serialized data if necessary. The + * function also deallocates data buffers used for previous block, and compressed + * data buffers for the current block which will not be needed again. If a column + * data is not present serialized buffer, then default value (or null) is used + * to fill value array. + */ +static BlockData * +DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, + uint32 rowCount, TupleDesc tupleDescriptor, + List *projectedColumnList) +{ + int columnIndex = 0; + bool *columnMask = ProjectedColumnMask(tupleDescriptor->natts, projectedColumnList); + BlockData *blockData = CreateEmptyBlockData(tupleDescriptor->natts, columnMask, + rowCount); + + for (columnIndex = 0; columnIndex < stripeBuffers->columnCount; columnIndex++) + { + Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); + ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; + bool columnAdded = false; + + if (columnBuffers == NULL && columnMask[columnIndex]) + { + columnAdded = true; + } + + if (columnBuffers != NULL) + { + ColumnBlockBuffers *blockBuffers = + columnBuffers->blockBuffersArray[blockIndex]; + StringInfo valueBuffer = NULL; + + /* decompress and deserialize current block's data */ + valueBuffer = DecompressBuffer(blockBuffers->valueBuffer, + blockBuffers->valueCompressionType); + + if (blockBuffers->valueCompressionType != COMPRESSION_NONE) + { + /* compressed data is not needed anymore */ + pfree(blockBuffers->valueBuffer->data); + pfree(blockBuffers->valueBuffer); + } + + DeserializeBoolArray(blockBuffers->existsBuffer, + blockData->existsArray[columnIndex], + rowCount); + DeserializeDatumArray(valueBuffer, blockData->existsArray[columnIndex], + rowCount, attributeForm->attbyval, + attributeForm->attlen, attributeForm->attalign, + blockData->valueArray[columnIndex]); + + /* store current block's data buffer to be freed at next block read */ + blockData->valueBufferArray[columnIndex] = valueBuffer; + } + else if (columnAdded) + { + /* + * This is a column that was added after creation of this stripe. + * So we use either the default value or NULL. + */ + if (attributeForm->atthasdef) + { + int rowIndex = 0; + + Datum defaultValue = ColumnDefaultValue(tupleDescriptor->constr, + attributeForm); + + for (rowIndex = 0; rowIndex < rowCount; rowIndex++) + { + blockData->existsArray[columnIndex][rowIndex] = true; + blockData->valueArray[columnIndex][rowIndex] = defaultValue; + } + } + else + { + memset(blockData->existsArray[columnIndex], false, + rowCount * sizeof(bool)); + } + } + } + + return blockData; +} + + +/* + * ColumnDefaultValue returns default value for given column. Only const values + * are supported. The function errors on any other default value expressions. + */ +static Datum +ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeForm) +{ + Node *defaultValueNode = NULL; + int defValIndex = 0; + + for (defValIndex = 0; defValIndex < tupleConstraints->num_defval; defValIndex++) + { + AttrDefault attrDefault = tupleConstraints->defval[defValIndex]; + if (attrDefault.adnum == attributeForm->attnum) + { + defaultValueNode = stringToNode(attrDefault.adbin); + break; + } + } + + Assert(defaultValueNode != NULL); + + /* try reducing the default value node to a const node */ + defaultValueNode = eval_const_expressions(NULL, defaultValueNode); + if (IsA(defaultValueNode, Const)) + { + Const *constNode = (Const *) defaultValueNode; + return constNode->constvalue; + } + else + { + const char *columnName = NameStr(attributeForm->attname); + ereport(ERROR, (errmsg("unsupported default value for column \"%s\"", columnName), + errhint("Expression is either mutable or " + "does not evaluate to constant value"))); + } +} + + +static StringInfo +ReadFromSmgr(Relation rel, uint64 offset, uint32 size) +{ + StringInfo resultBuffer = makeStringInfo(); + uint64 read = 0; + + enlargeStringInfo(resultBuffer, size); + resultBuffer->len = size; + + while (read < size) + { + Buffer buffer; + Page page; + PageHeader phdr; + uint32 to_read; + SmgrAddr addr = logical_to_smgr(offset + read); + + buffer = ReadBuffer(rel, addr.blockno); + page = BufferGetPage(buffer); + phdr = (PageHeader) page; + + to_read = Min(size - read, phdr->pd_upper - addr.offset); + memcpy(resultBuffer->data + read, page + addr.offset, to_read); + ReleaseBuffer(buffer); + read += to_read; + } + + return resultBuffer; +} diff --git a/src/backend/columnar/cstore_tableam.c b/src/backend/columnar/cstore_tableam.c new file mode 100644 index 000000000..ce7d7de97 --- /dev/null +++ b/src/backend/columnar/cstore_tableam.c @@ -0,0 +1,1419 @@ +#include "postgres.h" + +#include + +#include "miscadmin.h" + +#include "access/genam.h" +#include "access/heapam.h" +#include "access/multixact.h" +#include "access/rewriteheap.h" +#include "access/tableam.h" +#include "access/tsmapi.h" +#if PG_VERSION_NUM >= 130000 +#include "access/heaptoast.h" +#else +#include "access/tuptoaster.h" +#endif +#include "access/xact.h" +#include "catalog/catalog.h" +#include "catalog/index.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_am.h" +#include "catalog/pg_trigger.h" +#include "catalog/storage.h" +#include "catalog/storage_xlog.h" +#include "commands/progress.h" +#include "commands/vacuum.h" +#include "executor/executor.h" +#include "nodes/makefuncs.h" +#include "optimizer/plancat.h" +#include "pgstat.h" +#include "storage/bufmgr.h" +#include "storage/bufpage.h" +#include "storage/bufmgr.h" +#include "storage/lmgr.h" +#include "storage/predicate.h" +#include "storage/procarray.h" +#include "storage/smgr.h" +#include "tcop/utility.h" +#include "utils/builtins.h" +#include "utils/pg_rusage.h" +#include "utils/rel.h" +#include "utils/syscache.h" + +#include "cstore.h" +#include "cstore_customscan.h" +#include "cstore_tableam.h" +#include "cstore_version_compat.h" + +#define CSTORE_TABLEAM_NAME "cstore_tableam" + +/* + * Timing parameters for truncate locking heuristics. + * + * These are the same values from src/backend/access/heap/vacuumlazy.c + */ +#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */ +#define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */ + +typedef struct CStoreScanDescData +{ + TableScanDescData cs_base; + TableReadState *cs_readState; +} CStoreScanDescData; + +typedef struct CStoreScanDescData *CStoreScanDesc; + +static TableWriteState *CStoreWriteState = NULL; +static ExecutorEnd_hook_type PreviousExecutorEndHook = NULL; +static MemoryContext CStoreContext = NULL; +static object_access_hook_type prevObjectAccessHook = NULL; +static ProcessUtility_hook_type PreviousProcessUtilityHook = NULL; + +/* forward declaration for static functions */ +static void CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid + objectId, int subId, + void *arg); +#if PG_VERSION_NUM >= 130000 +static void CStoreTableAMProcessUtility(PlannedStmt *plannedStatement, + const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + QueryEnvironment *queryEnvironment, + DestReceiver *destReceiver, + QueryCompletion *qc); +#else +static void CStoreTableAMProcessUtility(PlannedStmt *plannedStatement, + const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + QueryEnvironment *queryEnvironment, + DestReceiver *destReceiver, + char *completionTag); +#endif + +static bool IsCStoreTableAmTable(Oid relationId); +static bool ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode, + int timeout, int retryInterval); +static void LogRelationStats(Relation rel, int elevel); +static void TruncateCStore(Relation rel, int elevel); + + +/* + * CStoreTableAMDefaultOptions returns the default options for a cstore table am table. + * These options are based on the GUC's controlling the defaults. + */ +static CStoreOptions * +CStoreTableAMDefaultOptions() +{ + CStoreOptions *cstoreOptions = palloc0(sizeof(CStoreOptions)); + cstoreOptions->compressionType = cstore_compression; + cstoreOptions->stripeRowCount = cstore_stripe_row_count; + cstoreOptions->blockRowCount = cstore_block_row_count; + return cstoreOptions; +} + + +/* + * CStoreTableAMGetOptions returns the options based on a relation. It is advised the + * relation is a cstore table am table, if not it will raise an error + */ +static CStoreOptions * +CStoreTableAMGetOptions(Relation rel) +{ + CStoreOptions *cstoreOptions = NULL; + DataFileMetadata *metadata = NULL; + + Assert(rel != NULL); + + cstoreOptions = palloc0(sizeof(CStoreOptions)); + metadata = ReadDataFileMetadata(rel->rd_node.relNode, false); + cstoreOptions->compressionType = metadata->compression; + cstoreOptions->stripeRowCount = metadata->stripeRowCount; + cstoreOptions->blockRowCount = metadata->blockRowCount; + return cstoreOptions; +} + + +static MemoryContext +GetCStoreMemoryContext() +{ + if (CStoreContext == NULL) + { + CStoreContext = AllocSetContextCreate(TopMemoryContext, "cstore context", + ALLOCSET_DEFAULT_SIZES); + } + return CStoreContext; +} + + +static void +ResetCStoreMemoryContext() +{ + if (CStoreContext != NULL) + { + MemoryContextReset(CStoreContext); + } +} + + +static void +cstore_init_write_state(Relation relation) +{ + if (CStoreWriteState != NULL) + { + /* TODO: consider whether it's possible for a new write to start */ + /* before an old one is flushed */ + Assert(CStoreWriteState->relation->rd_id == relation->rd_id); + } + + if (CStoreWriteState == NULL) + { + CStoreOptions *cstoreOptions = CStoreTableAMGetOptions(relation); + TupleDesc tupdesc = RelationGetDescr(relation); + + elog(LOG, "initializing write state for relation %d", relation->rd_id); + CStoreWriteState = CStoreBeginWrite(relation, + cstoreOptions->compressionType, + cstoreOptions->stripeRowCount, + cstoreOptions->blockRowCount, + tupdesc); + } +} + + +static void +cstore_free_write_state() +{ + if (CStoreWriteState != NULL) + { + elog(LOG, "flushing write state for relation %d", + CStoreWriteState->relation->rd_id); + CStoreEndWrite(CStoreWriteState); + CStoreWriteState = NULL; + } +} + + +static List * +RelationColumnList(Relation rel) +{ + List *columnList = NIL; + TupleDesc tupdesc = RelationGetDescr(rel); + + for (int i = 0; i < tupdesc->natts; i++) + { + Index varno = 1; + AttrNumber varattno = i + 1; + Oid vartype = tupdesc->attrs[i].atttypid; + int32 vartypmod = tupdesc->attrs[i].atttypmod; + Oid varcollid = tupdesc->attrs[i].attcollation; + Index varlevelsup = 0; + Var *var; + + if (tupdesc->attrs[i].attisdropped) + { + continue; + } + + var = makeVar(varno, varattno, vartype, vartypmod, + varcollid, varlevelsup); + columnList = lappend(columnList, var); + } + + return columnList; +} + + +static const TupleTableSlotOps * +cstore_slot_callbacks(Relation relation) +{ + return &TTSOpsVirtual; +} + + +static TableScanDesc +cstore_beginscan(Relation relation, Snapshot snapshot, + int nkeys, ScanKey key, + ParallelTableScanDesc parallel_scan, + uint32 flags) +{ + TableScanDesc scandesc; + int natts = relation->rd_att->natts; + Bitmapset *attr_needed = NULL; + + attr_needed = bms_add_range(attr_needed, 0, natts - 1); + + /* the cstore access method does not use the flags, they are specific to heap */ + flags = 0; + + scandesc = cstore_beginscan_extended(relation, snapshot, nkeys, key, parallel_scan, + flags, attr_needed, NULL); + + pfree(attr_needed); + + return scandesc; +} + + +TableScanDesc +cstore_beginscan_extended(Relation relation, Snapshot snapshot, + int nkeys, ScanKey key, + ParallelTableScanDesc parallel_scan, + uint32 flags, Bitmapset *attr_needed, List *scanQual) +{ + TupleDesc tupdesc = relation->rd_att; + TableReadState *readState = NULL; + CStoreScanDesc scan = palloc(sizeof(CStoreScanDescData)); + List *columnList = NIL; + List *neededColumnList = NIL; + MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); + ListCell *columnCell = NULL; + + scan->cs_base.rs_rd = relation; + scan->cs_base.rs_snapshot = snapshot; + scan->cs_base.rs_nkeys = nkeys; + scan->cs_base.rs_key = key; + scan->cs_base.rs_flags = flags; + scan->cs_base.rs_parallel = parallel_scan; + + columnList = RelationColumnList(relation); + + /* only collect columns that we need for the scan */ + foreach(columnCell, columnList) + { + Var *var = castNode(Var, lfirst(columnCell)); + if (bms_is_member(var->varattno - 1, attr_needed)) + { + neededColumnList = lappend(neededColumnList, var); + } + } + + readState = CStoreBeginRead(relation, tupdesc, neededColumnList, scanQual); + + scan->cs_readState = readState; + + MemoryContextSwitchTo(oldContext); + return ((TableScanDesc) scan); +} + + +static void +cstore_endscan(TableScanDesc sscan) +{ + CStoreScanDesc scan = (CStoreScanDesc) sscan; + CStoreEndRead(scan->cs_readState); + scan->cs_readState = NULL; +} + + +static void +cstore_rescan(TableScanDesc sscan, ScanKey key, bool set_params, + bool allow_strat, bool allow_sync, bool allow_pagemode) +{ + CStoreScanDesc scan = (CStoreScanDesc) sscan; + CStoreRescan(scan->cs_readState); +} + + +static bool +cstore_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot) +{ + CStoreScanDesc scan = (CStoreScanDesc) sscan; + bool nextRowFound; + MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); + + ExecClearTuple(slot); + + nextRowFound = CStoreReadNextRow(scan->cs_readState, slot->tts_values, + slot->tts_isnull); + + MemoryContextSwitchTo(oldContext); + + if (!nextRowFound) + { + return false; + } + + ExecStoreVirtualTuple(slot); + return true; +} + + +static Size +cstore_parallelscan_estimate(Relation rel) +{ + elog(ERROR, "cstore_parallelscan_estimate not implemented"); +} + + +static Size +cstore_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan) +{ + elog(ERROR, "cstore_parallelscan_initialize not implemented"); +} + + +static void +cstore_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan) +{ + elog(ERROR, "cstore_parallelscan_reinitialize not implemented"); +} + + +static IndexFetchTableData * +cstore_index_fetch_begin(Relation rel) +{ + elog(ERROR, "cstore_index_fetch_begin not implemented"); +} + + +static void +cstore_index_fetch_reset(IndexFetchTableData *scan) +{ + elog(ERROR, "cstore_index_fetch_reset not implemented"); +} + + +static void +cstore_index_fetch_end(IndexFetchTableData *scan) +{ + elog(ERROR, "cstore_index_fetch_end not implemented"); +} + + +static bool +cstore_index_fetch_tuple(struct IndexFetchTableData *scan, + ItemPointer tid, + Snapshot snapshot, + TupleTableSlot *slot, + bool *call_again, bool *all_dead) +{ + elog(ERROR, "cstore_index_fetch_tuple not implemented"); +} + + +static bool +cstore_fetch_row_version(Relation relation, + ItemPointer tid, + Snapshot snapshot, + TupleTableSlot *slot) +{ + elog(ERROR, "cstore_fetch_row_version not implemented"); +} + + +static void +cstore_get_latest_tid(TableScanDesc sscan, + ItemPointer tid) +{ + elog(ERROR, "cstore_get_latest_tid not implemented"); +} + + +static bool +cstore_tuple_tid_valid(TableScanDesc scan, ItemPointer tid) +{ + elog(ERROR, "cstore_tuple_tid_valid not implemented"); +} + + +static bool +cstore_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, + Snapshot snapshot) +{ + return true; +} + + +static TransactionId +cstore_compute_xid_horizon_for_tuples(Relation rel, + ItemPointerData *tids, + int nitems) +{ + elog(ERROR, "cstore_compute_xid_horizon_for_tuples not implemented"); +} + + +static void +cstore_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, + int options, BulkInsertState bistate) +{ + HeapTuple heapTuple; + MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); + + cstore_init_write_state(relation); + + heapTuple = ExecCopySlotHeapTuple(slot); + if (HeapTupleHasExternal(heapTuple)) + { + /* detoast any toasted attributes */ + HeapTuple newTuple = toast_flatten_tuple(heapTuple, + slot->tts_tupleDescriptor); + + ExecForceStoreHeapTuple(newTuple, slot, true); + } + + slot_getallattrs(slot); + + CStoreWriteRow(CStoreWriteState, slot->tts_values, slot->tts_isnull); + MemoryContextSwitchTo(oldContext); +} + + +static void +cstore_tuple_insert_speculative(Relation relation, TupleTableSlot *slot, + CommandId cid, int options, + BulkInsertState bistate, uint32 specToken) +{ + elog(ERROR, "cstore_tuple_insert_speculative not implemented"); +} + + +static void +cstore_tuple_complete_speculative(Relation relation, TupleTableSlot *slot, + uint32 specToken, bool succeeded) +{ + elog(ERROR, "cstore_tuple_complete_speculative not implemented"); +} + + +static void +cstore_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, + CommandId cid, int options, BulkInsertState bistate) +{ + MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); + + cstore_init_write_state(relation); + + for (int i = 0; i < ntuples; i++) + { + TupleTableSlot *tupleSlot = slots[i]; + HeapTuple heapTuple = ExecCopySlotHeapTuple(tupleSlot); + + if (HeapTupleHasExternal(heapTuple)) + { + /* detoast any toasted attributes */ + HeapTuple newTuple = toast_flatten_tuple(heapTuple, + tupleSlot->tts_tupleDescriptor); + + ExecForceStoreHeapTuple(newTuple, tupleSlot, true); + } + + slot_getallattrs(tupleSlot); + + CStoreWriteRow(CStoreWriteState, tupleSlot->tts_values, tupleSlot->tts_isnull); + } + MemoryContextSwitchTo(oldContext); +} + + +static TM_Result +cstore_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, + Snapshot snapshot, Snapshot crosscheck, bool wait, + TM_FailureData *tmfd, bool changingPart) +{ + elog(ERROR, "cstore_tuple_delete not implemented"); +} + + +static TM_Result +cstore_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, + CommandId cid, Snapshot snapshot, Snapshot crosscheck, + bool wait, TM_FailureData *tmfd, + LockTupleMode *lockmode, bool *update_indexes) +{ + elog(ERROR, "cstore_tuple_update not implemented"); +} + + +static TM_Result +cstore_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, + TupleTableSlot *slot, CommandId cid, LockTupleMode mode, + LockWaitPolicy wait_policy, uint8 flags, + TM_FailureData *tmfd) +{ + elog(ERROR, "cstore_tuple_lock not implemented"); +} + + +static void +cstore_finish_bulk_insert(Relation relation, int options) +{ + /*TODO: flush relation like for heap? */ + /* free write state or only in ExecutorEnd_hook? */ + + /* for COPY */ + cstore_free_write_state(); +} + + +static void +cstore_relation_set_new_filenode(Relation rel, + const RelFileNode *newrnode, + char persistence, + TransactionId *freezeXid, + MultiXactId *minmulti) +{ + SMgrRelation srel; + DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, true); + uint64 blockRowCount = 0; + uint64 stripeRowCount = 0; + CompressionType compression = 0; + + if (metadata != NULL) + { + /* existing table (e.g. TRUNCATE), use existing blockRowCount */ + blockRowCount = metadata->blockRowCount; + stripeRowCount = metadata->stripeRowCount; + compression = metadata->compression; + } + else + { + /* new table, use options */ + CStoreOptions *options = CStoreTableAMDefaultOptions(); + blockRowCount = options->blockRowCount; + stripeRowCount = options->stripeRowCount; + compression = options->compressionType; + } + + /* delete old relfilenode metadata */ + DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode); + + Assert(persistence == RELPERSISTENCE_PERMANENT); + *freezeXid = RecentXmin; + *minmulti = GetOldestMultiXactId(); + srel = RelationCreateStorage(*newrnode, persistence); + InitCStoreDataFileMetadata(newrnode->relNode, blockRowCount, stripeRowCount, + compression); + smgrclose(srel); +} + + +static void +cstore_relation_nontransactional_truncate(Relation rel) +{ + DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, false); + + /* + * No need to set new relfilenode, since the table was created in this + * transaction and no other transaction can see this relation yet. We + * can just truncate the relation. + * + * This is similar to what is done in heapam_relation_nontransactional_truncate. + */ + RelationTruncate(rel, 0); + + /* Delete old relfilenode metadata and recreate it */ + DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode); + InitCStoreDataFileMetadata(rel->rd_node.relNode, metadata->blockRowCount, + metadata->stripeRowCount, metadata->compression); +} + + +static void +cstore_relation_copy_data(Relation rel, const RelFileNode *newrnode) +{ + elog(ERROR, "cstore_relation_copy_data not implemented"); +} + + +/* + * cstore_relation_copy_for_cluster is called on VACUUM FULL, at which + * we should copy data from OldHeap to NewHeap. + * + * In general TableAM case this can also be called for the CLUSTER command + * which is not applicable for cstore since it doesn't support indexes. + */ +static void +cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, + Relation OldIndex, bool use_sort, + TransactionId OldestXmin, + TransactionId *xid_cutoff, + MultiXactId *multi_cutoff, + double *num_tuples, + double *tups_vacuumed, + double *tups_recently_dead) +{ + TableWriteState *writeState = NULL; + TableReadState *readState = NULL; + CStoreOptions *cstoreOptions = NULL; + Datum *values = NULL; + bool *nulls = NULL; + TupleDesc sourceDesc = RelationGetDescr(OldHeap); + TupleDesc targetDesc = RelationGetDescr(NewHeap); + + if (OldIndex != NULL || use_sort) + { + ereport(ERROR, (errmsg(CSTORE_TABLEAM_NAME " doesn't support indexes"))); + } + + /* + * copy_table_data in cluster.c assumes tuple descriptors are exactly + * the same. Even dropped columns exist and are marked as attisdropped + * in the target relation. + */ + Assert(sourceDesc->natts == targetDesc->natts); + + /* + * Since we are copying into a new relation we need to copy the settings from the old + * relation first. + */ + + cstoreOptions = CStoreTableAMGetOptions(OldHeap); + + UpdateCStoreDataFileMetadata(NewHeap->rd_node.relNode, + cstoreOptions->blockRowCount, + cstoreOptions->stripeRowCount, + cstoreOptions->compressionType); + + cstoreOptions = CStoreTableAMGetOptions(NewHeap); + + writeState = CStoreBeginWrite(NewHeap, + cstoreOptions->compressionType, + cstoreOptions->stripeRowCount, + cstoreOptions->blockRowCount, + targetDesc); + + readState = CStoreBeginRead(OldHeap, sourceDesc, RelationColumnList(OldHeap), NULL); + + values = palloc0(sourceDesc->natts * sizeof(Datum)); + nulls = palloc0(sourceDesc->natts * sizeof(bool)); + + *num_tuples = 0; + + while (CStoreReadNextRow(readState, values, nulls)) + { + CStoreWriteRow(writeState, values, nulls); + (*num_tuples)++; + } + + *tups_vacuumed = 0; + + CStoreEndWrite(writeState); + CStoreEndRead(readState); +} + + +/* + * cstore_vacuum_rel implements VACUUM without FULL option. + */ +static void +cstore_vacuum_rel(Relation rel, VacuumParams *params, + BufferAccessStrategy bstrategy) +{ + int elevel = (params->options & VACOPT_VERBOSE) ? INFO : DEBUG2; + + /* this should have been resolved by vacuum.c until now */ + Assert(params->truncate != VACOPT_TERNARY_DEFAULT); + + LogRelationStats(rel, elevel); + + /* + * We don't have updates, deletes, or concurrent updates, so all we + * care for now is truncating the unused space at the end of storage. + */ + if (params->truncate == VACOPT_TERNARY_ENABLED) + { + TruncateCStore(rel, elevel); + } +} + + +static void +LogRelationStats(Relation rel, int elevel) +{ + DataFileMetadata *datafileMetadata = NULL; + ListCell *stripeMetadataCell = NULL; + Oid relfilenode = rel->rd_node.relNode; + StringInfo infoBuf = makeStringInfo(); + + int compressionStats[COMPRESSION_COUNT] = { 0 }; + uint64 totalStripeLength = 0; + uint64 tupleCount = 0; + uint64 blockCount = 0; + uint64 relPages = 0; + int stripeCount = 0; + TupleDesc tupdesc = RelationGetDescr(rel); + uint64 droppedBlocksWithData = 0; + + datafileMetadata = ReadDataFileMetadata(relfilenode, false); + stripeCount = list_length(datafileMetadata->stripeMetadataList); + + foreach(stripeMetadataCell, datafileMetadata->stripeMetadataList) + { + StripeMetadata *stripe = lfirst(stripeMetadataCell); + StripeSkipList *skiplist = ReadStripeSkipList(relfilenode, stripe->id, + RelationGetDescr(rel), + stripe->blockCount); + for (uint32 column = 0; column < skiplist->columnCount; column++) + { + bool attrDropped = tupdesc->attrs[column].attisdropped; + for (uint32 block = 0; block < skiplist->blockCount; block++) + { + ColumnBlockSkipNode *skipnode = + &skiplist->blockSkipNodeArray[column][block]; + + /* ignore zero length blocks for dropped attributes */ + if (skipnode->valueLength > 0) + { + compressionStats[skipnode->valueCompressionType]++; + blockCount++; + + if (attrDropped) + { + droppedBlocksWithData++; + } + } + } + } + + tupleCount += stripe->rowCount; + totalStripeLength += stripe->dataLength; + } + + RelationOpenSmgr(rel); + relPages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + RelationCloseSmgr(rel); + + appendStringInfo(infoBuf, "total file size: %ld, total data size: %ld\n", + relPages * BLCKSZ, totalStripeLength); + appendStringInfo(infoBuf, + "total row count: %ld, stripe count: %d, " + "average rows per stripe: %ld\n", + tupleCount, stripeCount, tupleCount / stripeCount); + appendStringInfo(infoBuf, + "block count: %ld" + ", containing data for dropped columns: %ld", + blockCount, droppedBlocksWithData); + for (int compressionType = 0; compressionType < COMPRESSION_COUNT; compressionType++) + { + appendStringInfo(infoBuf, + ", %s compressed: %d", + CompressionTypeStr(compressionType), + compressionStats[compressionType]); + } + appendStringInfoString(infoBuf, "\n"); + + ereport(elevel, (errmsg("statistics for \"%s\":\n%s", RelationGetRelationName(rel), + infoBuf->data))); +} + + +/* + * TruncateCStore truncates the unused space at the end of main fork for + * a cstore table. This unused space can be created by aborted transactions. + * + * This implementation is based on heap_vacuum_rel in vacuumlazy.c with some + * changes so it suits columnar store relations. + */ +static void +TruncateCStore(Relation rel, int elevel) +{ + PGRUsage ru0; + BlockNumber old_rel_pages = 0; + BlockNumber new_rel_pages = 0; + SmgrAddr highestPhysicalAddress; + + pg_rusage_init(&ru0); + + /* Report that we are now truncating */ + pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, + PROGRESS_VACUUM_PHASE_TRUNCATE); + + + /* + * We need access exclusive lock on the relation in order to do + * truncation. If we can't get it, give up rather than waiting --- we + * don't want to block other backends, and we don't want to deadlock + * (which is quite possible considering we already hold a lower-grade + * lock). + * + * The decisions for AccessExclusiveLock and conditional lock with + * a timeout is based on lazy_truncate_heap in vacuumlazy.c. + */ + if (!ConditionalLockRelationWithTimeout(rel, AccessExclusiveLock, + VACUUM_TRUNCATE_LOCK_TIMEOUT, + VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL)) + { + /* + * We failed to establish the lock in the specified number of + * retries. This means we give up truncating. + */ + ereport(elevel, + (errmsg("\"%s\": stopping truncate due to conflicting lock request", + RelationGetRelationName(rel)))); + return; + } + + RelationOpenSmgr(rel); + old_rel_pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + RelationCloseSmgr(rel); + + /* + * Due to the AccessExclusive lock there's no danger that + * new stripes be added beyond highestPhysicalAddress while + * we're truncating. + */ + highestPhysicalAddress = + logical_to_smgr(GetHighestUsedAddress(rel->rd_node.relNode)); + + new_rel_pages = highestPhysicalAddress.blockno + 1; + if (new_rel_pages == old_rel_pages) + { + UnlockRelation(rel, AccessExclusiveLock); + return; + } + + /* + * Truncate the storage. Note that RelationTruncate() takes care of + * Write Ahead Logging. + */ + RelationTruncate(rel, new_rel_pages); + + /* + * We can release the exclusive lock as soon as we have truncated. + * Other backends can't safely access the relation until they have + * processed the smgr invalidation that smgrtruncate sent out ... but + * that should happen as part of standard invalidation processing once + * they acquire lock on the relation. + */ + UnlockRelation(rel, AccessExclusiveLock); + + ereport(elevel, + (errmsg("\"%s\": truncated %u to %u pages", + RelationGetRelationName(rel), + old_rel_pages, new_rel_pages), + errdetail_internal("%s", pg_rusage_show(&ru0)))); +} + + +/* + * ConditionalLockRelationWithTimeout tries to acquire a relation lock until + * it either succeeds or timesout. It doesn't enter wait queue and instead it + * sleeps between lock tries. + * + * This is based on the lock loop in lazy_truncate_heap(). + */ +static bool +ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode, int timeout, + int retryInterval) +{ + int lock_retry = 0; + + while (true) + { + if (ConditionalLockRelation(rel, lockMode)) + { + break; + } + + /* + * Check for interrupts while trying to (re-)acquire the lock + */ + CHECK_FOR_INTERRUPTS(); + + if (++lock_retry > (timeout / retryInterval)) + { + return false; + } + + pg_usleep(retryInterval * 1000L); + } + + return true; +} + + +static bool +cstore_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, + BufferAccessStrategy bstrategy) +{ + /* + * Our access method is not pages based, i.e. tuples are not confined + * to pages boundaries. So not much to do here. We return true anyway + * so acquire_sample_rows() in analyze.c would call our + * cstore_scan_analyze_next_tuple() callback. + */ + return true; +} + + +static bool +cstore_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, + double *liverows, double *deadrows, + TupleTableSlot *slot) +{ + /* + * Currently we don't do anything smart to reduce number of rows returned + * for ANALYZE. The TableAM API's ANALYZE functions are designed for page + * based access methods where it chooses random pages, and then reads + * tuples from those pages. + * + * We could do something like that here by choosing sample stripes or blocks, + * but getting that correct might need quite some work. Since cstore_fdw's + * ANALYZE scanned all rows, as a starter we do the same here and scan all + * rows. + */ + if (cstore_getnextslot(scan, ForwardScanDirection, slot)) + { + (*liverows)++; + return true; + } + + return false; +} + + +static double +cstore_index_build_range_scan(Relation heapRelation, + Relation indexRelation, + IndexInfo *indexInfo, + bool allow_sync, + bool anyvisible, + bool progress, + BlockNumber start_blockno, + BlockNumber numblocks, + IndexBuildCallback callback, + void *callback_state, + TableScanDesc scan) +{ + elog(ERROR, "cstore_index_build_range_scan not implemented"); +} + + +static void +cstore_index_validate_scan(Relation heapRelation, + Relation indexRelation, + IndexInfo *indexInfo, + Snapshot snapshot, + ValidateIndexState *state) +{ + elog(ERROR, "cstore_index_validate_scan not implemented"); +} + + +static uint64 +cstore_relation_size(Relation rel, ForkNumber forkNumber) +{ + uint64 nblocks = 0; + + /* Open it at the smgr level if not already done */ + RelationOpenSmgr(rel); + + /* InvalidForkNumber indicates returning the size for all forks */ + if (forkNumber == InvalidForkNumber) + { + for (int i = 0; i < MAX_FORKNUM; i++) + { + nblocks += smgrnblocks(rel->rd_smgr, i); + } + } + else + { + nblocks = smgrnblocks(rel->rd_smgr, forkNumber); + } + + return nblocks * BLCKSZ; +} + + +static bool +cstore_relation_needs_toast_table(Relation rel) +{ + return false; +} + + +static void +cstore_estimate_rel_size(Relation rel, int32 *attr_widths, + BlockNumber *pages, double *tuples, + double *allvisfrac) +{ + RelationOpenSmgr(rel); + *pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + *tuples = CStoreTableRowCount(rel); + + /* + * Append-only, so everything is visible except in-progress or rolled-back + * transactions. + */ + *allvisfrac = 1.0; + + get_rel_data_width(rel, attr_widths); +} + + +static bool +cstore_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate) +{ + elog(ERROR, "cstore_scan_sample_next_block not implemented"); +} + + +static bool +cstore_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, + TupleTableSlot *slot) +{ + elog(ERROR, "cstore_scan_sample_next_tuple not implemented"); +} + + +static void +CStoreExecutorEnd(QueryDesc *queryDesc) +{ + cstore_free_write_state(); + if (PreviousExecutorEndHook) + { + PreviousExecutorEndHook(queryDesc); + } + else + { + standard_ExecutorEnd(queryDesc); + } + ResetCStoreMemoryContext(); +} + + +#if PG_VERSION_NUM >= 130000 +static void +CStoreTableAMProcessUtility(PlannedStmt *plannedStatement, + const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + QueryEnvironment *queryEnvironment, + DestReceiver *destReceiver, + QueryCompletion *queryCompletion) +#else +static void +CStoreTableAMProcessUtility(PlannedStmt * plannedStatement, + const char * queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + QueryEnvironment * queryEnvironment, + DestReceiver * destReceiver, + char * completionTag) +#endif +{ + Node *parseTree = plannedStatement->utilityStmt; + + if (nodeTag(parseTree) == T_CreateTrigStmt) + { + CreateTrigStmt *createTrigStmt = (CreateTrigStmt *) parseTree; + Relation rel; + bool isCStore; + + rel = relation_openrv(createTrigStmt->relation, AccessShareLock); + isCStore = rel->rd_tableam == GetCstoreTableAmRoutine(); + relation_close(rel, AccessShareLock); + + if (isCStore && + createTrigStmt->row && + createTrigStmt->timing == TRIGGER_TYPE_AFTER) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg( + "AFTER ROW triggers are not supported for columnstore access method"), + errhint("Consider an AFTER STATEMENT trigger instead."))); + } + } + + CALL_PREVIOUS_UTILITY(); +} + + +void +cstore_tableam_init() +{ + PreviousExecutorEndHook = ExecutorEnd_hook; + ExecutorEnd_hook = CStoreExecutorEnd; + PreviousProcessUtilityHook = (ProcessUtility_hook != NULL) ? + ProcessUtility_hook : standard_ProcessUtility; + ProcessUtility_hook = CStoreTableAMProcessUtility; + prevObjectAccessHook = object_access_hook; + object_access_hook = CStoreTableAMObjectAccessHook; + + cstore_customscan_init(); +} + + +void +cstore_tableam_finish() +{ + ExecutorEnd_hook = PreviousExecutorEndHook; +} + + +/* + * Implements object_access_hook. One of the places this is called is just + * before dropping an object, which allows us to clean-up resources for + * cstore tables. + * + * See the comments for CStoreFdwObjectAccessHook for more details. + */ +static void +CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId, int + subId, + void *arg) +{ + if (prevObjectAccessHook) + { + prevObjectAccessHook(access, classId, objectId, subId, arg); + } + + /* + * Do nothing if this is not a DROP relation command. + */ + if (access != OAT_DROP || classId != RelationRelationId || OidIsValid(subId)) + { + return; + } + + /* + * Lock relation to prevent it from being dropped and to avoid + * race conditions in the next if block. + */ + LockRelationOid(objectId, AccessShareLock); + + if (IsCStoreTableAmTable(objectId)) + { + /* + * Drop metadata. No need to drop storage here since for + * tableam tables storage is managed by postgres. + */ + Relation rel = table_open(objectId, AccessExclusiveLock); + DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode); + + /* keep the lock since we did physical changes to the relation */ + table_close(rel, NoLock); + } +} + + +/* + * IsCStoreTableAmTable returns true if relation has cstore_tableam + * access method. This can be called before extension creation. + */ +static bool +IsCStoreTableAmTable(Oid relationId) +{ + bool result; + Relation rel; + + if (!OidIsValid(relationId)) + { + return false; + } + + /* + * Lock relation to prevent it from being dropped & + * avoid race conditions. + */ + rel = relation_open(relationId, AccessShareLock); + result = rel->rd_tableam == GetCstoreTableAmRoutine(); + relation_close(rel, NoLock); + + return result; +} + + +static const TableAmRoutine cstore_am_methods = { + .type = T_TableAmRoutine, + + .slot_callbacks = cstore_slot_callbacks, + + .scan_begin = cstore_beginscan, + .scan_end = cstore_endscan, + .scan_rescan = cstore_rescan, + .scan_getnextslot = cstore_getnextslot, + + .parallelscan_estimate = cstore_parallelscan_estimate, + .parallelscan_initialize = cstore_parallelscan_initialize, + .parallelscan_reinitialize = cstore_parallelscan_reinitialize, + + .index_fetch_begin = cstore_index_fetch_begin, + .index_fetch_reset = cstore_index_fetch_reset, + .index_fetch_end = cstore_index_fetch_end, + .index_fetch_tuple = cstore_index_fetch_tuple, + + .tuple_fetch_row_version = cstore_fetch_row_version, + .tuple_get_latest_tid = cstore_get_latest_tid, + .tuple_tid_valid = cstore_tuple_tid_valid, + .tuple_satisfies_snapshot = cstore_tuple_satisfies_snapshot, + .compute_xid_horizon_for_tuples = cstore_compute_xid_horizon_for_tuples, + + .tuple_insert = cstore_tuple_insert, + .tuple_insert_speculative = cstore_tuple_insert_speculative, + .tuple_complete_speculative = cstore_tuple_complete_speculative, + .multi_insert = cstore_multi_insert, + .tuple_delete = cstore_tuple_delete, + .tuple_update = cstore_tuple_update, + .tuple_lock = cstore_tuple_lock, + .finish_bulk_insert = cstore_finish_bulk_insert, + + .relation_set_new_filenode = cstore_relation_set_new_filenode, + .relation_nontransactional_truncate = cstore_relation_nontransactional_truncate, + .relation_copy_data = cstore_relation_copy_data, + .relation_copy_for_cluster = cstore_relation_copy_for_cluster, + .relation_vacuum = cstore_vacuum_rel, + .scan_analyze_next_block = cstore_scan_analyze_next_block, + .scan_analyze_next_tuple = cstore_scan_analyze_next_tuple, + .index_build_range_scan = cstore_index_build_range_scan, + .index_validate_scan = cstore_index_validate_scan, + + .relation_size = cstore_relation_size, + .relation_needs_toast_table = cstore_relation_needs_toast_table, + + .relation_estimate_size = cstore_estimate_rel_size, + + .scan_bitmap_next_block = NULL, + .scan_bitmap_next_tuple = NULL, + .scan_sample_next_block = cstore_scan_sample_next_block, + .scan_sample_next_tuple = cstore_scan_sample_next_tuple +}; + + +const TableAmRoutine * +GetCstoreTableAmRoutine(void) +{ + return &cstore_am_methods; +} + + +PG_FUNCTION_INFO_V1(cstore_tableam_handler); +Datum +cstore_tableam_handler(PG_FUNCTION_ARGS) +{ + PG_RETURN_POINTER(&cstore_am_methods); +} + + +/* + * alter_cstore_table_set is a UDF exposed in postgres to change settings on a columnar + * table. Calling this function on a non-columnar table gives an error. + * + * sql syntax: + * pg_catalog.alter_cstore_table_set( + * table_name regclass, + * block_row_count int DEFAULT NULL, + * stripe_row_count int DEFAULT NULL, + * compression name DEFAULT null) + * + * All arguments except the table name are optional. The UDF is supposed to be called + * like: + * SELECT alter_cstore_table_set('table', compression => 'pglz'); + * + * This will only update the compression of the table, keeping all other settings the + * same. Multiple settings can be changed at the same time by providing multiple + * arguments. Calling the argument with the NULL value will be interperted as not having + * provided the argument. + */ +PG_FUNCTION_INFO_V1(alter_cstore_table_set); +Datum +alter_cstore_table_set(PG_FUNCTION_ARGS) +{ + Oid relationId = PG_GETARG_OID(0); + int blockRowCount = 0; + int stripeRowCount = 0; + CompressionType compression = COMPRESSION_TYPE_INVALID; + + Relation rel = table_open(relationId, AccessExclusiveLock); /* ALTER TABLE LOCK */ + DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, true); + if (!metadata) + { + ereport(ERROR, (errmsg("table %s is not a cstore table", + quote_identifier(RelationGetRelationName(rel))))); + } + + blockRowCount = metadata->blockRowCount; + stripeRowCount = metadata->stripeRowCount; + compression = metadata->compression; + + /* block_row_count => not null */ + if (!PG_ARGISNULL(1)) + { + blockRowCount = PG_GETARG_INT32(1); + ereport(DEBUG1, (errmsg("updating block row count to %d", blockRowCount))); + } + + /* stripe_row_count => not null */ + if (!PG_ARGISNULL(2)) + { + stripeRowCount = PG_GETARG_INT32(2); + ereport(DEBUG1, (errmsg("updating stripe row count to %d", stripeRowCount))); + } + + /* compression => not null */ + if (!PG_ARGISNULL(3)) + { + Name compressionName = PG_GETARG_NAME(3); + compression = ParseCompressionType(NameStr(*compressionName)); + if (compression == COMPRESSION_TYPE_INVALID) + { + ereport(ERROR, (errmsg("unknown compression type for cstore table: %s", + quote_identifier(NameStr(*compressionName))))); + } + ereport(DEBUG1, (errmsg("updating compression to %s", + CompressionTypeStr(compression)))); + } + + UpdateCStoreDataFileMetadata(rel->rd_node.relNode, blockRowCount, stripeRowCount, + compression); + + table_close(rel, NoLock); + + PG_RETURN_VOID(); +} + + +PG_FUNCTION_INFO_V1(alter_cstore_table_reset); +Datum +alter_cstore_table_reset(PG_FUNCTION_ARGS) +{ + Oid relationId = PG_GETARG_OID(0); + int blockRowCount = 0; + int stripeRowCount = 0; + CompressionType compression = COMPRESSION_TYPE_INVALID; + + Relation rel = table_open(relationId, AccessExclusiveLock); /* ALTER TABLE LOCK */ + DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, true); + if (!metadata) + { + ereport(ERROR, (errmsg("table %s is not a cstore table", + quote_identifier(RelationGetRelationName(rel))))); + } + + blockRowCount = metadata->blockRowCount; + stripeRowCount = metadata->stripeRowCount; + compression = metadata->compression; + + /* block_row_count => true */ + if (!PG_ARGISNULL(1) && PG_GETARG_BOOL(1)) + { + blockRowCount = cstore_block_row_count; + ereport(DEBUG1, (errmsg("resetting block row count to %d", blockRowCount))); + } + + /* stripe_row_count => true */ + if (!PG_ARGISNULL(2) && PG_GETARG_BOOL(2)) + { + stripeRowCount = cstore_stripe_row_count; + ereport(DEBUG1, (errmsg("resetting stripe row count to %d", stripeRowCount))); + } + + /* compression => true */ + if (!PG_ARGISNULL(3) && PG_GETARG_BOOL(3)) + { + compression = cstore_compression; + ereport(DEBUG1, (errmsg("resetting compression to %s", + CompressionTypeStr(compression)))); + } + + UpdateCStoreDataFileMetadata(rel->rd_node.relNode, blockRowCount, stripeRowCount, + compression); + + table_close(rel, NoLock); + + PG_RETURN_VOID(); +} diff --git a/src/backend/columnar/cstore_tableam.h b/src/backend/columnar/cstore_tableam.h new file mode 100644 index 000000000..557506b9f --- /dev/null +++ b/src/backend/columnar/cstore_tableam.h @@ -0,0 +1,15 @@ +#include "postgres.h" +#include "fmgr.h" +#include "access/tableam.h" +#include "access/skey.h" +#include "nodes/bitmapset.h" + +const TableAmRoutine * GetCstoreTableAmRoutine(void); +extern void cstore_tableam_init(void); +extern void cstore_tableam_finish(void); + +extern TableScanDesc cstore_beginscan_extended(Relation relation, Snapshot snapshot, + int nkeys, ScanKey key, + ParallelTableScanDesc parallel_scan, + uint32 flags, Bitmapset *attr_needed, + List *scanQual); diff --git a/src/backend/columnar/cstore_version_compat.h b/src/backend/columnar/cstore_version_compat.h new file mode 100644 index 000000000..69eb9c9f3 --- /dev/null +++ b/src/backend/columnar/cstore_version_compat.h @@ -0,0 +1,65 @@ +/*------------------------------------------------------------------------- + * + * cstore_version_compat.h + * + * Compatibility macros for writing code agnostic to PostgreSQL versions + * + * Copyright (c) 2018, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#ifndef CSTORE_COMPAT_H +#define CSTORE_COMPAT_H + +#if PG_VERSION_NUM < 100000 + +/* Accessor for the i'th attribute of tupdesc. */ +#define TupleDescAttr(tupdesc, i) ((tupdesc)->attrs[(i)]) + +#endif + +#if PG_VERSION_NUM < 110000 +#define ALLOCSET_DEFAULT_SIZES ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, \ + ALLOCSET_DEFAULT_MAXSIZE +#define ACLCHECK_OBJECT_TABLE ACL_KIND_CLASS +#else +#define ACLCHECK_OBJECT_TABLE OBJECT_TABLE + +#define ExplainPropertyLong(qlabel, value, es) \ + ExplainPropertyInteger(qlabel, NULL, value, es) +#endif + +#if PG_VERSION_NUM >= 130000 +#define CALL_PREVIOUS_UTILITY() \ + PreviousProcessUtilityHook(plannedStatement, queryString, context, paramListInfo, \ + queryEnvironment, destReceiver, queryCompletion) +#elif PG_VERSION_NUM >= 100000 +#define CALL_PREVIOUS_UTILITY() \ + PreviousProcessUtilityHook(plannedStatement, queryString, context, paramListInfo, \ + queryEnvironment, destReceiver, completionTag) +#else +#define CALL_PREVIOUS_UTILITY() \ + PreviousProcessUtilityHook(parseTree, queryString, context, paramListInfo, \ + destReceiver, completionTag) +#endif + +#if PG_VERSION_NUM < 120000 +#define TTS_EMPTY(slot) ((slot)->tts_isempty) +#define ExecForceStoreHeapTuple(tuple, slot, shouldFree) \ + ExecStoreTuple(newTuple, tupleSlot, InvalidBuffer, shouldFree); +#define TableScanDesc HeapScanDesc +#define table_beginscan heap_beginscan +#define table_endscan heap_endscan + +#endif + +#if PG_VERSION_NUM >= 130000 +#define heap_open table_open +#define heap_openrv table_openrv +#define heap_close table_close +#endif + +#endif /* CSTORE_COMPAT_H */ diff --git a/src/backend/columnar/cstore_writer.c b/src/backend/columnar/cstore_writer.c new file mode 100644 index 000000000..9ca8c806e --- /dev/null +++ b/src/backend/columnar/cstore_writer.c @@ -0,0 +1,764 @@ +/*------------------------------------------------------------------------- + * + * cstore_writer.c + * + * This file contains function definitions for writing cstore files. This + * includes the logic for writing file level metadata, writing row stripes, + * and calculating block skip nodes. + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + + +#include "postgres.h" + +#include "access/nbtree.h" +#include "catalog/pg_am.h" +#include "miscadmin.h" +#include "storage/fd.h" +#include "storage/smgr.h" +#include "utils/memutils.h" +#include "utils/rel.h" + +#include "cstore.h" +#include "cstore_version_compat.h" + +static StripeBuffers * CreateEmptyStripeBuffers(uint32 stripeMaxRowCount, + uint32 blockRowCount, + uint32 columnCount); +static StripeSkipList * CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, + uint32 blockRowCount, + uint32 columnCount); +static void FlushStripe(TableWriteState *writeState); +static StringInfo SerializeBoolArray(bool *boolArray, uint32 boolArrayLength); +static void SerializeSingleDatum(StringInfo datumBuffer, Datum datum, + bool datumTypeByValue, int datumTypeLength, + char datumTypeAlign); +static void SerializeBlockData(TableWriteState *writeState, uint32 blockIndex, + uint32 rowCount); +static void UpdateBlockSkipNodeMinMax(ColumnBlockSkipNode *blockSkipNode, + Datum columnValue, bool columnTypeByValue, + int columnTypeLength, Oid columnCollation, + FmgrInfo *comparisonFunction); +static Datum DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength); +static StringInfo CopyStringInfo(StringInfo sourceString); + + +/* + * CStoreBeginWrite initializes a cstore data load operation and returns a table + * handle. This handle should be used for adding the row values and finishing the + * data load operation. If the cstore footer file already exists, we read the + * footer and then seek to right after the last stripe where the new stripes + * will be added. + */ +TableWriteState * +CStoreBeginWrite(Relation relation, + CompressionType compressionType, + uint64 stripeMaxRowCount, uint32 blockRowCount, + TupleDesc tupleDescriptor) +{ + TableWriteState *writeState = NULL; + FmgrInfo **comparisonFunctionArray = NULL; + MemoryContext stripeWriteContext = NULL; + uint32 columnCount = 0; + uint32 columnIndex = 0; + bool *columnMaskArray = NULL; + BlockData *blockData = NULL; + + /* get comparison function pointers for each of the columns */ + columnCount = tupleDescriptor->natts; + comparisonFunctionArray = palloc0(columnCount * sizeof(FmgrInfo *)); + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + FmgrInfo *comparisonFunction = NULL; + FormData_pg_attribute *attributeForm = TupleDescAttr(tupleDescriptor, + columnIndex); + + if (!attributeForm->attisdropped) + { + Oid typeId = attributeForm->atttypid; + + comparisonFunction = GetFunctionInfoOrNull(typeId, BTREE_AM_OID, + BTORDER_PROC); + } + + comparisonFunctionArray[columnIndex] = comparisonFunction; + } + + /* + * We allocate all stripe specific data in the stripeWriteContext, and + * reset this memory context once we have flushed the stripe to the file. + * This is to avoid memory leaks. + */ + stripeWriteContext = AllocSetContextCreate(CurrentMemoryContext, + "Stripe Write Memory Context", + ALLOCSET_DEFAULT_SIZES); + + columnMaskArray = palloc(columnCount * sizeof(bool)); + memset(columnMaskArray, true, columnCount); + + blockData = CreateEmptyBlockData(columnCount, columnMaskArray, blockRowCount); + + writeState = palloc0(sizeof(TableWriteState)); + writeState->relation = relation; + writeState->compressionType = compressionType; + writeState->stripeMaxRowCount = stripeMaxRowCount; + writeState->blockRowCount = blockRowCount; + writeState->tupleDescriptor = tupleDescriptor; + writeState->comparisonFunctionArray = comparisonFunctionArray; + writeState->stripeBuffers = NULL; + writeState->stripeSkipList = NULL; + writeState->stripeWriteContext = stripeWriteContext; + writeState->blockData = blockData; + writeState->compressionBuffer = NULL; + + return writeState; +} + + +/* + * CStoreWriteRow adds a row to the cstore file. If the stripe is not initialized, + * we create structures to hold stripe data and skip list. Then, we serialize and + * append data to serialized value buffer for each of the columns and update + * corresponding skip nodes. Then, whole block data is compressed at every + * rowBlockCount insertion. Then, if row count exceeds stripeMaxRowCount, we flush + * the stripe, and add its metadata to the table footer. + */ +void +CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNulls) +{ + uint32 columnIndex = 0; + uint32 blockIndex = 0; + uint32 blockRowIndex = 0; + StripeBuffers *stripeBuffers = writeState->stripeBuffers; + StripeSkipList *stripeSkipList = writeState->stripeSkipList; + uint32 columnCount = writeState->tupleDescriptor->natts; + const uint32 blockRowCount = writeState->blockRowCount; + BlockData *blockData = writeState->blockData; + MemoryContext oldContext = MemoryContextSwitchTo(writeState->stripeWriteContext); + + if (stripeBuffers == NULL) + { + stripeBuffers = CreateEmptyStripeBuffers(writeState->stripeMaxRowCount, + blockRowCount, columnCount); + stripeSkipList = CreateEmptyStripeSkipList(writeState->stripeMaxRowCount, + blockRowCount, columnCount); + writeState->stripeBuffers = stripeBuffers; + writeState->stripeSkipList = stripeSkipList; + writeState->compressionBuffer = makeStringInfo(); + + /* + * serializedValueBuffer lives in stripe write memory context so it needs to be + * initialized when the stripe is created. + */ + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + blockData->valueBufferArray[columnIndex] = makeStringInfo(); + } + } + + blockIndex = stripeBuffers->rowCount / blockRowCount; + blockRowIndex = stripeBuffers->rowCount % blockRowCount; + + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBlockSkipNode **blockSkipNodeArray = stripeSkipList->blockSkipNodeArray; + ColumnBlockSkipNode *blockSkipNode = + &blockSkipNodeArray[columnIndex][blockIndex]; + + if (columnNulls[columnIndex]) + { + blockData->existsArray[columnIndex][blockRowIndex] = false; + } + else + { + FmgrInfo *comparisonFunction = + writeState->comparisonFunctionArray[columnIndex]; + Form_pg_attribute attributeForm = + TupleDescAttr(writeState->tupleDescriptor, columnIndex); + bool columnTypeByValue = attributeForm->attbyval; + int columnTypeLength = attributeForm->attlen; + Oid columnCollation = attributeForm->attcollation; + char columnTypeAlign = attributeForm->attalign; + + blockData->existsArray[columnIndex][blockRowIndex] = true; + + SerializeSingleDatum(blockData->valueBufferArray[columnIndex], + columnValues[columnIndex], columnTypeByValue, + columnTypeLength, columnTypeAlign); + + UpdateBlockSkipNodeMinMax(blockSkipNode, columnValues[columnIndex], + columnTypeByValue, columnTypeLength, + columnCollation, comparisonFunction); + } + + blockSkipNode->rowCount++; + } + + stripeSkipList->blockCount = blockIndex + 1; + + /* last row of the block is inserted serialize the block */ + if (blockRowIndex == blockRowCount - 1) + { + SerializeBlockData(writeState, blockIndex, blockRowCount); + } + + stripeBuffers->rowCount++; + if (stripeBuffers->rowCount >= writeState->stripeMaxRowCount) + { + FlushStripe(writeState); + + /* set stripe data and skip list to NULL so they are recreated next time */ + writeState->stripeBuffers = NULL; + writeState->stripeSkipList = NULL; + } + + MemoryContextSwitchTo(oldContext); +} + + +/* + * CStoreEndWrite finishes a cstore data load operation. If we have an unflushed + * stripe, we flush it. Then, we sync and close the cstore data file. Last, we + * flush the footer to a temporary file, and atomically rename this temporary + * file to the original footer file. + */ +void +CStoreEndWrite(TableWriteState *writeState) +{ + StripeBuffers *stripeBuffers = writeState->stripeBuffers; + + if (stripeBuffers != NULL) + { + MemoryContext oldContext = MemoryContextSwitchTo(writeState->stripeWriteContext); + + FlushStripe(writeState); + MemoryContextReset(writeState->stripeWriteContext); + + MemoryContextSwitchTo(oldContext); + } + + MemoryContextDelete(writeState->stripeWriteContext); + pfree(writeState->comparisonFunctionArray); + FreeBlockData(writeState->blockData); + pfree(writeState); +} + + +/* + * CreateEmptyStripeBuffers allocates an empty StripeBuffers structure with the given + * column count. + */ +static StripeBuffers * +CreateEmptyStripeBuffers(uint32 stripeMaxRowCount, uint32 blockRowCount, + uint32 columnCount) +{ + StripeBuffers *stripeBuffers = NULL; + uint32 columnIndex = 0; + uint32 maxBlockCount = (stripeMaxRowCount / blockRowCount) + 1; + ColumnBuffers **columnBuffersArray = palloc0(columnCount * sizeof(ColumnBuffers *)); + + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + uint32 blockIndex = 0; + ColumnBlockBuffers **blockBuffersArray = + palloc0(maxBlockCount * sizeof(ColumnBlockBuffers *)); + + for (blockIndex = 0; blockIndex < maxBlockCount; blockIndex++) + { + blockBuffersArray[blockIndex] = palloc0(sizeof(ColumnBlockBuffers)); + blockBuffersArray[blockIndex]->existsBuffer = NULL; + blockBuffersArray[blockIndex]->valueBuffer = NULL; + blockBuffersArray[blockIndex]->valueCompressionType = COMPRESSION_NONE; + } + + columnBuffersArray[columnIndex] = palloc0(sizeof(ColumnBuffers)); + columnBuffersArray[columnIndex]->blockBuffersArray = blockBuffersArray; + } + + stripeBuffers = palloc0(sizeof(StripeBuffers)); + stripeBuffers->columnBuffersArray = columnBuffersArray; + stripeBuffers->columnCount = columnCount; + stripeBuffers->rowCount = 0; + + return stripeBuffers; +} + + +/* + * CreateEmptyStripeSkipList allocates an empty StripeSkipList structure with + * the given column count. This structure has enough blocks to hold statistics + * for stripeMaxRowCount rows. + */ +static StripeSkipList * +CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 blockRowCount, + uint32 columnCount) +{ + StripeSkipList *stripeSkipList = NULL; + uint32 columnIndex = 0; + uint32 maxBlockCount = (stripeMaxRowCount / blockRowCount) + 1; + + ColumnBlockSkipNode **blockSkipNodeArray = + palloc0(columnCount * sizeof(ColumnBlockSkipNode *)); + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + blockSkipNodeArray[columnIndex] = + palloc0(maxBlockCount * sizeof(ColumnBlockSkipNode)); + } + + stripeSkipList = palloc0(sizeof(StripeSkipList)); + stripeSkipList->columnCount = columnCount; + stripeSkipList->blockCount = 0; + stripeSkipList->blockSkipNodeArray = blockSkipNodeArray; + + return stripeSkipList; +} + + +static void +WriteToSmgr(Relation rel, uint64 logicalOffset, char *data, uint32 dataLength) +{ + uint64 remaining = dataLength; + Buffer buffer; + + while (remaining > 0) + { + SmgrAddr addr = logical_to_smgr(logicalOffset); + BlockNumber nblocks; + Page page; + PageHeader phdr; + uint64 to_write; + + RelationOpenSmgr(rel); + nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + Assert(addr.blockno < nblocks); + (void) nblocks; /* keep compiler quiet */ + RelationCloseSmgr(rel); + + buffer = ReadBuffer(rel, addr.blockno); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + + page = BufferGetPage(buffer); + phdr = (PageHeader) page; + if (PageIsNew(page)) + { + PageInit(page, BLCKSZ, 0); + } + + /* + * After a transaction has been rolled-back, we might be + * over-writing the rolledback write, so phdr->pd_lower can be + * different from addr.offset. + * + * We reset pd_lower to reset the rolledback write. + */ + if (phdr->pd_lower > addr.offset) + { + ereport(DEBUG1, (errmsg("over-writing page %u", addr.blockno), + errdetail("This can happen after a roll-back."))); + phdr->pd_lower = addr.offset; + } + Assert(phdr->pd_lower == addr.offset); + + START_CRIT_SECTION(); + + to_write = Min(phdr->pd_upper - phdr->pd_lower, remaining); + memcpy(page + phdr->pd_lower, data, to_write); + phdr->pd_lower += to_write; + + MarkBufferDirty(buffer); + + if (RelationNeedsWAL(rel)) + { + XLogRecPtr recptr = 0; + + XLogBeginInsert(); + + /* + * Since cstore will mostly write whole pages we force the transmission of the + * whole image in the buffer + */ + XLogRegisterBuffer(0, buffer, REGBUF_FORCE_IMAGE); + + recptr = XLogInsert(RM_GENERIC_ID, 0); + PageSetLSN(page, recptr); + } + + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buffer); + + data += to_write; + remaining -= to_write; + logicalOffset += to_write; + } +} + + +/* + * FlushStripe flushes current stripe data into the file. The function first ensures + * the last data block for each column is properly serialized and compressed. Then, + * the function creates the skip list and footer buffers. Finally, the function + * flushes the skip list, data, and footer buffers to the file. + */ +static void +FlushStripe(TableWriteState *writeState) +{ + StripeMetadata stripeMetadata = { 0 }; + uint32 columnIndex = 0; + uint32 blockIndex = 0; + StripeBuffers *stripeBuffers = writeState->stripeBuffers; + StripeSkipList *stripeSkipList = writeState->stripeSkipList; + ColumnBlockSkipNode **columnSkipNodeArray = stripeSkipList->blockSkipNodeArray; + TupleDesc tupleDescriptor = writeState->tupleDescriptor; + uint32 columnCount = tupleDescriptor->natts; + uint32 blockCount = stripeSkipList->blockCount; + uint32 blockRowCount = writeState->blockRowCount; + uint32 lastBlockIndex = stripeBuffers->rowCount / blockRowCount; + uint32 lastBlockRowCount = stripeBuffers->rowCount % blockRowCount; + uint64 currentFileOffset = 0; + uint64 stripeSize = 0; + uint64 stripeRowCount = 0; + + /* + * check if the last block needs serialization , the last block was not serialized + * if it was not full yet, e.g. (rowCount > 0) + */ + if (lastBlockRowCount > 0) + { + SerializeBlockData(writeState, lastBlockIndex, lastBlockRowCount); + } + + /* update buffer sizes in stripe skip list */ + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBlockSkipNode *blockSkipNodeArray = columnSkipNodeArray[columnIndex]; + ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; + + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + ColumnBlockBuffers *blockBuffers = + columnBuffers->blockBuffersArray[blockIndex]; + uint64 existsBufferSize = blockBuffers->existsBuffer->len; + ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; + + blockSkipNode->existsBlockOffset = stripeSize; + blockSkipNode->existsLength = existsBufferSize; + stripeSize += existsBufferSize; + } + + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + ColumnBlockBuffers *blockBuffers = + columnBuffers->blockBuffersArray[blockIndex]; + uint64 valueBufferSize = blockBuffers->valueBuffer->len; + CompressionType valueCompressionType = blockBuffers->valueCompressionType; + ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; + + blockSkipNode->valueBlockOffset = stripeSize; + blockSkipNode->valueLength = valueBufferSize; + blockSkipNode->valueCompressionType = valueCompressionType; + + stripeSize += valueBufferSize; + } + } + + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + stripeRowCount += + stripeSkipList->blockSkipNodeArray[0][blockIndex].rowCount; + } + + stripeMetadata = ReserveStripe(writeState->relation, stripeSize, + stripeRowCount, columnCount, blockCount, + blockRowCount); + + currentFileOffset = stripeMetadata.fileOffset; + + /* + * Each stripe has only one section: + * Data section, in which we store data for each column continuously. + * We store data for each for each column in blocks. For each block, we + * store two buffers: "exists" buffer, and "value" buffer. "exists" buffer + * tells which values are not NULL. "value" buffer contains values for + * present values. For each column, we first store all "exists" buffers, + * and then all "value" buffers. + */ + + /* flush the data buffers */ + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; + + for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) + { + ColumnBlockBuffers *blockBuffers = + columnBuffers->blockBuffersArray[blockIndex]; + StringInfo existsBuffer = blockBuffers->existsBuffer; + + WriteToSmgr(writeState->relation, currentFileOffset, + existsBuffer->data, existsBuffer->len); + currentFileOffset += existsBuffer->len; + } + + for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) + { + ColumnBlockBuffers *blockBuffers = + columnBuffers->blockBuffersArray[blockIndex]; + StringInfo valueBuffer = blockBuffers->valueBuffer; + + WriteToSmgr(writeState->relation, currentFileOffset, + valueBuffer->data, valueBuffer->len); + currentFileOffset += valueBuffer->len; + } + } + + /* create skip list and footer buffers */ + SaveStripeSkipList(writeState->relation->rd_node.relNode, + stripeMetadata.id, + stripeSkipList, tupleDescriptor); +} + + +/* + * SerializeBoolArray serializes the given boolean array and returns the result + * as a StringInfo. This function packs every 8 boolean values into one byte. + */ +static StringInfo +SerializeBoolArray(bool *boolArray, uint32 boolArrayLength) +{ + StringInfo boolArrayBuffer = NULL; + uint32 boolArrayIndex = 0; + uint32 byteCount = (boolArrayLength + 7) / 8; + + boolArrayBuffer = makeStringInfo(); + enlargeStringInfo(boolArrayBuffer, byteCount); + boolArrayBuffer->len = byteCount; + memset(boolArrayBuffer->data, 0, byteCount); + + for (boolArrayIndex = 0; boolArrayIndex < boolArrayLength; boolArrayIndex++) + { + if (boolArray[boolArrayIndex]) + { + uint32 byteIndex = boolArrayIndex / 8; + uint32 bitIndex = boolArrayIndex % 8; + boolArrayBuffer->data[byteIndex] |= (1 << bitIndex); + } + } + + return boolArrayBuffer; +} + + +/* + * SerializeSingleDatum serializes the given datum value and appends it to the + * provided string info buffer. + */ +static void +SerializeSingleDatum(StringInfo datumBuffer, Datum datum, bool datumTypeByValue, + int datumTypeLength, char datumTypeAlign) +{ + uint32 datumLength = att_addlength_datum(0, datumTypeLength, datum); + uint32 datumLengthAligned = att_align_nominal(datumLength, datumTypeAlign); + char *currentDatumDataPointer = NULL; + + enlargeStringInfo(datumBuffer, datumLengthAligned); + + currentDatumDataPointer = datumBuffer->data + datumBuffer->len; + memset(currentDatumDataPointer, 0, datumLengthAligned); + + if (datumTypeLength > 0) + { + if (datumTypeByValue) + { + store_att_byval(currentDatumDataPointer, datum, datumTypeLength); + } + else + { + memcpy(currentDatumDataPointer, DatumGetPointer(datum), datumTypeLength); + } + } + else + { + Assert(!datumTypeByValue); + memcpy(currentDatumDataPointer, DatumGetPointer(datum), datumLength); + } + + datumBuffer->len += datumLengthAligned; +} + + +/* + * SerializeBlockData serializes and compresses block data at given block index with given + * compression type for every column. + */ +static void +SerializeBlockData(TableWriteState *writeState, uint32 blockIndex, uint32 rowCount) +{ + uint32 columnIndex = 0; + StripeBuffers *stripeBuffers = writeState->stripeBuffers; + BlockData *blockData = writeState->blockData; + CompressionType requestedCompressionType = writeState->compressionType; + const uint32 columnCount = stripeBuffers->columnCount; + StringInfo compressionBuffer = writeState->compressionBuffer; + + /* serialize exist values, data values are already serialized */ + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; + ColumnBlockBuffers *blockBuffers = columnBuffers->blockBuffersArray[blockIndex]; + + blockBuffers->existsBuffer = + SerializeBoolArray(blockData->existsArray[columnIndex], rowCount); + } + + /* + * check and compress value buffers, if a value buffer is not compressable + * then keep it as uncompressed, store compression information. + */ + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; + ColumnBlockBuffers *blockBuffers = columnBuffers->blockBuffersArray[blockIndex]; + StringInfo serializedValueBuffer = NULL; + CompressionType actualCompressionType = COMPRESSION_NONE; + bool compressed = false; + + serializedValueBuffer = blockData->valueBufferArray[columnIndex]; + + /* the only other supported compression type is pg_lz for now */ + Assert(requestedCompressionType == COMPRESSION_NONE || + requestedCompressionType == COMPRESSION_PG_LZ); + + /* + * if serializedValueBuffer is be compressed, update serializedValueBuffer + * with compressed data and store compression type. + */ + compressed = CompressBuffer(serializedValueBuffer, compressionBuffer, + requestedCompressionType); + if (compressed) + { + serializedValueBuffer = compressionBuffer; + actualCompressionType = COMPRESSION_PG_LZ; + } + + /* store (compressed) value buffer */ + blockBuffers->valueCompressionType = actualCompressionType; + blockBuffers->valueBuffer = CopyStringInfo(serializedValueBuffer); + + /* valueBuffer needs to be reset for next block's data */ + resetStringInfo(blockData->valueBufferArray[columnIndex]); + } +} + + +/* + * UpdateBlockSkipNodeMinMax takes the given column value, and checks if this + * value falls outside the range of minimum/maximum values of the given column + * block skip node. If it does, the function updates the column block skip node + * accordingly. + */ +static void +UpdateBlockSkipNodeMinMax(ColumnBlockSkipNode *blockSkipNode, Datum columnValue, + bool columnTypeByValue, int columnTypeLength, + Oid columnCollation, FmgrInfo *comparisonFunction) +{ + bool hasMinMax = blockSkipNode->hasMinMax; + Datum previousMinimum = blockSkipNode->minimumValue; + Datum previousMaximum = blockSkipNode->maximumValue; + Datum currentMinimum = 0; + Datum currentMaximum = 0; + + /* if type doesn't have a comparison function, skip min/max values */ + if (comparisonFunction == NULL) + { + return; + } + + if (!hasMinMax) + { + currentMinimum = DatumCopy(columnValue, columnTypeByValue, columnTypeLength); + currentMaximum = DatumCopy(columnValue, columnTypeByValue, columnTypeLength); + } + else + { + Datum minimumComparisonDatum = FunctionCall2Coll(comparisonFunction, + columnCollation, columnValue, + previousMinimum); + Datum maximumComparisonDatum = FunctionCall2Coll(comparisonFunction, + columnCollation, columnValue, + previousMaximum); + int minimumComparison = DatumGetInt32(minimumComparisonDatum); + int maximumComparison = DatumGetInt32(maximumComparisonDatum); + + if (minimumComparison < 0) + { + currentMinimum = DatumCopy(columnValue, columnTypeByValue, columnTypeLength); + } + else + { + currentMinimum = previousMinimum; + } + + if (maximumComparison > 0) + { + currentMaximum = DatumCopy(columnValue, columnTypeByValue, columnTypeLength); + } + else + { + currentMaximum = previousMaximum; + } + } + + blockSkipNode->hasMinMax = true; + blockSkipNode->minimumValue = currentMinimum; + blockSkipNode->maximumValue = currentMaximum; +} + + +/* Creates a copy of the given datum. */ +static Datum +DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength) +{ + Datum datumCopy = 0; + + if (datumTypeByValue) + { + datumCopy = datum; + } + else + { + uint32 datumLength = att_addlength_datum(0, datumTypeLength, datum); + char *datumData = palloc0(datumLength); + memcpy(datumData, DatumGetPointer(datum), datumLength); + + datumCopy = PointerGetDatum(datumData); + } + + return datumCopy; +} + + +/* + * CopyStringInfo creates a deep copy of given source string allocating only needed + * amount of memory. + */ +static StringInfo +CopyStringInfo(StringInfo sourceString) +{ + StringInfo targetString = palloc0(sizeof(StringInfoData)); + + if (sourceString->len > 0) + { + targetString->data = palloc0(sourceString->len); + targetString->len = sourceString->len; + targetString->maxlen = sourceString->len; + memcpy(targetString->data, sourceString->data, sourceString->len); + } + + return targetString; +} diff --git a/src/backend/columnar/data/array_types.csv b/src/backend/columnar/data/array_types.csv new file mode 100644 index 000000000..f20e2d2d0 --- /dev/null +++ b/src/backend/columnar/data/array_types.csv @@ -0,0 +1,3 @@ +"{1,2,3}","{1,2,3}","{a,b,c}" +{},{},{} +"{-2147483648,2147483647}","{-9223372036854775808,9223372036854775807}","{""""}" diff --git a/src/backend/columnar/data/block_filtering.csv b/src/backend/columnar/data/block_filtering.csv new file mode 100644 index 000000000..9812045fd --- /dev/null +++ b/src/backend/columnar/data/block_filtering.csv @@ -0,0 +1,10000 @@ +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 +517 +518 +519 +520 +521 +522 +523 +524 +525 +526 +527 +528 +529 +530 +531 +532 +533 +534 +535 +536 +537 +538 +539 +540 +541 +542 +543 +544 +545 +546 +547 +548 +549 +550 +551 +552 +553 +554 +555 +556 +557 +558 +559 +560 +561 +562 +563 +564 +565 +566 +567 +568 +569 +570 +571 +572 +573 +574 +575 +576 +577 +578 +579 +580 +581 +582 +583 +584 +585 +586 +587 +588 +589 +590 +591 +592 +593 +594 +595 +596 +597 +598 +599 +600 +601 +602 +603 +604 +605 +606 +607 +608 +609 +610 +611 +612 +613 +614 +615 +616 +617 +618 +619 +620 +621 +622 +623 +624 +625 +626 +627 +628 +629 +630 +631 +632 +633 +634 +635 +636 +637 +638 +639 +640 +641 +642 +643 +644 +645 +646 +647 +648 +649 +650 +651 +652 +653 +654 +655 +656 +657 +658 +659 +660 +661 +662 +663 +664 +665 +666 +667 +668 +669 +670 +671 +672 +673 +674 +675 +676 +677 +678 +679 +680 +681 +682 +683 +684 +685 +686 +687 +688 +689 +690 +691 +692 +693 +694 +695 +696 +697 +698 +699 +700 +701 +702 +703 +704 +705 +706 +707 +708 +709 +710 +711 +712 +713 +714 +715 +716 +717 +718 +719 +720 +721 +722 +723 +724 +725 +726 +727 +728 +729 +730 +731 +732 +733 +734 +735 +736 +737 +738 +739 +740 +741 +742 +743 +744 +745 +746 +747 +748 +749 +750 +751 +752 +753 +754 +755 +756 +757 +758 +759 +760 +761 +762 +763 +764 +765 +766 +767 +768 +769 +770 +771 +772 +773 +774 +775 +776 +777 +778 +779 +780 +781 +782 +783 +784 +785 +786 +787 +788 +789 +790 +791 +792 +793 +794 +795 +796 +797 +798 +799 +800 +801 +802 +803 +804 +805 +806 +807 +808 +809 +810 +811 +812 +813 +814 +815 +816 +817 +818 +819 +820 +821 +822 +823 +824 +825 +826 +827 +828 +829 +830 +831 +832 +833 +834 +835 +836 +837 +838 +839 +840 +841 +842 +843 +844 +845 +846 +847 +848 +849 +850 +851 +852 +853 +854 +855 +856 +857 +858 +859 +860 +861 +862 +863 +864 +865 +866 +867 +868 +869 +870 +871 +872 +873 +874 +875 +876 +877 +878 +879 +880 +881 +882 +883 +884 +885 +886 +887 +888 +889 +890 +891 +892 +893 +894 +895 +896 +897 +898 +899 +900 +901 +902 +903 +904 +905 +906 +907 +908 +909 +910 +911 +912 +913 +914 +915 +916 +917 +918 +919 +920 +921 +922 +923 +924 +925 +926 +927 +928 +929 +930 +931 +932 +933 +934 +935 +936 +937 +938 +939 +940 +941 +942 +943 +944 +945 +946 +947 +948 +949 +950 +951 +952 +953 +954 +955 +956 +957 +958 +959 +960 +961 +962 +963 +964 +965 +966 +967 +968 +969 +970 +971 +972 +973 +974 +975 +976 +977 +978 +979 +980 +981 +982 +983 +984 +985 +986 +987 +988 +989 +990 +991 +992 +993 +994 +995 +996 +997 +998 +999 +1000 +1001 +1002 +1003 +1004 +1005 +1006 +1007 +1008 +1009 +1010 +1011 +1012 +1013 +1014 +1015 +1016 +1017 +1018 +1019 +1020 +1021 +1022 +1023 +1024 +1025 +1026 +1027 +1028 +1029 +1030 +1031 +1032 +1033 +1034 +1035 +1036 +1037 +1038 +1039 +1040 +1041 +1042 +1043 +1044 +1045 +1046 +1047 +1048 +1049 +1050 +1051 +1052 +1053 +1054 +1055 +1056 +1057 +1058 +1059 +1060 +1061 +1062 +1063 +1064 +1065 +1066 +1067 +1068 +1069 +1070 +1071 +1072 +1073 +1074 +1075 +1076 +1077 +1078 +1079 +1080 +1081 +1082 +1083 +1084 +1085 +1086 +1087 +1088 +1089 +1090 +1091 +1092 +1093 +1094 +1095 +1096 +1097 +1098 +1099 +1100 +1101 +1102 +1103 +1104 +1105 +1106 +1107 +1108 +1109 +1110 +1111 +1112 +1113 +1114 +1115 +1116 +1117 +1118 +1119 +1120 +1121 +1122 +1123 +1124 +1125 +1126 +1127 +1128 +1129 +1130 +1131 +1132 +1133 +1134 +1135 +1136 +1137 +1138 +1139 +1140 +1141 +1142 +1143 +1144 +1145 +1146 +1147 +1148 +1149 +1150 +1151 +1152 +1153 +1154 +1155 +1156 +1157 +1158 +1159 +1160 +1161 +1162 +1163 +1164 +1165 +1166 +1167 +1168 +1169 +1170 +1171 +1172 +1173 +1174 +1175 +1176 +1177 +1178 +1179 +1180 +1181 +1182 +1183 +1184 +1185 +1186 +1187 +1188 +1189 +1190 +1191 +1192 +1193 +1194 +1195 +1196 +1197 +1198 +1199 +1200 +1201 +1202 +1203 +1204 +1205 +1206 +1207 +1208 +1209 +1210 +1211 +1212 +1213 +1214 +1215 +1216 +1217 +1218 +1219 +1220 +1221 +1222 +1223 +1224 +1225 +1226 +1227 +1228 +1229 +1230 +1231 +1232 +1233 +1234 +1235 +1236 +1237 +1238 +1239 +1240 +1241 +1242 +1243 +1244 +1245 +1246 +1247 +1248 +1249 +1250 +1251 +1252 +1253 +1254 +1255 +1256 +1257 +1258 +1259 +1260 +1261 +1262 +1263 +1264 +1265 +1266 +1267 +1268 +1269 +1270 +1271 +1272 +1273 +1274 +1275 +1276 +1277 +1278 +1279 +1280 +1281 +1282 +1283 +1284 +1285 +1286 +1287 +1288 +1289 +1290 +1291 +1292 +1293 +1294 +1295 +1296 +1297 +1298 +1299 +1300 +1301 +1302 +1303 +1304 +1305 +1306 +1307 +1308 +1309 +1310 +1311 +1312 +1313 +1314 +1315 +1316 +1317 +1318 +1319 +1320 +1321 +1322 +1323 +1324 +1325 +1326 +1327 +1328 +1329 +1330 +1331 +1332 +1333 +1334 +1335 +1336 +1337 +1338 +1339 +1340 +1341 +1342 +1343 +1344 +1345 +1346 +1347 +1348 +1349 +1350 +1351 +1352 +1353 +1354 +1355 +1356 +1357 +1358 +1359 +1360 +1361 +1362 +1363 +1364 +1365 +1366 +1367 +1368 +1369 +1370 +1371 +1372 +1373 +1374 +1375 +1376 +1377 +1378 +1379 +1380 +1381 +1382 +1383 +1384 +1385 +1386 +1387 +1388 +1389 +1390 +1391 +1392 +1393 +1394 +1395 +1396 +1397 +1398 +1399 +1400 +1401 +1402 +1403 +1404 +1405 +1406 +1407 +1408 +1409 +1410 +1411 +1412 +1413 +1414 +1415 +1416 +1417 +1418 +1419 +1420 +1421 +1422 +1423 +1424 +1425 +1426 +1427 +1428 +1429 +1430 +1431 +1432 +1433 +1434 +1435 +1436 +1437 +1438 +1439 +1440 +1441 +1442 +1443 +1444 +1445 +1446 +1447 +1448 +1449 +1450 +1451 +1452 +1453 +1454 +1455 +1456 +1457 +1458 +1459 +1460 +1461 +1462 +1463 +1464 +1465 +1466 +1467 +1468 +1469 +1470 +1471 +1472 +1473 +1474 +1475 +1476 +1477 +1478 +1479 +1480 +1481 +1482 +1483 +1484 +1485 +1486 +1487 +1488 +1489 +1490 +1491 +1492 +1493 +1494 +1495 +1496 +1497 +1498 +1499 +1500 +1501 +1502 +1503 +1504 +1505 +1506 +1507 +1508 +1509 +1510 +1511 +1512 +1513 +1514 +1515 +1516 +1517 +1518 +1519 +1520 +1521 +1522 +1523 +1524 +1525 +1526 +1527 +1528 +1529 +1530 +1531 +1532 +1533 +1534 +1535 +1536 +1537 +1538 +1539 +1540 +1541 +1542 +1543 +1544 +1545 +1546 +1547 +1548 +1549 +1550 +1551 +1552 +1553 +1554 +1555 +1556 +1557 +1558 +1559 +1560 +1561 +1562 +1563 +1564 +1565 +1566 +1567 +1568 +1569 +1570 +1571 +1572 +1573 +1574 +1575 +1576 +1577 +1578 +1579 +1580 +1581 +1582 +1583 +1584 +1585 +1586 +1587 +1588 +1589 +1590 +1591 +1592 +1593 +1594 +1595 +1596 +1597 +1598 +1599 +1600 +1601 +1602 +1603 +1604 +1605 +1606 +1607 +1608 +1609 +1610 +1611 +1612 +1613 +1614 +1615 +1616 +1617 +1618 +1619 +1620 +1621 +1622 +1623 +1624 +1625 +1626 +1627 +1628 +1629 +1630 +1631 +1632 +1633 +1634 +1635 +1636 +1637 +1638 +1639 +1640 +1641 +1642 +1643 +1644 +1645 +1646 +1647 +1648 +1649 +1650 +1651 +1652 +1653 +1654 +1655 +1656 +1657 +1658 +1659 +1660 +1661 +1662 +1663 +1664 +1665 +1666 +1667 +1668 +1669 +1670 +1671 +1672 +1673 +1674 +1675 +1676 +1677 +1678 +1679 +1680 +1681 +1682 +1683 +1684 +1685 +1686 +1687 +1688 +1689 +1690 +1691 +1692 +1693 +1694 +1695 +1696 +1697 +1698 +1699 +1700 +1701 +1702 +1703 +1704 +1705 +1706 +1707 +1708 +1709 +1710 +1711 +1712 +1713 +1714 +1715 +1716 +1717 +1718 +1719 +1720 +1721 +1722 +1723 +1724 +1725 +1726 +1727 +1728 +1729 +1730 +1731 +1732 +1733 +1734 +1735 +1736 +1737 +1738 +1739 +1740 +1741 +1742 +1743 +1744 +1745 +1746 +1747 +1748 +1749 +1750 +1751 +1752 +1753 +1754 +1755 +1756 +1757 +1758 +1759 +1760 +1761 +1762 +1763 +1764 +1765 +1766 +1767 +1768 +1769 +1770 +1771 +1772 +1773 +1774 +1775 +1776 +1777 +1778 +1779 +1780 +1781 +1782 +1783 +1784 +1785 +1786 +1787 +1788 +1789 +1790 +1791 +1792 +1793 +1794 +1795 +1796 +1797 +1798 +1799 +1800 +1801 +1802 +1803 +1804 +1805 +1806 +1807 +1808 +1809 +1810 +1811 +1812 +1813 +1814 +1815 +1816 +1817 +1818 +1819 +1820 +1821 +1822 +1823 +1824 +1825 +1826 +1827 +1828 +1829 +1830 +1831 +1832 +1833 +1834 +1835 +1836 +1837 +1838 +1839 +1840 +1841 +1842 +1843 +1844 +1845 +1846 +1847 +1848 +1849 +1850 +1851 +1852 +1853 +1854 +1855 +1856 +1857 +1858 +1859 +1860 +1861 +1862 +1863 +1864 +1865 +1866 +1867 +1868 +1869 +1870 +1871 +1872 +1873 +1874 +1875 +1876 +1877 +1878 +1879 +1880 +1881 +1882 +1883 +1884 +1885 +1886 +1887 +1888 +1889 +1890 +1891 +1892 +1893 +1894 +1895 +1896 +1897 +1898 +1899 +1900 +1901 +1902 +1903 +1904 +1905 +1906 +1907 +1908 +1909 +1910 +1911 +1912 +1913 +1914 +1915 +1916 +1917 +1918 +1919 +1920 +1921 +1922 +1923 +1924 +1925 +1926 +1927 +1928 +1929 +1930 +1931 +1932 +1933 +1934 +1935 +1936 +1937 +1938 +1939 +1940 +1941 +1942 +1943 +1944 +1945 +1946 +1947 +1948 +1949 +1950 +1951 +1952 +1953 +1954 +1955 +1956 +1957 +1958 +1959 +1960 +1961 +1962 +1963 +1964 +1965 +1966 +1967 +1968 +1969 +1970 +1971 +1972 +1973 +1974 +1975 +1976 +1977 +1978 +1979 +1980 +1981 +1982 +1983 +1984 +1985 +1986 +1987 +1988 +1989 +1990 +1991 +1992 +1993 +1994 +1995 +1996 +1997 +1998 +1999 +2000 +2001 +2002 +2003 +2004 +2005 +2006 +2007 +2008 +2009 +2010 +2011 +2012 +2013 +2014 +2015 +2016 +2017 +2018 +2019 +2020 +2021 +2022 +2023 +2024 +2025 +2026 +2027 +2028 +2029 +2030 +2031 +2032 +2033 +2034 +2035 +2036 +2037 +2038 +2039 +2040 +2041 +2042 +2043 +2044 +2045 +2046 +2047 +2048 +2049 +2050 +2051 +2052 +2053 +2054 +2055 +2056 +2057 +2058 +2059 +2060 +2061 +2062 +2063 +2064 +2065 +2066 +2067 +2068 +2069 +2070 +2071 +2072 +2073 +2074 +2075 +2076 +2077 +2078 +2079 +2080 +2081 +2082 +2083 +2084 +2085 +2086 +2087 +2088 +2089 +2090 +2091 +2092 +2093 +2094 +2095 +2096 +2097 +2098 +2099 +2100 +2101 +2102 +2103 +2104 +2105 +2106 +2107 +2108 +2109 +2110 +2111 +2112 +2113 +2114 +2115 +2116 +2117 +2118 +2119 +2120 +2121 +2122 +2123 +2124 +2125 +2126 +2127 +2128 +2129 +2130 +2131 +2132 +2133 +2134 +2135 +2136 +2137 +2138 +2139 +2140 +2141 +2142 +2143 +2144 +2145 +2146 +2147 +2148 +2149 +2150 +2151 +2152 +2153 +2154 +2155 +2156 +2157 +2158 +2159 +2160 +2161 +2162 +2163 +2164 +2165 +2166 +2167 +2168 +2169 +2170 +2171 +2172 +2173 +2174 +2175 +2176 +2177 +2178 +2179 +2180 +2181 +2182 +2183 +2184 +2185 +2186 +2187 +2188 +2189 +2190 +2191 +2192 +2193 +2194 +2195 +2196 +2197 +2198 +2199 +2200 +2201 +2202 +2203 +2204 +2205 +2206 +2207 +2208 +2209 +2210 +2211 +2212 +2213 +2214 +2215 +2216 +2217 +2218 +2219 +2220 +2221 +2222 +2223 +2224 +2225 +2226 +2227 +2228 +2229 +2230 +2231 +2232 +2233 +2234 +2235 +2236 +2237 +2238 +2239 +2240 +2241 +2242 +2243 +2244 +2245 +2246 +2247 +2248 +2249 +2250 +2251 +2252 +2253 +2254 +2255 +2256 +2257 +2258 +2259 +2260 +2261 +2262 +2263 +2264 +2265 +2266 +2267 +2268 +2269 +2270 +2271 +2272 +2273 +2274 +2275 +2276 +2277 +2278 +2279 +2280 +2281 +2282 +2283 +2284 +2285 +2286 +2287 +2288 +2289 +2290 +2291 +2292 +2293 +2294 +2295 +2296 +2297 +2298 +2299 +2300 +2301 +2302 +2303 +2304 +2305 +2306 +2307 +2308 +2309 +2310 +2311 +2312 +2313 +2314 +2315 +2316 +2317 +2318 +2319 +2320 +2321 +2322 +2323 +2324 +2325 +2326 +2327 +2328 +2329 +2330 +2331 +2332 +2333 +2334 +2335 +2336 +2337 +2338 +2339 +2340 +2341 +2342 +2343 +2344 +2345 +2346 +2347 +2348 +2349 +2350 +2351 +2352 +2353 +2354 +2355 +2356 +2357 +2358 +2359 +2360 +2361 +2362 +2363 +2364 +2365 +2366 +2367 +2368 +2369 +2370 +2371 +2372 +2373 +2374 +2375 +2376 +2377 +2378 +2379 +2380 +2381 +2382 +2383 +2384 +2385 +2386 +2387 +2388 +2389 +2390 +2391 +2392 +2393 +2394 +2395 +2396 +2397 +2398 +2399 +2400 +2401 +2402 +2403 +2404 +2405 +2406 +2407 +2408 +2409 +2410 +2411 +2412 +2413 +2414 +2415 +2416 +2417 +2418 +2419 +2420 +2421 +2422 +2423 +2424 +2425 +2426 +2427 +2428 +2429 +2430 +2431 +2432 +2433 +2434 +2435 +2436 +2437 +2438 +2439 +2440 +2441 +2442 +2443 +2444 +2445 +2446 +2447 +2448 +2449 +2450 +2451 +2452 +2453 +2454 +2455 +2456 +2457 +2458 +2459 +2460 +2461 +2462 +2463 +2464 +2465 +2466 +2467 +2468 +2469 +2470 +2471 +2472 +2473 +2474 +2475 +2476 +2477 +2478 +2479 +2480 +2481 +2482 +2483 +2484 +2485 +2486 +2487 +2488 +2489 +2490 +2491 +2492 +2493 +2494 +2495 +2496 +2497 +2498 +2499 +2500 +2501 +2502 +2503 +2504 +2505 +2506 +2507 +2508 +2509 +2510 +2511 +2512 +2513 +2514 +2515 +2516 +2517 +2518 +2519 +2520 +2521 +2522 +2523 +2524 +2525 +2526 +2527 +2528 +2529 +2530 +2531 +2532 +2533 +2534 +2535 +2536 +2537 +2538 +2539 +2540 +2541 +2542 +2543 +2544 +2545 +2546 +2547 +2548 +2549 +2550 +2551 +2552 +2553 +2554 +2555 +2556 +2557 +2558 +2559 +2560 +2561 +2562 +2563 +2564 +2565 +2566 +2567 +2568 +2569 +2570 +2571 +2572 +2573 +2574 +2575 +2576 +2577 +2578 +2579 +2580 +2581 +2582 +2583 +2584 +2585 +2586 +2587 +2588 +2589 +2590 +2591 +2592 +2593 +2594 +2595 +2596 +2597 +2598 +2599 +2600 +2601 +2602 +2603 +2604 +2605 +2606 +2607 +2608 +2609 +2610 +2611 +2612 +2613 +2614 +2615 +2616 +2617 +2618 +2619 +2620 +2621 +2622 +2623 +2624 +2625 +2626 +2627 +2628 +2629 +2630 +2631 +2632 +2633 +2634 +2635 +2636 +2637 +2638 +2639 +2640 +2641 +2642 +2643 +2644 +2645 +2646 +2647 +2648 +2649 +2650 +2651 +2652 +2653 +2654 +2655 +2656 +2657 +2658 +2659 +2660 +2661 +2662 +2663 +2664 +2665 +2666 +2667 +2668 +2669 +2670 +2671 +2672 +2673 +2674 +2675 +2676 +2677 +2678 +2679 +2680 +2681 +2682 +2683 +2684 +2685 +2686 +2687 +2688 +2689 +2690 +2691 +2692 +2693 +2694 +2695 +2696 +2697 +2698 +2699 +2700 +2701 +2702 +2703 +2704 +2705 +2706 +2707 +2708 +2709 +2710 +2711 +2712 +2713 +2714 +2715 +2716 +2717 +2718 +2719 +2720 +2721 +2722 +2723 +2724 +2725 +2726 +2727 +2728 +2729 +2730 +2731 +2732 +2733 +2734 +2735 +2736 +2737 +2738 +2739 +2740 +2741 +2742 +2743 +2744 +2745 +2746 +2747 +2748 +2749 +2750 +2751 +2752 +2753 +2754 +2755 +2756 +2757 +2758 +2759 +2760 +2761 +2762 +2763 +2764 +2765 +2766 +2767 +2768 +2769 +2770 +2771 +2772 +2773 +2774 +2775 +2776 +2777 +2778 +2779 +2780 +2781 +2782 +2783 +2784 +2785 +2786 +2787 +2788 +2789 +2790 +2791 +2792 +2793 +2794 +2795 +2796 +2797 +2798 +2799 +2800 +2801 +2802 +2803 +2804 +2805 +2806 +2807 +2808 +2809 +2810 +2811 +2812 +2813 +2814 +2815 +2816 +2817 +2818 +2819 +2820 +2821 +2822 +2823 +2824 +2825 +2826 +2827 +2828 +2829 +2830 +2831 +2832 +2833 +2834 +2835 +2836 +2837 +2838 +2839 +2840 +2841 +2842 +2843 +2844 +2845 +2846 +2847 +2848 +2849 +2850 +2851 +2852 +2853 +2854 +2855 +2856 +2857 +2858 +2859 +2860 +2861 +2862 +2863 +2864 +2865 +2866 +2867 +2868 +2869 +2870 +2871 +2872 +2873 +2874 +2875 +2876 +2877 +2878 +2879 +2880 +2881 +2882 +2883 +2884 +2885 +2886 +2887 +2888 +2889 +2890 +2891 +2892 +2893 +2894 +2895 +2896 +2897 +2898 +2899 +2900 +2901 +2902 +2903 +2904 +2905 +2906 +2907 +2908 +2909 +2910 +2911 +2912 +2913 +2914 +2915 +2916 +2917 +2918 +2919 +2920 +2921 +2922 +2923 +2924 +2925 +2926 +2927 +2928 +2929 +2930 +2931 +2932 +2933 +2934 +2935 +2936 +2937 +2938 +2939 +2940 +2941 +2942 +2943 +2944 +2945 +2946 +2947 +2948 +2949 +2950 +2951 +2952 +2953 +2954 +2955 +2956 +2957 +2958 +2959 +2960 +2961 +2962 +2963 +2964 +2965 +2966 +2967 +2968 +2969 +2970 +2971 +2972 +2973 +2974 +2975 +2976 +2977 +2978 +2979 +2980 +2981 +2982 +2983 +2984 +2985 +2986 +2987 +2988 +2989 +2990 +2991 +2992 +2993 +2994 +2995 +2996 +2997 +2998 +2999 +3000 +3001 +3002 +3003 +3004 +3005 +3006 +3007 +3008 +3009 +3010 +3011 +3012 +3013 +3014 +3015 +3016 +3017 +3018 +3019 +3020 +3021 +3022 +3023 +3024 +3025 +3026 +3027 +3028 +3029 +3030 +3031 +3032 +3033 +3034 +3035 +3036 +3037 +3038 +3039 +3040 +3041 +3042 +3043 +3044 +3045 +3046 +3047 +3048 +3049 +3050 +3051 +3052 +3053 +3054 +3055 +3056 +3057 +3058 +3059 +3060 +3061 +3062 +3063 +3064 +3065 +3066 +3067 +3068 +3069 +3070 +3071 +3072 +3073 +3074 +3075 +3076 +3077 +3078 +3079 +3080 +3081 +3082 +3083 +3084 +3085 +3086 +3087 +3088 +3089 +3090 +3091 +3092 +3093 +3094 +3095 +3096 +3097 +3098 +3099 +3100 +3101 +3102 +3103 +3104 +3105 +3106 +3107 +3108 +3109 +3110 +3111 +3112 +3113 +3114 +3115 +3116 +3117 +3118 +3119 +3120 +3121 +3122 +3123 +3124 +3125 +3126 +3127 +3128 +3129 +3130 +3131 +3132 +3133 +3134 +3135 +3136 +3137 +3138 +3139 +3140 +3141 +3142 +3143 +3144 +3145 +3146 +3147 +3148 +3149 +3150 +3151 +3152 +3153 +3154 +3155 +3156 +3157 +3158 +3159 +3160 +3161 +3162 +3163 +3164 +3165 +3166 +3167 +3168 +3169 +3170 +3171 +3172 +3173 +3174 +3175 +3176 +3177 +3178 +3179 +3180 +3181 +3182 +3183 +3184 +3185 +3186 +3187 +3188 +3189 +3190 +3191 +3192 +3193 +3194 +3195 +3196 +3197 +3198 +3199 +3200 +3201 +3202 +3203 +3204 +3205 +3206 +3207 +3208 +3209 +3210 +3211 +3212 +3213 +3214 +3215 +3216 +3217 +3218 +3219 +3220 +3221 +3222 +3223 +3224 +3225 +3226 +3227 +3228 +3229 +3230 +3231 +3232 +3233 +3234 +3235 +3236 +3237 +3238 +3239 +3240 +3241 +3242 +3243 +3244 +3245 +3246 +3247 +3248 +3249 +3250 +3251 +3252 +3253 +3254 +3255 +3256 +3257 +3258 +3259 +3260 +3261 +3262 +3263 +3264 +3265 +3266 +3267 +3268 +3269 +3270 +3271 +3272 +3273 +3274 +3275 +3276 +3277 +3278 +3279 +3280 +3281 +3282 +3283 +3284 +3285 +3286 +3287 +3288 +3289 +3290 +3291 +3292 +3293 +3294 +3295 +3296 +3297 +3298 +3299 +3300 +3301 +3302 +3303 +3304 +3305 +3306 +3307 +3308 +3309 +3310 +3311 +3312 +3313 +3314 +3315 +3316 +3317 +3318 +3319 +3320 +3321 +3322 +3323 +3324 +3325 +3326 +3327 +3328 +3329 +3330 +3331 +3332 +3333 +3334 +3335 +3336 +3337 +3338 +3339 +3340 +3341 +3342 +3343 +3344 +3345 +3346 +3347 +3348 +3349 +3350 +3351 +3352 +3353 +3354 +3355 +3356 +3357 +3358 +3359 +3360 +3361 +3362 +3363 +3364 +3365 +3366 +3367 +3368 +3369 +3370 +3371 +3372 +3373 +3374 +3375 +3376 +3377 +3378 +3379 +3380 +3381 +3382 +3383 +3384 +3385 +3386 +3387 +3388 +3389 +3390 +3391 +3392 +3393 +3394 +3395 +3396 +3397 +3398 +3399 +3400 +3401 +3402 +3403 +3404 +3405 +3406 +3407 +3408 +3409 +3410 +3411 +3412 +3413 +3414 +3415 +3416 +3417 +3418 +3419 +3420 +3421 +3422 +3423 +3424 +3425 +3426 +3427 +3428 +3429 +3430 +3431 +3432 +3433 +3434 +3435 +3436 +3437 +3438 +3439 +3440 +3441 +3442 +3443 +3444 +3445 +3446 +3447 +3448 +3449 +3450 +3451 +3452 +3453 +3454 +3455 +3456 +3457 +3458 +3459 +3460 +3461 +3462 +3463 +3464 +3465 +3466 +3467 +3468 +3469 +3470 +3471 +3472 +3473 +3474 +3475 +3476 +3477 +3478 +3479 +3480 +3481 +3482 +3483 +3484 +3485 +3486 +3487 +3488 +3489 +3490 +3491 +3492 +3493 +3494 +3495 +3496 +3497 +3498 +3499 +3500 +3501 +3502 +3503 +3504 +3505 +3506 +3507 +3508 +3509 +3510 +3511 +3512 +3513 +3514 +3515 +3516 +3517 +3518 +3519 +3520 +3521 +3522 +3523 +3524 +3525 +3526 +3527 +3528 +3529 +3530 +3531 +3532 +3533 +3534 +3535 +3536 +3537 +3538 +3539 +3540 +3541 +3542 +3543 +3544 +3545 +3546 +3547 +3548 +3549 +3550 +3551 +3552 +3553 +3554 +3555 +3556 +3557 +3558 +3559 +3560 +3561 +3562 +3563 +3564 +3565 +3566 +3567 +3568 +3569 +3570 +3571 +3572 +3573 +3574 +3575 +3576 +3577 +3578 +3579 +3580 +3581 +3582 +3583 +3584 +3585 +3586 +3587 +3588 +3589 +3590 +3591 +3592 +3593 +3594 +3595 +3596 +3597 +3598 +3599 +3600 +3601 +3602 +3603 +3604 +3605 +3606 +3607 +3608 +3609 +3610 +3611 +3612 +3613 +3614 +3615 +3616 +3617 +3618 +3619 +3620 +3621 +3622 +3623 +3624 +3625 +3626 +3627 +3628 +3629 +3630 +3631 +3632 +3633 +3634 +3635 +3636 +3637 +3638 +3639 +3640 +3641 +3642 +3643 +3644 +3645 +3646 +3647 +3648 +3649 +3650 +3651 +3652 +3653 +3654 +3655 +3656 +3657 +3658 +3659 +3660 +3661 +3662 +3663 +3664 +3665 +3666 +3667 +3668 +3669 +3670 +3671 +3672 +3673 +3674 +3675 +3676 +3677 +3678 +3679 +3680 +3681 +3682 +3683 +3684 +3685 +3686 +3687 +3688 +3689 +3690 +3691 +3692 +3693 +3694 +3695 +3696 +3697 +3698 +3699 +3700 +3701 +3702 +3703 +3704 +3705 +3706 +3707 +3708 +3709 +3710 +3711 +3712 +3713 +3714 +3715 +3716 +3717 +3718 +3719 +3720 +3721 +3722 +3723 +3724 +3725 +3726 +3727 +3728 +3729 +3730 +3731 +3732 +3733 +3734 +3735 +3736 +3737 +3738 +3739 +3740 +3741 +3742 +3743 +3744 +3745 +3746 +3747 +3748 +3749 +3750 +3751 +3752 +3753 +3754 +3755 +3756 +3757 +3758 +3759 +3760 +3761 +3762 +3763 +3764 +3765 +3766 +3767 +3768 +3769 +3770 +3771 +3772 +3773 +3774 +3775 +3776 +3777 +3778 +3779 +3780 +3781 +3782 +3783 +3784 +3785 +3786 +3787 +3788 +3789 +3790 +3791 +3792 +3793 +3794 +3795 +3796 +3797 +3798 +3799 +3800 +3801 +3802 +3803 +3804 +3805 +3806 +3807 +3808 +3809 +3810 +3811 +3812 +3813 +3814 +3815 +3816 +3817 +3818 +3819 +3820 +3821 +3822 +3823 +3824 +3825 +3826 +3827 +3828 +3829 +3830 +3831 +3832 +3833 +3834 +3835 +3836 +3837 +3838 +3839 +3840 +3841 +3842 +3843 +3844 +3845 +3846 +3847 +3848 +3849 +3850 +3851 +3852 +3853 +3854 +3855 +3856 +3857 +3858 +3859 +3860 +3861 +3862 +3863 +3864 +3865 +3866 +3867 +3868 +3869 +3870 +3871 +3872 +3873 +3874 +3875 +3876 +3877 +3878 +3879 +3880 +3881 +3882 +3883 +3884 +3885 +3886 +3887 +3888 +3889 +3890 +3891 +3892 +3893 +3894 +3895 +3896 +3897 +3898 +3899 +3900 +3901 +3902 +3903 +3904 +3905 +3906 +3907 +3908 +3909 +3910 +3911 +3912 +3913 +3914 +3915 +3916 +3917 +3918 +3919 +3920 +3921 +3922 +3923 +3924 +3925 +3926 +3927 +3928 +3929 +3930 +3931 +3932 +3933 +3934 +3935 +3936 +3937 +3938 +3939 +3940 +3941 +3942 +3943 +3944 +3945 +3946 +3947 +3948 +3949 +3950 +3951 +3952 +3953 +3954 +3955 +3956 +3957 +3958 +3959 +3960 +3961 +3962 +3963 +3964 +3965 +3966 +3967 +3968 +3969 +3970 +3971 +3972 +3973 +3974 +3975 +3976 +3977 +3978 +3979 +3980 +3981 +3982 +3983 +3984 +3985 +3986 +3987 +3988 +3989 +3990 +3991 +3992 +3993 +3994 +3995 +3996 +3997 +3998 +3999 +4000 +4001 +4002 +4003 +4004 +4005 +4006 +4007 +4008 +4009 +4010 +4011 +4012 +4013 +4014 +4015 +4016 +4017 +4018 +4019 +4020 +4021 +4022 +4023 +4024 +4025 +4026 +4027 +4028 +4029 +4030 +4031 +4032 +4033 +4034 +4035 +4036 +4037 +4038 +4039 +4040 +4041 +4042 +4043 +4044 +4045 +4046 +4047 +4048 +4049 +4050 +4051 +4052 +4053 +4054 +4055 +4056 +4057 +4058 +4059 +4060 +4061 +4062 +4063 +4064 +4065 +4066 +4067 +4068 +4069 +4070 +4071 +4072 +4073 +4074 +4075 +4076 +4077 +4078 +4079 +4080 +4081 +4082 +4083 +4084 +4085 +4086 +4087 +4088 +4089 +4090 +4091 +4092 +4093 +4094 +4095 +4096 +4097 +4098 +4099 +4100 +4101 +4102 +4103 +4104 +4105 +4106 +4107 +4108 +4109 +4110 +4111 +4112 +4113 +4114 +4115 +4116 +4117 +4118 +4119 +4120 +4121 +4122 +4123 +4124 +4125 +4126 +4127 +4128 +4129 +4130 +4131 +4132 +4133 +4134 +4135 +4136 +4137 +4138 +4139 +4140 +4141 +4142 +4143 +4144 +4145 +4146 +4147 +4148 +4149 +4150 +4151 +4152 +4153 +4154 +4155 +4156 +4157 +4158 +4159 +4160 +4161 +4162 +4163 +4164 +4165 +4166 +4167 +4168 +4169 +4170 +4171 +4172 +4173 +4174 +4175 +4176 +4177 +4178 +4179 +4180 +4181 +4182 +4183 +4184 +4185 +4186 +4187 +4188 +4189 +4190 +4191 +4192 +4193 +4194 +4195 +4196 +4197 +4198 +4199 +4200 +4201 +4202 +4203 +4204 +4205 +4206 +4207 +4208 +4209 +4210 +4211 +4212 +4213 +4214 +4215 +4216 +4217 +4218 +4219 +4220 +4221 +4222 +4223 +4224 +4225 +4226 +4227 +4228 +4229 +4230 +4231 +4232 +4233 +4234 +4235 +4236 +4237 +4238 +4239 +4240 +4241 +4242 +4243 +4244 +4245 +4246 +4247 +4248 +4249 +4250 +4251 +4252 +4253 +4254 +4255 +4256 +4257 +4258 +4259 +4260 +4261 +4262 +4263 +4264 +4265 +4266 +4267 +4268 +4269 +4270 +4271 +4272 +4273 +4274 +4275 +4276 +4277 +4278 +4279 +4280 +4281 +4282 +4283 +4284 +4285 +4286 +4287 +4288 +4289 +4290 +4291 +4292 +4293 +4294 +4295 +4296 +4297 +4298 +4299 +4300 +4301 +4302 +4303 +4304 +4305 +4306 +4307 +4308 +4309 +4310 +4311 +4312 +4313 +4314 +4315 +4316 +4317 +4318 +4319 +4320 +4321 +4322 +4323 +4324 +4325 +4326 +4327 +4328 +4329 +4330 +4331 +4332 +4333 +4334 +4335 +4336 +4337 +4338 +4339 +4340 +4341 +4342 +4343 +4344 +4345 +4346 +4347 +4348 +4349 +4350 +4351 +4352 +4353 +4354 +4355 +4356 +4357 +4358 +4359 +4360 +4361 +4362 +4363 +4364 +4365 +4366 +4367 +4368 +4369 +4370 +4371 +4372 +4373 +4374 +4375 +4376 +4377 +4378 +4379 +4380 +4381 +4382 +4383 +4384 +4385 +4386 +4387 +4388 +4389 +4390 +4391 +4392 +4393 +4394 +4395 +4396 +4397 +4398 +4399 +4400 +4401 +4402 +4403 +4404 +4405 +4406 +4407 +4408 +4409 +4410 +4411 +4412 +4413 +4414 +4415 +4416 +4417 +4418 +4419 +4420 +4421 +4422 +4423 +4424 +4425 +4426 +4427 +4428 +4429 +4430 +4431 +4432 +4433 +4434 +4435 +4436 +4437 +4438 +4439 +4440 +4441 +4442 +4443 +4444 +4445 +4446 +4447 +4448 +4449 +4450 +4451 +4452 +4453 +4454 +4455 +4456 +4457 +4458 +4459 +4460 +4461 +4462 +4463 +4464 +4465 +4466 +4467 +4468 +4469 +4470 +4471 +4472 +4473 +4474 +4475 +4476 +4477 +4478 +4479 +4480 +4481 +4482 +4483 +4484 +4485 +4486 +4487 +4488 +4489 +4490 +4491 +4492 +4493 +4494 +4495 +4496 +4497 +4498 +4499 +4500 +4501 +4502 +4503 +4504 +4505 +4506 +4507 +4508 +4509 +4510 +4511 +4512 +4513 +4514 +4515 +4516 +4517 +4518 +4519 +4520 +4521 +4522 +4523 +4524 +4525 +4526 +4527 +4528 +4529 +4530 +4531 +4532 +4533 +4534 +4535 +4536 +4537 +4538 +4539 +4540 +4541 +4542 +4543 +4544 +4545 +4546 +4547 +4548 +4549 +4550 +4551 +4552 +4553 +4554 +4555 +4556 +4557 +4558 +4559 +4560 +4561 +4562 +4563 +4564 +4565 +4566 +4567 +4568 +4569 +4570 +4571 +4572 +4573 +4574 +4575 +4576 +4577 +4578 +4579 +4580 +4581 +4582 +4583 +4584 +4585 +4586 +4587 +4588 +4589 +4590 +4591 +4592 +4593 +4594 +4595 +4596 +4597 +4598 +4599 +4600 +4601 +4602 +4603 +4604 +4605 +4606 +4607 +4608 +4609 +4610 +4611 +4612 +4613 +4614 +4615 +4616 +4617 +4618 +4619 +4620 +4621 +4622 +4623 +4624 +4625 +4626 +4627 +4628 +4629 +4630 +4631 +4632 +4633 +4634 +4635 +4636 +4637 +4638 +4639 +4640 +4641 +4642 +4643 +4644 +4645 +4646 +4647 +4648 +4649 +4650 +4651 +4652 +4653 +4654 +4655 +4656 +4657 +4658 +4659 +4660 +4661 +4662 +4663 +4664 +4665 +4666 +4667 +4668 +4669 +4670 +4671 +4672 +4673 +4674 +4675 +4676 +4677 +4678 +4679 +4680 +4681 +4682 +4683 +4684 +4685 +4686 +4687 +4688 +4689 +4690 +4691 +4692 +4693 +4694 +4695 +4696 +4697 +4698 +4699 +4700 +4701 +4702 +4703 +4704 +4705 +4706 +4707 +4708 +4709 +4710 +4711 +4712 +4713 +4714 +4715 +4716 +4717 +4718 +4719 +4720 +4721 +4722 +4723 +4724 +4725 +4726 +4727 +4728 +4729 +4730 +4731 +4732 +4733 +4734 +4735 +4736 +4737 +4738 +4739 +4740 +4741 +4742 +4743 +4744 +4745 +4746 +4747 +4748 +4749 +4750 +4751 +4752 +4753 +4754 +4755 +4756 +4757 +4758 +4759 +4760 +4761 +4762 +4763 +4764 +4765 +4766 +4767 +4768 +4769 +4770 +4771 +4772 +4773 +4774 +4775 +4776 +4777 +4778 +4779 +4780 +4781 +4782 +4783 +4784 +4785 +4786 +4787 +4788 +4789 +4790 +4791 +4792 +4793 +4794 +4795 +4796 +4797 +4798 +4799 +4800 +4801 +4802 +4803 +4804 +4805 +4806 +4807 +4808 +4809 +4810 +4811 +4812 +4813 +4814 +4815 +4816 +4817 +4818 +4819 +4820 +4821 +4822 +4823 +4824 +4825 +4826 +4827 +4828 +4829 +4830 +4831 +4832 +4833 +4834 +4835 +4836 +4837 +4838 +4839 +4840 +4841 +4842 +4843 +4844 +4845 +4846 +4847 +4848 +4849 +4850 +4851 +4852 +4853 +4854 +4855 +4856 +4857 +4858 +4859 +4860 +4861 +4862 +4863 +4864 +4865 +4866 +4867 +4868 +4869 +4870 +4871 +4872 +4873 +4874 +4875 +4876 +4877 +4878 +4879 +4880 +4881 +4882 +4883 +4884 +4885 +4886 +4887 +4888 +4889 +4890 +4891 +4892 +4893 +4894 +4895 +4896 +4897 +4898 +4899 +4900 +4901 +4902 +4903 +4904 +4905 +4906 +4907 +4908 +4909 +4910 +4911 +4912 +4913 +4914 +4915 +4916 +4917 +4918 +4919 +4920 +4921 +4922 +4923 +4924 +4925 +4926 +4927 +4928 +4929 +4930 +4931 +4932 +4933 +4934 +4935 +4936 +4937 +4938 +4939 +4940 +4941 +4942 +4943 +4944 +4945 +4946 +4947 +4948 +4949 +4950 +4951 +4952 +4953 +4954 +4955 +4956 +4957 +4958 +4959 +4960 +4961 +4962 +4963 +4964 +4965 +4966 +4967 +4968 +4969 +4970 +4971 +4972 +4973 +4974 +4975 +4976 +4977 +4978 +4979 +4980 +4981 +4982 +4983 +4984 +4985 +4986 +4987 +4988 +4989 +4990 +4991 +4992 +4993 +4994 +4995 +4996 +4997 +4998 +4999 +5000 +5001 +5002 +5003 +5004 +5005 +5006 +5007 +5008 +5009 +5010 +5011 +5012 +5013 +5014 +5015 +5016 +5017 +5018 +5019 +5020 +5021 +5022 +5023 +5024 +5025 +5026 +5027 +5028 +5029 +5030 +5031 +5032 +5033 +5034 +5035 +5036 +5037 +5038 +5039 +5040 +5041 +5042 +5043 +5044 +5045 +5046 +5047 +5048 +5049 +5050 +5051 +5052 +5053 +5054 +5055 +5056 +5057 +5058 +5059 +5060 +5061 +5062 +5063 +5064 +5065 +5066 +5067 +5068 +5069 +5070 +5071 +5072 +5073 +5074 +5075 +5076 +5077 +5078 +5079 +5080 +5081 +5082 +5083 +5084 +5085 +5086 +5087 +5088 +5089 +5090 +5091 +5092 +5093 +5094 +5095 +5096 +5097 +5098 +5099 +5100 +5101 +5102 +5103 +5104 +5105 +5106 +5107 +5108 +5109 +5110 +5111 +5112 +5113 +5114 +5115 +5116 +5117 +5118 +5119 +5120 +5121 +5122 +5123 +5124 +5125 +5126 +5127 +5128 +5129 +5130 +5131 +5132 +5133 +5134 +5135 +5136 +5137 +5138 +5139 +5140 +5141 +5142 +5143 +5144 +5145 +5146 +5147 +5148 +5149 +5150 +5151 +5152 +5153 +5154 +5155 +5156 +5157 +5158 +5159 +5160 +5161 +5162 +5163 +5164 +5165 +5166 +5167 +5168 +5169 +5170 +5171 +5172 +5173 +5174 +5175 +5176 +5177 +5178 +5179 +5180 +5181 +5182 +5183 +5184 +5185 +5186 +5187 +5188 +5189 +5190 +5191 +5192 +5193 +5194 +5195 +5196 +5197 +5198 +5199 +5200 +5201 +5202 +5203 +5204 +5205 +5206 +5207 +5208 +5209 +5210 +5211 +5212 +5213 +5214 +5215 +5216 +5217 +5218 +5219 +5220 +5221 +5222 +5223 +5224 +5225 +5226 +5227 +5228 +5229 +5230 +5231 +5232 +5233 +5234 +5235 +5236 +5237 +5238 +5239 +5240 +5241 +5242 +5243 +5244 +5245 +5246 +5247 +5248 +5249 +5250 +5251 +5252 +5253 +5254 +5255 +5256 +5257 +5258 +5259 +5260 +5261 +5262 +5263 +5264 +5265 +5266 +5267 +5268 +5269 +5270 +5271 +5272 +5273 +5274 +5275 +5276 +5277 +5278 +5279 +5280 +5281 +5282 +5283 +5284 +5285 +5286 +5287 +5288 +5289 +5290 +5291 +5292 +5293 +5294 +5295 +5296 +5297 +5298 +5299 +5300 +5301 +5302 +5303 +5304 +5305 +5306 +5307 +5308 +5309 +5310 +5311 +5312 +5313 +5314 +5315 +5316 +5317 +5318 +5319 +5320 +5321 +5322 +5323 +5324 +5325 +5326 +5327 +5328 +5329 +5330 +5331 +5332 +5333 +5334 +5335 +5336 +5337 +5338 +5339 +5340 +5341 +5342 +5343 +5344 +5345 +5346 +5347 +5348 +5349 +5350 +5351 +5352 +5353 +5354 +5355 +5356 +5357 +5358 +5359 +5360 +5361 +5362 +5363 +5364 +5365 +5366 +5367 +5368 +5369 +5370 +5371 +5372 +5373 +5374 +5375 +5376 +5377 +5378 +5379 +5380 +5381 +5382 +5383 +5384 +5385 +5386 +5387 +5388 +5389 +5390 +5391 +5392 +5393 +5394 +5395 +5396 +5397 +5398 +5399 +5400 +5401 +5402 +5403 +5404 +5405 +5406 +5407 +5408 +5409 +5410 +5411 +5412 +5413 +5414 +5415 +5416 +5417 +5418 +5419 +5420 +5421 +5422 +5423 +5424 +5425 +5426 +5427 +5428 +5429 +5430 +5431 +5432 +5433 +5434 +5435 +5436 +5437 +5438 +5439 +5440 +5441 +5442 +5443 +5444 +5445 +5446 +5447 +5448 +5449 +5450 +5451 +5452 +5453 +5454 +5455 +5456 +5457 +5458 +5459 +5460 +5461 +5462 +5463 +5464 +5465 +5466 +5467 +5468 +5469 +5470 +5471 +5472 +5473 +5474 +5475 +5476 +5477 +5478 +5479 +5480 +5481 +5482 +5483 +5484 +5485 +5486 +5487 +5488 +5489 +5490 +5491 +5492 +5493 +5494 +5495 +5496 +5497 +5498 +5499 +5500 +5501 +5502 +5503 +5504 +5505 +5506 +5507 +5508 +5509 +5510 +5511 +5512 +5513 +5514 +5515 +5516 +5517 +5518 +5519 +5520 +5521 +5522 +5523 +5524 +5525 +5526 +5527 +5528 +5529 +5530 +5531 +5532 +5533 +5534 +5535 +5536 +5537 +5538 +5539 +5540 +5541 +5542 +5543 +5544 +5545 +5546 +5547 +5548 +5549 +5550 +5551 +5552 +5553 +5554 +5555 +5556 +5557 +5558 +5559 +5560 +5561 +5562 +5563 +5564 +5565 +5566 +5567 +5568 +5569 +5570 +5571 +5572 +5573 +5574 +5575 +5576 +5577 +5578 +5579 +5580 +5581 +5582 +5583 +5584 +5585 +5586 +5587 +5588 +5589 +5590 +5591 +5592 +5593 +5594 +5595 +5596 +5597 +5598 +5599 +5600 +5601 +5602 +5603 +5604 +5605 +5606 +5607 +5608 +5609 +5610 +5611 +5612 +5613 +5614 +5615 +5616 +5617 +5618 +5619 +5620 +5621 +5622 +5623 +5624 +5625 +5626 +5627 +5628 +5629 +5630 +5631 +5632 +5633 +5634 +5635 +5636 +5637 +5638 +5639 +5640 +5641 +5642 +5643 +5644 +5645 +5646 +5647 +5648 +5649 +5650 +5651 +5652 +5653 +5654 +5655 +5656 +5657 +5658 +5659 +5660 +5661 +5662 +5663 +5664 +5665 +5666 +5667 +5668 +5669 +5670 +5671 +5672 +5673 +5674 +5675 +5676 +5677 +5678 +5679 +5680 +5681 +5682 +5683 +5684 +5685 +5686 +5687 +5688 +5689 +5690 +5691 +5692 +5693 +5694 +5695 +5696 +5697 +5698 +5699 +5700 +5701 +5702 +5703 +5704 +5705 +5706 +5707 +5708 +5709 +5710 +5711 +5712 +5713 +5714 +5715 +5716 +5717 +5718 +5719 +5720 +5721 +5722 +5723 +5724 +5725 +5726 +5727 +5728 +5729 +5730 +5731 +5732 +5733 +5734 +5735 +5736 +5737 +5738 +5739 +5740 +5741 +5742 +5743 +5744 +5745 +5746 +5747 +5748 +5749 +5750 +5751 +5752 +5753 +5754 +5755 +5756 +5757 +5758 +5759 +5760 +5761 +5762 +5763 +5764 +5765 +5766 +5767 +5768 +5769 +5770 +5771 +5772 +5773 +5774 +5775 +5776 +5777 +5778 +5779 +5780 +5781 +5782 +5783 +5784 +5785 +5786 +5787 +5788 +5789 +5790 +5791 +5792 +5793 +5794 +5795 +5796 +5797 +5798 +5799 +5800 +5801 +5802 +5803 +5804 +5805 +5806 +5807 +5808 +5809 +5810 +5811 +5812 +5813 +5814 +5815 +5816 +5817 +5818 +5819 +5820 +5821 +5822 +5823 +5824 +5825 +5826 +5827 +5828 +5829 +5830 +5831 +5832 +5833 +5834 +5835 +5836 +5837 +5838 +5839 +5840 +5841 +5842 +5843 +5844 +5845 +5846 +5847 +5848 +5849 +5850 +5851 +5852 +5853 +5854 +5855 +5856 +5857 +5858 +5859 +5860 +5861 +5862 +5863 +5864 +5865 +5866 +5867 +5868 +5869 +5870 +5871 +5872 +5873 +5874 +5875 +5876 +5877 +5878 +5879 +5880 +5881 +5882 +5883 +5884 +5885 +5886 +5887 +5888 +5889 +5890 +5891 +5892 +5893 +5894 +5895 +5896 +5897 +5898 +5899 +5900 +5901 +5902 +5903 +5904 +5905 +5906 +5907 +5908 +5909 +5910 +5911 +5912 +5913 +5914 +5915 +5916 +5917 +5918 +5919 +5920 +5921 +5922 +5923 +5924 +5925 +5926 +5927 +5928 +5929 +5930 +5931 +5932 +5933 +5934 +5935 +5936 +5937 +5938 +5939 +5940 +5941 +5942 +5943 +5944 +5945 +5946 +5947 +5948 +5949 +5950 +5951 +5952 +5953 +5954 +5955 +5956 +5957 +5958 +5959 +5960 +5961 +5962 +5963 +5964 +5965 +5966 +5967 +5968 +5969 +5970 +5971 +5972 +5973 +5974 +5975 +5976 +5977 +5978 +5979 +5980 +5981 +5982 +5983 +5984 +5985 +5986 +5987 +5988 +5989 +5990 +5991 +5992 +5993 +5994 +5995 +5996 +5997 +5998 +5999 +6000 +6001 +6002 +6003 +6004 +6005 +6006 +6007 +6008 +6009 +6010 +6011 +6012 +6013 +6014 +6015 +6016 +6017 +6018 +6019 +6020 +6021 +6022 +6023 +6024 +6025 +6026 +6027 +6028 +6029 +6030 +6031 +6032 +6033 +6034 +6035 +6036 +6037 +6038 +6039 +6040 +6041 +6042 +6043 +6044 +6045 +6046 +6047 +6048 +6049 +6050 +6051 +6052 +6053 +6054 +6055 +6056 +6057 +6058 +6059 +6060 +6061 +6062 +6063 +6064 +6065 +6066 +6067 +6068 +6069 +6070 +6071 +6072 +6073 +6074 +6075 +6076 +6077 +6078 +6079 +6080 +6081 +6082 +6083 +6084 +6085 +6086 +6087 +6088 +6089 +6090 +6091 +6092 +6093 +6094 +6095 +6096 +6097 +6098 +6099 +6100 +6101 +6102 +6103 +6104 +6105 +6106 +6107 +6108 +6109 +6110 +6111 +6112 +6113 +6114 +6115 +6116 +6117 +6118 +6119 +6120 +6121 +6122 +6123 +6124 +6125 +6126 +6127 +6128 +6129 +6130 +6131 +6132 +6133 +6134 +6135 +6136 +6137 +6138 +6139 +6140 +6141 +6142 +6143 +6144 +6145 +6146 +6147 +6148 +6149 +6150 +6151 +6152 +6153 +6154 +6155 +6156 +6157 +6158 +6159 +6160 +6161 +6162 +6163 +6164 +6165 +6166 +6167 +6168 +6169 +6170 +6171 +6172 +6173 +6174 +6175 +6176 +6177 +6178 +6179 +6180 +6181 +6182 +6183 +6184 +6185 +6186 +6187 +6188 +6189 +6190 +6191 +6192 +6193 +6194 +6195 +6196 +6197 +6198 +6199 +6200 +6201 +6202 +6203 +6204 +6205 +6206 +6207 +6208 +6209 +6210 +6211 +6212 +6213 +6214 +6215 +6216 +6217 +6218 +6219 +6220 +6221 +6222 +6223 +6224 +6225 +6226 +6227 +6228 +6229 +6230 +6231 +6232 +6233 +6234 +6235 +6236 +6237 +6238 +6239 +6240 +6241 +6242 +6243 +6244 +6245 +6246 +6247 +6248 +6249 +6250 +6251 +6252 +6253 +6254 +6255 +6256 +6257 +6258 +6259 +6260 +6261 +6262 +6263 +6264 +6265 +6266 +6267 +6268 +6269 +6270 +6271 +6272 +6273 +6274 +6275 +6276 +6277 +6278 +6279 +6280 +6281 +6282 +6283 +6284 +6285 +6286 +6287 +6288 +6289 +6290 +6291 +6292 +6293 +6294 +6295 +6296 +6297 +6298 +6299 +6300 +6301 +6302 +6303 +6304 +6305 +6306 +6307 +6308 +6309 +6310 +6311 +6312 +6313 +6314 +6315 +6316 +6317 +6318 +6319 +6320 +6321 +6322 +6323 +6324 +6325 +6326 +6327 +6328 +6329 +6330 +6331 +6332 +6333 +6334 +6335 +6336 +6337 +6338 +6339 +6340 +6341 +6342 +6343 +6344 +6345 +6346 +6347 +6348 +6349 +6350 +6351 +6352 +6353 +6354 +6355 +6356 +6357 +6358 +6359 +6360 +6361 +6362 +6363 +6364 +6365 +6366 +6367 +6368 +6369 +6370 +6371 +6372 +6373 +6374 +6375 +6376 +6377 +6378 +6379 +6380 +6381 +6382 +6383 +6384 +6385 +6386 +6387 +6388 +6389 +6390 +6391 +6392 +6393 +6394 +6395 +6396 +6397 +6398 +6399 +6400 +6401 +6402 +6403 +6404 +6405 +6406 +6407 +6408 +6409 +6410 +6411 +6412 +6413 +6414 +6415 +6416 +6417 +6418 +6419 +6420 +6421 +6422 +6423 +6424 +6425 +6426 +6427 +6428 +6429 +6430 +6431 +6432 +6433 +6434 +6435 +6436 +6437 +6438 +6439 +6440 +6441 +6442 +6443 +6444 +6445 +6446 +6447 +6448 +6449 +6450 +6451 +6452 +6453 +6454 +6455 +6456 +6457 +6458 +6459 +6460 +6461 +6462 +6463 +6464 +6465 +6466 +6467 +6468 +6469 +6470 +6471 +6472 +6473 +6474 +6475 +6476 +6477 +6478 +6479 +6480 +6481 +6482 +6483 +6484 +6485 +6486 +6487 +6488 +6489 +6490 +6491 +6492 +6493 +6494 +6495 +6496 +6497 +6498 +6499 +6500 +6501 +6502 +6503 +6504 +6505 +6506 +6507 +6508 +6509 +6510 +6511 +6512 +6513 +6514 +6515 +6516 +6517 +6518 +6519 +6520 +6521 +6522 +6523 +6524 +6525 +6526 +6527 +6528 +6529 +6530 +6531 +6532 +6533 +6534 +6535 +6536 +6537 +6538 +6539 +6540 +6541 +6542 +6543 +6544 +6545 +6546 +6547 +6548 +6549 +6550 +6551 +6552 +6553 +6554 +6555 +6556 +6557 +6558 +6559 +6560 +6561 +6562 +6563 +6564 +6565 +6566 +6567 +6568 +6569 +6570 +6571 +6572 +6573 +6574 +6575 +6576 +6577 +6578 +6579 +6580 +6581 +6582 +6583 +6584 +6585 +6586 +6587 +6588 +6589 +6590 +6591 +6592 +6593 +6594 +6595 +6596 +6597 +6598 +6599 +6600 +6601 +6602 +6603 +6604 +6605 +6606 +6607 +6608 +6609 +6610 +6611 +6612 +6613 +6614 +6615 +6616 +6617 +6618 +6619 +6620 +6621 +6622 +6623 +6624 +6625 +6626 +6627 +6628 +6629 +6630 +6631 +6632 +6633 +6634 +6635 +6636 +6637 +6638 +6639 +6640 +6641 +6642 +6643 +6644 +6645 +6646 +6647 +6648 +6649 +6650 +6651 +6652 +6653 +6654 +6655 +6656 +6657 +6658 +6659 +6660 +6661 +6662 +6663 +6664 +6665 +6666 +6667 +6668 +6669 +6670 +6671 +6672 +6673 +6674 +6675 +6676 +6677 +6678 +6679 +6680 +6681 +6682 +6683 +6684 +6685 +6686 +6687 +6688 +6689 +6690 +6691 +6692 +6693 +6694 +6695 +6696 +6697 +6698 +6699 +6700 +6701 +6702 +6703 +6704 +6705 +6706 +6707 +6708 +6709 +6710 +6711 +6712 +6713 +6714 +6715 +6716 +6717 +6718 +6719 +6720 +6721 +6722 +6723 +6724 +6725 +6726 +6727 +6728 +6729 +6730 +6731 +6732 +6733 +6734 +6735 +6736 +6737 +6738 +6739 +6740 +6741 +6742 +6743 +6744 +6745 +6746 +6747 +6748 +6749 +6750 +6751 +6752 +6753 +6754 +6755 +6756 +6757 +6758 +6759 +6760 +6761 +6762 +6763 +6764 +6765 +6766 +6767 +6768 +6769 +6770 +6771 +6772 +6773 +6774 +6775 +6776 +6777 +6778 +6779 +6780 +6781 +6782 +6783 +6784 +6785 +6786 +6787 +6788 +6789 +6790 +6791 +6792 +6793 +6794 +6795 +6796 +6797 +6798 +6799 +6800 +6801 +6802 +6803 +6804 +6805 +6806 +6807 +6808 +6809 +6810 +6811 +6812 +6813 +6814 +6815 +6816 +6817 +6818 +6819 +6820 +6821 +6822 +6823 +6824 +6825 +6826 +6827 +6828 +6829 +6830 +6831 +6832 +6833 +6834 +6835 +6836 +6837 +6838 +6839 +6840 +6841 +6842 +6843 +6844 +6845 +6846 +6847 +6848 +6849 +6850 +6851 +6852 +6853 +6854 +6855 +6856 +6857 +6858 +6859 +6860 +6861 +6862 +6863 +6864 +6865 +6866 +6867 +6868 +6869 +6870 +6871 +6872 +6873 +6874 +6875 +6876 +6877 +6878 +6879 +6880 +6881 +6882 +6883 +6884 +6885 +6886 +6887 +6888 +6889 +6890 +6891 +6892 +6893 +6894 +6895 +6896 +6897 +6898 +6899 +6900 +6901 +6902 +6903 +6904 +6905 +6906 +6907 +6908 +6909 +6910 +6911 +6912 +6913 +6914 +6915 +6916 +6917 +6918 +6919 +6920 +6921 +6922 +6923 +6924 +6925 +6926 +6927 +6928 +6929 +6930 +6931 +6932 +6933 +6934 +6935 +6936 +6937 +6938 +6939 +6940 +6941 +6942 +6943 +6944 +6945 +6946 +6947 +6948 +6949 +6950 +6951 +6952 +6953 +6954 +6955 +6956 +6957 +6958 +6959 +6960 +6961 +6962 +6963 +6964 +6965 +6966 +6967 +6968 +6969 +6970 +6971 +6972 +6973 +6974 +6975 +6976 +6977 +6978 +6979 +6980 +6981 +6982 +6983 +6984 +6985 +6986 +6987 +6988 +6989 +6990 +6991 +6992 +6993 +6994 +6995 +6996 +6997 +6998 +6999 +7000 +7001 +7002 +7003 +7004 +7005 +7006 +7007 +7008 +7009 +7010 +7011 +7012 +7013 +7014 +7015 +7016 +7017 +7018 +7019 +7020 +7021 +7022 +7023 +7024 +7025 +7026 +7027 +7028 +7029 +7030 +7031 +7032 +7033 +7034 +7035 +7036 +7037 +7038 +7039 +7040 +7041 +7042 +7043 +7044 +7045 +7046 +7047 +7048 +7049 +7050 +7051 +7052 +7053 +7054 +7055 +7056 +7057 +7058 +7059 +7060 +7061 +7062 +7063 +7064 +7065 +7066 +7067 +7068 +7069 +7070 +7071 +7072 +7073 +7074 +7075 +7076 +7077 +7078 +7079 +7080 +7081 +7082 +7083 +7084 +7085 +7086 +7087 +7088 +7089 +7090 +7091 +7092 +7093 +7094 +7095 +7096 +7097 +7098 +7099 +7100 +7101 +7102 +7103 +7104 +7105 +7106 +7107 +7108 +7109 +7110 +7111 +7112 +7113 +7114 +7115 +7116 +7117 +7118 +7119 +7120 +7121 +7122 +7123 +7124 +7125 +7126 +7127 +7128 +7129 +7130 +7131 +7132 +7133 +7134 +7135 +7136 +7137 +7138 +7139 +7140 +7141 +7142 +7143 +7144 +7145 +7146 +7147 +7148 +7149 +7150 +7151 +7152 +7153 +7154 +7155 +7156 +7157 +7158 +7159 +7160 +7161 +7162 +7163 +7164 +7165 +7166 +7167 +7168 +7169 +7170 +7171 +7172 +7173 +7174 +7175 +7176 +7177 +7178 +7179 +7180 +7181 +7182 +7183 +7184 +7185 +7186 +7187 +7188 +7189 +7190 +7191 +7192 +7193 +7194 +7195 +7196 +7197 +7198 +7199 +7200 +7201 +7202 +7203 +7204 +7205 +7206 +7207 +7208 +7209 +7210 +7211 +7212 +7213 +7214 +7215 +7216 +7217 +7218 +7219 +7220 +7221 +7222 +7223 +7224 +7225 +7226 +7227 +7228 +7229 +7230 +7231 +7232 +7233 +7234 +7235 +7236 +7237 +7238 +7239 +7240 +7241 +7242 +7243 +7244 +7245 +7246 +7247 +7248 +7249 +7250 +7251 +7252 +7253 +7254 +7255 +7256 +7257 +7258 +7259 +7260 +7261 +7262 +7263 +7264 +7265 +7266 +7267 +7268 +7269 +7270 +7271 +7272 +7273 +7274 +7275 +7276 +7277 +7278 +7279 +7280 +7281 +7282 +7283 +7284 +7285 +7286 +7287 +7288 +7289 +7290 +7291 +7292 +7293 +7294 +7295 +7296 +7297 +7298 +7299 +7300 +7301 +7302 +7303 +7304 +7305 +7306 +7307 +7308 +7309 +7310 +7311 +7312 +7313 +7314 +7315 +7316 +7317 +7318 +7319 +7320 +7321 +7322 +7323 +7324 +7325 +7326 +7327 +7328 +7329 +7330 +7331 +7332 +7333 +7334 +7335 +7336 +7337 +7338 +7339 +7340 +7341 +7342 +7343 +7344 +7345 +7346 +7347 +7348 +7349 +7350 +7351 +7352 +7353 +7354 +7355 +7356 +7357 +7358 +7359 +7360 +7361 +7362 +7363 +7364 +7365 +7366 +7367 +7368 +7369 +7370 +7371 +7372 +7373 +7374 +7375 +7376 +7377 +7378 +7379 +7380 +7381 +7382 +7383 +7384 +7385 +7386 +7387 +7388 +7389 +7390 +7391 +7392 +7393 +7394 +7395 +7396 +7397 +7398 +7399 +7400 +7401 +7402 +7403 +7404 +7405 +7406 +7407 +7408 +7409 +7410 +7411 +7412 +7413 +7414 +7415 +7416 +7417 +7418 +7419 +7420 +7421 +7422 +7423 +7424 +7425 +7426 +7427 +7428 +7429 +7430 +7431 +7432 +7433 +7434 +7435 +7436 +7437 +7438 +7439 +7440 +7441 +7442 +7443 +7444 +7445 +7446 +7447 +7448 +7449 +7450 +7451 +7452 +7453 +7454 +7455 +7456 +7457 +7458 +7459 +7460 +7461 +7462 +7463 +7464 +7465 +7466 +7467 +7468 +7469 +7470 +7471 +7472 +7473 +7474 +7475 +7476 +7477 +7478 +7479 +7480 +7481 +7482 +7483 +7484 +7485 +7486 +7487 +7488 +7489 +7490 +7491 +7492 +7493 +7494 +7495 +7496 +7497 +7498 +7499 +7500 +7501 +7502 +7503 +7504 +7505 +7506 +7507 +7508 +7509 +7510 +7511 +7512 +7513 +7514 +7515 +7516 +7517 +7518 +7519 +7520 +7521 +7522 +7523 +7524 +7525 +7526 +7527 +7528 +7529 +7530 +7531 +7532 +7533 +7534 +7535 +7536 +7537 +7538 +7539 +7540 +7541 +7542 +7543 +7544 +7545 +7546 +7547 +7548 +7549 +7550 +7551 +7552 +7553 +7554 +7555 +7556 +7557 +7558 +7559 +7560 +7561 +7562 +7563 +7564 +7565 +7566 +7567 +7568 +7569 +7570 +7571 +7572 +7573 +7574 +7575 +7576 +7577 +7578 +7579 +7580 +7581 +7582 +7583 +7584 +7585 +7586 +7587 +7588 +7589 +7590 +7591 +7592 +7593 +7594 +7595 +7596 +7597 +7598 +7599 +7600 +7601 +7602 +7603 +7604 +7605 +7606 +7607 +7608 +7609 +7610 +7611 +7612 +7613 +7614 +7615 +7616 +7617 +7618 +7619 +7620 +7621 +7622 +7623 +7624 +7625 +7626 +7627 +7628 +7629 +7630 +7631 +7632 +7633 +7634 +7635 +7636 +7637 +7638 +7639 +7640 +7641 +7642 +7643 +7644 +7645 +7646 +7647 +7648 +7649 +7650 +7651 +7652 +7653 +7654 +7655 +7656 +7657 +7658 +7659 +7660 +7661 +7662 +7663 +7664 +7665 +7666 +7667 +7668 +7669 +7670 +7671 +7672 +7673 +7674 +7675 +7676 +7677 +7678 +7679 +7680 +7681 +7682 +7683 +7684 +7685 +7686 +7687 +7688 +7689 +7690 +7691 +7692 +7693 +7694 +7695 +7696 +7697 +7698 +7699 +7700 +7701 +7702 +7703 +7704 +7705 +7706 +7707 +7708 +7709 +7710 +7711 +7712 +7713 +7714 +7715 +7716 +7717 +7718 +7719 +7720 +7721 +7722 +7723 +7724 +7725 +7726 +7727 +7728 +7729 +7730 +7731 +7732 +7733 +7734 +7735 +7736 +7737 +7738 +7739 +7740 +7741 +7742 +7743 +7744 +7745 +7746 +7747 +7748 +7749 +7750 +7751 +7752 +7753 +7754 +7755 +7756 +7757 +7758 +7759 +7760 +7761 +7762 +7763 +7764 +7765 +7766 +7767 +7768 +7769 +7770 +7771 +7772 +7773 +7774 +7775 +7776 +7777 +7778 +7779 +7780 +7781 +7782 +7783 +7784 +7785 +7786 +7787 +7788 +7789 +7790 +7791 +7792 +7793 +7794 +7795 +7796 +7797 +7798 +7799 +7800 +7801 +7802 +7803 +7804 +7805 +7806 +7807 +7808 +7809 +7810 +7811 +7812 +7813 +7814 +7815 +7816 +7817 +7818 +7819 +7820 +7821 +7822 +7823 +7824 +7825 +7826 +7827 +7828 +7829 +7830 +7831 +7832 +7833 +7834 +7835 +7836 +7837 +7838 +7839 +7840 +7841 +7842 +7843 +7844 +7845 +7846 +7847 +7848 +7849 +7850 +7851 +7852 +7853 +7854 +7855 +7856 +7857 +7858 +7859 +7860 +7861 +7862 +7863 +7864 +7865 +7866 +7867 +7868 +7869 +7870 +7871 +7872 +7873 +7874 +7875 +7876 +7877 +7878 +7879 +7880 +7881 +7882 +7883 +7884 +7885 +7886 +7887 +7888 +7889 +7890 +7891 +7892 +7893 +7894 +7895 +7896 +7897 +7898 +7899 +7900 +7901 +7902 +7903 +7904 +7905 +7906 +7907 +7908 +7909 +7910 +7911 +7912 +7913 +7914 +7915 +7916 +7917 +7918 +7919 +7920 +7921 +7922 +7923 +7924 +7925 +7926 +7927 +7928 +7929 +7930 +7931 +7932 +7933 +7934 +7935 +7936 +7937 +7938 +7939 +7940 +7941 +7942 +7943 +7944 +7945 +7946 +7947 +7948 +7949 +7950 +7951 +7952 +7953 +7954 +7955 +7956 +7957 +7958 +7959 +7960 +7961 +7962 +7963 +7964 +7965 +7966 +7967 +7968 +7969 +7970 +7971 +7972 +7973 +7974 +7975 +7976 +7977 +7978 +7979 +7980 +7981 +7982 +7983 +7984 +7985 +7986 +7987 +7988 +7989 +7990 +7991 +7992 +7993 +7994 +7995 +7996 +7997 +7998 +7999 +8000 +8001 +8002 +8003 +8004 +8005 +8006 +8007 +8008 +8009 +8010 +8011 +8012 +8013 +8014 +8015 +8016 +8017 +8018 +8019 +8020 +8021 +8022 +8023 +8024 +8025 +8026 +8027 +8028 +8029 +8030 +8031 +8032 +8033 +8034 +8035 +8036 +8037 +8038 +8039 +8040 +8041 +8042 +8043 +8044 +8045 +8046 +8047 +8048 +8049 +8050 +8051 +8052 +8053 +8054 +8055 +8056 +8057 +8058 +8059 +8060 +8061 +8062 +8063 +8064 +8065 +8066 +8067 +8068 +8069 +8070 +8071 +8072 +8073 +8074 +8075 +8076 +8077 +8078 +8079 +8080 +8081 +8082 +8083 +8084 +8085 +8086 +8087 +8088 +8089 +8090 +8091 +8092 +8093 +8094 +8095 +8096 +8097 +8098 +8099 +8100 +8101 +8102 +8103 +8104 +8105 +8106 +8107 +8108 +8109 +8110 +8111 +8112 +8113 +8114 +8115 +8116 +8117 +8118 +8119 +8120 +8121 +8122 +8123 +8124 +8125 +8126 +8127 +8128 +8129 +8130 +8131 +8132 +8133 +8134 +8135 +8136 +8137 +8138 +8139 +8140 +8141 +8142 +8143 +8144 +8145 +8146 +8147 +8148 +8149 +8150 +8151 +8152 +8153 +8154 +8155 +8156 +8157 +8158 +8159 +8160 +8161 +8162 +8163 +8164 +8165 +8166 +8167 +8168 +8169 +8170 +8171 +8172 +8173 +8174 +8175 +8176 +8177 +8178 +8179 +8180 +8181 +8182 +8183 +8184 +8185 +8186 +8187 +8188 +8189 +8190 +8191 +8192 +8193 +8194 +8195 +8196 +8197 +8198 +8199 +8200 +8201 +8202 +8203 +8204 +8205 +8206 +8207 +8208 +8209 +8210 +8211 +8212 +8213 +8214 +8215 +8216 +8217 +8218 +8219 +8220 +8221 +8222 +8223 +8224 +8225 +8226 +8227 +8228 +8229 +8230 +8231 +8232 +8233 +8234 +8235 +8236 +8237 +8238 +8239 +8240 +8241 +8242 +8243 +8244 +8245 +8246 +8247 +8248 +8249 +8250 +8251 +8252 +8253 +8254 +8255 +8256 +8257 +8258 +8259 +8260 +8261 +8262 +8263 +8264 +8265 +8266 +8267 +8268 +8269 +8270 +8271 +8272 +8273 +8274 +8275 +8276 +8277 +8278 +8279 +8280 +8281 +8282 +8283 +8284 +8285 +8286 +8287 +8288 +8289 +8290 +8291 +8292 +8293 +8294 +8295 +8296 +8297 +8298 +8299 +8300 +8301 +8302 +8303 +8304 +8305 +8306 +8307 +8308 +8309 +8310 +8311 +8312 +8313 +8314 +8315 +8316 +8317 +8318 +8319 +8320 +8321 +8322 +8323 +8324 +8325 +8326 +8327 +8328 +8329 +8330 +8331 +8332 +8333 +8334 +8335 +8336 +8337 +8338 +8339 +8340 +8341 +8342 +8343 +8344 +8345 +8346 +8347 +8348 +8349 +8350 +8351 +8352 +8353 +8354 +8355 +8356 +8357 +8358 +8359 +8360 +8361 +8362 +8363 +8364 +8365 +8366 +8367 +8368 +8369 +8370 +8371 +8372 +8373 +8374 +8375 +8376 +8377 +8378 +8379 +8380 +8381 +8382 +8383 +8384 +8385 +8386 +8387 +8388 +8389 +8390 +8391 +8392 +8393 +8394 +8395 +8396 +8397 +8398 +8399 +8400 +8401 +8402 +8403 +8404 +8405 +8406 +8407 +8408 +8409 +8410 +8411 +8412 +8413 +8414 +8415 +8416 +8417 +8418 +8419 +8420 +8421 +8422 +8423 +8424 +8425 +8426 +8427 +8428 +8429 +8430 +8431 +8432 +8433 +8434 +8435 +8436 +8437 +8438 +8439 +8440 +8441 +8442 +8443 +8444 +8445 +8446 +8447 +8448 +8449 +8450 +8451 +8452 +8453 +8454 +8455 +8456 +8457 +8458 +8459 +8460 +8461 +8462 +8463 +8464 +8465 +8466 +8467 +8468 +8469 +8470 +8471 +8472 +8473 +8474 +8475 +8476 +8477 +8478 +8479 +8480 +8481 +8482 +8483 +8484 +8485 +8486 +8487 +8488 +8489 +8490 +8491 +8492 +8493 +8494 +8495 +8496 +8497 +8498 +8499 +8500 +8501 +8502 +8503 +8504 +8505 +8506 +8507 +8508 +8509 +8510 +8511 +8512 +8513 +8514 +8515 +8516 +8517 +8518 +8519 +8520 +8521 +8522 +8523 +8524 +8525 +8526 +8527 +8528 +8529 +8530 +8531 +8532 +8533 +8534 +8535 +8536 +8537 +8538 +8539 +8540 +8541 +8542 +8543 +8544 +8545 +8546 +8547 +8548 +8549 +8550 +8551 +8552 +8553 +8554 +8555 +8556 +8557 +8558 +8559 +8560 +8561 +8562 +8563 +8564 +8565 +8566 +8567 +8568 +8569 +8570 +8571 +8572 +8573 +8574 +8575 +8576 +8577 +8578 +8579 +8580 +8581 +8582 +8583 +8584 +8585 +8586 +8587 +8588 +8589 +8590 +8591 +8592 +8593 +8594 +8595 +8596 +8597 +8598 +8599 +8600 +8601 +8602 +8603 +8604 +8605 +8606 +8607 +8608 +8609 +8610 +8611 +8612 +8613 +8614 +8615 +8616 +8617 +8618 +8619 +8620 +8621 +8622 +8623 +8624 +8625 +8626 +8627 +8628 +8629 +8630 +8631 +8632 +8633 +8634 +8635 +8636 +8637 +8638 +8639 +8640 +8641 +8642 +8643 +8644 +8645 +8646 +8647 +8648 +8649 +8650 +8651 +8652 +8653 +8654 +8655 +8656 +8657 +8658 +8659 +8660 +8661 +8662 +8663 +8664 +8665 +8666 +8667 +8668 +8669 +8670 +8671 +8672 +8673 +8674 +8675 +8676 +8677 +8678 +8679 +8680 +8681 +8682 +8683 +8684 +8685 +8686 +8687 +8688 +8689 +8690 +8691 +8692 +8693 +8694 +8695 +8696 +8697 +8698 +8699 +8700 +8701 +8702 +8703 +8704 +8705 +8706 +8707 +8708 +8709 +8710 +8711 +8712 +8713 +8714 +8715 +8716 +8717 +8718 +8719 +8720 +8721 +8722 +8723 +8724 +8725 +8726 +8727 +8728 +8729 +8730 +8731 +8732 +8733 +8734 +8735 +8736 +8737 +8738 +8739 +8740 +8741 +8742 +8743 +8744 +8745 +8746 +8747 +8748 +8749 +8750 +8751 +8752 +8753 +8754 +8755 +8756 +8757 +8758 +8759 +8760 +8761 +8762 +8763 +8764 +8765 +8766 +8767 +8768 +8769 +8770 +8771 +8772 +8773 +8774 +8775 +8776 +8777 +8778 +8779 +8780 +8781 +8782 +8783 +8784 +8785 +8786 +8787 +8788 +8789 +8790 +8791 +8792 +8793 +8794 +8795 +8796 +8797 +8798 +8799 +8800 +8801 +8802 +8803 +8804 +8805 +8806 +8807 +8808 +8809 +8810 +8811 +8812 +8813 +8814 +8815 +8816 +8817 +8818 +8819 +8820 +8821 +8822 +8823 +8824 +8825 +8826 +8827 +8828 +8829 +8830 +8831 +8832 +8833 +8834 +8835 +8836 +8837 +8838 +8839 +8840 +8841 +8842 +8843 +8844 +8845 +8846 +8847 +8848 +8849 +8850 +8851 +8852 +8853 +8854 +8855 +8856 +8857 +8858 +8859 +8860 +8861 +8862 +8863 +8864 +8865 +8866 +8867 +8868 +8869 +8870 +8871 +8872 +8873 +8874 +8875 +8876 +8877 +8878 +8879 +8880 +8881 +8882 +8883 +8884 +8885 +8886 +8887 +8888 +8889 +8890 +8891 +8892 +8893 +8894 +8895 +8896 +8897 +8898 +8899 +8900 +8901 +8902 +8903 +8904 +8905 +8906 +8907 +8908 +8909 +8910 +8911 +8912 +8913 +8914 +8915 +8916 +8917 +8918 +8919 +8920 +8921 +8922 +8923 +8924 +8925 +8926 +8927 +8928 +8929 +8930 +8931 +8932 +8933 +8934 +8935 +8936 +8937 +8938 +8939 +8940 +8941 +8942 +8943 +8944 +8945 +8946 +8947 +8948 +8949 +8950 +8951 +8952 +8953 +8954 +8955 +8956 +8957 +8958 +8959 +8960 +8961 +8962 +8963 +8964 +8965 +8966 +8967 +8968 +8969 +8970 +8971 +8972 +8973 +8974 +8975 +8976 +8977 +8978 +8979 +8980 +8981 +8982 +8983 +8984 +8985 +8986 +8987 +8988 +8989 +8990 +8991 +8992 +8993 +8994 +8995 +8996 +8997 +8998 +8999 +9000 +9001 +9002 +9003 +9004 +9005 +9006 +9007 +9008 +9009 +9010 +9011 +9012 +9013 +9014 +9015 +9016 +9017 +9018 +9019 +9020 +9021 +9022 +9023 +9024 +9025 +9026 +9027 +9028 +9029 +9030 +9031 +9032 +9033 +9034 +9035 +9036 +9037 +9038 +9039 +9040 +9041 +9042 +9043 +9044 +9045 +9046 +9047 +9048 +9049 +9050 +9051 +9052 +9053 +9054 +9055 +9056 +9057 +9058 +9059 +9060 +9061 +9062 +9063 +9064 +9065 +9066 +9067 +9068 +9069 +9070 +9071 +9072 +9073 +9074 +9075 +9076 +9077 +9078 +9079 +9080 +9081 +9082 +9083 +9084 +9085 +9086 +9087 +9088 +9089 +9090 +9091 +9092 +9093 +9094 +9095 +9096 +9097 +9098 +9099 +9100 +9101 +9102 +9103 +9104 +9105 +9106 +9107 +9108 +9109 +9110 +9111 +9112 +9113 +9114 +9115 +9116 +9117 +9118 +9119 +9120 +9121 +9122 +9123 +9124 +9125 +9126 +9127 +9128 +9129 +9130 +9131 +9132 +9133 +9134 +9135 +9136 +9137 +9138 +9139 +9140 +9141 +9142 +9143 +9144 +9145 +9146 +9147 +9148 +9149 +9150 +9151 +9152 +9153 +9154 +9155 +9156 +9157 +9158 +9159 +9160 +9161 +9162 +9163 +9164 +9165 +9166 +9167 +9168 +9169 +9170 +9171 +9172 +9173 +9174 +9175 +9176 +9177 +9178 +9179 +9180 +9181 +9182 +9183 +9184 +9185 +9186 +9187 +9188 +9189 +9190 +9191 +9192 +9193 +9194 +9195 +9196 +9197 +9198 +9199 +9200 +9201 +9202 +9203 +9204 +9205 +9206 +9207 +9208 +9209 +9210 +9211 +9212 +9213 +9214 +9215 +9216 +9217 +9218 +9219 +9220 +9221 +9222 +9223 +9224 +9225 +9226 +9227 +9228 +9229 +9230 +9231 +9232 +9233 +9234 +9235 +9236 +9237 +9238 +9239 +9240 +9241 +9242 +9243 +9244 +9245 +9246 +9247 +9248 +9249 +9250 +9251 +9252 +9253 +9254 +9255 +9256 +9257 +9258 +9259 +9260 +9261 +9262 +9263 +9264 +9265 +9266 +9267 +9268 +9269 +9270 +9271 +9272 +9273 +9274 +9275 +9276 +9277 +9278 +9279 +9280 +9281 +9282 +9283 +9284 +9285 +9286 +9287 +9288 +9289 +9290 +9291 +9292 +9293 +9294 +9295 +9296 +9297 +9298 +9299 +9300 +9301 +9302 +9303 +9304 +9305 +9306 +9307 +9308 +9309 +9310 +9311 +9312 +9313 +9314 +9315 +9316 +9317 +9318 +9319 +9320 +9321 +9322 +9323 +9324 +9325 +9326 +9327 +9328 +9329 +9330 +9331 +9332 +9333 +9334 +9335 +9336 +9337 +9338 +9339 +9340 +9341 +9342 +9343 +9344 +9345 +9346 +9347 +9348 +9349 +9350 +9351 +9352 +9353 +9354 +9355 +9356 +9357 +9358 +9359 +9360 +9361 +9362 +9363 +9364 +9365 +9366 +9367 +9368 +9369 +9370 +9371 +9372 +9373 +9374 +9375 +9376 +9377 +9378 +9379 +9380 +9381 +9382 +9383 +9384 +9385 +9386 +9387 +9388 +9389 +9390 +9391 +9392 +9393 +9394 +9395 +9396 +9397 +9398 +9399 +9400 +9401 +9402 +9403 +9404 +9405 +9406 +9407 +9408 +9409 +9410 +9411 +9412 +9413 +9414 +9415 +9416 +9417 +9418 +9419 +9420 +9421 +9422 +9423 +9424 +9425 +9426 +9427 +9428 +9429 +9430 +9431 +9432 +9433 +9434 +9435 +9436 +9437 +9438 +9439 +9440 +9441 +9442 +9443 +9444 +9445 +9446 +9447 +9448 +9449 +9450 +9451 +9452 +9453 +9454 +9455 +9456 +9457 +9458 +9459 +9460 +9461 +9462 +9463 +9464 +9465 +9466 +9467 +9468 +9469 +9470 +9471 +9472 +9473 +9474 +9475 +9476 +9477 +9478 +9479 +9480 +9481 +9482 +9483 +9484 +9485 +9486 +9487 +9488 +9489 +9490 +9491 +9492 +9493 +9494 +9495 +9496 +9497 +9498 +9499 +9500 +9501 +9502 +9503 +9504 +9505 +9506 +9507 +9508 +9509 +9510 +9511 +9512 +9513 +9514 +9515 +9516 +9517 +9518 +9519 +9520 +9521 +9522 +9523 +9524 +9525 +9526 +9527 +9528 +9529 +9530 +9531 +9532 +9533 +9534 +9535 +9536 +9537 +9538 +9539 +9540 +9541 +9542 +9543 +9544 +9545 +9546 +9547 +9548 +9549 +9550 +9551 +9552 +9553 +9554 +9555 +9556 +9557 +9558 +9559 +9560 +9561 +9562 +9563 +9564 +9565 +9566 +9567 +9568 +9569 +9570 +9571 +9572 +9573 +9574 +9575 +9576 +9577 +9578 +9579 +9580 +9581 +9582 +9583 +9584 +9585 +9586 +9587 +9588 +9589 +9590 +9591 +9592 +9593 +9594 +9595 +9596 +9597 +9598 +9599 +9600 +9601 +9602 +9603 +9604 +9605 +9606 +9607 +9608 +9609 +9610 +9611 +9612 +9613 +9614 +9615 +9616 +9617 +9618 +9619 +9620 +9621 +9622 +9623 +9624 +9625 +9626 +9627 +9628 +9629 +9630 +9631 +9632 +9633 +9634 +9635 +9636 +9637 +9638 +9639 +9640 +9641 +9642 +9643 +9644 +9645 +9646 +9647 +9648 +9649 +9650 +9651 +9652 +9653 +9654 +9655 +9656 +9657 +9658 +9659 +9660 +9661 +9662 +9663 +9664 +9665 +9666 +9667 +9668 +9669 +9670 +9671 +9672 +9673 +9674 +9675 +9676 +9677 +9678 +9679 +9680 +9681 +9682 +9683 +9684 +9685 +9686 +9687 +9688 +9689 +9690 +9691 +9692 +9693 +9694 +9695 +9696 +9697 +9698 +9699 +9700 +9701 +9702 +9703 +9704 +9705 +9706 +9707 +9708 +9709 +9710 +9711 +9712 +9713 +9714 +9715 +9716 +9717 +9718 +9719 +9720 +9721 +9722 +9723 +9724 +9725 +9726 +9727 +9728 +9729 +9730 +9731 +9732 +9733 +9734 +9735 +9736 +9737 +9738 +9739 +9740 +9741 +9742 +9743 +9744 +9745 +9746 +9747 +9748 +9749 +9750 +9751 +9752 +9753 +9754 +9755 +9756 +9757 +9758 +9759 +9760 +9761 +9762 +9763 +9764 +9765 +9766 +9767 +9768 +9769 +9770 +9771 +9772 +9773 +9774 +9775 +9776 +9777 +9778 +9779 +9780 +9781 +9782 +9783 +9784 +9785 +9786 +9787 +9788 +9789 +9790 +9791 +9792 +9793 +9794 +9795 +9796 +9797 +9798 +9799 +9800 +9801 +9802 +9803 +9804 +9805 +9806 +9807 +9808 +9809 +9810 +9811 +9812 +9813 +9814 +9815 +9816 +9817 +9818 +9819 +9820 +9821 +9822 +9823 +9824 +9825 +9826 +9827 +9828 +9829 +9830 +9831 +9832 +9833 +9834 +9835 +9836 +9837 +9838 +9839 +9840 +9841 +9842 +9843 +9844 +9845 +9846 +9847 +9848 +9849 +9850 +9851 +9852 +9853 +9854 +9855 +9856 +9857 +9858 +9859 +9860 +9861 +9862 +9863 +9864 +9865 +9866 +9867 +9868 +9869 +9870 +9871 +9872 +9873 +9874 +9875 +9876 +9877 +9878 +9879 +9880 +9881 +9882 +9883 +9884 +9885 +9886 +9887 +9888 +9889 +9890 +9891 +9892 +9893 +9894 +9895 +9896 +9897 +9898 +9899 +9900 +9901 +9902 +9903 +9904 +9905 +9906 +9907 +9908 +9909 +9910 +9911 +9912 +9913 +9914 +9915 +9916 +9917 +9918 +9919 +9920 +9921 +9922 +9923 +9924 +9925 +9926 +9927 +9928 +9929 +9930 +9931 +9932 +9933 +9934 +9935 +9936 +9937 +9938 +9939 +9940 +9941 +9942 +9943 +9944 +9945 +9946 +9947 +9948 +9949 +9950 +9951 +9952 +9953 +9954 +9955 +9956 +9957 +9958 +9959 +9960 +9961 +9962 +9963 +9964 +9965 +9966 +9967 +9968 +9969 +9970 +9971 +9972 +9973 +9974 +9975 +9976 +9977 +9978 +9979 +9980 +9981 +9982 +9983 +9984 +9985 +9986 +9987 +9988 +9989 +9990 +9991 +9992 +9993 +9994 +9995 +9996 +9997 +9998 +9999 +10000 diff --git a/src/backend/columnar/data/contestants.1.csv b/src/backend/columnar/data/contestants.1.csv new file mode 100644 index 000000000..bdd28c4d4 --- /dev/null +++ b/src/backend/columnar/data/contestants.1.csv @@ -0,0 +1,5 @@ +a,1990-01-10,2090,97.1,XA ,{a} +b,1990-11-01,2203,98.1,XA ,"{a,b}" +c,1988-11-01,2907,99.4,XB ,"{w,y}" +d,1985-05-05,2314,98.3,XB ,{} +e,1995-05-05,2236,98.2,XC ,{a} diff --git a/src/backend/columnar/data/contestants.2.csv b/src/backend/columnar/data/contestants.2.csv new file mode 100644 index 000000000..1a4506bc3 --- /dev/null +++ b/src/backend/columnar/data/contestants.2.csv @@ -0,0 +1,3 @@ +f,1983-04-02,3090,99.6,XD ,"{a,b,c,y}" +g,1991-12-13,1803,85.1,XD ,"{a,c}" +h,1987-10-26,2112,95.4,XD ,"{w,a}" diff --git a/src/backend/columnar/data/datetime_types.csv b/src/backend/columnar/data/datetime_types.csv new file mode 100644 index 000000000..b5c2eb099 --- /dev/null +++ b/src/backend/columnar/data/datetime_types.csv @@ -0,0 +1,2 @@ +2000-01-02 04:05:06,1999-01-08 14:05:06+02,2000-01-02,04:05:06,04:00:00 +1970-01-01 00:00:00,infinity,-infinity,00:00:00,00:00:00 diff --git a/src/backend/columnar/data/enum_and_composite_types.csv b/src/backend/columnar/data/enum_and_composite_types.csv new file mode 100644 index 000000000..979f2ebc3 --- /dev/null +++ b/src/backend/columnar/data/enum_and_composite_types.csv @@ -0,0 +1,2 @@ +a,"(2,b)" +b,"(3,c)" diff --git a/src/backend/columnar/data/null_values.csv b/src/backend/columnar/data/null_values.csv new file mode 100644 index 000000000..7ddd67b24 --- /dev/null +++ b/src/backend/columnar/data/null_values.csv @@ -0,0 +1,2 @@ +,{NULL},"(,)" +,, diff --git a/src/backend/columnar/data/other_types.csv b/src/backend/columnar/data/other_types.csv new file mode 100644 index 000000000..487f386fb --- /dev/null +++ b/src/backend/columnar/data/other_types.csv @@ -0,0 +1,2 @@ +f,\xdeadbeef,$1.00,192.168.1.2,10101,a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11,"{""key"": ""value""}" +t,\xcdb0,$1.50,127.0.0.1,"",a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11,[] diff --git a/src/backend/columnar/data/range_types.csv b/src/backend/columnar/data/range_types.csv new file mode 100644 index 000000000..db0ca880c --- /dev/null +++ b/src/backend/columnar/data/range_types.csv @@ -0,0 +1,2 @@ +"[1,3)","[1,3)","[1,3)","[""2000-01-02 00:30:00"",""2010-02-03 12:30:00"")" +empty,"[1,)","(,)",empty diff --git a/src/backend/columnar/expected/am_alter.out b/src/backend/columnar/expected/am_alter.out new file mode 100644 index 000000000..bd0737b4b --- /dev/null +++ b/src/backend/columnar/expected/am_alter.out @@ -0,0 +1,177 @@ +-- +-- Testing ALTER TABLE on cstore_fdw tables. +-- +CREATE TABLE test_alter_table (a int, b int, c int) USING cstore_tableam; +WITH sample_data AS (VALUES + (1, 2, 3), + (4, 5, 6), + (7, 8, 9) +) +INSERT INTO test_alter_table SELECT * FROM sample_data; +-- drop a column +ALTER TABLE test_alter_table DROP COLUMN a; +-- test analyze +ANALYZE test_alter_table; +-- verify select queries run as expected +SELECT * FROM test_alter_table; + b | c +---+--- + 2 | 3 + 5 | 6 + 8 | 9 +(3 rows) + +SELECT a FROM test_alter_table; +ERROR: column "a" does not exist +LINE 1: SELECT a FROM test_alter_table; + ^ +SELECT b FROM test_alter_table; + b +--- + 2 + 5 + 8 +(3 rows) + +-- verify insert runs as expected +INSERT INTO test_alter_table (SELECT 3, 5, 8); +ERROR: INSERT has more expressions than target columns +LINE 1: INSERT INTO test_alter_table (SELECT 3, 5, 8); + ^ +INSERT INTO test_alter_table (SELECT 5, 8); +-- add a column with no defaults +ALTER TABLE test_alter_table ADD COLUMN d int; +SELECT * FROM test_alter_table; + b | c | d +---+---+--- + 2 | 3 | + 5 | 6 | + 8 | 9 | + 5 | 8 | +(4 rows) + +INSERT INTO test_alter_table (SELECT 3, 5, 8); +SELECT * FROM test_alter_table; + b | c | d +---+---+--- + 2 | 3 | + 5 | 6 | + 8 | 9 | + 5 | 8 | + 3 | 5 | 8 +(5 rows) + +-- add a fixed-length column with default value +ALTER TABLE test_alter_table ADD COLUMN e int default 3; +SELECT * from test_alter_table; + b | c | d | e +---+---+---+--- + 2 | 3 | | 3 + 5 | 6 | | 3 + 8 | 9 | | 3 + 5 | 8 | | 3 + 3 | 5 | 8 | 3 +(5 rows) + +INSERT INTO test_alter_table (SELECT 1, 2, 4, 8); +SELECT * from test_alter_table; + b | c | d | e +---+---+---+--- + 2 | 3 | | 3 + 5 | 6 | | 3 + 8 | 9 | | 3 + 5 | 8 | | 3 + 3 | 5 | 8 | 3 + 1 | 2 | 4 | 8 +(6 rows) + +-- add a variable-length column with default value +ALTER TABLE test_alter_table ADD COLUMN f text DEFAULT 'TEXT ME'; +SELECT * from test_alter_table; + b | c | d | e | f +---+---+---+---+--------- + 2 | 3 | | 3 | TEXT ME + 5 | 6 | | 3 | TEXT ME + 8 | 9 | | 3 | TEXT ME + 5 | 8 | | 3 | TEXT ME + 3 | 5 | 8 | 3 | TEXT ME + 1 | 2 | 4 | 8 | TEXT ME +(6 rows) + +INSERT INTO test_alter_table (SELECT 1, 2, 4, 8, 'ABCDEF'); +SELECT * from test_alter_table; + b | c | d | e | f +---+---+---+---+--------- + 2 | 3 | | 3 | TEXT ME + 5 | 6 | | 3 | TEXT ME + 8 | 9 | | 3 | TEXT ME + 5 | 8 | | 3 | TEXT ME + 3 | 5 | 8 | 3 | TEXT ME + 1 | 2 | 4 | 8 | TEXT ME + 1 | 2 | 4 | 8 | ABCDEF +(7 rows) + +-- drop couple of columns +ALTER TABLE test_alter_table DROP COLUMN c; +ALTER TABLE test_alter_table DROP COLUMN e; +ANALYZE test_alter_table; +SELECT * from test_alter_table; + b | d | f +---+---+--------- + 2 | | TEXT ME + 5 | | TEXT ME + 8 | | TEXT ME + 5 | | TEXT ME + 3 | 8 | TEXT ME + 1 | 4 | TEXT ME + 1 | 4 | ABCDEF +(7 rows) + +SELECT count(*) from test_alter_table; + count +------- + 7 +(1 row) + +SELECT count(t.*) from test_alter_table t; + count +------- + 7 +(1 row) + +-- unsupported default values +ALTER TABLE test_alter_table ADD COLUMN g boolean DEFAULT isfinite(current_date); +ALTER TABLE test_alter_table ADD COLUMN h DATE DEFAULT current_date; +SELECT * FROM test_alter_table; +ERROR: unsupported default value for column "g" +HINT: Expression is either mutable or does not evaluate to constant value +ALTER TABLE test_alter_table ALTER COLUMN g DROP DEFAULT; +SELECT * FROM test_alter_table; +ERROR: unsupported default value for column "h" +HINT: Expression is either mutable or does not evaluate to constant value +ALTER TABLE test_alter_table ALTER COLUMN h DROP DEFAULT; +ANALYZE test_alter_table; +SELECT * FROM test_alter_table; + b | d | f | g | h +---+---+---------+---+--- + 2 | | TEXT ME | | + 5 | | TEXT ME | | + 8 | | TEXT ME | | + 5 | | TEXT ME | | + 3 | 8 | TEXT ME | | + 1 | 4 | TEXT ME | | + 1 | 4 | ABCDEF | | +(7 rows) + +-- unsupported type change +ALTER TABLE test_alter_table ADD COLUMN i int; +ALTER TABLE test_alter_table ADD COLUMN j float; +ALTER TABLE test_alter_table ADD COLUMN k text; +-- this is valid type change +ALTER TABLE test_alter_table ALTER COLUMN i TYPE float; +-- this is not valid +ALTER TABLE test_alter_table ALTER COLUMN j TYPE int; +-- text / varchar conversion is valid both ways +ALTER TABLE test_alter_table ALTER COLUMN k TYPE varchar(20); +ALTER TABLE test_alter_table ALTER COLUMN k TYPE text; +DROP TABLE test_alter_table; diff --git a/src/backend/columnar/expected/am_analyze.out b/src/backend/columnar/expected/am_analyze.out new file mode 100644 index 000000000..f8c4d974a --- /dev/null +++ b/src/backend/columnar/expected/am_analyze.out @@ -0,0 +1,19 @@ +-- +-- Test the ANALYZE command for cstore_fdw tables. +-- +-- ANALYZE uncompressed table +ANALYZE contestant; +SELECT count(*) FROM pg_stats WHERE tablename='contestant'; + count +------- + 6 +(1 row) + +-- ANALYZE compressed table +ANALYZE contestant_compressed; +SELECT count(*) FROM pg_stats WHERE tablename='contestant_compressed'; + count +------- + 6 +(1 row) + diff --git a/src/backend/columnar/expected/am_clean.out b/src/backend/columnar/expected/am_clean.out new file mode 100644 index 000000000..2c1e82ee6 --- /dev/null +++ b/src/backend/columnar/expected/am_clean.out @@ -0,0 +1,8 @@ +DROP TABLE test_null_values; +DROP TABLE test_other_types; +DROP TABLE test_range_types; +DROP TABLE test_enum_and_composite_types; +DROP TYPE composite_type; +DROP TYPE enum_type; +DROP TABLE test_datetime_types; +DROP TABLE test_array_types; diff --git a/src/backend/columnar/expected/am_create.out b/src/backend/columnar/expected/am_create.out new file mode 100644 index 000000000..47c6a6c44 --- /dev/null +++ b/src/backend/columnar/expected/am_create.out @@ -0,0 +1,20 @@ +-- +-- Test the CREATE statements related to cstore. +-- +-- Create uncompressed table +CREATE TABLE contestant (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + USING cstore_tableam; +-- Create compressed table with automatically determined file path +-- COMPRESSED +CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + USING cstore_tableam; +-- Test that querying an empty table works +ANALYZE contestant; +SELECT count(*) FROM contestant; + count +------- + 0 +(1 row) + diff --git a/src/backend/columnar/expected/am_drop.out b/src/backend/columnar/expected/am_drop.out new file mode 100644 index 000000000..26de328f6 --- /dev/null +++ b/src/backend/columnar/expected/am_drop.out @@ -0,0 +1,51 @@ +-- +-- Tests the different DROP commands for cstore_fdw tables. +-- +-- DROP TABL +-- DROP SCHEMA +-- DROP EXTENSION +-- DROP DATABASE +-- +-- Note that travis does not create +-- cstore_fdw extension in default database (postgres). This has caused +-- different behavior between travis tests and local tests. Thus +-- 'postgres' directory is excluded from comparison to have the same result. +-- store postgres database oid +SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset +SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset +-- DROP cstore_fdw tables +DROP TABLE contestant; +DROP TABLE contestant_compressed; +-- make sure DROP deletes metadata +SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; + ?column? +---------- + 2 +(1 row) + +-- Create a cstore_fdw table under a schema and drop it. +CREATE SCHEMA test_schema; +CREATE TABLE test_schema.test_table(data int) USING cstore_tableam; +SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset +DROP SCHEMA test_schema CASCADE; +NOTICE: drop cascades to table test_schema.test_table +SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; + ?column? +---------- + 1 +(1 row) + +SELECT current_database() datname \gset +CREATE DATABASE db_to_drop; +\c db_to_drop +CREATE EXTENSION cstore_fdw; +SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset +CREATE TABLE test_table(data int) USING cstore_tableam; +DROP EXTENSION cstore_fdw CASCADE; +NOTICE: drop cascades to table test_table +-- test database drop +CREATE EXTENSION cstore_fdw; +SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset +CREATE TABLE test_table(data int) USING cstore_tableam; +\c :datname +DROP DATABASE db_to_drop; diff --git a/src/backend/columnar/expected/am_functions.out b/src/backend/columnar/expected/am_functions.out new file mode 100644 index 000000000..6351ba0bf --- /dev/null +++ b/src/backend/columnar/expected/am_functions.out @@ -0,0 +1,18 @@ +-- +-- Test utility functions for cstore_fdw tables. +-- +CREATE TABLE empty_table (a int) USING cstore_tableam; +CREATE TABLE table_with_data (a int) USING cstore_tableam; +CREATE TABLE non_cstore_table (a int); +COPY table_with_data FROM STDIN; +SELECT pg_relation_size('empty_table') < pg_relation_size('table_with_data'); + ?column? +---------- + t +(1 row) + +SELECT cstore_table_size('non_cstore_table'); +ERROR: relation is not a cstore table +DROP TABLE empty_table; +DROP TABLE table_with_data; +DROP TABLE non_cstore_table; diff --git a/src/backend/columnar/expected/am_insert.out b/src/backend/columnar/expected/am_insert.out new file mode 100644 index 000000000..8d06d4323 --- /dev/null +++ b/src/backend/columnar/expected/am_insert.out @@ -0,0 +1,86 @@ +-- +-- Testing insert on cstore_fdw tables. +-- +CREATE TABLE test_insert_command (a int) USING cstore_tableam; +-- test single row inserts fail +select count(*) from test_insert_command; + count +------- + 0 +(1 row) + +insert into test_insert_command values(1); +select count(*) from test_insert_command; + count +------- + 1 +(1 row) + +insert into test_insert_command default values; +select count(*) from test_insert_command; + count +------- + 2 +(1 row) + +-- test inserting from another table succeed +CREATE TABLE test_insert_command_data (a int); +select count(*) from test_insert_command_data; + count +------- + 0 +(1 row) + +insert into test_insert_command_data values(1); +select count(*) from test_insert_command_data; + count +------- + 1 +(1 row) + +insert into test_insert_command select * from test_insert_command_data; +select count(*) from test_insert_command; + count +------- + 3 +(1 row) + +drop table test_insert_command_data; +drop table test_insert_command; +-- test long attribute value insertion +-- create sufficiently long text so that data is stored in toast +CREATE TABLE test_long_text AS +SELECT a as int_val, string_agg(random()::text, '') as text_val +FROM generate_series(1, 10) a, generate_series(1, 1000) b +GROUP BY a ORDER BY a; +-- store hash values of text for later comparison +CREATE TABLE test_long_text_hash AS +SELECT int_val, md5(text_val) AS hash +FROM test_long_text; +CREATE TABLE test_cstore_long_text(int_val int, text_val text) +USING cstore_tableam; +-- store long text in cstore table +INSERT INTO test_cstore_long_text SELECT * FROM test_long_text; +-- drop source table to remove original text from toast +DROP TABLE test_long_text; +-- check if text data is still available in cstore table +-- by comparing previously stored hash. +SELECT a.int_val +FROM test_long_text_hash a, test_cstore_long_text c +WHERE a.int_val = c.int_val AND a.hash = md5(c.text_val); + int_val +--------- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 +(10 rows) + +DROP TABLE test_long_text_hash; +DROP TABLE test_cstore_long_text; diff --git a/src/backend/columnar/expected/am_join.out b/src/backend/columnar/expected/am_join.out new file mode 100644 index 000000000..fbb628187 --- /dev/null +++ b/src/backend/columnar/expected/am_join.out @@ -0,0 +1,37 @@ +CREATE SCHEMA am_cstore_join; +SET search_path TO am_cstore_join; +CREATE TABLE users (id int, name text) USING cstore_tableam; +INSERT INTO users SELECT a, 'name' || a FROM generate_series(0,30-1) AS a; +CREATE TABLE things (id int, user_id int, name text) USING cstore_tableam; +INSERT INTO things SELECT a, a % 30, 'thing' || a FROM generate_series(1,300) AS a; +-- force the nested loop to rescan the table +SET enable_material TO off; +SET enable_hashjoin TO off; +SET enable_mergejoin TO off; +SELECT count(*) +FROM users +JOIN things ON (users.id = things.user_id) +WHERE things.id > 290; + count +------- + 10 +(1 row) + +-- verify the join uses a nested loop to trigger the rescan behaviour +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM users +JOIN things ON (users.id = things.user_id) +WHERE things.id > 299990; + QUERY PLAN +-------------------------------------------------- + Aggregate + -> Nested Loop + Join Filter: (users.id = things.user_id) + -> Custom Scan (CStoreScan) on things + Filter: (id > 299990) + -> Custom Scan (CStoreScan) on users +(6 rows) + +SET client_min_messages TO warning; +DROP SCHEMA am_cstore_join CASCADE; diff --git a/src/backend/columnar/expected/am_query.out b/src/backend/columnar/expected/am_query.out new file mode 100644 index 000000000..2f0ff6cc7 --- /dev/null +++ b/src/backend/columnar/expected/am_query.out @@ -0,0 +1,105 @@ +-- +-- Test querying cstore_fdw tables. +-- +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; +-- Query uncompressed data +SELECT count(*) FROM contestant; + count +------- + 8 +(1 row) + +SELECT avg(rating), stddev_samp(rating) FROM contestant; + avg | stddev_samp +-----------------------+------------------ + 2344.3750000000000000 | 433.746119785032 +(1 row) + +SELECT country, avg(rating) FROM contestant WHERE rating > 2200 + GROUP BY country ORDER BY country; + country | avg +---------+----------------------- + XA | 2203.0000000000000000 + XB | 2610.5000000000000000 + XC | 2236.0000000000000000 + XD | 3090.0000000000000000 +(4 rows) + +SELECT * FROM contestant ORDER BY handle; + handle | birthdate | rating | percentile | country | achievements +--------+------------+--------+------------+---------+-------------- + a | 1990-01-10 | 2090 | 97.1 | XA | {a} + b | 1990-11-01 | 2203 | 98.1 | XA | {a,b} + c | 1988-11-01 | 2907 | 99.4 | XB | {w,y} + d | 1985-05-05 | 2314 | 98.3 | XB | {} + e | 1995-05-05 | 2236 | 98.2 | XC | {a} + f | 1983-04-02 | 3090 | 99.6 | XD | {a,b,c,y} + g | 1991-12-13 | 1803 | 85.1 | XD | {a,c} + h | 1987-10-26 | 2112 | 95.4 | XD | {w,a} +(8 rows) + +-- Query compressed data +SELECT count(*) FROM contestant_compressed; + count +------- + 8 +(1 row) + +SELECT avg(rating), stddev_samp(rating) FROM contestant_compressed; + avg | stddev_samp +-----------------------+------------------ + 2344.3750000000000000 | 433.746119785032 +(1 row) + +SELECT country, avg(rating) FROM contestant_compressed WHERE rating > 2200 + GROUP BY country ORDER BY country; + country | avg +---------+----------------------- + XA | 2203.0000000000000000 + XB | 2610.5000000000000000 + XC | 2236.0000000000000000 + XD | 3090.0000000000000000 +(4 rows) + +SELECT * FROM contestant_compressed ORDER BY handle; + handle | birthdate | rating | percentile | country | achievements +--------+------------+--------+------------+---------+-------------- + a | 1990-01-10 | 2090 | 97.1 | XA | {a} + b | 1990-11-01 | 2203 | 98.1 | XA | {a,b} + c | 1988-11-01 | 2907 | 99.4 | XB | {w,y} + d | 1985-05-05 | 2314 | 98.3 | XB | {} + e | 1995-05-05 | 2236 | 98.2 | XC | {a} + f | 1983-04-02 | 3090 | 99.6 | XD | {a,b,c,y} + g | 1991-12-13 | 1803 | 85.1 | XD | {a,c} + h | 1987-10-26 | 2112 | 95.4 | XD | {w,a} +(8 rows) + +-- Verify that we handle whole-row references correctly +SELECT to_json(v) FROM contestant v ORDER BY rating LIMIT 1; + to_json +------------------------------------------------------------------------------------------------------------------ + {"handle":"g","birthdate":"1991-12-13","rating":1803,"percentile":85.1,"country":"XD ","achievements":["a","c"]} +(1 row) + +-- Test variables used in expressions +CREATE TABLE union_first (a int, b int) USING cstore_tableam; +CREATE TABLE union_second (a int, b int) USING cstore_tableam; +INSERT INTO union_first SELECT a, a FROM generate_series(1, 5) a; +INSERT INTO union_second SELECT a, a FROM generate_series(11, 15) a; +(SELECT a*1, b FROM union_first) union all (SELECT a*1, b FROM union_second); + ?column? | b +----------+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 11 | 11 + 12 | 12 + 13 | 13 + 14 | 14 + 15 | 15 +(10 rows) + +DROP TABLE union_first, union_second; diff --git a/src/backend/columnar/expected/am_rollback.out b/src/backend/columnar/expected/am_rollback.out new file mode 100644 index 000000000..130baaa3a --- /dev/null +++ b/src/backend/columnar/expected/am_rollback.out @@ -0,0 +1,77 @@ +-- +-- Testing we handle rollbacks properly +-- +CREATE TABLE t(a int, b int) USING cstore_tableam; +BEGIN; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +ROLLBACK; +SELECT count(*) FROM t; + count +------- + 0 +(1 row) + +-- check stripe metadata also have been rolled-back +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + count +------- + 0 +(1 row) + +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; + count +------- + 10 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + count +------- + 1 +(1 row) + +-- savepoint rollback +BEGIN; +SAVEPOINT s0; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SAVEPOINT s1; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; + count +------- + 30 +(1 row) + +ROLLBACK TO SAVEPOINT s1; +SELECT count(*) FROM t; + count +------- + 20 +(1 row) + +ROLLBACK TO SAVEPOINT s0; +SELECT count(*) FROM t; + count +------- + 10 +(1 row) + +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +COMMIT; +SELECT count(*) FROM t; + count +------- + 20 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + count +------- + 2 +(1 row) + +DROP TABLE t; diff --git a/src/backend/columnar/expected/am_tableoptions.out b/src/backend/columnar/expected/am_tableoptions.out new file mode 100644 index 000000000..e5e0f9a4f --- /dev/null +++ b/src/backend/columnar/expected/am_tableoptions.out @@ -0,0 +1,179 @@ +CREATE SCHEMA am_tableoptions; +SET search_path TO am_tableoptions; +CREATE TABLE table_options (a int) USING cstore_tableam; +INSERT INTO table_options SELECT generate_series(1,100); +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 10000 | 150000 | none +(1 row) + +-- test changing the compression +SELECT alter_cstore_table_set('table_options', compression => 'pglz'); + alter_cstore_table_set +------------------------ + +(1 row) + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 10000 | 150000 | pglz +(1 row) + +-- test changing the block_row_count +SELECT alter_cstore_table_set('table_options', block_row_count => 10); + alter_cstore_table_set +------------------------ + +(1 row) + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 10 | 150000 | pglz +(1 row) + +-- test changing the block_row_count +SELECT alter_cstore_table_set('table_options', stripe_row_count => 100); + alter_cstore_table_set +------------------------ + +(1 row) + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 10 | 100 | pglz +(1 row) + +-- VACUUM FULL creates a new table, make sure it copies settings from the table you are vacuuming +VACUUM FULL table_options; +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 10 | 100 | pglz +(1 row) + +-- set all settings at the same time +SELECT alter_cstore_table_set('table_options', stripe_row_count => 1000, block_row_count => 100, compression => 'none'); + alter_cstore_table_set +------------------------ + +(1 row) + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 100 | 1000 | none +(1 row) + +-- reset settings one by one to the version of the GUC's +SET cstore.block_row_count TO 1000; +SET cstore.stripe_row_count TO 10000; +SET cstore.compression TO 'pglz'; +-- verify setting the GUC's didn't change the settings +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 100 | 1000 | none +(1 row) + +SELECT alter_cstore_table_reset('table_options', block_row_count => true); + alter_cstore_table_reset +-------------------------- + +(1 row) + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 1000 | 1000 | none +(1 row) + +SELECT alter_cstore_table_reset('table_options', stripe_row_count => true); + alter_cstore_table_reset +-------------------------- + +(1 row) + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 1000 | 10000 | none +(1 row) + +SELECT alter_cstore_table_reset('table_options', compression => true); + alter_cstore_table_reset +-------------------------- + +(1 row) + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 1000 | 10000 | pglz +(1 row) + +-- verify resetting all settings at once work +SET cstore.block_row_count TO 10000; +SET cstore.stripe_row_count TO 100000; +SET cstore.compression TO 'none'; +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 1000 | 10000 | pglz +(1 row) + +SELECT alter_cstore_table_reset( + 'table_options', + block_row_count => true, + stripe_row_count => true, + compression => true); + alter_cstore_table_reset +-------------------------- + +(1 row) + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 10000 | 100000 | none +(1 row) + +-- verify edge cases +-- first start with a table that is not a cstore table +CREATE TABLE not_a_cstore_table (a int); +SELECT alter_cstore_table_set('not_a_cstore_table', compression => 'pglz'); +ERROR: table not_a_cstore_table is not a cstore table +SELECT alter_cstore_table_reset('not_a_cstore_table', compression => true); +ERROR: table not_a_cstore_table is not a cstore table +-- verify you can't use a compression that is not known +SELECT alter_cstore_table_set('table_options', compression => 'foobar'); +ERROR: unknown compression type for cstore table: foobar +SET client_min_messages TO warning; +DROP SCHEMA am_tableoptions CASCADE; diff --git a/src/backend/columnar/expected/am_trigger.out b/src/backend/columnar/expected/am_trigger.out new file mode 100644 index 000000000..53b2c9d9e --- /dev/null +++ b/src/backend/columnar/expected/am_trigger.out @@ -0,0 +1,65 @@ +create or replace function trs_before() returns trigger language plpgsql as $$ +BEGIN + RAISE NOTICE 'BEFORE STATEMENT %', TG_OP; + RETURN NULL; +END; +$$; +create or replace function trs_after() returns trigger language plpgsql as $$ +DECLARE + r RECORD; +BEGIN + RAISE NOTICE 'AFTER STATEMENT %', TG_OP; + IF (TG_OP = 'DELETE') THEN + FOR R IN select * from old_table + LOOP + RAISE NOTICE ' (%)', r.i; + END LOOP; + ELSE + FOR R IN select * from new_table + LOOP + RAISE NOTICE ' (%)', r.i; + END LOOP; + END IF; + RETURN NULL; +END; +$$; +create or replace function trr_before() returns trigger language plpgsql as $$ +BEGIN + RAISE NOTICE 'BEFORE ROW %: (%)', TG_OP, NEW.i; + RETURN NEW; +END; +$$; +create or replace function trr_after() returns trigger language plpgsql as $$ +BEGIN + RAISE NOTICE 'AFTER ROW %: (%)', TG_OP, NEW.i; + RETURN NEW; +END; +$$; +create table test_tr(i int) using cstore_tableam; +create trigger tr_before_stmt before insert on test_tr + for each statement execute procedure trs_before(); +create trigger tr_after_stmt after insert on test_tr + referencing new table as new_table + for each statement execute procedure trs_after(); +create trigger tr_before_row before insert on test_tr + for each row execute procedure trr_before(); +-- after triggers require TIDs, which are not supported yet +create trigger tr_after_row after insert on test_tr + for each row execute procedure trr_after(); +ERROR: AFTER ROW triggers are not supported for columnstore access method +HINT: Consider an AFTER STATEMENT trigger instead. +insert into test_tr values(1); +NOTICE: BEFORE STATEMENT INSERT +NOTICE: BEFORE ROW INSERT: (1) +NOTICE: AFTER STATEMENT INSERT +NOTICE: (1) +insert into test_tr values(2),(3),(4); +NOTICE: BEFORE STATEMENT INSERT +NOTICE: BEFORE ROW INSERT: (2) +NOTICE: BEFORE ROW INSERT: (3) +NOTICE: BEFORE ROW INSERT: (4) +NOTICE: AFTER STATEMENT INSERT +NOTICE: (2) +NOTICE: (3) +NOTICE: (4) +drop table test_tr; diff --git a/src/backend/columnar/expected/am_truncate.out b/src/backend/columnar/expected/am_truncate.out new file mode 100644 index 000000000..245c72062 --- /dev/null +++ b/src/backend/columnar/expected/am_truncate.out @@ -0,0 +1,271 @@ +-- +-- Test the TRUNCATE TABLE command for cstore_fdw tables. +-- +-- print whether we're using version > 10 to make version-specific tests clear +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; + version_above_ten +------------------- + t +(1 row) + +-- CREATE a cstore_fdw table, fill with some data -- +CREATE TABLE cstore_truncate_test (a int, b int) USING cstore_tableam; +CREATE TABLE cstore_truncate_test_second (a int, b int) USING cstore_tableam; +-- COMPRESSED +CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING cstore_tableam; +CREATE TABLE cstore_truncate_test_regular (a int, b int); +SELECT count(*) AS cstore_data_files_before_truncate FROM cstore.cstore_data_files \gset +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +set cstore.compression = 'pglz'; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +set cstore.compression to default; +-- query rows +SELECT * FROM cstore_truncate_test; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +TRUNCATE TABLE cstore_truncate_test; +SELECT * FROM cstore_truncate_test; + a | b +---+--- +(0 rows) + +SELECT COUNT(*) from cstore_truncate_test; + count +------- + 0 +(1 row) + +SELECT count(*) FROM cstore_truncate_test_compressed; + count +------- + 20 +(1 row) + +TRUNCATE TABLE cstore_truncate_test_compressed; +SELECT count(*) FROM cstore_truncate_test_compressed; + count +------- + 0 +(1 row) + +SELECT pg_relation_size('cstore_truncate_test_compressed'); + pg_relation_size +------------------ + 0 +(1 row) + +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; +INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; +SELECT * from cstore_truncate_test; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +SELECT * from cstore_truncate_test_second; + a | b +----+---- + 20 | 20 + 21 | 21 + 22 | 22 + 23 | 23 + 24 | 24 + 25 | 25 + 26 | 26 + 27 | 27 + 28 | 28 + 29 | 29 + 30 | 30 +(11 rows) + +SELECT * from cstore_truncate_test_regular; + a | b +----+---- + 10 | 10 + 11 | 11 + 12 | 12 + 13 | 13 + 14 | 14 + 15 | 15 + 16 | 16 + 17 | 17 + 18 | 18 + 19 | 19 + 20 | 20 +(11 rows) + +-- make sure multi truncate works +-- notice that the same table might be repeated +TRUNCATE TABLE cstore_truncate_test, + cstore_truncate_test_regular, + cstore_truncate_test_second, + cstore_truncate_test; +SELECT * from cstore_truncate_test; + a | b +---+--- +(0 rows) + +SELECT * from cstore_truncate_test_second; + a | b +---+--- +(0 rows) + +SELECT * from cstore_truncate_test_regular; + a | b +---+--- +(0 rows) + +-- test if truncate on empty table works +TRUNCATE TABLE cstore_truncate_test; +SELECT * from cstore_truncate_test; + a | b +---+--- +(0 rows) + +-- make sure TRUNATE deletes metadata for old relfilenode +SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_files; + ?column? +---------- + 0 +(1 row) + +-- test if truncation in the same transaction that created the table works properly +BEGIN; +CREATE TABLE cstore_same_transaction_truncate(a int) USING cstore_tableam; +INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(1, 100); +TRUNCATE cstore_same_transaction_truncate; +INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(20, 23); +COMMIT; +-- should output "1" for the newly created relation +SELECT count(*) - :cstore_data_files_before_truncate FROM cstore.cstore_data_files; + ?column? +---------- + 1 +(1 row) + +SELECT * FROM cstore_same_transaction_truncate; + a +---- + 20 + 21 + 22 + 23 +(4 rows) + +DROP TABLE cstore_same_transaction_truncate; +-- test if a cached truncate from a pl/pgsql function works +CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ +BEGIN + INSERT INTO cstore_truncate_test_regular select a, a from generate_series(1, 10) a; + TRUNCATE TABLE cstore_truncate_test_regular; +END;$$ +LANGUAGE plpgsql; +SELECT cstore_truncate_test_regular_func(); + cstore_truncate_test_regular_func +----------------------------------- + +(1 row) + +-- the cached plans are used stating from the second call +SELECT cstore_truncate_test_regular_func(); + cstore_truncate_test_regular_func +----------------------------------- + +(1 row) + +DROP FUNCTION cstore_truncate_test_regular_func(); +DROP TABLE cstore_truncate_test, cstore_truncate_test_second; +DROP TABLE cstore_truncate_test_regular; +DROP TABLE cstore_truncate_test_compressed; +-- test truncate with schema +CREATE SCHEMA truncate_schema; +-- COMPRESSED +CREATE TABLE truncate_schema.truncate_tbl (id int) USING cstore_tableam; +set cstore.compression = 'pglz'; +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +set cstore.compression to default; +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + count +------- + 0 +(1 row) + +set cstore.compression = 'pglz'; +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +set cstore.compression to default; +-- create a user that can not truncate +CREATE USER truncate_user; +GRANT USAGE ON SCHEMA truncate_schema TO truncate_user; +GRANT SELECT ON TABLE truncate_schema.truncate_tbl TO truncate_user; +REVOKE TRUNCATE ON TABLE truncate_schema.truncate_tbl FROM truncate_user; +SELECT current_user \gset +\c - truncate_user +-- verify truncate command fails and check number of rows +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +ERROR: permission denied for table truncate_tbl +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +-- switch to super user, grant truncate to truncate_user +\c - :current_user +GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; +-- verify truncate_user can truncate now +\c - truncate_user +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 0 +(1 row) + +\c - :current_user +-- cleanup +DROP SCHEMA truncate_schema CASCADE; +NOTICE: drop cascades to table truncate_schema.truncate_tbl +DROP USER truncate_user; diff --git a/src/backend/columnar/expected/am_truncate_0.out b/src/backend/columnar/expected/am_truncate_0.out new file mode 100644 index 000000000..c8cc4ad98 --- /dev/null +++ b/src/backend/columnar/expected/am_truncate_0.out @@ -0,0 +1,262 @@ +-- +-- Test the TRUNCATE TABLE command for cstore_fdw tables. +-- +-- print whether we're using version > 10 to make version-specific tests clear +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; + version_above_ten +------------------- + f +(1 row) + +-- Check that files for the automatically managed table exist in the +-- cstore_fdw/{databaseoid} directory. +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 0 +(1 row) + +-- CREATE a cstore_fdw table, fill with some data -- +CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_compressed (a int, b int) SERVER cstore_server OPTIONS (compression 'pglz'); +CREATE TABLE cstore_truncate_test_regular (a int, b int); +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +-- query rows +SELECT * FROM cstore_truncate_test; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +TRUNCATE TABLE cstore_truncate_test; +SELECT * FROM cstore_truncate_test; + a | b +---+--- +(0 rows) + +SELECT COUNT(*) from cstore_truncate_test; + count +------- + 0 +(1 row) + +SELECT count(*) FROM cstore_truncate_test_compressed; + count +------- + 20 +(1 row) + +TRUNCATE TABLE cstore_truncate_test_compressed; +SELECT count(*) FROM cstore_truncate_test_compressed; + count +------- + 0 +(1 row) + +SELECT cstore_table_size('cstore_truncate_test_compressed'); + cstore_table_size +------------------- + 26 +(1 row) + +-- make sure data files still present +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 6 +(1 row) + +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; +INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; +SELECT * from cstore_truncate_test; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +SELECT * from cstore_truncate_test_second; + a | b +----+---- + 20 | 20 + 21 | 21 + 22 | 22 + 23 | 23 + 24 | 24 + 25 | 25 + 26 | 26 + 27 | 27 + 28 | 28 + 29 | 29 + 30 | 30 +(11 rows) + +SELECT * from cstore_truncate_test_regular; + a | b +----+---- + 10 | 10 + 11 | 11 + 12 | 12 + 13 | 13 + 14 | 14 + 15 | 15 + 16 | 16 + 17 | 17 + 18 | 18 + 19 | 19 + 20 | 20 +(11 rows) + +-- make sure multi truncate works +-- notice that the same table might be repeated +TRUNCATE TABLE cstore_truncate_test, + cstore_truncate_test_regular, + cstore_truncate_test_second, + cstore_truncate_test; +SELECT * from cstore_truncate_test; + a | b +---+--- +(0 rows) + +SELECT * from cstore_truncate_test_second; + a | b +---+--- +(0 rows) + +SELECT * from cstore_truncate_test_regular; + a | b +---+--- +(0 rows) + +-- test if truncate on empty table works +TRUNCATE TABLE cstore_truncate_test; +SELECT * from cstore_truncate_test; + a | b +---+--- +(0 rows) + +-- test if a cached truncate from a pl/pgsql function works +CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ +BEGIN + INSERT INTO cstore_truncate_test_regular select a, a from generate_series(1, 10) a; + TRUNCATE TABLE cstore_truncate_test_regular; +END;$$ +LANGUAGE plpgsql; +SELECT cstore_truncate_test_regular_func(); + cstore_truncate_test_regular_func +----------------------------------- + +(1 row) + +-- the cached plans are used stating from the second call +SELECT cstore_truncate_test_regular_func(); + cstore_truncate_test_regular_func +----------------------------------- + +(1 row) + +DROP FUNCTION cstore_truncate_test_regular_func(); +DROP FOREIGN TABLE cstore_truncate_test, cstore_truncate_test_second; +DROP TABLE cstore_truncate_test_regular; +DROP FOREIGN TABLE cstore_truncate_test_compressed; +-- test truncate with schema +CREATE SCHEMA truncate_schema; +CREATE FOREIGN TABLE truncate_schema.truncate_tbl (id int) SERVER cstore_server OPTIONS(compression 'pglz'); +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + count +------- + 0 +(1 row) + +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +-- create a user that can not truncate +CREATE USER truncate_user; +GRANT USAGE ON SCHEMA truncate_schema TO truncate_user; +GRANT SELECT ON TABLE truncate_schema.truncate_tbl TO truncate_user; +REVOKE TRUNCATE ON TABLE truncate_schema.truncate_tbl FROM truncate_user; +SELECT current_user \gset +\c - truncate_user +-- verify truncate command fails and check number of rows +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +ERROR: permission denied for relation truncate_tbl +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +-- switch to super user, grant truncate to truncate_user +\c - :current_user +GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; +-- verify truncate_user can truncate now +\c - truncate_user +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 0 +(1 row) + +\c - :current_user +-- cleanup +DROP SCHEMA truncate_schema CASCADE; +NOTICE: drop cascades to foreign table truncate_schema.truncate_tbl +DROP USER truncate_user; +-- verify files are removed +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 0 +(1 row) + diff --git a/src/backend/columnar/expected/am_vacuum.out b/src/backend/columnar/expected/am_vacuum.out new file mode 100644 index 000000000..3975be12b --- /dev/null +++ b/src/backend/columnar/expected/am_vacuum.out @@ -0,0 +1,234 @@ +SELECT count(*) AS columnar_table_count FROM cstore.cstore_data_files \gset +CREATE TABLE t(a int, b int) USING cstore_tableam; +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + count +------- + 0 +(1 row) + +INSERT INTO t SELECT i, i * i FROM generate_series(1, 10) i; +INSERT INTO t SELECT i, i * i FROM generate_series(11, 20) i; +INSERT INTO t SELECT i, i * i FROM generate_series(21, 30) i; +SELECT sum(a), sum(b) FROM t; + sum | sum +-----+------ + 465 | 9455 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + count +------- + 3 +(1 row) + +-- vacuum full should merge stripes together +VACUUM FULL t; +SELECT sum(a), sum(b) FROM t; + sum | sum +-----+------ + 465 | 9455 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + count +------- + 1 +(1 row) + +-- test the case when all data cannot fit into a single stripe +SELECT alter_cstore_table_set('t', stripe_row_count => 1000); + alter_cstore_table_set +------------------------ + +(1 row) + +INSERT INTO t SELECT i, 2 * i FROM generate_series(1,2500) i; +SELECT sum(a), sum(b) FROM t; + sum | sum +---------+--------- + 3126715 | 6261955 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + count +------- + 4 +(1 row) + +VACUUM FULL t; +SELECT sum(a), sum(b) FROM t; + sum | sum +---------+--------- + 3126715 | 6261955 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + count +------- + 3 +(1 row) + +-- VACUUM FULL doesn't reclaim dropped columns, but converts them to NULLs +ALTER TABLE t DROP COLUMN a; +SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; + stripe | attr | block | ?column? | ?column? +--------+------+-------+----------+---------- + 1 | 1 | 0 | f | f + 1 | 2 | 0 | f | f + 2 | 1 | 0 | f | f + 2 | 2 | 0 | f | f + 3 | 1 | 0 | f | f + 3 | 2 | 0 | f | f +(6 rows) + +VACUUM FULL t; +SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; + stripe | attr | block | ?column? | ?column? +--------+------+-------+----------+---------- + 1 | 1 | 0 | t | t + 1 | 2 | 0 | f | f + 2 | 1 | 0 | t | t + 2 | 2 | 0 | f | f + 3 | 1 | 0 | t | t + 3 | 2 | 0 | f | f +(6 rows) + +-- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands +SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; + ?column? +---------- + 1 +(1 row) + +-- do this in a transaction so concurrent autovacuum doesn't interfere with results +BEGIN; +SAVEPOINT s1; +SELECT count(*) FROM t; + count +------- + 2530 +(1 row) + +SELECT pg_size_pretty(pg_relation_size('t')); + pg_size_pretty +---------------- + 32 kB +(1 row) + +INSERT INTO t SELECT i FROM generate_series(1, 10000) i; +SELECT pg_size_pretty(pg_relation_size('t')); + pg_size_pretty +---------------- + 112 kB +(1 row) + +SELECT count(*) FROM t; + count +------- + 12530 +(1 row) + +ROLLBACK TO SAVEPOINT s1; +-- not truncated by VACUUM or autovacuum yet (being in transaction ensures this), +-- so relation size should be same as before. +SELECT pg_size_pretty(pg_relation_size('t')); + pg_size_pretty +---------------- + 112 kB +(1 row) + +COMMIT; +-- vacuum should truncate the relation to the usable space +VACUUM VERBOSE t; +INFO: statistics for "t": +total file size: 114688, total data size: 10754 +total row count: 2530, stripe count: 3, average rows per stripe: 843 +block count: 3, containing data for dropped columns: 0, none compressed: 3, pglz compressed: 0 + +INFO: "t": truncated 14 to 4 pages +DETAIL: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s +SELECT pg_size_pretty(pg_relation_size('t')); + pg_size_pretty +---------------- + 32 kB +(1 row) + +SELECT count(*) FROM t; + count +------- + 2530 +(1 row) + +-- add some stripes with different compression types and create some gaps, +-- then vacuum to print stats +BEGIN; +SELECT alter_cstore_table_set('t', + block_row_count => 1000, + stripe_row_count => 2000, + compression => 'pglz'); + alter_cstore_table_set +------------------------ + +(1 row) + +SAVEPOINT s1; +INSERT INTO t SELECT i FROM generate_series(1, 1500) i; +ROLLBACK TO SAVEPOINT s1; +INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; +SELECT alter_cstore_table_set('t', compression => 'none'); + alter_cstore_table_set +------------------------ + +(1 row) + +SAVEPOINT s2; +INSERT INTO t SELECT i FROM generate_series(1, 1500) i; +ROLLBACK TO SAVEPOINT s2; +INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; +COMMIT; +VACUUM VERBOSE t; +INFO: statistics for "t": +total file size: 49152, total data size: 18808 +total row count: 5530, stripe count: 5, average rows per stripe: 1106 +block count: 7, containing data for dropped columns: 0, none compressed: 5, pglz compressed: 2 + +SELECT count(*) FROM t; + count +------- + 5530 +(1 row) + +-- check that we report blocks with data for dropped columns +ALTER TABLE t ADD COLUMN c int; +INSERT INTO t SELECT 1, i / 5 FROM generate_series(1, 1500) i; +ALTER TABLE t DROP COLUMN c; +VACUUM VERBOSE t; +INFO: statistics for "t": +total file size: 65536, total data size: 31372 +total row count: 7030, stripe count: 6, average rows per stripe: 1171 +block count: 11, containing data for dropped columns: 2, none compressed: 9, pglz compressed: 2 + +-- vacuum full should remove blocks for dropped columns +-- note that, a block will be stored in non-compressed for if compression +-- doesn't reduce its size. +SELECT alter_cstore_table_set('t', compression => 'pglz'); + alter_cstore_table_set +------------------------ + +(1 row) + +VACUUM FULL t; +VACUUM VERBOSE t; +INFO: statistics for "t": +total file size: 49152, total data size: 15728 +total row count: 7030, stripe count: 4, average rows per stripe: 1757 +block count: 8, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 6 + +DROP TABLE t; +-- Make sure we cleaned the metadata for t too +SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; + ?column? +---------- + 0 +(1 row) + diff --git a/src/backend/columnar/expected/am_vacuum_vs_insert.out b/src/backend/columnar/expected/am_vacuum_vs_insert.out new file mode 100644 index 000000000..d463bd076 --- /dev/null +++ b/src/backend/columnar/expected/am_vacuum_vs_insert.out @@ -0,0 +1,68 @@ +Parsed test spec with 2 sessions + +starting permutation: s1-insert s1-begin s1-insert s2-vacuum s1-commit s2-select +step s1-insert: + INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; + +step s1-begin: + BEGIN; + +step s1-insert: + INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; + +s2: INFO: statistics for "test_vacuum_vs_insert": +total file size: 24576, total data size: 26 +total row count: 3, stripe count: 1, average rows per stripe: 3 +block count: 2, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 0 + +s2: INFO: "test_vacuum_vs_insert": stopping truncate due to conflicting lock request +step s2-vacuum: + VACUUM VERBOSE test_vacuum_vs_insert; + +step s1-commit: + COMMIT; + +step s2-select: + SELECT * FROM test_vacuum_vs_insert; + +a b + +1 2 +2 4 +3 6 +1 2 +2 4 +3 6 + +starting permutation: s1-insert s1-begin s1-insert s2-vacuum-full s1-commit s2-select +step s1-insert: + INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; + +step s1-begin: + BEGIN; + +step s1-insert: + INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; + +step s2-vacuum-full: + VACUUM FULL VERBOSE test_vacuum_vs_insert; + +step s1-commit: + COMMIT; + +s2: INFO: vacuuming "public.test_vacuum_vs_insert" +s2: INFO: "test_vacuum_vs_insert": found 0 removable, 6 nonremovable row versions in 3 pages +DETAIL: 0 dead row versions cannot be removed yet. +CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s. +step s2-vacuum-full: <... completed> +step s2-select: + SELECT * FROM test_vacuum_vs_insert; + +a b + +1 2 +2 4 +3 6 +1 2 +2 4 +3 6 diff --git a/src/backend/columnar/expected/am_write_concurrency.out b/src/backend/columnar/expected/am_write_concurrency.out new file mode 100644 index 000000000..41c6ee7e6 --- /dev/null +++ b/src/backend/columnar/expected/am_write_concurrency.out @@ -0,0 +1,142 @@ +Parsed test spec with 2 sessions + +starting permutation: s1-begin s2-begin s1-insert s2-insert s1-select s2-select s1-commit s2-commit s1-select +step s1-begin: + BEGIN; + +step s2-begin: + BEGIN; + +step s1-insert: + INSERT INTO test_insert_concurrency SELECT i, 2 * i FROM generate_series(1, 3) i; + +step s2-insert: + INSERT INTO test_insert_concurrency SELECT i, 2 * i FROM generate_series(4, 6) i; + +step s1-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +1 2 +2 4 +3 6 +step s2-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +4 8 +5 10 +6 12 +step s1-commit: + COMMIT; + +step s2-commit: + COMMIT; + +step s1-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +1 2 +2 4 +3 6 +4 8 +5 10 +6 12 + +starting permutation: s1-begin s2-begin s1-copy s2-insert s1-select s2-select s1-commit s2-commit s1-select +step s1-begin: + BEGIN; + +step s2-begin: + BEGIN; + +step s1-copy: + COPY test_insert_concurrency(a) FROM PROGRAM 'seq 11 13'; + +step s2-insert: + INSERT INTO test_insert_concurrency SELECT i, 2 * i FROM generate_series(4, 6) i; + +step s1-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +11 +12 +13 +step s2-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +4 8 +5 10 +6 12 +step s1-commit: + COMMIT; + +step s2-commit: + COMMIT; + +step s1-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +4 8 +5 10 +6 12 +11 +12 +13 + +starting permutation: s1-begin s2-begin s2-insert s1-copy s1-select s2-select s1-commit s2-commit s1-select +step s1-begin: + BEGIN; + +step s2-begin: + BEGIN; + +step s2-insert: + INSERT INTO test_insert_concurrency SELECT i, 2 * i FROM generate_series(4, 6) i; + +step s1-copy: + COPY test_insert_concurrency(a) FROM PROGRAM 'seq 11 13'; + +step s1-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +11 +12 +13 +step s2-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +4 8 +5 10 +6 12 +step s1-commit: + COMMIT; + +step s2-commit: + COMMIT; + +step s1-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +4 8 +5 10 +6 12 +11 +12 +13 diff --git a/src/backend/columnar/expected/create.out b/src/backend/columnar/expected/create.out new file mode 100644 index 000000000..39b477c81 --- /dev/null +++ b/src/backend/columnar/expected/create.out @@ -0,0 +1,6 @@ +Parsed test spec with 1 sessions + +starting permutation: s1a +step s1a: + CREATE EXTENSION cstore_fdw; + diff --git a/src/backend/columnar/expected/extension_create.out b/src/backend/columnar/expected/extension_create.out new file mode 100644 index 000000000..c4d94e1e5 --- /dev/null +++ b/src/backend/columnar/expected/extension_create.out @@ -0,0 +1,2 @@ +-- Install cstore_fdw +CREATE EXTENSION cstore_fdw; diff --git a/src/backend/columnar/expected/fdw_alter.out b/src/backend/columnar/expected/fdw_alter.out new file mode 100644 index 000000000..659e2723e --- /dev/null +++ b/src/backend/columnar/expected/fdw_alter.out @@ -0,0 +1,178 @@ +-- +-- Testing ALTER TABLE on cstore_fdw tables. +-- +CREATE FOREIGN TABLE test_alter_table (a int, b int, c int) SERVER cstore_server; +WITH sample_data AS (VALUES + (1, 2, 3), + (4, 5, 6), + (7, 8, 9) +) +INSERT INTO test_alter_table SELECT * FROM sample_data; +-- drop a column +ALTER FOREIGN TABLE test_alter_table DROP COLUMN a; +-- test analyze +ANALYZE test_alter_table; +-- verify select queries run as expected +SELECT * FROM test_alter_table; + b | c +---+--- + 2 | 3 + 5 | 6 + 8 | 9 +(3 rows) + +SELECT a FROM test_alter_table; +ERROR: column "a" does not exist +LINE 1: SELECT a FROM test_alter_table; + ^ +SELECT b FROM test_alter_table; + b +--- + 2 + 5 + 8 +(3 rows) + +-- verify insert runs as expected +INSERT INTO test_alter_table (SELECT 3, 5, 8); +ERROR: INSERT has more expressions than target columns +LINE 1: INSERT INTO test_alter_table (SELECT 3, 5, 8); + ^ +INSERT INTO test_alter_table (SELECT 5, 8); +-- add a column with no defaults +ALTER FOREIGN TABLE test_alter_table ADD COLUMN d int; +SELECT * FROM test_alter_table; + b | c | d +---+---+--- + 2 | 3 | + 5 | 6 | + 8 | 9 | + 5 | 8 | +(4 rows) + +INSERT INTO test_alter_table (SELECT 3, 5, 8); +SELECT * FROM test_alter_table; + b | c | d +---+---+--- + 2 | 3 | + 5 | 6 | + 8 | 9 | + 5 | 8 | + 3 | 5 | 8 +(5 rows) + +-- add a fixed-length column with default value +ALTER FOREIGN TABLE test_alter_table ADD COLUMN e int default 3; +SELECT * from test_alter_table; + b | c | d | e +---+---+---+--- + 2 | 3 | | 3 + 5 | 6 | | 3 + 8 | 9 | | 3 + 5 | 8 | | 3 + 3 | 5 | 8 | 3 +(5 rows) + +INSERT INTO test_alter_table (SELECT 1, 2, 4, 8); +SELECT * from test_alter_table; + b | c | d | e +---+---+---+--- + 2 | 3 | | 3 + 5 | 6 | | 3 + 8 | 9 | | 3 + 5 | 8 | | 3 + 3 | 5 | 8 | 3 + 1 | 2 | 4 | 8 +(6 rows) + +-- add a variable-length column with default value +ALTER FOREIGN TABLE test_alter_table ADD COLUMN f text DEFAULT 'TEXT ME'; +SELECT * from test_alter_table; + b | c | d | e | f +---+---+---+---+--------- + 2 | 3 | | 3 | TEXT ME + 5 | 6 | | 3 | TEXT ME + 8 | 9 | | 3 | TEXT ME + 5 | 8 | | 3 | TEXT ME + 3 | 5 | 8 | 3 | TEXT ME + 1 | 2 | 4 | 8 | TEXT ME +(6 rows) + +INSERT INTO test_alter_table (SELECT 1, 2, 4, 8, 'ABCDEF'); +SELECT * from test_alter_table; + b | c | d | e | f +---+---+---+---+--------- + 2 | 3 | | 3 | TEXT ME + 5 | 6 | | 3 | TEXT ME + 8 | 9 | | 3 | TEXT ME + 5 | 8 | | 3 | TEXT ME + 3 | 5 | 8 | 3 | TEXT ME + 1 | 2 | 4 | 8 | TEXT ME + 1 | 2 | 4 | 8 | ABCDEF +(7 rows) + +-- drop couple of columns +ALTER FOREIGN TABLE test_alter_table DROP COLUMN c; +ALTER FOREIGN TABLE test_alter_table DROP COLUMN e; +ANALYZE test_alter_table; +SELECT * from test_alter_table; + b | d | f +---+---+--------- + 2 | | TEXT ME + 5 | | TEXT ME + 8 | | TEXT ME + 5 | | TEXT ME + 3 | 8 | TEXT ME + 1 | 4 | TEXT ME + 1 | 4 | ABCDEF +(7 rows) + +SELECT count(*) from test_alter_table; + count +------- + 7 +(1 row) + +SELECT count(t.*) from test_alter_table t; + count +------- + 7 +(1 row) + +-- unsupported default values +ALTER FOREIGN TABLE test_alter_table ADD COLUMN g boolean DEFAULT isfinite(current_date); +ALTER FOREIGN TABLE test_alter_table ADD COLUMN h DATE DEFAULT current_date; +SELECT * FROM test_alter_table; +ERROR: unsupported default value for column "g" +HINT: Expression is either mutable or does not evaluate to constant value +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN g DROP DEFAULT; +SELECT * FROM test_alter_table; +ERROR: unsupported default value for column "h" +HINT: Expression is either mutable or does not evaluate to constant value +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN h DROP DEFAULT; +ANALYZE test_alter_table; +SELECT * FROM test_alter_table; + b | d | f | g | h +---+---+---------+---+--- + 2 | | TEXT ME | | + 5 | | TEXT ME | | + 8 | | TEXT ME | | + 5 | | TEXT ME | | + 3 | 8 | TEXT ME | | + 1 | 4 | TEXT ME | | + 1 | 4 | ABCDEF | | +(7 rows) + +-- unsupported type change +ALTER FOREIGN TABLE test_alter_table ADD COLUMN i int; +ALTER FOREIGN TABLE test_alter_table ADD COLUMN j float; +ALTER FOREIGN TABLE test_alter_table ADD COLUMN k text; +-- this is valid type change +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN i TYPE float; +-- this is not valid +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN j TYPE int; +ERROR: Column j cannot be cast automatically to type pg_catalog.int4 +-- text / varchar conversion is valid both ways +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE varchar(20); +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE text; +DROP FOREIGN TABLE test_alter_table; diff --git a/src/backend/columnar/expected/fdw_analyze.out b/src/backend/columnar/expected/fdw_analyze.out new file mode 100644 index 000000000..f8c4d974a --- /dev/null +++ b/src/backend/columnar/expected/fdw_analyze.out @@ -0,0 +1,19 @@ +-- +-- Test the ANALYZE command for cstore_fdw tables. +-- +-- ANALYZE uncompressed table +ANALYZE contestant; +SELECT count(*) FROM pg_stats WHERE tablename='contestant'; + count +------- + 6 +(1 row) + +-- ANALYZE compressed table +ANALYZE contestant_compressed; +SELECT count(*) FROM pg_stats WHERE tablename='contestant_compressed'; + count +------- + 6 +(1 row) + diff --git a/src/backend/columnar/expected/fdw_clean.out b/src/backend/columnar/expected/fdw_clean.out new file mode 100644 index 000000000..ecd4d67a1 --- /dev/null +++ b/src/backend/columnar/expected/fdw_clean.out @@ -0,0 +1,10 @@ +DROP FOREIGN TABLE collation_block_filtering_test; +DROP FOREIGN TABLE test_block_filtering; +DROP FOREIGN TABLE test_null_values; +DROP FOREIGN TABLE test_other_types; +DROP FOREIGN TABLE test_range_types; +DROP FOREIGN TABLE test_enum_and_composite_types; +DROP TYPE composite_type; +DROP TYPE enum_type; +DROP FOREIGN TABLE test_datetime_types; +DROP FOREIGN TABLE test_array_types; diff --git a/src/backend/columnar/expected/fdw_drop.out b/src/backend/columnar/expected/fdw_drop.out new file mode 100644 index 000000000..e1ddf0fd0 --- /dev/null +++ b/src/backend/columnar/expected/fdw_drop.out @@ -0,0 +1,55 @@ +-- +-- Tests the different DROP commands for cstore_fdw tables. +-- +-- DROP FOREIGN TABL +-- DROP SCHEMA +-- DROP EXTENSION +-- DROP DATABASE +-- +-- Note that travis does not create +-- cstore_fdw extension in default database (postgres). This has caused +-- different behavior between travis tests and local tests. Thus +-- 'postgres' directory is excluded from comparison to have the same result. +-- store postgres database oid +SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset +SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset +-- DROP cstore_fdw tables +DROP FOREIGN TABLE contestant; +DROP FOREIGN TABLE contestant_compressed; +-- make sure DROP deletes metadata +SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; + ?column? +---------- + 2 +(1 row) + +-- Create a cstore_fdw table under a schema and drop it. +CREATE SCHEMA test_schema; +CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; +SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset +DROP SCHEMA test_schema CASCADE; +NOTICE: drop cascades to foreign table test_schema.test_table +SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; + ?column? +---------- + 1 +(1 row) + +SELECT current_database() datname \gset +CREATE DATABASE db_to_drop; +\c db_to_drop +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset +CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; +DROP EXTENSION cstore_fdw CASCADE; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to server cstore_server +drop cascades to foreign table test_table +-- test database drop +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset +CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; +\c :datname +DROP DATABASE db_to_drop; diff --git a/src/backend/columnar/expected/fdw_functions.out b/src/backend/columnar/expected/fdw_functions.out new file mode 100644 index 000000000..117fc15f9 --- /dev/null +++ b/src/backend/columnar/expected/fdw_functions.out @@ -0,0 +1,18 @@ +-- +-- Test utility functions for cstore_fdw tables. +-- +CREATE FOREIGN TABLE empty_table (a int) SERVER cstore_server; +CREATE FOREIGN TABLE table_with_data (a int) SERVER cstore_server; +CREATE TABLE non_cstore_table (a int); +COPY table_with_data FROM STDIN; +SELECT cstore_table_size('empty_table') < cstore_table_size('table_with_data'); + ?column? +---------- + t +(1 row) + +SELECT cstore_table_size('non_cstore_table'); +ERROR: relation is not a cstore table +DROP FOREIGN TABLE empty_table; +DROP FOREIGN TABLE table_with_data; +DROP TABLE non_cstore_table; diff --git a/src/backend/columnar/expected/fdw_insert.out b/src/backend/columnar/expected/fdw_insert.out new file mode 100644 index 000000000..49d9ed132 --- /dev/null +++ b/src/backend/columnar/expected/fdw_insert.out @@ -0,0 +1,88 @@ +-- +-- Testing insert on cstore_fdw tables. +-- +CREATE FOREIGN TABLE test_insert_command (a int) SERVER cstore_server; +-- test single row inserts fail +select count(*) from test_insert_command; + count +------- + 0 +(1 row) + +insert into test_insert_command values(1); +ERROR: operation is not supported +select count(*) from test_insert_command; + count +------- + 0 +(1 row) + +insert into test_insert_command default values; +ERROR: operation is not supported +select count(*) from test_insert_command; + count +------- + 0 +(1 row) + +-- test inserting from another table succeed +CREATE TABLE test_insert_command_data (a int); +select count(*) from test_insert_command_data; + count +------- + 0 +(1 row) + +insert into test_insert_command_data values(1); +select count(*) from test_insert_command_data; + count +------- + 1 +(1 row) + +insert into test_insert_command select * from test_insert_command_data; +select count(*) from test_insert_command; + count +------- + 1 +(1 row) + +drop table test_insert_command_data; +drop foreign table test_insert_command; +-- test long attribute value insertion +-- create sufficiently long text so that data is stored in toast +CREATE TABLE test_long_text AS +SELECT a as int_val, string_agg(random()::text, '') as text_val +FROM generate_series(1, 10) a, generate_series(1, 1000) b +GROUP BY a ORDER BY a; +-- store hash values of text for later comparison +CREATE TABLE test_long_text_hash AS +SELECT int_val, md5(text_val) AS hash +FROM test_long_text; +CREATE FOREIGN TABLE test_cstore_long_text(int_val int, text_val text) +SERVER cstore_server; +-- store long text in cstore table +INSERT INTO test_cstore_long_text SELECT * FROM test_long_text; +-- drop source table to remove original text from toast +DROP TABLE test_long_text; +-- check if text data is still available in cstore table +-- by comparing previously stored hash. +SELECT a.int_val +FROM test_long_text_hash a, test_cstore_long_text c +WHERE a.int_val = c.int_val AND a.hash = md5(c.text_val); + int_val +--------- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 +(10 rows) + +DROP TABLE test_long_text_hash; +DROP FOREIGN TABLE test_cstore_long_text; diff --git a/src/backend/columnar/expected/fdw_query.out b/src/backend/columnar/expected/fdw_query.out new file mode 100644 index 000000000..7ac3508a4 --- /dev/null +++ b/src/backend/columnar/expected/fdw_query.out @@ -0,0 +1,105 @@ +-- +-- Test querying cstore_fdw tables. +-- +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; +-- Query uncompressed data +SELECT count(*) FROM contestant; + count +------- + 8 +(1 row) + +SELECT avg(rating), stddev_samp(rating) FROM contestant; + avg | stddev_samp +-----------------------+------------------ + 2344.3750000000000000 | 433.746119785032 +(1 row) + +SELECT country, avg(rating) FROM contestant WHERE rating > 2200 + GROUP BY country ORDER BY country; + country | avg +---------+----------------------- + XA | 2203.0000000000000000 + XB | 2610.5000000000000000 + XC | 2236.0000000000000000 + XD | 3090.0000000000000000 +(4 rows) + +SELECT * FROM contestant ORDER BY handle; + handle | birthdate | rating | percentile | country | achievements +--------+------------+--------+------------+---------+-------------- + a | 1990-01-10 | 2090 | 97.1 | XA | {a} + b | 1990-11-01 | 2203 | 98.1 | XA | {a,b} + c | 1988-11-01 | 2907 | 99.4 | XB | {w,y} + d | 1985-05-05 | 2314 | 98.3 | XB | {} + e | 1995-05-05 | 2236 | 98.2 | XC | {a} + f | 1983-04-02 | 3090 | 99.6 | XD | {a,b,c,y} + g | 1991-12-13 | 1803 | 85.1 | XD | {a,c} + h | 1987-10-26 | 2112 | 95.4 | XD | {w,a} +(8 rows) + +-- Query compressed data +SELECT count(*) FROM contestant_compressed; + count +------- + 8 +(1 row) + +SELECT avg(rating), stddev_samp(rating) FROM contestant_compressed; + avg | stddev_samp +-----------------------+------------------ + 2344.3750000000000000 | 433.746119785032 +(1 row) + +SELECT country, avg(rating) FROM contestant_compressed WHERE rating > 2200 + GROUP BY country ORDER BY country; + country | avg +---------+----------------------- + XA | 2203.0000000000000000 + XB | 2610.5000000000000000 + XC | 2236.0000000000000000 + XD | 3090.0000000000000000 +(4 rows) + +SELECT * FROM contestant_compressed ORDER BY handle; + handle | birthdate | rating | percentile | country | achievements +--------+------------+--------+------------+---------+-------------- + a | 1990-01-10 | 2090 | 97.1 | XA | {a} + b | 1990-11-01 | 2203 | 98.1 | XA | {a,b} + c | 1988-11-01 | 2907 | 99.4 | XB | {w,y} + d | 1985-05-05 | 2314 | 98.3 | XB | {} + e | 1995-05-05 | 2236 | 98.2 | XC | {a} + f | 1983-04-02 | 3090 | 99.6 | XD | {a,b,c,y} + g | 1991-12-13 | 1803 | 85.1 | XD | {a,c} + h | 1987-10-26 | 2112 | 95.4 | XD | {w,a} +(8 rows) + +-- Verify that we handle whole-row references correctly +SELECT to_json(v) FROM contestant v ORDER BY rating LIMIT 1; + to_json +------------------------------------------------------------------------------------------------------------------ + {"handle":"g","birthdate":"1991-12-13","rating":1803,"percentile":85.1,"country":"XD ","achievements":["a","c"]} +(1 row) + +-- Test variables used in expressions +CREATE FOREIGN TABLE union_first (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE union_second (a int, b int) SERVER cstore_server; +INSERT INTO union_first SELECT a, a FROM generate_series(1, 5) a; +INSERT INTO union_second SELECT a, a FROM generate_series(11, 15) a; +(SELECT a*1, b FROM union_first) union all (SELECT a*1, b FROM union_second); + ?column? | b +----------+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 11 | 11 + 12 | 12 + 13 | 13 + 14 | 14 + 15 | 15 +(10 rows) + +DROP FOREIGN TABLE union_first, union_second; diff --git a/src/backend/columnar/expected/fdw_rollback.out b/src/backend/columnar/expected/fdw_rollback.out new file mode 100644 index 000000000..f50f9fd19 --- /dev/null +++ b/src/backend/columnar/expected/fdw_rollback.out @@ -0,0 +1,77 @@ +-- +-- Testing we handle rollbacks properly +-- +CREATE FOREIGN TABLE t(a int, b int) SERVER cstore_server; +BEGIN; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +ROLLBACK; +SELECT count(*) FROM t; + count +------- + 0 +(1 row) + +-- check stripe metadata also have been rolled-back +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + count +------- + 0 +(1 row) + +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; + count +------- + 10 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + count +------- + 1 +(1 row) + +-- savepoint rollback +BEGIN; +SAVEPOINT s0; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SAVEPOINT s1; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; + count +------- + 30 +(1 row) + +ROLLBACK TO SAVEPOINT s1; +SELECT count(*) FROM t; + count +------- + 20 +(1 row) + +ROLLBACK TO SAVEPOINT s0; +SELECT count(*) FROM t; + count +------- + 10 +(1 row) + +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +COMMIT; +SELECT count(*) FROM t; + count +------- + 20 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + count +------- + 2 +(1 row) + +DROP FOREIGN TABLE t; diff --git a/src/backend/columnar/expected/fdw_truncate.out b/src/backend/columnar/expected/fdw_truncate.out new file mode 100644 index 000000000..6192c704c --- /dev/null +++ b/src/backend/columnar/expected/fdw_truncate.out @@ -0,0 +1,263 @@ +-- +-- Test the TRUNCATE TABLE command for cstore_fdw tables. +-- +-- print whether we're using version > 10 to make version-specific tests clear +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; + version_above_ten +------------------- + t +(1 row) + +-- CREATE a cstore_fdw table, fill with some data -- +CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_compressed (a int, b int) SERVER cstore_server OPTIONS (compression 'pglz'); +CREATE TABLE cstore_truncate_test_regular (a int, b int); +SELECT count(*) AS cstore_data_files_before_truncate FROM cstore.cstore_data_files \gset +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +-- query rows +SELECT * FROM cstore_truncate_test; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +TRUNCATE TABLE cstore_truncate_test; +SELECT * FROM cstore_truncate_test; + a | b +---+--- +(0 rows) + +SELECT COUNT(*) from cstore_truncate_test; + count +------- + 0 +(1 row) + +SELECT count(*) FROM cstore_truncate_test_compressed; + count +------- + 20 +(1 row) + +TRUNCATE TABLE cstore_truncate_test_compressed; +SELECT count(*) FROM cstore_truncate_test_compressed; + count +------- + 0 +(1 row) + +SELECT cstore_table_size('cstore_truncate_test_compressed'); + cstore_table_size +------------------- + 0 +(1 row) + +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; +INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; +SELECT * from cstore_truncate_test; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +SELECT * from cstore_truncate_test_second; + a | b +----+---- + 20 | 20 + 21 | 21 + 22 | 22 + 23 | 23 + 24 | 24 + 25 | 25 + 26 | 26 + 27 | 27 + 28 | 28 + 29 | 29 + 30 | 30 +(11 rows) + +SELECT * from cstore_truncate_test_regular; + a | b +----+---- + 10 | 10 + 11 | 11 + 12 | 12 + 13 | 13 + 14 | 14 + 15 | 15 + 16 | 16 + 17 | 17 + 18 | 18 + 19 | 19 + 20 | 20 +(11 rows) + +-- make sure multi truncate works +-- notice that the same table might be repeated +TRUNCATE TABLE cstore_truncate_test, + cstore_truncate_test_regular, + cstore_truncate_test_second, + cstore_truncate_test; +SELECT * from cstore_truncate_test; + a | b +---+--- +(0 rows) + +SELECT * from cstore_truncate_test_second; + a | b +---+--- +(0 rows) + +SELECT * from cstore_truncate_test_regular; + a | b +---+--- +(0 rows) + +-- test if truncate on empty table works +TRUNCATE TABLE cstore_truncate_test; +SELECT * from cstore_truncate_test; + a | b +---+--- +(0 rows) + +-- make sure TRUNATE deletes metadata for old relfilenode +SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_files; + ?column? +---------- + 0 +(1 row) + +-- test if truncation in the same transaction that created the table works properly +BEGIN; +CREATE FOREIGN TABLE cstore_same_transaction_truncate(a int) SERVER cstore_server; +INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(1, 100); +TRUNCATE cstore_same_transaction_truncate; +INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(20, 23); +COMMIT; +-- should output "1" for the newly created relation +SELECT count(*) - :cstore_data_files_before_truncate FROM cstore.cstore_data_files; + ?column? +---------- + 1 +(1 row) + +SELECT * FROM cstore_same_transaction_truncate; + a +---- + 20 + 21 + 22 + 23 +(4 rows) + +DROP FOREIGN TABLE cstore_same_transaction_truncate; +-- test if a cached truncate from a pl/pgsql function works +CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ +BEGIN + INSERT INTO cstore_truncate_test_regular select a, a from generate_series(1, 10) a; + TRUNCATE TABLE cstore_truncate_test_regular; +END;$$ +LANGUAGE plpgsql; +SELECT cstore_truncate_test_regular_func(); + cstore_truncate_test_regular_func +----------------------------------- + +(1 row) + +-- the cached plans are used stating from the second call +SELECT cstore_truncate_test_regular_func(); + cstore_truncate_test_regular_func +----------------------------------- + +(1 row) + +DROP FUNCTION cstore_truncate_test_regular_func(); +DROP FOREIGN TABLE cstore_truncate_test, cstore_truncate_test_second; +DROP TABLE cstore_truncate_test_regular; +DROP FOREIGN TABLE cstore_truncate_test_compressed; +-- test truncate with schema +CREATE SCHEMA truncate_schema; +CREATE FOREIGN TABLE truncate_schema.truncate_tbl (id int) SERVER cstore_server OPTIONS(compression 'pglz'); +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + count +------- + 0 +(1 row) + +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +-- create a user that can not truncate +CREATE USER truncate_user; +GRANT USAGE ON SCHEMA truncate_schema TO truncate_user; +GRANT SELECT ON TABLE truncate_schema.truncate_tbl TO truncate_user; +REVOKE TRUNCATE ON TABLE truncate_schema.truncate_tbl FROM truncate_user; +SELECT current_user \gset +\c - truncate_user +-- verify truncate command fails and check number of rows +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +ERROR: permission denied for table truncate_tbl +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +-- switch to super user, grant truncate to truncate_user +\c - :current_user +GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; +-- verify truncate_user can truncate now +\c - truncate_user +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 0 +(1 row) + +\c - :current_user +-- cleanup +DROP SCHEMA truncate_schema CASCADE; +NOTICE: drop cascades to foreign table truncate_schema.truncate_tbl +DROP USER truncate_user; diff --git a/src/backend/columnar/expected/fdw_truncate_0.out b/src/backend/columnar/expected/fdw_truncate_0.out new file mode 100644 index 000000000..c8cc4ad98 --- /dev/null +++ b/src/backend/columnar/expected/fdw_truncate_0.out @@ -0,0 +1,262 @@ +-- +-- Test the TRUNCATE TABLE command for cstore_fdw tables. +-- +-- print whether we're using version > 10 to make version-specific tests clear +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; + version_above_ten +------------------- + f +(1 row) + +-- Check that files for the automatically managed table exist in the +-- cstore_fdw/{databaseoid} directory. +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 0 +(1 row) + +-- CREATE a cstore_fdw table, fill with some data -- +CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_compressed (a int, b int) SERVER cstore_server OPTIONS (compression 'pglz'); +CREATE TABLE cstore_truncate_test_regular (a int, b int); +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +-- query rows +SELECT * FROM cstore_truncate_test; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +TRUNCATE TABLE cstore_truncate_test; +SELECT * FROM cstore_truncate_test; + a | b +---+--- +(0 rows) + +SELECT COUNT(*) from cstore_truncate_test; + count +------- + 0 +(1 row) + +SELECT count(*) FROM cstore_truncate_test_compressed; + count +------- + 20 +(1 row) + +TRUNCATE TABLE cstore_truncate_test_compressed; +SELECT count(*) FROM cstore_truncate_test_compressed; + count +------- + 0 +(1 row) + +SELECT cstore_table_size('cstore_truncate_test_compressed'); + cstore_table_size +------------------- + 26 +(1 row) + +-- make sure data files still present +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 6 +(1 row) + +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; +INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; +SELECT * from cstore_truncate_test; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +SELECT * from cstore_truncate_test_second; + a | b +----+---- + 20 | 20 + 21 | 21 + 22 | 22 + 23 | 23 + 24 | 24 + 25 | 25 + 26 | 26 + 27 | 27 + 28 | 28 + 29 | 29 + 30 | 30 +(11 rows) + +SELECT * from cstore_truncate_test_regular; + a | b +----+---- + 10 | 10 + 11 | 11 + 12 | 12 + 13 | 13 + 14 | 14 + 15 | 15 + 16 | 16 + 17 | 17 + 18 | 18 + 19 | 19 + 20 | 20 +(11 rows) + +-- make sure multi truncate works +-- notice that the same table might be repeated +TRUNCATE TABLE cstore_truncate_test, + cstore_truncate_test_regular, + cstore_truncate_test_second, + cstore_truncate_test; +SELECT * from cstore_truncate_test; + a | b +---+--- +(0 rows) + +SELECT * from cstore_truncate_test_second; + a | b +---+--- +(0 rows) + +SELECT * from cstore_truncate_test_regular; + a | b +---+--- +(0 rows) + +-- test if truncate on empty table works +TRUNCATE TABLE cstore_truncate_test; +SELECT * from cstore_truncate_test; + a | b +---+--- +(0 rows) + +-- test if a cached truncate from a pl/pgsql function works +CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ +BEGIN + INSERT INTO cstore_truncate_test_regular select a, a from generate_series(1, 10) a; + TRUNCATE TABLE cstore_truncate_test_regular; +END;$$ +LANGUAGE plpgsql; +SELECT cstore_truncate_test_regular_func(); + cstore_truncate_test_regular_func +----------------------------------- + +(1 row) + +-- the cached plans are used stating from the second call +SELECT cstore_truncate_test_regular_func(); + cstore_truncate_test_regular_func +----------------------------------- + +(1 row) + +DROP FUNCTION cstore_truncate_test_regular_func(); +DROP FOREIGN TABLE cstore_truncate_test, cstore_truncate_test_second; +DROP TABLE cstore_truncate_test_regular; +DROP FOREIGN TABLE cstore_truncate_test_compressed; +-- test truncate with schema +CREATE SCHEMA truncate_schema; +CREATE FOREIGN TABLE truncate_schema.truncate_tbl (id int) SERVER cstore_server OPTIONS(compression 'pglz'); +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + count +------- + 0 +(1 row) + +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +-- create a user that can not truncate +CREATE USER truncate_user; +GRANT USAGE ON SCHEMA truncate_schema TO truncate_user; +GRANT SELECT ON TABLE truncate_schema.truncate_tbl TO truncate_user; +REVOKE TRUNCATE ON TABLE truncate_schema.truncate_tbl FROM truncate_user; +SELECT current_user \gset +\c - truncate_user +-- verify truncate command fails and check number of rows +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +ERROR: permission denied for relation truncate_tbl +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +-- switch to super user, grant truncate to truncate_user +\c - :current_user +GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; +-- verify truncate_user can truncate now +\c - truncate_user +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 0 +(1 row) + +\c - :current_user +-- cleanup +DROP SCHEMA truncate_schema CASCADE; +NOTICE: drop cascades to foreign table truncate_schema.truncate_tbl +DROP USER truncate_user; +-- verify files are removed +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 0 +(1 row) + diff --git a/src/backend/columnar/input/am_block_filtering.source b/src/backend/columnar/input/am_block_filtering.source new file mode 100644 index 000000000..f93eb1988 --- /dev/null +++ b/src/backend/columnar/input/am_block_filtering.source @@ -0,0 +1,73 @@ +-- +-- Test block filtering in cstore_fdw using min/max values in stripe skip lists. +-- + + +-- +-- filtered_row_count returns number of rows filtered by the WHERE clause. +-- If blocks get filtered by cstore_fdw, less rows are passed to WHERE +-- clause, so this function should return a lower number. +-- +CREATE OR REPLACE FUNCTION filtered_row_count (query text) RETURNS bigint AS +$$ + DECLARE + result bigint; + rec text; + BEGIN + result := 0; + + FOR rec IN EXECUTE 'EXPLAIN ANALYZE ' || query LOOP + IF rec ~ '^\s+Rows Removed by Filter' then + result := regexp_replace(rec, '[^0-9]*', '', 'g'); + END IF; + END LOOP; + + RETURN result; + END; +$$ LANGUAGE PLPGSQL; + + +-- Create and load data +-- block_row_count '1000', stripe_row_count '2000' +set cstore.stripe_row_count = 2000; +set cstore.block_row_count = 1000; +CREATE TABLE test_block_filtering (a int) + USING cstore_tableam; + +COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; + + +-- Verify that filtered_row_count is less than 1000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 200'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 9900'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 9900'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); + + +-- Verify that filtered_row_count is less than 2000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 1 AND 10'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN -10 AND 0'); + + +-- Load data for second time and verify that filtered_row_count is exactly twice as before +COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); + +set cstore.stripe_row_count to default; +set cstore.block_row_count to default; + +-- Verify that we are fine with collations which use a different alphabet order +CREATE TABLE collation_block_filtering_test(A text collate "da_DK") + USING cstore_tableam; +COPY collation_block_filtering_test FROM STDIN; +A +Å +B +\. + +SELECT * FROM collation_block_filtering_test WHERE A > 'B'; diff --git a/src/backend/columnar/input/am_copyto.source b/src/backend/columnar/input/am_copyto.source new file mode 100644 index 000000000..bb333bacf --- /dev/null +++ b/src/backend/columnar/input/am_copyto.source @@ -0,0 +1,17 @@ +-- +-- Test copying data from cstore_fdw tables. +-- +CREATE TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + USING cstore_tableam; + +-- load table data from file +COPY test_contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; + +-- export using COPY table TO ... +COPY test_contestant TO STDOUT; + +-- export using COPY (SELECT * FROM table) TO ... +COPY (select * from test_contestant) TO STDOUT; + +DROP TABLE test_contestant CASCADE; diff --git a/src/backend/columnar/input/am_create.source b/src/backend/columnar/input/am_create.source new file mode 100644 index 000000000..6d4d5a388 --- /dev/null +++ b/src/backend/columnar/input/am_create.source @@ -0,0 +1,20 @@ +-- +-- Test the CREATE statements related to cstore. +-- + + +-- Create uncompressed table +CREATE TABLE contestant (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + USING cstore_tableam; + + +-- Create compressed table with automatically determined file path +-- COMPRESSED +CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + USING cstore_tableam; + +-- Test that querying an empty table works +ANALYZE contestant; +SELECT count(*) FROM contestant; diff --git a/src/backend/columnar/input/am_data_types.source b/src/backend/columnar/input/am_data_types.source new file mode 100644 index 000000000..24c661090 --- /dev/null +++ b/src/backend/columnar/input/am_data_types.source @@ -0,0 +1,68 @@ +-- +-- Test loading and reading different data types to/from cstore_fdw foreign tables. +-- + + +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; +SET timezone to 'GMT'; +SET intervalstyle TO 'POSTGRES_VERBOSE'; + + +-- Test array types +CREATE TABLE test_array_types (int_array int[], bigint_array bigint[], + text_array text[]) USING cstore_tableam; + +COPY test_array_types FROM '@abs_srcdir@/data/array_types.csv' WITH CSV; + +SELECT * FROM test_array_types; + + +-- Test date/time types +CREATE TABLE test_datetime_types (timestamp timestamp, + timestamp_with_timezone timestamp with time zone, date date, time time, + interval interval) USING cstore_tableam; + +COPY test_datetime_types FROM '@abs_srcdir@/data/datetime_types.csv' WITH CSV; + +SELECT * FROM test_datetime_types; + + +-- Test enum and composite types +CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); +CREATE TYPE composite_type AS (a int, b text); + +CREATE TABLE test_enum_and_composite_types (enum enum_type, + composite composite_type) USING cstore_tableam; + +COPY test_enum_and_composite_types FROM + '@abs_srcdir@/data/enum_and_composite_types.csv' WITH CSV; + +SELECT * FROM test_enum_and_composite_types; + + +-- Test range types +CREATE TABLE test_range_types (int4range int4range, int8range int8range, + numrange numrange, tsrange tsrange) USING cstore_tableam; + +COPY test_range_types FROM '@abs_srcdir@/data/range_types.csv' WITH CSV; + +SELECT * FROM test_range_types; + + +-- Test other types +CREATE TABLE test_other_types (bool boolean, bytea bytea, money money, + inet inet, bitstring bit varying(5), uuid uuid, json json) USING cstore_tableam; + +COPY test_other_types FROM '@abs_srcdir@/data/other_types.csv' WITH CSV; + +SELECT * FROM test_other_types; + + +-- Test null values +CREATE TABLE test_null_values (a int, b int[], c composite_type) + USING cstore_tableam; + +COPY test_null_values FROM '@abs_srcdir@/data/null_values.csv' WITH CSV; + +SELECT * FROM test_null_values; diff --git a/src/backend/columnar/input/am_load.source b/src/backend/columnar/input/am_load.source new file mode 100644 index 000000000..d0ef9bfac --- /dev/null +++ b/src/backend/columnar/input/am_load.source @@ -0,0 +1,46 @@ +-- +-- Test loading data into cstore_fdw tables. +-- + +-- COPY with incorrect delimiter +COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' + WITH DELIMITER '|'; -- ERROR + +-- COPY with invalid program +COPY contestant FROM PROGRAM 'invalid_program' WITH CSV; -- ERROR + +-- COPY into uncompressed table from file +COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; + +-- COPY into uncompressed table from program +COPY contestant FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' WITH CSV; + +-- COPY into compressed table +set cstore.compression = 'pglz'; +COPY contestant_compressed FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; + +-- COPY into uncompressed table from program +COPY contestant_compressed FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' + WITH CSV; +set cstore.compression to default; + +-- Test column list +CREATE TABLE famous_constants (id int, name text, value real) + USING cstore_tableam; +COPY famous_constants (value, name, id) FROM STDIN WITH CSV; +3.141,pi,1 +2.718,e,2 +0.577,gamma,3 +5.291e-11,bohr radius,4 +\. + +COPY famous_constants (name, value) FROM STDIN WITH CSV; +avagadro,6.022e23 +electron mass,9.109e-31 +proton mass,1.672e-27 +speed of light,2.997e8 +\. + +SELECT * FROM famous_constants ORDER BY id, name; + +DROP TABLE famous_constants; diff --git a/src/backend/columnar/input/fdw_block_filtering.source b/src/backend/columnar/input/fdw_block_filtering.source new file mode 100644 index 000000000..dc3170f0d --- /dev/null +++ b/src/backend/columnar/input/fdw_block_filtering.source @@ -0,0 +1,69 @@ +-- +-- Test block filtering in cstore_fdw using min/max values in stripe skip lists. +-- + + +-- +-- filtered_row_count returns number of rows filtered by the WHERE clause. +-- If blocks get filtered by cstore_fdw, less rows are passed to WHERE +-- clause, so this function should return a lower number. +-- +CREATE OR REPLACE FUNCTION filtered_row_count (query text) RETURNS bigint AS +$$ + DECLARE + result bigint; + rec text; + BEGIN + result := 0; + + FOR rec IN EXECUTE 'EXPLAIN ANALYZE ' || query LOOP + IF rec ~ '^\s+Rows Removed by Filter' then + result := regexp_replace(rec, '[^0-9]*', '', 'g'); + END IF; + END LOOP; + + RETURN result; + END; +$$ LANGUAGE PLPGSQL; + + +-- Create and load data +CREATE FOREIGN TABLE test_block_filtering (a int) + SERVER cstore_server + OPTIONS(block_row_count '1000', stripe_row_count '2000'); + +COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; + + +-- Verify that filtered_row_count is less than 1000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 200'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 9900'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 9900'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); + + +-- Verify that filtered_row_count is less than 2000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 1 AND 10'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN -10 AND 0'); + + +-- Load data for second time and verify that filtered_row_count is exactly twice as before +COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); + + +-- Verify that we are fine with collations which use a different alphabet order +CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") + SERVER cstore_server; +COPY collation_block_filtering_test FROM STDIN; +A +Å +B +\. + +SELECT * FROM collation_block_filtering_test WHERE A > 'B'; diff --git a/src/backend/columnar/input/fdw_copyto.source b/src/backend/columnar/input/fdw_copyto.source new file mode 100644 index 000000000..a4b753a8d --- /dev/null +++ b/src/backend/columnar/input/fdw_copyto.source @@ -0,0 +1,17 @@ +-- +-- Test copying data from cstore_fdw tables. +-- +CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server; + +-- load table data from file +COPY test_contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; + +-- export using COPY table TO ... +COPY test_contestant TO STDOUT; + +-- export using COPY (SELECT * FROM table) TO ... +COPY (select * from test_contestant) TO STDOUT; + +DROP FOREIGN TABLE test_contestant CASCADE; diff --git a/src/backend/columnar/input/fdw_create.source b/src/backend/columnar/input/fdw_create.source new file mode 100644 index 000000000..bb3a38e28 --- /dev/null +++ b/src/backend/columnar/input/fdw_create.source @@ -0,0 +1,39 @@ +-- +-- Test the CREATE statements related to cstore_fdw. +-- + +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; + + +-- Validator tests +CREATE FOREIGN TABLE test_validator_invalid_option () + SERVER cstore_server + OPTIONS(bad_option_name '1'); -- ERROR + +CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () + SERVER cstore_server + OPTIONS(stripe_row_count '0'); -- ERROR + +CREATE FOREIGN TABLE test_validator_invalid_block_row_count () + SERVER cstore_server + OPTIONS(block_row_count '0'); -- ERROR + +CREATE FOREIGN TABLE test_validator_invalid_compression_type () + SERVER cstore_server + OPTIONS(compression 'invalid_compression'); -- ERROR + +-- Create uncompressed table +CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server; + + +-- Create compressed table with automatically determined file path +CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server + OPTIONS(compression 'pglz'); + +-- Test that querying an empty table works +ANALYZE contestant; +SELECT count(*) FROM contestant; diff --git a/src/backend/columnar/input/fdw_data_types.source b/src/backend/columnar/input/fdw_data_types.source new file mode 100644 index 000000000..ec83c4d8c --- /dev/null +++ b/src/backend/columnar/input/fdw_data_types.source @@ -0,0 +1,68 @@ +-- +-- Test loading and reading different data types to/from cstore_fdw foreign tables. +-- + + +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; +SET timezone to 'GMT'; +SET intervalstyle TO 'POSTGRES_VERBOSE'; + + +-- Test array types +CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], + text_array text[]) SERVER cstore_server; + +COPY test_array_types FROM '@abs_srcdir@/data/array_types.csv' WITH CSV; + +SELECT * FROM test_array_types; + + +-- Test date/time types +CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, + timestamp_with_timezone timestamp with time zone, date date, time time, + interval interval) SERVER cstore_server; + +COPY test_datetime_types FROM '@abs_srcdir@/data/datetime_types.csv' WITH CSV; + +SELECT * FROM test_datetime_types; + + +-- Test enum and composite types +CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); +CREATE TYPE composite_type AS (a int, b text); + +CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, + composite composite_type) SERVER cstore_server; + +COPY test_enum_and_composite_types FROM + '@abs_srcdir@/data/enum_and_composite_types.csv' WITH CSV; + +SELECT * FROM test_enum_and_composite_types; + + +-- Test range types +CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, + numrange numrange, tsrange tsrange) SERVER cstore_server; + +COPY test_range_types FROM '@abs_srcdir@/data/range_types.csv' WITH CSV; + +SELECT * FROM test_range_types; + + +-- Test other types +CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, + inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server; + +COPY test_other_types FROM '@abs_srcdir@/data/other_types.csv' WITH CSV; + +SELECT * FROM test_other_types; + + +-- Test null values +CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) + SERVER cstore_server; + +COPY test_null_values FROM '@abs_srcdir@/data/null_values.csv' WITH CSV; + +SELECT * FROM test_null_values; diff --git a/src/backend/columnar/input/fdw_load.source b/src/backend/columnar/input/fdw_load.source new file mode 100644 index 000000000..0913acde7 --- /dev/null +++ b/src/backend/columnar/input/fdw_load.source @@ -0,0 +1,44 @@ +-- +-- Test loading data into cstore_fdw tables. +-- + +-- COPY with incorrect delimiter +COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' + WITH DELIMITER '|'; -- ERROR + +-- COPY with invalid program +COPY contestant FROM PROGRAM 'invalid_program' WITH CSV; -- ERROR + +-- COPY into uncompressed table from file +COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; + +-- COPY into uncompressed table from program +COPY contestant FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' WITH CSV; + +-- COPY into compressed table +COPY contestant_compressed FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; + +-- COPY into uncompressed table from program +COPY contestant_compressed FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' + WITH CSV; + +-- Test column list +CREATE FOREIGN TABLE famous_constants (id int, name text, value real) + SERVER cstore_server; +COPY famous_constants (value, name, id) FROM STDIN WITH CSV; +3.141,pi,1 +2.718,e,2 +0.577,gamma,3 +5.291e-11,bohr radius,4 +\. + +COPY famous_constants (name, value) FROM STDIN WITH CSV; +avagadro,6.022e23 +electron mass,9.109e-31 +proton mass,1.672e-27 +speed of light,2.997e8 +\. + +SELECT * FROM famous_constants ORDER BY id, name; + +DROP FOREIGN TABLE famous_constants; diff --git a/src/backend/columnar/mod.c b/src/backend/columnar/mod.c new file mode 100644 index 000000000..e81f7a6e5 --- /dev/null +++ b/src/backend/columnar/mod.c @@ -0,0 +1,56 @@ +/*------------------------------------------------------------------------- + * + * mod.c + * + * This file contains module-level definitions. + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "fmgr.h" + +#include "cstore.h" +#include "mod.h" + +#ifdef USE_TABLEAM +#include "cstore_tableam.h" +#endif + +#ifdef USE_FDW +#include "cstore_fdw.h" +#endif + +PG_MODULE_MAGIC; + +void +_PG_init(void) +{ + cstore_init(); + +#ifdef USE_TABLEAM + cstore_tableam_init(); +#endif + +#ifdef USE_FDW + cstore_fdw_init(); +#endif +} + + +void +_PG_fini(void) +{ +#if USE_TABLEAM + cstore_tableam_finish(); +#endif + +#ifdef USE_FDW + cstore_fdw_finish(); +#endif +} diff --git a/src/backend/columnar/mod.h b/src/backend/columnar/mod.h new file mode 100644 index 000000000..3196bc809 --- /dev/null +++ b/src/backend/columnar/mod.h @@ -0,0 +1,21 @@ +/*------------------------------------------------------------------------- + * + * mod.h + * + * Type and function declarations for CStore + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#ifndef MOD_H +#define MOD_H + +/* Function declarations for extension loading and unloading */ +extern void _PG_init(void); +extern void _PG_fini(void); + +#endif /* MOD_H */ diff --git a/src/backend/columnar/output/am_block_filtering.source b/src/backend/columnar/output/am_block_filtering.source new file mode 100644 index 000000000..45cb702b6 --- /dev/null +++ b/src/backend/columnar/output/am_block_filtering.source @@ -0,0 +1,120 @@ +-- +-- Test block filtering in cstore_fdw using min/max values in stripe skip lists. +-- +-- +-- filtered_row_count returns number of rows filtered by the WHERE clause. +-- If blocks get filtered by cstore_fdw, less rows are passed to WHERE +-- clause, so this function should return a lower number. +-- +CREATE OR REPLACE FUNCTION filtered_row_count (query text) RETURNS bigint AS +$$ + DECLARE + result bigint; + rec text; + BEGIN + result := 0; + + FOR rec IN EXECUTE 'EXPLAIN ANALYZE ' || query LOOP + IF rec ~ '^\s+Rows Removed by Filter' then + result := regexp_replace(rec, '[^0-9]*', '', 'g'); + END IF; + END LOOP; + + RETURN result; + END; +$$ LANGUAGE PLPGSQL; +-- Create and load data +-- block_row_count '1000', stripe_row_count '2000' +set cstore.stripe_row_count = 2000; +set cstore.block_row_count = 1000; +CREATE TABLE test_block_filtering (a int) + USING cstore_tableam; +COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; +-- Verify that filtered_row_count is less than 1000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); + filtered_row_count +-------------------- + 0 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); + filtered_row_count +-------------------- + 801 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 200'); + filtered_row_count +-------------------- + 200 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 9900'); + filtered_row_count +-------------------- + 101 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 9900'); + filtered_row_count +-------------------- + 900 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); + filtered_row_count +-------------------- + 0 +(1 row) + +-- Verify that filtered_row_count is less than 2000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 1 AND 10'); + filtered_row_count +-------------------- + 990 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); + filtered_row_count +-------------------- + 1979 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN -10 AND 0'); + filtered_row_count +-------------------- + 0 +(1 row) + +-- Load data for second time and verify that filtered_row_count is exactly twice as before +COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); + filtered_row_count +-------------------- + 1602 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); + filtered_row_count +-------------------- + 0 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); + filtered_row_count +-------------------- + 3958 +(1 row) + +set cstore.stripe_row_count to default; +set cstore.block_row_count to default; +-- Verify that we are fine with collations which use a different alphabet order +CREATE TABLE collation_block_filtering_test(A text collate "da_DK") + USING cstore_tableam; +COPY collation_block_filtering_test FROM STDIN; +SELECT * FROM collation_block_filtering_test WHERE A > 'B'; + a +--- + Å +(1 row) + diff --git a/src/backend/columnar/output/am_copyto.source b/src/backend/columnar/output/am_copyto.source new file mode 100644 index 000000000..127bdc65d --- /dev/null +++ b/src/backend/columnar/output/am_copyto.source @@ -0,0 +1,23 @@ +-- +-- Test copying data from cstore_fdw tables. +-- +CREATE TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + USING cstore_tableam; +-- load table data from file +COPY test_contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; +-- export using COPY table TO ... +COPY test_contestant TO STDOUT; +a 01-10-1990 2090 97.1 XA {a} +b 11-01-1990 2203 98.1 XA {a,b} +c 11-01-1988 2907 99.4 XB {w,y} +d 05-05-1985 2314 98.3 XB {} +e 05-05-1995 2236 98.2 XC {a} +-- export using COPY (SELECT * FROM table) TO ... +COPY (select * from test_contestant) TO STDOUT; +a 01-10-1990 2090 97.1 XA {a} +b 11-01-1990 2203 98.1 XA {a,b} +c 11-01-1988 2907 99.4 XB {w,y} +d 05-05-1985 2314 98.3 XB {} +e 05-05-1995 2236 98.2 XC {a} +DROP TABLE test_contestant CASCADE; diff --git a/src/backend/columnar/output/am_create.source b/src/backend/columnar/output/am_create.source new file mode 100644 index 000000000..47c6a6c44 --- /dev/null +++ b/src/backend/columnar/output/am_create.source @@ -0,0 +1,20 @@ +-- +-- Test the CREATE statements related to cstore. +-- +-- Create uncompressed table +CREATE TABLE contestant (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + USING cstore_tableam; +-- Create compressed table with automatically determined file path +-- COMPRESSED +CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + USING cstore_tableam; +-- Test that querying an empty table works +ANALYZE contestant; +SELECT count(*) FROM contestant; + count +------- + 0 +(1 row) + diff --git a/src/backend/columnar/output/am_data_types.source b/src/backend/columnar/output/am_data_types.source new file mode 100644 index 000000000..8431e6ca2 --- /dev/null +++ b/src/backend/columnar/output/am_data_types.source @@ -0,0 +1,78 @@ +-- +-- Test loading and reading different data types to/from cstore_fdw foreign tables. +-- +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; +SET timezone to 'GMT'; +SET intervalstyle TO 'POSTGRES_VERBOSE'; +-- Test array types +CREATE TABLE test_array_types (int_array int[], bigint_array bigint[], + text_array text[]) USING cstore_tableam; +COPY test_array_types FROM '@abs_srcdir@/data/array_types.csv' WITH CSV; +SELECT * FROM test_array_types; + int_array | bigint_array | text_array +--------------------------+--------------------------------------------+------------ + {1,2,3} | {1,2,3} | {a,b,c} + {} | {} | {} + {-2147483648,2147483647} | {-9223372036854775808,9223372036854775807} | {""} +(3 rows) + +-- Test date/time types +CREATE TABLE test_datetime_types (timestamp timestamp, + timestamp_with_timezone timestamp with time zone, date date, time time, + interval interval) USING cstore_tableam; +COPY test_datetime_types FROM '@abs_srcdir@/data/datetime_types.csv' WITH CSV; +SELECT * FROM test_datetime_types; + timestamp | timestamp_with_timezone | date | time | interval +---------------------+-------------------------+------------+----------+----------- + 2000-01-02 04:05:06 | 1999-01-08 12:05:06+00 | 2000-01-02 | 04:05:06 | @ 4 hours + 1970-01-01 00:00:00 | infinity | -infinity | 00:00:00 | @ 0 +(2 rows) + +-- Test enum and composite types +CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); +CREATE TYPE composite_type AS (a int, b text); +CREATE TABLE test_enum_and_composite_types (enum enum_type, + composite composite_type) USING cstore_tableam; +COPY test_enum_and_composite_types FROM + '@abs_srcdir@/data/enum_and_composite_types.csv' WITH CSV; +SELECT * FROM test_enum_and_composite_types; + enum | composite +------+----------- + a | (2,b) + b | (3,c) +(2 rows) + +-- Test range types +CREATE TABLE test_range_types (int4range int4range, int8range int8range, + numrange numrange, tsrange tsrange) USING cstore_tableam; +COPY test_range_types FROM '@abs_srcdir@/data/range_types.csv' WITH CSV; +SELECT * FROM test_range_types; + int4range | int8range | numrange | tsrange +-----------+-----------+----------+----------------------------------------------- + [1,3) | [1,3) | [1,3) | ["2000-01-02 00:30:00","2010-02-03 12:30:00") + empty | [1,) | (,) | empty +(2 rows) + +-- Test other types +CREATE TABLE test_other_types (bool boolean, bytea bytea, money money, + inet inet, bitstring bit varying(5), uuid uuid, json json) USING cstore_tableam; +COPY test_other_types FROM '@abs_srcdir@/data/other_types.csv' WITH CSV; +SELECT * FROM test_other_types; + bool | bytea | money | inet | bitstring | uuid | json +------+------------+-------+-------------+-----------+--------------------------------------+------------------ + f | \xdeadbeef | $1.00 | 192.168.1.2 | 10101 | a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11 | {"key": "value"} + t | \xcdb0 | $1.50 | 127.0.0.1 | | a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11 | [] +(2 rows) + +-- Test null values +CREATE TABLE test_null_values (a int, b int[], c composite_type) + USING cstore_tableam; +COPY test_null_values FROM '@abs_srcdir@/data/null_values.csv' WITH CSV; +SELECT * FROM test_null_values; + a | b | c +---+--------+----- + | {NULL} | (,) + | | +(2 rows) + diff --git a/src/backend/columnar/output/am_load.source b/src/backend/columnar/output/am_load.source new file mode 100644 index 000000000..5eb81a250 --- /dev/null +++ b/src/backend/columnar/output/am_load.source @@ -0,0 +1,42 @@ +-- +-- Test loading data into cstore_fdw tables. +-- +-- COPY with incorrect delimiter +COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' + WITH DELIMITER '|'; -- ERROR +ERROR: missing data for column "birthdate" +CONTEXT: COPY contestant, line 1: "a,1990-01-10,2090,97.1,XA ,{a}" +-- COPY with invalid program +COPY contestant FROM PROGRAM 'invalid_program' WITH CSV; -- ERROR +ERROR: program "invalid_program" failed +DETAIL: command not found +-- COPY into uncompressed table from file +COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; +-- COPY into uncompressed table from program +COPY contestant FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' WITH CSV; +-- COPY into compressed table +set cstore.compression = 'pglz'; +COPY contestant_compressed FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; +-- COPY into uncompressed table from program +COPY contestant_compressed FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' + WITH CSV; +set cstore.compression to default; +-- Test column list +CREATE TABLE famous_constants (id int, name text, value real) + USING cstore_tableam; +COPY famous_constants (value, name, id) FROM STDIN WITH CSV; +COPY famous_constants (name, value) FROM STDIN WITH CSV; +SELECT * FROM famous_constants ORDER BY id, name; + id | name | value +----+----------------+----------- + 1 | pi | 3.141 + 2 | e | 2.718 + 3 | gamma | 0.577 + 4 | bohr radius | 5.291e-11 + | avagadro | 6.022e+23 + | electron mass | 9.109e-31 + | proton mass | 1.672e-27 + | speed of light | 2.997e+08 +(8 rows) + +DROP TABLE famous_constants; diff --git a/src/backend/columnar/output/fdw_block_filtering.source b/src/backend/columnar/output/fdw_block_filtering.source new file mode 100644 index 000000000..2f664a78a --- /dev/null +++ b/src/backend/columnar/output/fdw_block_filtering.source @@ -0,0 +1,116 @@ +-- +-- Test block filtering in cstore_fdw using min/max values in stripe skip lists. +-- +-- +-- filtered_row_count returns number of rows filtered by the WHERE clause. +-- If blocks get filtered by cstore_fdw, less rows are passed to WHERE +-- clause, so this function should return a lower number. +-- +CREATE OR REPLACE FUNCTION filtered_row_count (query text) RETURNS bigint AS +$$ + DECLARE + result bigint; + rec text; + BEGIN + result := 0; + + FOR rec IN EXECUTE 'EXPLAIN ANALYZE ' || query LOOP + IF rec ~ '^\s+Rows Removed by Filter' then + result := regexp_replace(rec, '[^0-9]*', '', 'g'); + END IF; + END LOOP; + + RETURN result; + END; +$$ LANGUAGE PLPGSQL; +-- Create and load data +CREATE FOREIGN TABLE test_block_filtering (a int) + SERVER cstore_server + OPTIONS(block_row_count '1000', stripe_row_count '2000'); +COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; +-- Verify that filtered_row_count is less than 1000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); + filtered_row_count +-------------------- + 0 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); + filtered_row_count +-------------------- + 801 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 200'); + filtered_row_count +-------------------- + 200 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 9900'); + filtered_row_count +-------------------- + 101 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 9900'); + filtered_row_count +-------------------- + 900 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); + filtered_row_count +-------------------- + 0 +(1 row) + +-- Verify that filtered_row_count is less than 2000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 1 AND 10'); + filtered_row_count +-------------------- + 990 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); + filtered_row_count +-------------------- + 1979 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN -10 AND 0'); + filtered_row_count +-------------------- + 0 +(1 row) + +-- Load data for second time and verify that filtered_row_count is exactly twice as before +COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); + filtered_row_count +-------------------- + 1602 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); + filtered_row_count +-------------------- + 0 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); + filtered_row_count +-------------------- + 3958 +(1 row) + +-- Verify that we are fine with collations which use a different alphabet order +CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") + SERVER cstore_server; +COPY collation_block_filtering_test FROM STDIN; +SELECT * FROM collation_block_filtering_test WHERE A > 'B'; + a +--- + Å +(1 row) + diff --git a/src/backend/columnar/output/fdw_copyto.source b/src/backend/columnar/output/fdw_copyto.source new file mode 100644 index 000000000..a8d841f18 --- /dev/null +++ b/src/backend/columnar/output/fdw_copyto.source @@ -0,0 +1,23 @@ +-- +-- Test copying data from cstore_fdw tables. +-- +CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server; +-- load table data from file +COPY test_contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; +-- export using COPY table TO ... +COPY test_contestant TO STDOUT; +a 01-10-1990 2090 97.1 XA {a} +b 11-01-1990 2203 98.1 XA {a,b} +c 11-01-1988 2907 99.4 XB {w,y} +d 05-05-1985 2314 98.3 XB {} +e 05-05-1995 2236 98.2 XC {a} +-- export using COPY (SELECT * FROM table) TO ... +COPY (select * from test_contestant) TO STDOUT; +a 01-10-1990 2090 97.1 XA {a} +b 11-01-1990 2203 98.1 XA {a,b} +c 11-01-1988 2907 99.4 XB {w,y} +d 05-05-1985 2314 98.3 XB {} +e 05-05-1995 2236 98.2 XC {a} +DROP FOREIGN TABLE test_contestant CASCADE; diff --git a/src/backend/columnar/output/fdw_create.source b/src/backend/columnar/output/fdw_create.source new file mode 100644 index 000000000..41f17fdd8 --- /dev/null +++ b/src/backend/columnar/output/fdw_create.source @@ -0,0 +1,42 @@ +-- +-- Test the CREATE statements related to cstore_fdw. +-- +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +-- Validator tests +CREATE FOREIGN TABLE test_validator_invalid_option () + SERVER cstore_server + OPTIONS(bad_option_name '1'); -- ERROR +ERROR: invalid option "bad_option_name" +HINT: Valid options in this context are: compression, stripe_row_count, block_row_count +CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () + SERVER cstore_server + OPTIONS(stripe_row_count '0'); -- ERROR +ERROR: invalid stripe row count +HINT: Stripe row count must be an integer between 1000 and 10000000 +CREATE FOREIGN TABLE test_validator_invalid_block_row_count () + SERVER cstore_server + OPTIONS(block_row_count '0'); -- ERROR +ERROR: invalid block row count +HINT: Block row count must be an integer between 1000 and 100000 +CREATE FOREIGN TABLE test_validator_invalid_compression_type () + SERVER cstore_server + OPTIONS(compression 'invalid_compression'); -- ERROR +ERROR: invalid compression type +HINT: Valid options are: none, pglz +-- Create uncompressed table +CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server; +-- Create compressed table with automatically determined file path +CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server + OPTIONS(compression 'pglz'); +-- Test that querying an empty table works +ANALYZE contestant; +SELECT count(*) FROM contestant; + count +------- + 0 +(1 row) + diff --git a/src/backend/columnar/output/fdw_data_types.source b/src/backend/columnar/output/fdw_data_types.source new file mode 100644 index 000000000..23fdcfa29 --- /dev/null +++ b/src/backend/columnar/output/fdw_data_types.source @@ -0,0 +1,78 @@ +-- +-- Test loading and reading different data types to/from cstore_fdw foreign tables. +-- +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; +SET timezone to 'GMT'; +SET intervalstyle TO 'POSTGRES_VERBOSE'; +-- Test array types +CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], + text_array text[]) SERVER cstore_server; +COPY test_array_types FROM '@abs_srcdir@/data/array_types.csv' WITH CSV; +SELECT * FROM test_array_types; + int_array | bigint_array | text_array +--------------------------+--------------------------------------------+------------ + {1,2,3} | {1,2,3} | {a,b,c} + {} | {} | {} + {-2147483648,2147483647} | {-9223372036854775808,9223372036854775807} | {""} +(3 rows) + +-- Test date/time types +CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, + timestamp_with_timezone timestamp with time zone, date date, time time, + interval interval) SERVER cstore_server; +COPY test_datetime_types FROM '@abs_srcdir@/data/datetime_types.csv' WITH CSV; +SELECT * FROM test_datetime_types; + timestamp | timestamp_with_timezone | date | time | interval +---------------------+-------------------------+------------+----------+----------- + 2000-01-02 04:05:06 | 1999-01-08 12:05:06+00 | 2000-01-02 | 04:05:06 | @ 4 hours + 1970-01-01 00:00:00 | infinity | -infinity | 00:00:00 | @ 0 +(2 rows) + +-- Test enum and composite types +CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); +CREATE TYPE composite_type AS (a int, b text); +CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, + composite composite_type) SERVER cstore_server; +COPY test_enum_and_composite_types FROM + '@abs_srcdir@/data/enum_and_composite_types.csv' WITH CSV; +SELECT * FROM test_enum_and_composite_types; + enum | composite +------+----------- + a | (2,b) + b | (3,c) +(2 rows) + +-- Test range types +CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, + numrange numrange, tsrange tsrange) SERVER cstore_server; +COPY test_range_types FROM '@abs_srcdir@/data/range_types.csv' WITH CSV; +SELECT * FROM test_range_types; + int4range | int8range | numrange | tsrange +-----------+-----------+----------+----------------------------------------------- + [1,3) | [1,3) | [1,3) | ["2000-01-02 00:30:00","2010-02-03 12:30:00") + empty | [1,) | (,) | empty +(2 rows) + +-- Test other types +CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, + inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server; +COPY test_other_types FROM '@abs_srcdir@/data/other_types.csv' WITH CSV; +SELECT * FROM test_other_types; + bool | bytea | money | inet | bitstring | uuid | json +------+------------+-------+-------------+-----------+--------------------------------------+------------------ + f | \xdeadbeef | $1.00 | 192.168.1.2 | 10101 | a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11 | {"key": "value"} + t | \xcdb0 | $1.50 | 127.0.0.1 | | a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11 | [] +(2 rows) + +-- Test null values +CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) + SERVER cstore_server; +COPY test_null_values FROM '@abs_srcdir@/data/null_values.csv' WITH CSV; +SELECT * FROM test_null_values; + a | b | c +---+--------+----- + | {NULL} | (,) + | | +(2 rows) + diff --git a/src/backend/columnar/output/fdw_load.source b/src/backend/columnar/output/fdw_load.source new file mode 100644 index 000000000..c76f203eb --- /dev/null +++ b/src/backend/columnar/output/fdw_load.source @@ -0,0 +1,39 @@ +-- +-- Test loading data into cstore_fdw tables. +-- +-- COPY with incorrect delimiter +COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' + WITH DELIMITER '|'; -- ERROR +ERROR: missing data for column "birthdate" +-- COPY with invalid program +COPY contestant FROM PROGRAM 'invalid_program' WITH CSV; -- ERROR +ERROR: program "invalid_program" failed +DETAIL: command not found +-- COPY into uncompressed table from file +COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; +-- COPY into uncompressed table from program +COPY contestant FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' WITH CSV; +-- COPY into compressed table +COPY contestant_compressed FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; +-- COPY into uncompressed table from program +COPY contestant_compressed FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' + WITH CSV; +-- Test column list +CREATE FOREIGN TABLE famous_constants (id int, name text, value real) + SERVER cstore_server; +COPY famous_constants (value, name, id) FROM STDIN WITH CSV; +COPY famous_constants (name, value) FROM STDIN WITH CSV; +SELECT * FROM famous_constants ORDER BY id, name; + id | name | value +----+----------------+----------- + 1 | pi | 3.141 + 2 | e | 2.718 + 3 | gamma | 0.577 + 4 | bohr radius | 5.291e-11 + | avagadro | 6.022e+23 + | electron mass | 9.109e-31 + | proton mass | 1.672e-27 + | speed of light | 2.997e+08 +(8 rows) + +DROP FOREIGN TABLE famous_constants; diff --git a/src/backend/columnar/specs/am_vacuum_vs_insert.spec b/src/backend/columnar/specs/am_vacuum_vs_insert.spec new file mode 100644 index 000000000..59c7274d5 --- /dev/null +++ b/src/backend/columnar/specs/am_vacuum_vs_insert.spec @@ -0,0 +1,46 @@ +setup +{ + CREATE TABLE test_vacuum_vs_insert (a int, b int) USING cstore_tableam; +} + +teardown +{ + DROP TABLE IF EXISTS test_vacuum_vs_insert CASCADE; +} + +session "s1" + +step "s1-begin" +{ + BEGIN; +} + +step "s1-insert" +{ + INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; +} + +step "s1-commit" +{ + COMMIT; +} + +session "s2" + +step "s2-vacuum" +{ + VACUUM VERBOSE test_vacuum_vs_insert; +} + +step "s2-vacuum-full" +{ + VACUUM FULL VERBOSE test_vacuum_vs_insert; +} + +step "s2-select" +{ + SELECT * FROM test_vacuum_vs_insert; +} + +permutation "s1-insert" "s1-begin" "s1-insert" "s2-vacuum" "s1-commit" "s2-select" +permutation "s1-insert" "s1-begin" "s1-insert" "s2-vacuum-full" "s1-commit" "s2-select" diff --git a/src/backend/columnar/specs/am_write_concurrency.spec b/src/backend/columnar/specs/am_write_concurrency.spec new file mode 100644 index 000000000..7b5d90a4d --- /dev/null +++ b/src/backend/columnar/specs/am_write_concurrency.spec @@ -0,0 +1,67 @@ +setup +{ + CREATE TABLE test_insert_concurrency (a int, b int) USING cstore_tableam; +} + +teardown +{ + DROP TABLE IF EXISTS test_insert_concurrency CASCADE; +} + +session "s1" + +step "s1-begin" +{ + BEGIN; +} + +step "s1-insert" +{ + INSERT INTO test_insert_concurrency SELECT i, 2 * i FROM generate_series(1, 3) i; +} + +step "s1-copy" +{ + COPY test_insert_concurrency(a) FROM PROGRAM 'seq 11 13'; +} + +step "s1-select" +{ + SELECT * FROM test_insert_concurrency ORDER BY a; +} + +step "s1-commit" +{ + COMMIT; +} + +session "s2" + +step "s2-begin" +{ + BEGIN; +} + +step "s2-insert" +{ + INSERT INTO test_insert_concurrency SELECT i, 2 * i FROM generate_series(4, 6) i; +} + +step "s2-select" +{ + SELECT * FROM test_insert_concurrency ORDER BY a; +} + +step "s2-commit" +{ + COMMIT; +} + +# writes shouldn't block writes or reads +permutation "s1-begin" "s2-begin" "s1-insert" "s2-insert" "s1-select" "s2-select" "s1-commit" "s2-commit" "s1-select" + +# copy vs insert +permutation "s1-begin" "s2-begin" "s1-copy" "s2-insert" "s1-select" "s2-select" "s1-commit" "s2-commit" "s1-select" + +# insert vs copy +permutation "s1-begin" "s2-begin" "s2-insert" "s1-copy" "s1-select" "s2-select" "s1-commit" "s2-commit" "s1-select" diff --git a/src/backend/columnar/specs/create.spec b/src/backend/columnar/specs/create.spec new file mode 100644 index 000000000..09fc32131 --- /dev/null +++ b/src/backend/columnar/specs/create.spec @@ -0,0 +1,7 @@ +session "s1" +step "s1a" +{ + CREATE EXTENSION cstore_fdw; +} + +permutation "s1a" diff --git a/src/backend/columnar/sql/am_alter.sql b/src/backend/columnar/sql/am_alter.sql new file mode 100644 index 000000000..3b608f9cd --- /dev/null +++ b/src/backend/columnar/sql/am_alter.sql @@ -0,0 +1,85 @@ +-- +-- Testing ALTER TABLE on cstore_fdw tables. +-- + +CREATE TABLE test_alter_table (a int, b int, c int) USING cstore_tableam; + +WITH sample_data AS (VALUES + (1, 2, 3), + (4, 5, 6), + (7, 8, 9) +) +INSERT INTO test_alter_table SELECT * FROM sample_data; + +-- drop a column +ALTER TABLE test_alter_table DROP COLUMN a; + +-- test analyze +ANALYZE test_alter_table; + +-- verify select queries run as expected +SELECT * FROM test_alter_table; +SELECT a FROM test_alter_table; +SELECT b FROM test_alter_table; + +-- verify insert runs as expected +INSERT INTO test_alter_table (SELECT 3, 5, 8); +INSERT INTO test_alter_table (SELECT 5, 8); + + +-- add a column with no defaults +ALTER TABLE test_alter_table ADD COLUMN d int; +SELECT * FROM test_alter_table; +INSERT INTO test_alter_table (SELECT 3, 5, 8); +SELECT * FROM test_alter_table; + + +-- add a fixed-length column with default value +ALTER TABLE test_alter_table ADD COLUMN e int default 3; +SELECT * from test_alter_table; +INSERT INTO test_alter_table (SELECT 1, 2, 4, 8); +SELECT * from test_alter_table; + + +-- add a variable-length column with default value +ALTER TABLE test_alter_table ADD COLUMN f text DEFAULT 'TEXT ME'; +SELECT * from test_alter_table; +INSERT INTO test_alter_table (SELECT 1, 2, 4, 8, 'ABCDEF'); +SELECT * from test_alter_table; + + +-- drop couple of columns +ALTER TABLE test_alter_table DROP COLUMN c; +ALTER TABLE test_alter_table DROP COLUMN e; +ANALYZE test_alter_table; +SELECT * from test_alter_table; +SELECT count(*) from test_alter_table; +SELECT count(t.*) from test_alter_table t; + + +-- unsupported default values +ALTER TABLE test_alter_table ADD COLUMN g boolean DEFAULT isfinite(current_date); +ALTER TABLE test_alter_table ADD COLUMN h DATE DEFAULT current_date; +SELECT * FROM test_alter_table; +ALTER TABLE test_alter_table ALTER COLUMN g DROP DEFAULT; +SELECT * FROM test_alter_table; +ALTER TABLE test_alter_table ALTER COLUMN h DROP DEFAULT; +ANALYZE test_alter_table; +SELECT * FROM test_alter_table; + +-- unsupported type change +ALTER TABLE test_alter_table ADD COLUMN i int; +ALTER TABLE test_alter_table ADD COLUMN j float; +ALTER TABLE test_alter_table ADD COLUMN k text; + +-- this is valid type change +ALTER TABLE test_alter_table ALTER COLUMN i TYPE float; + +-- this is not valid +ALTER TABLE test_alter_table ALTER COLUMN j TYPE int; + +-- text / varchar conversion is valid both ways +ALTER TABLE test_alter_table ALTER COLUMN k TYPE varchar(20); +ALTER TABLE test_alter_table ALTER COLUMN k TYPE text; + +DROP TABLE test_alter_table; diff --git a/src/backend/columnar/sql/am_analyze.sql b/src/backend/columnar/sql/am_analyze.sql new file mode 100644 index 000000000..4476454a6 --- /dev/null +++ b/src/backend/columnar/sql/am_analyze.sql @@ -0,0 +1,11 @@ +-- +-- Test the ANALYZE command for cstore_fdw tables. +-- + +-- ANALYZE uncompressed table +ANALYZE contestant; +SELECT count(*) FROM pg_stats WHERE tablename='contestant'; + +-- ANALYZE compressed table +ANALYZE contestant_compressed; +SELECT count(*) FROM pg_stats WHERE tablename='contestant_compressed'; diff --git a/src/backend/columnar/sql/am_clean.sql b/src/backend/columnar/sql/am_clean.sql new file mode 100644 index 000000000..f7dc889fc --- /dev/null +++ b/src/backend/columnar/sql/am_clean.sql @@ -0,0 +1,9 @@ + +DROP TABLE test_null_values; +DROP TABLE test_other_types; +DROP TABLE test_range_types; +DROP TABLE test_enum_and_composite_types; +DROP TYPE composite_type; +DROP TYPE enum_type; +DROP TABLE test_datetime_types; +DROP TABLE test_array_types; diff --git a/src/backend/columnar/sql/am_drop.sql b/src/backend/columnar/sql/am_drop.sql new file mode 100644 index 000000000..080712881 --- /dev/null +++ b/src/backend/columnar/sql/am_drop.sql @@ -0,0 +1,54 @@ +-- +-- Tests the different DROP commands for cstore_fdw tables. +-- +-- DROP TABL +-- DROP SCHEMA +-- DROP EXTENSION +-- DROP DATABASE +-- + +-- Note that travis does not create +-- cstore_fdw extension in default database (postgres). This has caused +-- different behavior between travis tests and local tests. Thus +-- 'postgres' directory is excluded from comparison to have the same result. + +-- store postgres database oid +SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset + +SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset + +-- DROP cstore_fdw tables +DROP TABLE contestant; +DROP TABLE contestant_compressed; + +-- make sure DROP deletes metadata +SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; + +-- Create a cstore_fdw table under a schema and drop it. +CREATE SCHEMA test_schema; +CREATE TABLE test_schema.test_table(data int) USING cstore_tableam; + +SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset +DROP SCHEMA test_schema CASCADE; +SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; + +SELECT current_database() datname \gset + +CREATE DATABASE db_to_drop; +\c db_to_drop +CREATE EXTENSION cstore_fdw; +SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset + +CREATE TABLE test_table(data int) USING cstore_tableam; + +DROP EXTENSION cstore_fdw CASCADE; + +-- test database drop +CREATE EXTENSION cstore_fdw; +SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset + +CREATE TABLE test_table(data int) USING cstore_tableam; + +\c :datname + +DROP DATABASE db_to_drop; diff --git a/src/backend/columnar/sql/am_functions.sql b/src/backend/columnar/sql/am_functions.sql new file mode 100644 index 000000000..1945eeb46 --- /dev/null +++ b/src/backend/columnar/sql/am_functions.sql @@ -0,0 +1,20 @@ +-- +-- Test utility functions for cstore_fdw tables. +-- + +CREATE TABLE empty_table (a int) USING cstore_tableam; +CREATE TABLE table_with_data (a int) USING cstore_tableam; +CREATE TABLE non_cstore_table (a int); + +COPY table_with_data FROM STDIN; +1 +2 +3 +\. + +SELECT pg_relation_size('empty_table') < pg_relation_size('table_with_data'); +SELECT cstore_table_size('non_cstore_table'); + +DROP TABLE empty_table; +DROP TABLE table_with_data; +DROP TABLE non_cstore_table; diff --git a/src/backend/columnar/sql/am_insert.sql b/src/backend/columnar/sql/am_insert.sql new file mode 100644 index 000000000..5a6d7d385 --- /dev/null +++ b/src/backend/columnar/sql/am_insert.sql @@ -0,0 +1,56 @@ +-- +-- Testing insert on cstore_fdw tables. +-- + +CREATE TABLE test_insert_command (a int) USING cstore_tableam; + +-- test single row inserts fail +select count(*) from test_insert_command; +insert into test_insert_command values(1); +select count(*) from test_insert_command; + +insert into test_insert_command default values; +select count(*) from test_insert_command; + +-- test inserting from another table succeed +CREATE TABLE test_insert_command_data (a int); + +select count(*) from test_insert_command_data; +insert into test_insert_command_data values(1); +select count(*) from test_insert_command_data; + +insert into test_insert_command select * from test_insert_command_data; +select count(*) from test_insert_command; + +drop table test_insert_command_data; +drop table test_insert_command; + +-- test long attribute value insertion +-- create sufficiently long text so that data is stored in toast +CREATE TABLE test_long_text AS +SELECT a as int_val, string_agg(random()::text, '') as text_val +FROM generate_series(1, 10) a, generate_series(1, 1000) b +GROUP BY a ORDER BY a; + +-- store hash values of text for later comparison +CREATE TABLE test_long_text_hash AS +SELECT int_val, md5(text_val) AS hash +FROM test_long_text; + +CREATE TABLE test_cstore_long_text(int_val int, text_val text) +USING cstore_tableam; + +-- store long text in cstore table +INSERT INTO test_cstore_long_text SELECT * FROM test_long_text; + +-- drop source table to remove original text from toast +DROP TABLE test_long_text; + +-- check if text data is still available in cstore table +-- by comparing previously stored hash. +SELECT a.int_val +FROM test_long_text_hash a, test_cstore_long_text c +WHERE a.int_val = c.int_val AND a.hash = md5(c.text_val); + +DROP TABLE test_long_text_hash; +DROP TABLE test_cstore_long_text; diff --git a/src/backend/columnar/sql/am_join.sql b/src/backend/columnar/sql/am_join.sql new file mode 100644 index 000000000..4d78dfe5b --- /dev/null +++ b/src/backend/columnar/sql/am_join.sql @@ -0,0 +1,28 @@ +CREATE SCHEMA am_cstore_join; +SET search_path TO am_cstore_join; + +CREATE TABLE users (id int, name text) USING cstore_tableam; +INSERT INTO users SELECT a, 'name' || a FROM generate_series(0,30-1) AS a; + +CREATE TABLE things (id int, user_id int, name text) USING cstore_tableam; +INSERT INTO things SELECT a, a % 30, 'thing' || a FROM generate_series(1,300) AS a; + +-- force the nested loop to rescan the table +SET enable_material TO off; +SET enable_hashjoin TO off; +SET enable_mergejoin TO off; + +SELECT count(*) +FROM users +JOIN things ON (users.id = things.user_id) +WHERE things.id > 290; + +-- verify the join uses a nested loop to trigger the rescan behaviour +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM users +JOIN things ON (users.id = things.user_id) +WHERE things.id > 299990; + +SET client_min_messages TO warning; +DROP SCHEMA am_cstore_join CASCADE; diff --git a/src/backend/columnar/sql/am_query.sql b/src/backend/columnar/sql/am_query.sql new file mode 100644 index 000000000..7ac8c2ea4 --- /dev/null +++ b/src/backend/columnar/sql/am_query.sql @@ -0,0 +1,34 @@ +-- +-- Test querying cstore_fdw tables. +-- + +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; + +-- Query uncompressed data +SELECT count(*) FROM contestant; +SELECT avg(rating), stddev_samp(rating) FROM contestant; +SELECT country, avg(rating) FROM contestant WHERE rating > 2200 + GROUP BY country ORDER BY country; +SELECT * FROM contestant ORDER BY handle; + +-- Query compressed data +SELECT count(*) FROM contestant_compressed; +SELECT avg(rating), stddev_samp(rating) FROM contestant_compressed; +SELECT country, avg(rating) FROM contestant_compressed WHERE rating > 2200 + GROUP BY country ORDER BY country; +SELECT * FROM contestant_compressed ORDER BY handle; + +-- Verify that we handle whole-row references correctly +SELECT to_json(v) FROM contestant v ORDER BY rating LIMIT 1; + +-- Test variables used in expressions +CREATE TABLE union_first (a int, b int) USING cstore_tableam; +CREATE TABLE union_second (a int, b int) USING cstore_tableam; + +INSERT INTO union_first SELECT a, a FROM generate_series(1, 5) a; +INSERT INTO union_second SELECT a, a FROM generate_series(11, 15) a; + +(SELECT a*1, b FROM union_first) union all (SELECT a*1, b FROM union_second); + +DROP TABLE union_first, union_second; diff --git a/src/backend/columnar/sql/am_rollback.sql b/src/backend/columnar/sql/am_rollback.sql new file mode 100644 index 000000000..da1cc8ce4 --- /dev/null +++ b/src/backend/columnar/sql/am_rollback.sql @@ -0,0 +1,41 @@ +-- +-- Testing we handle rollbacks properly +-- + +CREATE TABLE t(a int, b int) USING cstore_tableam; + +BEGIN; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +ROLLBACK; +SELECT count(*) FROM t; + +-- check stripe metadata also have been rolled-back +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + +-- savepoint rollback +BEGIN; +SAVEPOINT s0; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SAVEPOINT s1; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; +ROLLBACK TO SAVEPOINT s1; +SELECT count(*) FROM t; +ROLLBACK TO SAVEPOINT s0; +SELECT count(*) FROM t; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +COMMIT; + +SELECT count(*) FROM t; + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + +DROP TABLE t; diff --git a/src/backend/columnar/sql/am_tableoptions.sql b/src/backend/columnar/sql/am_tableoptions.sql new file mode 100644 index 000000000..33f26ec76 --- /dev/null +++ b/src/backend/columnar/sql/am_tableoptions.sql @@ -0,0 +1,102 @@ +CREATE SCHEMA am_tableoptions; +SET search_path TO am_tableoptions; + +CREATE TABLE table_options (a int) USING cstore_tableam; +INSERT INTO table_options SELECT generate_series(1,100); + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +-- test changing the compression +SELECT alter_cstore_table_set('table_options', compression => 'pglz'); + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +-- test changing the block_row_count +SELECT alter_cstore_table_set('table_options', block_row_count => 10); + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +-- test changing the block_row_count +SELECT alter_cstore_table_set('table_options', stripe_row_count => 100); + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +-- VACUUM FULL creates a new table, make sure it copies settings from the table you are vacuuming +VACUUM FULL table_options; + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +-- set all settings at the same time +SELECT alter_cstore_table_set('table_options', stripe_row_count => 1000, block_row_count => 100, compression => 'none'); + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +-- reset settings one by one to the version of the GUC's +SET cstore.block_row_count TO 1000; +SET cstore.stripe_row_count TO 10000; +SET cstore.compression TO 'pglz'; + +-- verify setting the GUC's didn't change the settings +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +SELECT alter_cstore_table_reset('table_options', block_row_count => true); +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +SELECT alter_cstore_table_reset('table_options', stripe_row_count => true); + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +SELECT alter_cstore_table_reset('table_options', compression => true); + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +-- verify resetting all settings at once work +SET cstore.block_row_count TO 10000; +SET cstore.stripe_row_count TO 100000; +SET cstore.compression TO 'none'; + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +SELECT alter_cstore_table_reset( + 'table_options', + block_row_count => true, + stripe_row_count => true, + compression => true); + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +-- verify edge cases +-- first start with a table that is not a cstore table +CREATE TABLE not_a_cstore_table (a int); +SELECT alter_cstore_table_set('not_a_cstore_table', compression => 'pglz'); +SELECT alter_cstore_table_reset('not_a_cstore_table', compression => true); + +-- verify you can't use a compression that is not known +SELECT alter_cstore_table_set('table_options', compression => 'foobar'); + +SET client_min_messages TO warning; +DROP SCHEMA am_tableoptions CASCADE; diff --git a/src/backend/columnar/sql/am_trigger.sql b/src/backend/columnar/sql/am_trigger.sql new file mode 100644 index 000000000..b8a918cf4 --- /dev/null +++ b/src/backend/columnar/sql/am_trigger.sql @@ -0,0 +1,61 @@ + +create or replace function trs_before() returns trigger language plpgsql as $$ +BEGIN + RAISE NOTICE 'BEFORE STATEMENT %', TG_OP; + RETURN NULL; +END; +$$; + +create or replace function trs_after() returns trigger language plpgsql as $$ +DECLARE + r RECORD; +BEGIN + RAISE NOTICE 'AFTER STATEMENT %', TG_OP; + IF (TG_OP = 'DELETE') THEN + FOR R IN select * from old_table + LOOP + RAISE NOTICE ' (%)', r.i; + END LOOP; + ELSE + FOR R IN select * from new_table + LOOP + RAISE NOTICE ' (%)', r.i; + END LOOP; + END IF; + RETURN NULL; +END; +$$; + +create or replace function trr_before() returns trigger language plpgsql as $$ +BEGIN + RAISE NOTICE 'BEFORE ROW %: (%)', TG_OP, NEW.i; + RETURN NEW; +END; +$$; + +create or replace function trr_after() returns trigger language plpgsql as $$ +BEGIN + RAISE NOTICE 'AFTER ROW %: (%)', TG_OP, NEW.i; + RETURN NEW; +END; +$$; + +create table test_tr(i int) using cstore_tableam; + +create trigger tr_before_stmt before insert on test_tr + for each statement execute procedure trs_before(); +create trigger tr_after_stmt after insert on test_tr + referencing new table as new_table + for each statement execute procedure trs_after(); + +create trigger tr_before_row before insert on test_tr + for each row execute procedure trr_before(); + +-- after triggers require TIDs, which are not supported yet +create trigger tr_after_row after insert on test_tr + for each row execute procedure trr_after(); + +insert into test_tr values(1); +insert into test_tr values(2),(3),(4); + +drop table test_tr; diff --git a/src/backend/columnar/sql/am_truncate.sql b/src/backend/columnar/sql/am_truncate.sql new file mode 100644 index 000000000..5d27a69fb --- /dev/null +++ b/src/backend/columnar/sql/am_truncate.sql @@ -0,0 +1,141 @@ +-- +-- Test the TRUNCATE TABLE command for cstore_fdw tables. +-- + +-- print whether we're using version > 10 to make version-specific tests clear +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; + +-- CREATE a cstore_fdw table, fill with some data -- +CREATE TABLE cstore_truncate_test (a int, b int) USING cstore_tableam; +CREATE TABLE cstore_truncate_test_second (a int, b int) USING cstore_tableam; +-- COMPRESSED +CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING cstore_tableam; +CREATE TABLE cstore_truncate_test_regular (a int, b int); + +SELECT count(*) AS cstore_data_files_before_truncate FROM cstore.cstore_data_files \gset + +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; + +set cstore.compression = 'pglz'; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +set cstore.compression to default; + +-- query rows +SELECT * FROM cstore_truncate_test; + +TRUNCATE TABLE cstore_truncate_test; + +SELECT * FROM cstore_truncate_test; + +SELECT COUNT(*) from cstore_truncate_test; + +SELECT count(*) FROM cstore_truncate_test_compressed; +TRUNCATE TABLE cstore_truncate_test_compressed; +SELECT count(*) FROM cstore_truncate_test_compressed; + +SELECT pg_relation_size('cstore_truncate_test_compressed'); + +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; +INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; + +SELECT * from cstore_truncate_test; + +SELECT * from cstore_truncate_test_second; + +SELECT * from cstore_truncate_test_regular; + +-- make sure multi truncate works +-- notice that the same table might be repeated +TRUNCATE TABLE cstore_truncate_test, + cstore_truncate_test_regular, + cstore_truncate_test_second, + cstore_truncate_test; + +SELECT * from cstore_truncate_test; +SELECT * from cstore_truncate_test_second; +SELECT * from cstore_truncate_test_regular; + +-- test if truncate on empty table works +TRUNCATE TABLE cstore_truncate_test; +SELECT * from cstore_truncate_test; + +-- make sure TRUNATE deletes metadata for old relfilenode +SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_files; + +-- test if truncation in the same transaction that created the table works properly +BEGIN; +CREATE TABLE cstore_same_transaction_truncate(a int) USING cstore_tableam; +INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(1, 100); +TRUNCATE cstore_same_transaction_truncate; +INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(20, 23); +COMMIT; + +-- should output "1" for the newly created relation +SELECT count(*) - :cstore_data_files_before_truncate FROM cstore.cstore_data_files; +SELECT * FROM cstore_same_transaction_truncate; + +DROP TABLE cstore_same_transaction_truncate; + +-- test if a cached truncate from a pl/pgsql function works +CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ +BEGIN + INSERT INTO cstore_truncate_test_regular select a, a from generate_series(1, 10) a; + TRUNCATE TABLE cstore_truncate_test_regular; +END;$$ +LANGUAGE plpgsql; + +SELECT cstore_truncate_test_regular_func(); +-- the cached plans are used stating from the second call +SELECT cstore_truncate_test_regular_func(); +DROP FUNCTION cstore_truncate_test_regular_func(); + +DROP TABLE cstore_truncate_test, cstore_truncate_test_second; +DROP TABLE cstore_truncate_test_regular; +DROP TABLE cstore_truncate_test_compressed; + +-- test truncate with schema +CREATE SCHEMA truncate_schema; +-- COMPRESSED +CREATE TABLE truncate_schema.truncate_tbl (id int) USING cstore_tableam; +set cstore.compression = 'pglz'; +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +set cstore.compression to default; +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + +set cstore.compression = 'pglz'; +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +set cstore.compression to default; +-- create a user that can not truncate +CREATE USER truncate_user; +GRANT USAGE ON SCHEMA truncate_schema TO truncate_user; +GRANT SELECT ON TABLE truncate_schema.truncate_tbl TO truncate_user; +REVOKE TRUNCATE ON TABLE truncate_schema.truncate_tbl FROM truncate_user; + +SELECT current_user \gset + +\c - truncate_user +-- verify truncate command fails and check number of rows +SELECT count(*) FROM truncate_schema.truncate_tbl; +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT count(*) FROM truncate_schema.truncate_tbl; + +-- switch to super user, grant truncate to truncate_user +\c - :current_user +GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; + +-- verify truncate_user can truncate now +\c - truncate_user +SELECT count(*) FROM truncate_schema.truncate_tbl; +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT count(*) FROM truncate_schema.truncate_tbl; +\c - :current_user + +-- cleanup +DROP SCHEMA truncate_schema CASCADE; +DROP USER truncate_user; diff --git a/src/backend/columnar/sql/am_vacuum.sql b/src/backend/columnar/sql/am_vacuum.sql new file mode 100644 index 000000000..6d248a147 --- /dev/null +++ b/src/backend/columnar/sql/am_vacuum.sql @@ -0,0 +1,104 @@ +SELECT count(*) AS columnar_table_count FROM cstore.cstore_data_files \gset + +CREATE TABLE t(a int, b int) USING cstore_tableam; + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + +INSERT INTO t SELECT i, i * i FROM generate_series(1, 10) i; +INSERT INTO t SELECT i, i * i FROM generate_series(11, 20) i; +INSERT INTO t SELECT i, i * i FROM generate_series(21, 30) i; + +SELECT sum(a), sum(b) FROM t; +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + +-- vacuum full should merge stripes together +VACUUM FULL t; + +SELECT sum(a), sum(b) FROM t; +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + +-- test the case when all data cannot fit into a single stripe +SELECT alter_cstore_table_set('t', stripe_row_count => 1000); +INSERT INTO t SELECT i, 2 * i FROM generate_series(1,2500) i; + +SELECT sum(a), sum(b) FROM t; +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + +VACUUM FULL t; + +SELECT sum(a), sum(b) FROM t; +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + +-- VACUUM FULL doesn't reclaim dropped columns, but converts them to NULLs +ALTER TABLE t DROP COLUMN a; + +SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; + +VACUUM FULL t; + +SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; + +-- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands +SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; + +-- do this in a transaction so concurrent autovacuum doesn't interfere with results +BEGIN; +SAVEPOINT s1; +SELECT count(*) FROM t; +SELECT pg_size_pretty(pg_relation_size('t')); +INSERT INTO t SELECT i FROM generate_series(1, 10000) i; +SELECT pg_size_pretty(pg_relation_size('t')); +SELECT count(*) FROM t; +ROLLBACK TO SAVEPOINT s1; + +-- not truncated by VACUUM or autovacuum yet (being in transaction ensures this), +-- so relation size should be same as before. +SELECT pg_size_pretty(pg_relation_size('t')); +COMMIT; + +-- vacuum should truncate the relation to the usable space +VACUUM VERBOSE t; +SELECT pg_size_pretty(pg_relation_size('t')); +SELECT count(*) FROM t; + +-- add some stripes with different compression types and create some gaps, +-- then vacuum to print stats + +BEGIN; +SELECT alter_cstore_table_set('t', + block_row_count => 1000, + stripe_row_count => 2000, + compression => 'pglz'); +SAVEPOINT s1; +INSERT INTO t SELECT i FROM generate_series(1, 1500) i; +ROLLBACK TO SAVEPOINT s1; +INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; +SELECT alter_cstore_table_set('t', compression => 'none'); +SAVEPOINT s2; +INSERT INTO t SELECT i FROM generate_series(1, 1500) i; +ROLLBACK TO SAVEPOINT s2; +INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; +COMMIT; + +VACUUM VERBOSE t; + +SELECT count(*) FROM t; + +-- check that we report blocks with data for dropped columns +ALTER TABLE t ADD COLUMN c int; +INSERT INTO t SELECT 1, i / 5 FROM generate_series(1, 1500) i; +ALTER TABLE t DROP COLUMN c; + +VACUUM VERBOSE t; + +-- vacuum full should remove blocks for dropped columns +-- note that, a block will be stored in non-compressed for if compression +-- doesn't reduce its size. +SELECT alter_cstore_table_set('t', compression => 'pglz'); +VACUUM FULL t; +VACUUM VERBOSE t; + +DROP TABLE t; + +-- Make sure we cleaned the metadata for t too +SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; diff --git a/src/backend/columnar/sql/extension_create.sql b/src/backend/columnar/sql/extension_create.sql new file mode 100644 index 000000000..2e73f5be7 --- /dev/null +++ b/src/backend/columnar/sql/extension_create.sql @@ -0,0 +1,4 @@ + +-- Install cstore_fdw +CREATE EXTENSION cstore_fdw; + diff --git a/src/backend/columnar/sql/fdw_alter.sql b/src/backend/columnar/sql/fdw_alter.sql new file mode 100644 index 000000000..5ba3beb34 --- /dev/null +++ b/src/backend/columnar/sql/fdw_alter.sql @@ -0,0 +1,85 @@ +-- +-- Testing ALTER TABLE on cstore_fdw tables. +-- + +CREATE FOREIGN TABLE test_alter_table (a int, b int, c int) SERVER cstore_server; + +WITH sample_data AS (VALUES + (1, 2, 3), + (4, 5, 6), + (7, 8, 9) +) +INSERT INTO test_alter_table SELECT * FROM sample_data; + +-- drop a column +ALTER FOREIGN TABLE test_alter_table DROP COLUMN a; + +-- test analyze +ANALYZE test_alter_table; + +-- verify select queries run as expected +SELECT * FROM test_alter_table; +SELECT a FROM test_alter_table; +SELECT b FROM test_alter_table; + +-- verify insert runs as expected +INSERT INTO test_alter_table (SELECT 3, 5, 8); +INSERT INTO test_alter_table (SELECT 5, 8); + + +-- add a column with no defaults +ALTER FOREIGN TABLE test_alter_table ADD COLUMN d int; +SELECT * FROM test_alter_table; +INSERT INTO test_alter_table (SELECT 3, 5, 8); +SELECT * FROM test_alter_table; + + +-- add a fixed-length column with default value +ALTER FOREIGN TABLE test_alter_table ADD COLUMN e int default 3; +SELECT * from test_alter_table; +INSERT INTO test_alter_table (SELECT 1, 2, 4, 8); +SELECT * from test_alter_table; + + +-- add a variable-length column with default value +ALTER FOREIGN TABLE test_alter_table ADD COLUMN f text DEFAULT 'TEXT ME'; +SELECT * from test_alter_table; +INSERT INTO test_alter_table (SELECT 1, 2, 4, 8, 'ABCDEF'); +SELECT * from test_alter_table; + + +-- drop couple of columns +ALTER FOREIGN TABLE test_alter_table DROP COLUMN c; +ALTER FOREIGN TABLE test_alter_table DROP COLUMN e; +ANALYZE test_alter_table; +SELECT * from test_alter_table; +SELECT count(*) from test_alter_table; +SELECT count(t.*) from test_alter_table t; + + +-- unsupported default values +ALTER FOREIGN TABLE test_alter_table ADD COLUMN g boolean DEFAULT isfinite(current_date); +ALTER FOREIGN TABLE test_alter_table ADD COLUMN h DATE DEFAULT current_date; +SELECT * FROM test_alter_table; +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN g DROP DEFAULT; +SELECT * FROM test_alter_table; +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN h DROP DEFAULT; +ANALYZE test_alter_table; +SELECT * FROM test_alter_table; + +-- unsupported type change +ALTER FOREIGN TABLE test_alter_table ADD COLUMN i int; +ALTER FOREIGN TABLE test_alter_table ADD COLUMN j float; +ALTER FOREIGN TABLE test_alter_table ADD COLUMN k text; + +-- this is valid type change +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN i TYPE float; + +-- this is not valid +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN j TYPE int; + +-- text / varchar conversion is valid both ways +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE varchar(20); +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE text; + +DROP FOREIGN TABLE test_alter_table; diff --git a/src/backend/columnar/sql/fdw_analyze.sql b/src/backend/columnar/sql/fdw_analyze.sql new file mode 100644 index 000000000..4476454a6 --- /dev/null +++ b/src/backend/columnar/sql/fdw_analyze.sql @@ -0,0 +1,11 @@ +-- +-- Test the ANALYZE command for cstore_fdw tables. +-- + +-- ANALYZE uncompressed table +ANALYZE contestant; +SELECT count(*) FROM pg_stats WHERE tablename='contestant'; + +-- ANALYZE compressed table +ANALYZE contestant_compressed; +SELECT count(*) FROM pg_stats WHERE tablename='contestant_compressed'; diff --git a/src/backend/columnar/sql/fdw_clean.sql b/src/backend/columnar/sql/fdw_clean.sql new file mode 100644 index 000000000..ecd4d67a1 --- /dev/null +++ b/src/backend/columnar/sql/fdw_clean.sql @@ -0,0 +1,10 @@ +DROP FOREIGN TABLE collation_block_filtering_test; +DROP FOREIGN TABLE test_block_filtering; +DROP FOREIGN TABLE test_null_values; +DROP FOREIGN TABLE test_other_types; +DROP FOREIGN TABLE test_range_types; +DROP FOREIGN TABLE test_enum_and_composite_types; +DROP TYPE composite_type; +DROP TYPE enum_type; +DROP FOREIGN TABLE test_datetime_types; +DROP FOREIGN TABLE test_array_types; diff --git a/src/backend/columnar/sql/fdw_drop.sql b/src/backend/columnar/sql/fdw_drop.sql new file mode 100644 index 000000000..f89374a5a --- /dev/null +++ b/src/backend/columnar/sql/fdw_drop.sql @@ -0,0 +1,56 @@ +-- +-- Tests the different DROP commands for cstore_fdw tables. +-- +-- DROP FOREIGN TABL +-- DROP SCHEMA +-- DROP EXTENSION +-- DROP DATABASE +-- + +-- Note that travis does not create +-- cstore_fdw extension in default database (postgres). This has caused +-- different behavior between travis tests and local tests. Thus +-- 'postgres' directory is excluded from comparison to have the same result. + +-- store postgres database oid +SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset + +SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset + +-- DROP cstore_fdw tables +DROP FOREIGN TABLE contestant; +DROP FOREIGN TABLE contestant_compressed; + +-- make sure DROP deletes metadata +SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; + +-- Create a cstore_fdw table under a schema and drop it. +CREATE SCHEMA test_schema; +CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; + +SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset +DROP SCHEMA test_schema CASCADE; +SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; + +SELECT current_database() datname \gset + +CREATE DATABASE db_to_drop; +\c db_to_drop +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset + +CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; + +DROP EXTENSION cstore_fdw CASCADE; + +-- test database drop +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset + +CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; + +\c :datname + +DROP DATABASE db_to_drop; diff --git a/src/backend/columnar/sql/fdw_functions.sql b/src/backend/columnar/sql/fdw_functions.sql new file mode 100644 index 000000000..ed7e260b3 --- /dev/null +++ b/src/backend/columnar/sql/fdw_functions.sql @@ -0,0 +1,20 @@ +-- +-- Test utility functions for cstore_fdw tables. +-- + +CREATE FOREIGN TABLE empty_table (a int) SERVER cstore_server; +CREATE FOREIGN TABLE table_with_data (a int) SERVER cstore_server; +CREATE TABLE non_cstore_table (a int); + +COPY table_with_data FROM STDIN; +1 +2 +3 +\. + +SELECT cstore_table_size('empty_table') < cstore_table_size('table_with_data'); +SELECT cstore_table_size('non_cstore_table'); + +DROP FOREIGN TABLE empty_table; +DROP FOREIGN TABLE table_with_data; +DROP TABLE non_cstore_table; diff --git a/src/backend/columnar/sql/fdw_insert.sql b/src/backend/columnar/sql/fdw_insert.sql new file mode 100644 index 000000000..7a6b075ce --- /dev/null +++ b/src/backend/columnar/sql/fdw_insert.sql @@ -0,0 +1,56 @@ +-- +-- Testing insert on cstore_fdw tables. +-- + +CREATE FOREIGN TABLE test_insert_command (a int) SERVER cstore_server; + +-- test single row inserts fail +select count(*) from test_insert_command; +insert into test_insert_command values(1); +select count(*) from test_insert_command; + +insert into test_insert_command default values; +select count(*) from test_insert_command; + +-- test inserting from another table succeed +CREATE TABLE test_insert_command_data (a int); + +select count(*) from test_insert_command_data; +insert into test_insert_command_data values(1); +select count(*) from test_insert_command_data; + +insert into test_insert_command select * from test_insert_command_data; +select count(*) from test_insert_command; + +drop table test_insert_command_data; +drop foreign table test_insert_command; + +-- test long attribute value insertion +-- create sufficiently long text so that data is stored in toast +CREATE TABLE test_long_text AS +SELECT a as int_val, string_agg(random()::text, '') as text_val +FROM generate_series(1, 10) a, generate_series(1, 1000) b +GROUP BY a ORDER BY a; + +-- store hash values of text for later comparison +CREATE TABLE test_long_text_hash AS +SELECT int_val, md5(text_val) AS hash +FROM test_long_text; + +CREATE FOREIGN TABLE test_cstore_long_text(int_val int, text_val text) +SERVER cstore_server; + +-- store long text in cstore table +INSERT INTO test_cstore_long_text SELECT * FROM test_long_text; + +-- drop source table to remove original text from toast +DROP TABLE test_long_text; + +-- check if text data is still available in cstore table +-- by comparing previously stored hash. +SELECT a.int_val +FROM test_long_text_hash a, test_cstore_long_text c +WHERE a.int_val = c.int_val AND a.hash = md5(c.text_val); + +DROP TABLE test_long_text_hash; +DROP FOREIGN TABLE test_cstore_long_text; diff --git a/src/backend/columnar/sql/fdw_query.sql b/src/backend/columnar/sql/fdw_query.sql new file mode 100644 index 000000000..87743e7bd --- /dev/null +++ b/src/backend/columnar/sql/fdw_query.sql @@ -0,0 +1,34 @@ +-- +-- Test querying cstore_fdw tables. +-- + +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; + +-- Query uncompressed data +SELECT count(*) FROM contestant; +SELECT avg(rating), stddev_samp(rating) FROM contestant; +SELECT country, avg(rating) FROM contestant WHERE rating > 2200 + GROUP BY country ORDER BY country; +SELECT * FROM contestant ORDER BY handle; + +-- Query compressed data +SELECT count(*) FROM contestant_compressed; +SELECT avg(rating), stddev_samp(rating) FROM contestant_compressed; +SELECT country, avg(rating) FROM contestant_compressed WHERE rating > 2200 + GROUP BY country ORDER BY country; +SELECT * FROM contestant_compressed ORDER BY handle; + +-- Verify that we handle whole-row references correctly +SELECT to_json(v) FROM contestant v ORDER BY rating LIMIT 1; + +-- Test variables used in expressions +CREATE FOREIGN TABLE union_first (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE union_second (a int, b int) SERVER cstore_server; + +INSERT INTO union_first SELECT a, a FROM generate_series(1, 5) a; +INSERT INTO union_second SELECT a, a FROM generate_series(11, 15) a; + +(SELECT a*1, b FROM union_first) union all (SELECT a*1, b FROM union_second); + +DROP FOREIGN TABLE union_first, union_second; diff --git a/src/backend/columnar/sql/fdw_rollback.sql b/src/backend/columnar/sql/fdw_rollback.sql new file mode 100644 index 000000000..804868ac9 --- /dev/null +++ b/src/backend/columnar/sql/fdw_rollback.sql @@ -0,0 +1,41 @@ +-- +-- Testing we handle rollbacks properly +-- + +CREATE FOREIGN TABLE t(a int, b int) SERVER cstore_server; + +BEGIN; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +ROLLBACK; +SELECT count(*) FROM t; + +-- check stripe metadata also have been rolled-back +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + +-- savepoint rollback +BEGIN; +SAVEPOINT s0; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SAVEPOINT s1; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; +ROLLBACK TO SAVEPOINT s1; +SELECT count(*) FROM t; +ROLLBACK TO SAVEPOINT s0; +SELECT count(*) FROM t; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +COMMIT; + +SELECT count(*) FROM t; + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + +DROP FOREIGN TABLE t; diff --git a/src/backend/columnar/sql/fdw_truncate.sql b/src/backend/columnar/sql/fdw_truncate.sql new file mode 100644 index 000000000..ed2aaa04a --- /dev/null +++ b/src/backend/columnar/sql/fdw_truncate.sql @@ -0,0 +1,135 @@ +-- +-- Test the TRUNCATE TABLE command for cstore_fdw tables. +-- + +-- print whether we're using version > 10 to make version-specific tests clear +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; + +-- CREATE a cstore_fdw table, fill with some data -- +CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_compressed (a int, b int) SERVER cstore_server OPTIONS (compression 'pglz'); +CREATE TABLE cstore_truncate_test_regular (a int, b int); + +SELECT count(*) AS cstore_data_files_before_truncate FROM cstore.cstore_data_files \gset + +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; + +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; + +-- query rows +SELECT * FROM cstore_truncate_test; + +TRUNCATE TABLE cstore_truncate_test; + +SELECT * FROM cstore_truncate_test; + +SELECT COUNT(*) from cstore_truncate_test; + +SELECT count(*) FROM cstore_truncate_test_compressed; +TRUNCATE TABLE cstore_truncate_test_compressed; +SELECT count(*) FROM cstore_truncate_test_compressed; + +SELECT cstore_table_size('cstore_truncate_test_compressed'); + +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; +INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; + +SELECT * from cstore_truncate_test; + +SELECT * from cstore_truncate_test_second; + +SELECT * from cstore_truncate_test_regular; + +-- make sure multi truncate works +-- notice that the same table might be repeated +TRUNCATE TABLE cstore_truncate_test, + cstore_truncate_test_regular, + cstore_truncate_test_second, + cstore_truncate_test; + +SELECT * from cstore_truncate_test; +SELECT * from cstore_truncate_test_second; +SELECT * from cstore_truncate_test_regular; + +-- test if truncate on empty table works +TRUNCATE TABLE cstore_truncate_test; +SELECT * from cstore_truncate_test; + +-- make sure TRUNATE deletes metadata for old relfilenode +SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_files; + +-- test if truncation in the same transaction that created the table works properly +BEGIN; +CREATE FOREIGN TABLE cstore_same_transaction_truncate(a int) SERVER cstore_server; +INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(1, 100); +TRUNCATE cstore_same_transaction_truncate; +INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(20, 23); +COMMIT; + +-- should output "1" for the newly created relation +SELECT count(*) - :cstore_data_files_before_truncate FROM cstore.cstore_data_files; +SELECT * FROM cstore_same_transaction_truncate; + +DROP FOREIGN TABLE cstore_same_transaction_truncate; + +-- test if a cached truncate from a pl/pgsql function works +CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ +BEGIN + INSERT INTO cstore_truncate_test_regular select a, a from generate_series(1, 10) a; + TRUNCATE TABLE cstore_truncate_test_regular; +END;$$ +LANGUAGE plpgsql; + +SELECT cstore_truncate_test_regular_func(); +-- the cached plans are used stating from the second call +SELECT cstore_truncate_test_regular_func(); +DROP FUNCTION cstore_truncate_test_regular_func(); + +DROP FOREIGN TABLE cstore_truncate_test, cstore_truncate_test_second; +DROP TABLE cstore_truncate_test_regular; +DROP FOREIGN TABLE cstore_truncate_test_compressed; + +-- test truncate with schema +CREATE SCHEMA truncate_schema; +CREATE FOREIGN TABLE truncate_schema.truncate_tbl (id int) SERVER cstore_server OPTIONS(compression 'pglz'); +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); + +-- create a user that can not truncate +CREATE USER truncate_user; +GRANT USAGE ON SCHEMA truncate_schema TO truncate_user; +GRANT SELECT ON TABLE truncate_schema.truncate_tbl TO truncate_user; +REVOKE TRUNCATE ON TABLE truncate_schema.truncate_tbl FROM truncate_user; + +SELECT current_user \gset + +\c - truncate_user +-- verify truncate command fails and check number of rows +SELECT count(*) FROM truncate_schema.truncate_tbl; +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT count(*) FROM truncate_schema.truncate_tbl; + +-- switch to super user, grant truncate to truncate_user +\c - :current_user +GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; + +-- verify truncate_user can truncate now +\c - truncate_user +SELECT count(*) FROM truncate_schema.truncate_tbl; +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT count(*) FROM truncate_schema.truncate_tbl; + +\c - :current_user + +-- cleanup +DROP SCHEMA truncate_schema CASCADE; +DROP USER truncate_user;