From abc9fbe1c3e4c1a68ffa665113eccced630b9e1b Mon Sep 17 00:00:00 2001 From: Citus Team Date: Fri, 31 Jan 2020 13:24:43 +0300 Subject: [PATCH 01/91] Squash of original cstore_fdw --- .gitignore | 57 + .travis.yml | 42 + LICENSE | 201 + META.json | 45 + Makefile | 59 + README.md | 373 ++ TODO.md | 41 + cstore.proto | 51 + cstore_compression.c | 171 + cstore_fdw--1.0--1.1.sql | 26 + cstore_fdw--1.1--1.2.sql | 3 + cstore_fdw--1.2--1.3.sql | 3 + cstore_fdw--1.3--1.4.sql | 3 + cstore_fdw--1.4--1.5.sql | 28 + cstore_fdw--1.5--1.6.sql | 19 + cstore_fdw--1.6--1.7.sql | 3 + cstore_fdw--1.7.sql | 60 + cstore_fdw.c | 2414 +++++++ cstore_fdw.control | 5 + cstore_fdw.h | 353 + cstore_metadata_serialization.c | 581 ++ cstore_metadata_serialization.h | 42 + cstore_reader.c | 1383 ++++ cstore_version_compat.h | 58 + cstore_writer.c | 1017 +++ data/array_types.csv | 3 + data/block_filtering.csv | 10000 ++++++++++++++++++++++++++++ data/contestants.1.csv | 5 + data/contestants.2.csv | 3 + data/datetime_types.csv | 2 + data/enum_and_composite_types.csv | 2 + data/null_values.csv | 2 + data/other_types.csv | 2 + data/range_types.csv | 2 + expected/alter.out | 178 + expected/analyze.out | 19 + expected/drop.out | 97 + expected/functions.out | 18 + expected/insert.out | 88 + expected/query.out | 105 + expected/truncate.out | 262 + expected/truncate_0.out | 262 + input/block_filtering.source | 71 + input/copyto.source | 18 + input/create.source | 49 + input/data_types.source | 74 + input/load.source | 44 + output/block_filtering.source | 118 + output/copyto.source | 24 + output/create.source | 50 + output/data_types.source | 84 + output/load.source | 39 + sql/alter.sql | 85 + sql/analyze.sql | 11 + sql/drop.sql | 76 + sql/functions.sql | 20 + sql/insert.sql | 56 + sql/query.sql | 34 + sql/truncate.sql | 135 + 59 files changed, 19076 insertions(+) create mode 100644 .gitignore create mode 100644 .travis.yml create mode 100644 LICENSE create mode 100644 META.json create mode 100644 Makefile create mode 100644 README.md create mode 100644 TODO.md create mode 100644 cstore.proto create mode 100644 cstore_compression.c create mode 100644 cstore_fdw--1.0--1.1.sql create mode 100644 cstore_fdw--1.1--1.2.sql create mode 100644 cstore_fdw--1.2--1.3.sql create mode 100644 cstore_fdw--1.3--1.4.sql create mode 100644 cstore_fdw--1.4--1.5.sql create mode 100644 cstore_fdw--1.5--1.6.sql create mode 100644 cstore_fdw--1.6--1.7.sql create mode 100644 cstore_fdw--1.7.sql create mode 100644 cstore_fdw.c create mode 100644 cstore_fdw.control create mode 100644 cstore_fdw.h create mode 100644 cstore_metadata_serialization.c create mode 100644 cstore_metadata_serialization.h create mode 100644 cstore_reader.c create mode 100644 cstore_version_compat.h create mode 100644 cstore_writer.c create mode 100644 data/array_types.csv create mode 100644 data/block_filtering.csv create mode 100644 data/contestants.1.csv create mode 100644 data/contestants.2.csv create mode 100644 data/datetime_types.csv create mode 100644 data/enum_and_composite_types.csv create mode 100644 data/null_values.csv create mode 100644 data/other_types.csv create mode 100644 data/range_types.csv create mode 100644 expected/alter.out create mode 100644 expected/analyze.out create mode 100644 expected/drop.out create mode 100644 expected/functions.out create mode 100644 expected/insert.out create mode 100644 expected/query.out create mode 100644 expected/truncate.out create mode 100644 expected/truncate_0.out create mode 100644 input/block_filtering.source create mode 100644 input/copyto.source create mode 100644 input/create.source create mode 100644 input/data_types.source create mode 100644 input/load.source create mode 100644 output/block_filtering.source create mode 100644 output/copyto.source create mode 100644 output/create.source create mode 100644 output/data_types.source create mode 100644 output/load.source create mode 100644 sql/alter.sql create mode 100644 sql/analyze.sql create mode 100644 sql/drop.sql create mode 100644 sql/functions.sql create mode 100644 sql/insert.sql create mode 100644 sql/query.sql create mode 100644 sql/truncate.sql diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..f95fd0b87 --- /dev/null +++ b/.gitignore @@ -0,0 +1,57 @@ +# ===== +# = C = +# ===== + +# Object files +*.o +*.ko +*.obj +*.elf + +# Libraries +*.lib +*.a + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.app +*.i*86 +*.x86_64 +*.hex + +# ======== +# = Gcov = +# ======== + +# gcc coverage testing tool files + +*.gcno +*.gcda +*.gcov + +# ==================== +# = Project-Specific = +# ==================== + +/data/*.cstore +/data/*.footer + +/sql/block_filtering.sql +/sql/copyto.sql +/sql/create.sql +/sql/data_types.sql +/sql/load.sql + +/expected/block_filtering.out +/expected/copyto.out +/expected/create.out +/expected/data_types.out +/expected/load.out + +*.pb-c.* diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 000000000..f83f7206e --- /dev/null +++ b/.travis.yml @@ -0,0 +1,42 @@ +sudo: required +dist: bionic +language: c +cache: + apt: true + directories: + - /home/travis/postgresql +env: + global: + - enable_coverage=yes + - PG_PRELOAD=cstore_fdw + matrix: + - PGVERSION=9.3 + - PGVERSION=9.4 + - PGVERSION=9.5 + - PGVERSION=9.6 + - PGVERSION=10 + - PGVERSION=11 + - PGVERSION=12 + +before_install: + - git clone -b v0.7.13 --depth 1 https://github.com/citusdata/tools.git + - sudo make -C tools install + - setup_apt + - nuke_pg +install: + - sudo apt-get install protobuf-c-compiler + - sudo apt-get install libprotobuf-c0-dev + - sudo locale-gen da_DK + - sudo locale-gen da_DK.utf8 + - sudo pip install cpp-coveralls + - install_pg + - install_custom_pg +before_script: + - chmod 777 . + - chmod 777 data + - chmod 666 data/* + - config_and_start_cluster +script: pg_travis_test +after_success: + - sudo chmod 666 *.gcda + - coveralls --exclude cstore.pb-c.c --exclude cstore.pb-c.h diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..ad410e113 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/META.json b/META.json new file mode 100644 index 000000000..19e819daa --- /dev/null +++ b/META.json @@ -0,0 +1,45 @@ +{ + "name": "cstore_fdw", + "abstract": "Columnar Store for PostgreSQL", + "description": "PostgreSQL extension which implements a Columnar Store.", + "version": "1.7.0", + "maintainer": "Murat Tuncer ", + "license": "apache_2_0", + "provides": { + "cstore_fdw": { + "abstract": "Foreign Data Wrapper for Columnar Store Tables", + "file": "cstore_fdw--1.7.sql", + "docfile": "README.md", + "version": "1.7.0" + } + }, + "prereqs": { + "runtime": { + "requires": { + "PostgreSQL": "9.3.0" + } + } + }, + "resources": { + "bugtracker": { + "web": "http://github.com/citusdata/cstore_fdw/issues/" + }, + "repository": { + "url": "git://github.com/citusdata/cstore_fdw.git", + "web": "https://github.com/citusdata/cstore_fdw/", + "type": "git" + } + }, + "generated_by": "Murat Tuncer", + "meta-spec": { + "version": "1.0.0", + "url": "http://pgxn.org/meta/spec.txt" + }, + "tags": [ + "orc", + "fdw", + "foreign data wrapper", + "cstore_fdw", + "columnar store" + ] +} diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..72daebc55 --- /dev/null +++ b/Makefile @@ -0,0 +1,59 @@ +# cstore_fdw/Makefile +# +# Copyright (c) 2016 Citus Data, Inc. +# + +MODULE_big = cstore_fdw + +PG_CPPFLAGS = --std=c99 +SHLIB_LINK = -lprotobuf-c +OBJS = cstore.pb-c.o cstore_fdw.o cstore_writer.o cstore_reader.o \ + cstore_metadata_serialization.o cstore_compression.o + +EXTENSION = cstore_fdw +DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cstore_fdw--1.4--1.5.sql \ + cstore_fdw--1.3--1.4.sql cstore_fdw--1.2--1.3.sql cstore_fdw--1.1--1.2.sql \ + cstore_fdw--1.0--1.1.sql + +REGRESS = create load query analyze data_types functions block_filtering drop \ + insert copyto alter truncate +EXTRA_CLEAN = cstore.pb-c.h cstore.pb-c.c data/*.cstore data/*.cstore.footer \ + sql/block_filtering.sql sql/create.sql sql/data_types.sql sql/load.sql \ + sql/copyto.sql expected/block_filtering.out expected/create.out \ + expected/data_types.out expected/load.out expected/copyto.out + +ifeq ($(enable_coverage),yes) + PG_CPPFLAGS += --coverage + SHLIB_LINK += --coverage + EXTRA_CLEAN += *.gcno +endif + +UNAME_S := $(shell uname -s) +ifeq ($(UNAME_S),Darwin) + PG_CPPFLAGS += -I/usr/local/include +endif + +# +# Users need to specify their Postgres installation path through pg_config. For +# example: /usr/local/pgsql/bin/pg_config or /usr/lib/postgresql/9.3/bin/pg_config +# + +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) + +ifndef MAJORVERSION + MAJORVERSION := $(basename $(VERSION)) +endif + +ifeq (,$(findstring $(MAJORVERSION), 9.3 9.4 9.5 9.6 10 11 12)) + $(error PostgreSQL 9.3 to 12 is required to compile this extension) +endif + +cstore.pb-c.c: cstore.proto + protoc-c --c_out=. cstore.proto + +installcheck: remove_cstore_files + +remove_cstore_files: + rm -f data/*.cstore data/*.cstore.footer diff --git a/README.md b/README.md new file mode 100644 index 000000000..1a20f3abe --- /dev/null +++ b/README.md @@ -0,0 +1,373 @@ +cstore_fdw +========== + +[![Build Status](https://travis-ci.org/citusdata/cstore_fdw.svg?branch=master)][status] +[![Coverage](http://img.shields.io/coveralls/citusdata/cstore_fdw/master.svg)][coverage] + +Cstore_fdw is an open source columnar store extension for PostgreSQL. Columnar stores provide notable benefits for analytics use cases where data is loaded in batches. Cstore_fdw’s columnar nature delivers performance by only reading relevant data from disk, and it may compress data 6x-10x to reduce space requirements for data archival. + +Cstore_fdw is developed by [Citus Data](https://www.citusdata.com) and can be used in combination with [Citus](https://github.com/citusdata/citus), a postgres extension that intelligently distributes your data and queries across many nodes so your database can scale and your queries are fast. If you have any questions about how Citus can help you scale or how to use Citus in combination with cstore_fdw, [please let us know](https://www.citusdata.com/about/contact_us/). + +Join the [Mailing List][mailing-list] to stay on top of the latest developments for Cstore_fdw. + + +Introduction +------------ + +This extension uses a format for its data layout that is inspired by ORC, +the Optimized Row Columnar format. Like ORC, the cstore format improves +upon RCFile developed at Facebook, and brings the following benefits: + +* Compression: Reduces in-memory and on-disk data size by 2-4x. Can be extended + to support different codecs. +* Column projections: Only reads column data relevant to the query. Improves + performance for I/O bound queries. +* Skip indexes: Stores min/max statistics for row groups, and uses them to skip + over unrelated rows. + +Further, we used the Postgres foreign data wrapper APIs and type representations +with this extension. This brings: + +* Support for 40+ Postgres data types. The user can also create new types and + use them. +* Statistics collection. PostgreSQL's query optimizer uses these stats to + evaluate different query plans and pick the best one. +* Simple setup. Create foreign table and copy data. Run SQL. + + +Building +-------- + +cstore\_fdw depends on protobuf-c for serializing and deserializing table metadata. +So we need to install these packages first: + + # Fedora 17+, CentOS, and Amazon Linux + sudo yum install protobuf-c-devel + + # Ubuntu 10.4+ + sudo apt-get install protobuf-c-compiler + sudo apt-get install libprotobuf-c0-dev + + # Ubuntu 18.4+ + sudo apt-get install protobuf-c-compiler + sudo apt-get install libprotobuf-c-dev + + # Mac OS X + brew install protobuf-c + +**Note.** In CentOS 5, 6, and 7, you may need to install or update EPEL 5, 6, or 7 repositories. + See [this page](https://support.rackspace.com/how-to/install-epel-and-additional-repositories-on-centos-and-red-hat/) +for instructions. + +**Note.** In Amazon Linux, the EPEL repository is installed by default, but not +enabled. See [these instructions](http://aws.amazon.com/amazon-linux-ami/faqs/#epel) +for how to enable it. + +Once you have protobuf-c installed on your machine, you are ready to build +cstore\_fdw. For this, you need to include the pg\_config directory path in +your make command. This path is typically the same as your PostgreSQL +installation's bin/ directory path. For example: + + PATH=/usr/local/pgsql/bin/:$PATH make + sudo PATH=/usr/local/pgsql/bin/:$PATH make install + +**Note.** cstore_fdw requires PostgreSQL version from 9.3 to 12. It doesn't +support earlier versions of PostgreSQL. + + +Usage +----- + +Before using cstore\_fdw, you need to add it to ```shared_preload_libraries``` +in your ```postgresql.conf``` and restart Postgres: + + shared_preload_libraries = 'cstore_fdw' # (change requires restart) + +The following parameters can be set on a cstore foreign table object. + +* filename (optional): The absolute path to the location for storing table data. + If you don't specify the filename option, cstore\_fdw will automatically + choose the $PGDATA/cstore\_fdw directory to store the files. If specified the + value of this parameter will be used as a prefix for all files created to + store table data. For example, the value ```/cstore_fdw/my_table``` could result in + the files ```/cstore_fdw/my_table``` and ```/cstore_fdw/my_table.footer``` being used + to manage table data. +* compression (optional): The compression used for compressing value streams. + Valid options are ```none``` and ```pglz```. The default is ```none```. +* stripe\_row\_count (optional): Number of rows per stripe. The default is + ```150000```. Reducing this decreases the amount memory used for loading data + and querying, but also decreases the performance. +* block\_row\_count (optional): Number of rows per column block. The default is + ```10000```. cstore\_fdw compresses, creates skip indexes, and reads from disk + at the block granularity. Increasing this value helps with compression and results + in fewer reads from disk. However, higher values also reduce the probability of + skipping over unrelated row blocks. + + +To load or append data into a cstore table, you have two options: + +* You can use the [```COPY``` command][copy-command] to load or append data from + a file, a program, or STDIN. +* You can use the ```INSERT INTO cstore_table SELECT ...``` syntax to load or + append data from another table. + +You can use the [```ANALYZE``` command][analyze-command] to collect statistics +about the table. These statistics help the query planner to help determine the +most efficient execution plan for each query. + +**Note.** We currently don't support updating table using DELETE, and UPDATE +commands. We also don't support single row inserts. + + +Updating from earlier versions to 1.7 +--------------------------------------- + +To update an existing cstore_fdw installation from versions earlier than 1.6 +you can take the following steps: + +* Download and install cstore_fdw version 1.6 using instructions from the "Building" + section, +* Restart the PostgreSQL server, +* Run ```ALTER EXTENSION cstore_fdw UPDATE;``` + + +Example +------- + +As an example, we demonstrate loading and querying data to/from a column store +table from scratch here. Let's start with downloading and decompressing the data +files. + + wget http://examples.citusdata.com/customer_reviews_1998.csv.gz + wget http://examples.citusdata.com/customer_reviews_1999.csv.gz + + gzip -d customer_reviews_1998.csv.gz + gzip -d customer_reviews_1999.csv.gz + +Then, let's log into Postgres, and run the following commands to create a column +store foreign table: + +```SQL +-- load extension first time after install +CREATE EXTENSION cstore_fdw; + +-- create server object +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; + +-- create foreign table +CREATE FOREIGN TABLE customer_reviews +( + customer_id TEXT, + review_date DATE, + review_rating INTEGER, + review_votes INTEGER, + review_helpful_votes INTEGER, + product_id CHAR(10), + product_title TEXT, + product_sales_rank BIGINT, + product_group TEXT, + product_category TEXT, + product_subcategory TEXT, + similar_product_ids CHAR(10)[] +) +SERVER cstore_server +OPTIONS(compression 'pglz'); +``` + +Next, we load data into the table: + +```SQL +\COPY customer_reviews FROM 'customer_reviews_1998.csv' WITH CSV; +\COPY customer_reviews FROM 'customer_reviews_1999.csv' WITH CSV; +``` + +**Note.** If you are getting ```ERROR: cannot copy to foreign table +"customer_reviews"``` when trying to run the COPY commands, double check that you +have added cstore\_fdw to ```shared_preload_libraries``` in ```postgresql.conf``` +and restarted Postgres. + +Next, we collect data distribution statistics about the table. This is optional, +but usually very helpful: + +```SQL +ANALYZE customer_reviews; +``` + +Finally, let's run some example SQL queries on the column store table. + +```SQL +-- Find all reviews a particular customer made on the Dune series in 1998. +SELECT + customer_id, review_date, review_rating, product_id, product_title +FROM + customer_reviews +WHERE + customer_id ='A27T7HVDXA3K2A' AND + product_title LIKE '%Dune%' AND + review_date >= '1998-01-01' AND + review_date <= '1998-12-31'; + +-- Do we have a correlation between a book's title's length and its review ratings? +SELECT + width_bucket(length(product_title), 1, 50, 5) title_length_bucket, + round(avg(review_rating), 2) AS review_average, + count(*) +FROM + customer_reviews +WHERE + product_group = 'Book' +GROUP BY + title_length_bucket +ORDER BY + title_length_bucket; +``` + + +Usage with Citus +---------------- + +The example above illustrated how to load data into a PostgreSQL database running +on a single host. However, sometimes your data is too large to analyze effectively +on a single host. Citus is a product built by Citus Data that allows you to run +a distributed PostgreSQL database to analyze your data using the power of multiple +hosts. You can easily install and run other PostgreSQL extensions and foreign data +wrappers—including cstore_fdw—alongside Citus. + +You can create a cstore_fdw table and distribute it using the +```create_distributed_table()``` UDF just like any other table. You can load data +using the ```copy``` command as you would do in single node PostgreSQL. + +Using Skip Indexes +------------------ + +cstore_fdw partitions each column into multiple blocks. Skip indexes store minimum +and maximum values for each of these blocks. While scanning the table, if min/max +values of the block contradict the WHERE clause, then the block is completely +skipped. This way, the query processes less data and hence finishes faster. + +To use skip indexes more efficiently, you should load the data after sorting it +on a column that is commonly used in the WHERE clause. This ensures that there is +a minimum overlap between blocks and the chance of them being skipped is higher. + +In practice, the data generally has an inherent dimension (for example a time field) +on which it is naturally sorted. Usually, the queries also have a filter clause on +that column (for example you want to query only the last week's data), and hence you +don't need to sort the data in such cases. + + +Uninstalling cstore_fdw +----------------------- + +Before uninstalling the extension, first you need to drop all the cstore tables: + + postgres=# DROP FOREIGN TABLE cstore_table_1; + ... + postgres=# DROP FOREIGN TABLE cstore_table_n; + +Then, you should drop the cstore server and extension: + + postgres=# DROP SERVER cstore_server; + postgres=# DROP EXTENSION cstore_fdw; + +cstore\_fdw automatically creates some directories inside the PostgreSQL's data +directory to store its files. To remove them, you can run: + + $ rm -rf $PGDATA/cstore_fdw + +Then, you should remove cstore\_fdw from ```shared_preload_libraries``` in +your ```postgresql.conf```: + + shared_preload_libraries = '' # (change requires restart) + +Finally, to uninstall the extension you can run the following command in the +extension's source code directory. This will clean up all the files copied during +the installation: + + $ sudo PATH=/usr/local/pgsql/bin/:$PATH make uninstall + + +Changeset +--------- +### Version 1.7.0 +* (Fix) Add support for PostgreSQL 12 +* (Fix) Support count(t.*) from t type queries +* (Fix) Build failures for MacOS 10.14+ +* (Fix) Make foreign scan parallel safe +* (Fix) Add support for PostgreSQL 11 COPY +### Version 1.6.2 +* (Fix) Add support for PostgreSQL 11 +### Version 1.6.1 +* (Fix) Fix crash during truncate (Cstore crashing server when enabled, not used) +* (Fix) No such file or directory warning when attempting to drop database +### Version 1.6 +* (Feature) Added support for PostgreSQL 10. +* (Fix) Removed table files when a schema, extension or database is dropped. +* (Fix) Removed unused code fragments. +* (Fix) Fixed incorrect initialization of stripe buffers. +* (Fix) Checked user access rights when executing truncate. +* (Fix) Made copy command cancellable. +* (Fix) Fixed namespace issue regarding drop table. + +### Version 1.5.1 +* (Fix) Verify cstore_fdw server on CREATE FOREIGN TABLE command + +### Version 1.5 +* (Feature) Added support for PostgreSQL 9.6. +* (Fix) Removed table data when cstore_fdw table is indirectly dropped. +* (Fix) Removed unused code fragments. +* (Fix) Fixed column selection logic to return columns used in expressions. +* (Fix) Prevented alter table command from changinf column type to incompatible types. + +### Version 1.4.1 + +* (Fix) Compatibility fix for Citus [copy command][copy-command]. + +### Version 1.4 + +* (Feature) Added support for ```TRUNCATE TABLE``` +* (Fix) Added support for PostgreSQL 9.5 + +### Version 1.3 + +* (Feature) Added support for ```ALTER TABLE ADD COLUMN``` and ```ALTER TABLE DROP COLUMN```. +* (Feature) Added column list support in ```COPY FROM```. +* (Optimization) Improve row count estimation, which results in better plans. +* (Fix) Fix the deadlock issue during concurrent inserts. +* (Fix) Return correct result when using whole row references. + +### Version 1.2 + +* (Feature) Added support for ```COPY TO```. +* (Feature) Added support for ```INSERT INTO cstore_table SELECT ...```. +* (Optimization) Improved memory usage. +* (Fix) Dropping multiple cstore tables in a single command cleans-up files + of all them. + +### Version 1.1 + +* (Feature) Make filename option optional, and use a default directory inside + $PGDATA to manage cstore tables. +* (Feature) Automatically delete files on DROP FOREIGN TABLE. +* (Fix) Return empty table if no data has been loaded. Previously, cstore_fdw + errored out. +* (Fix) Fix overestimating relation column counts when planning. +* (Feature) Added cstore\_table\_size(tablename) for getting the size of a cstore + table in bytes. + + +Copyright +--------- + +Copyright (c) 2017 Citus Data, Inc. + +This module is free software; you can redistribute it and/or modify it under the +Apache v2.0 License. + +For all types of questions and comments about the wrapper, please contact us at +engage @ citusdata.com. + +[status]: https://travis-ci.org/citusdata/cstore_fdw +[mailing-list]: https://groups.google.com/forum/#!forum/cstore-users +[coverage]: https://coveralls.io/r/citusdata/cstore_fdw +[copy-command]: http://www.postgresql.org/docs/current/static/sql-copy.html +[analyze-command]: http://www.postgresql.org/docs/current/static/sql-analyze.html diff --git a/TODO.md b/TODO.md new file mode 100644 index 000000000..179fbc8c7 --- /dev/null +++ b/TODO.md @@ -0,0 +1,41 @@ +To see the list of features and bug-fixes planned for next releases, see our +[development roadmap][roadmap]. + +Requested Features +------------------ + +* Improve write performance +* Improve read performance +* Add checksum logic +* Add new compression methods +* Enable INSERT/DELETE/UPDATE +* Enable users other than superuser to safely create columnar tables (permissions) +* Transactional semantics +* Add config setting to make pg\_fsync() optional + + +Known Issues +------------ + +* Copy command ignores NOT NULL constraints. +* Planning functions don't take into account average column width. +* Planning functions don't correctly take into account block skipping benefits. +* On 32-bit platforms, when file size is outside the 32-bit signed range, EXPLAIN + command prints incorrect file size. +* If two different columnar tables are configured to point to the same file, + writes to the underlying file aren't protected from each other. +* When a data load is in progress, concurrent reads on the table overestimate the + page count. +* We have a minor memory leak in CStoreEndWrite. We need to also free the + comparisonFunctionArray. +* block\_filtering test fails on Ubuntu because the "da\_DK" locale is not enabled + by default. +* We don't yet incorporate the compression method's impact on disk I/O into cost + estimates. +* CitusDB integration errors: +* Concurrent staging cstore\_fdw tables doesn't work. +* Setting a default value for column with ALTER TABLE has limited support for + existing rows. + +[roadmap]: https://github.com/citusdata/cstore_fdw/wiki/Roadmap + diff --git a/cstore.proto b/cstore.proto new file mode 100644 index 000000000..6e24c9075 --- /dev/null +++ b/cstore.proto @@ -0,0 +1,51 @@ +syntax = "proto2"; + +package protobuf; + +enum CompressionType { + // Values should match with the corresponding struct in cstore_fdw.h + NONE = 0; + PG_LZ = 1; +}; + +message ColumnBlockSkipNode { + optional uint64 rowCount = 1; + optional bytes minimumValue = 2; + optional bytes maximumValue = 3; + optional uint64 valueBlockOffset = 4; + optional uint64 valueLength = 5; + optional CompressionType valueCompressionType = 6; + optional uint64 existsBlockOffset = 7; + optional uint64 existsLength = 8; +} + +message ColumnBlockSkipList { + repeated ColumnBlockSkipNode blockSkipNodeArray = 1; +} + +message StripeFooter { + repeated uint64 skipListSizeArray = 1; + repeated uint64 existsSizeArray = 2; + repeated uint64 valueSizeArray = 3; +} + +message StripeMetadata { + optional uint64 fileOffset = 1; + optional uint64 skipListLength = 2; + optional uint64 dataLength = 3; + optional uint64 footerLength = 4; +} + +message TableFooter { + repeated StripeMetadata stripeMetadataArray = 1; + optional uint32 blockRowCount = 2; +} + +message PostScript { + optional uint64 tableFooterLength = 1; + optional uint64 versionMajor = 2; + optional uint64 versionMinor = 3; + + // Leave this last in the record + optional string magicNumber = 8000; +} diff --git a/cstore_compression.c b/cstore_compression.c new file mode 100644 index 000000000..3b37fd47a --- /dev/null +++ b/cstore_compression.c @@ -0,0 +1,171 @@ +/*------------------------------------------------------------------------- + * + * cstore_compression.c + * + * This file contains compression/decompression functions definitions + * used in cstore_fdw. + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" +#include "cstore_fdw.h" + +#if PG_VERSION_NUM >= 90500 +#include "common/pg_lzcompress.h" +#else +#include "utils/pg_lzcompress.h" +#endif + + + + +#if PG_VERSION_NUM >= 90500 +/* + * The information at the start of the compressed data. This decription is taken + * from pg_lzcompress in pre-9.5 version of PostgreSQL. + */ +typedef struct CStoreCompressHeader +{ + int32 vl_len_; /* varlena header (do not touch directly!) */ + int32 rawsize; +} CStoreCompressHeader; + +/* + * Utilities for manipulation of header information for compressed data + */ + +#define CSTORE_COMPRESS_HDRSZ ((int32) sizeof(CStoreCompressHeader)) +#define CSTORE_COMPRESS_RAWSIZE(ptr) (((CStoreCompressHeader *) (ptr))->rawsize) +#define CSTORE_COMPRESS_RAWDATA(ptr) (((char *) (ptr)) + CSTORE_COMPRESS_HDRSZ) +#define CSTORE_COMPRESS_SET_RAWSIZE(ptr, len) (((CStoreCompressHeader *) (ptr))->rawsize = (len)) + +#else + +#define CSTORE_COMPRESS_HDRSZ (0) +#define CSTORE_COMPRESS_RAWSIZE(ptr) (PGLZ_RAW_SIZE((PGLZ_Header *) buffer->data)) +#define CSTORE_COMPRESS_RAWDATA(ptr) (((PGLZ_Header *) (ptr))) +#define CSTORE_COMPRESS_SET_RAWSIZE(ptr, len) (((CStoreCompressHeader *) (ptr))->rawsize = (len)) + +#endif + + + +/* + * CompressBuffer compresses the given buffer with the given compression type + * outputBuffer enlarged to contain compressed data. The function returns true + * if compression is done, returns false if compression is not done. + * outputBuffer is valid only if the function returns true. + */ +bool +CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, + CompressionType compressionType) +{ + uint64 maximumLength = PGLZ_MAX_OUTPUT(inputBuffer->len) + CSTORE_COMPRESS_HDRSZ; + bool compressionResult = false; +#if PG_VERSION_NUM >= 90500 + int32 compressedByteCount = 0; +#endif + + if (compressionType != COMPRESSION_PG_LZ) + { + return false; + } + + resetStringInfo(outputBuffer); + enlargeStringInfo(outputBuffer, maximumLength); + +#if PG_VERSION_NUM >= 90500 + compressedByteCount = pglz_compress((const char *) inputBuffer->data, + inputBuffer->len, + CSTORE_COMPRESS_RAWDATA(outputBuffer->data), + PGLZ_strategy_always); + if (compressedByteCount >= 0) + { + CSTORE_COMPRESS_SET_RAWSIZE(outputBuffer->data, inputBuffer->len); + SET_VARSIZE_COMPRESSED(outputBuffer->data, + compressedByteCount + CSTORE_COMPRESS_HDRSZ); + compressionResult = true; + } +#else + + compressionResult = pglz_compress(inputBuffer->data, inputBuffer->len, + CSTORE_COMPRESS_RAWDATA(outputBuffer->data), + PGLZ_strategy_always); +#endif + + if (compressionResult) + { + outputBuffer->len = VARSIZE(outputBuffer->data); + } + + return compressionResult; +} + + +/* + * DecompressBuffer decompresses the given buffer with the given compression + * type. This function returns the buffer as-is when no compression is applied. + */ +StringInfo +DecompressBuffer(StringInfo buffer, CompressionType compressionType) +{ + StringInfo decompressedBuffer = NULL; + + Assert(compressionType == COMPRESSION_NONE || compressionType == COMPRESSION_PG_LZ); + + if (compressionType == COMPRESSION_NONE) + { + /* in case of no compression, return buffer */ + decompressedBuffer = buffer; + } + else if (compressionType == COMPRESSION_PG_LZ) + { + uint32 compressedDataSize = VARSIZE(buffer->data) - CSTORE_COMPRESS_HDRSZ; + uint32 decompressedDataSize = CSTORE_COMPRESS_RAWSIZE(buffer->data); + char *decompressedData = NULL; +#if PG_VERSION_NUM >= 90500 + int32 decompressedByteCount = 0; +#endif + + if (compressedDataSize + CSTORE_COMPRESS_HDRSZ != buffer->len) + { + ereport(ERROR, (errmsg("cannot decompress the buffer"), + errdetail("Expected %u bytes, but received %u bytes", + compressedDataSize, buffer->len))); + } + + decompressedData = palloc0(decompressedDataSize); + +#if PG_VERSION_NUM >= 90500 + +#if PG_VERSION_NUM >= 120000 + decompressedByteCount = pglz_decompress(CSTORE_COMPRESS_RAWDATA(buffer->data), + compressedDataSize, decompressedData, + decompressedDataSize, true); +#else + decompressedByteCount = pglz_decompress(CSTORE_COMPRESS_RAWDATA(buffer->data), + compressedDataSize, decompressedData, + decompressedDataSize); +#endif + + if (decompressedByteCount < 0) + { + ereport(ERROR, (errmsg("cannot decompress the buffer"), + errdetail("compressed data is corrupted"))); + } +#else + pglz_decompress((PGLZ_Header *) buffer->data, decompressedData); +#endif + + decompressedBuffer = palloc0(sizeof(StringInfoData)); + decompressedBuffer->data = decompressedData; + decompressedBuffer->len = decompressedDataSize; + decompressedBuffer->maxlen = decompressedDataSize; + } + + return decompressedBuffer; +} diff --git a/cstore_fdw--1.0--1.1.sql b/cstore_fdw--1.0--1.1.sql new file mode 100644 index 000000000..9e8029638 --- /dev/null +++ b/cstore_fdw--1.0--1.1.sql @@ -0,0 +1,26 @@ +/* cstore_fdw/cstore_fdw--1.0--1.1.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION UPDATE +\echo Use "ALTER EXTENSION cstore_fdw UPDATE TO '1.1'" to load this file. \quit + +CREATE FUNCTION cstore_ddl_event_end_trigger() +RETURNS event_trigger +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE EVENT TRIGGER cstore_ddl_event_end +ON ddl_command_end +EXECUTE PROCEDURE cstore_ddl_event_end_trigger(); + +CREATE FUNCTION cstore_table_size(relation regclass) +RETURNS bigint +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +-- cstore_fdw creates directories to store files for tables with automatically +-- determined filename during the CREATE SERVER statement. Since this feature +-- was newly added in v1.1, servers created with v1.0 did not create them. So, +-- we create a server with v1.1 to ensure that the required directories are +-- created to allow users to create automatically managed tables with old servers. +CREATE SERVER cstore_server_for_updating_1_0_to_1_1 FOREIGN DATA WRAPPER cstore_fdw; +DROP SERVER cstore_server_for_updating_1_0_to_1_1; diff --git a/cstore_fdw--1.1--1.2.sql b/cstore_fdw--1.1--1.2.sql new file mode 100644 index 000000000..6cabb8c5e --- /dev/null +++ b/cstore_fdw--1.1--1.2.sql @@ -0,0 +1,3 @@ +/* cstore_fdw/cstore_fdw--1.1--1.2.sql */ + +-- No new functions or definitions were added in 1.2 diff --git a/cstore_fdw--1.2--1.3.sql b/cstore_fdw--1.2--1.3.sql new file mode 100644 index 000000000..3ad187d09 --- /dev/null +++ b/cstore_fdw--1.2--1.3.sql @@ -0,0 +1,3 @@ +/* cstore_fdw/cstore_fdw--1.2--1.3.sql */ + +-- No new functions or definitions were added in 1.3 diff --git a/cstore_fdw--1.3--1.4.sql b/cstore_fdw--1.3--1.4.sql new file mode 100644 index 000000000..3b7b0f150 --- /dev/null +++ b/cstore_fdw--1.3--1.4.sql @@ -0,0 +1,3 @@ +/* cstore_fdw/cstore_fdw--1.3--1.4.sql */ + +-- No new functions or definitions were added in 1.4 diff --git a/cstore_fdw--1.4--1.5.sql b/cstore_fdw--1.4--1.5.sql new file mode 100644 index 000000000..55bbb0b2a --- /dev/null +++ b/cstore_fdw--1.4--1.5.sql @@ -0,0 +1,28 @@ +/* cstore_fdw/cstore_fdw--1.4--1.5.sql */ + +CREATE FUNCTION cstore_clean_table_resources(oid) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE OR REPLACE FUNCTION cstore_drop_trigger() + RETURNS event_trigger + LANGUAGE plpgsql + AS $csdt$ +DECLARE v_obj record; +BEGIN + FOR v_obj IN SELECT * FROM pg_event_trigger_dropped_objects() LOOP + + IF v_obj.object_type NOT IN ('table', 'foreign table') THEN + CONTINUE; + END IF; + + PERFORM cstore_clean_table_resources(v_obj.objid); + + END LOOP; +END; +$csdt$; + +CREATE EVENT TRIGGER cstore_drop_event + ON SQL_DROP + EXECUTE PROCEDURE cstore_drop_trigger(); diff --git a/cstore_fdw--1.5--1.6.sql b/cstore_fdw--1.5--1.6.sql new file mode 100644 index 000000000..c8f7e8097 --- /dev/null +++ b/cstore_fdw--1.5--1.6.sql @@ -0,0 +1,19 @@ +/* cstore_fdw/cstore_fdw--1.5--1.6.sql */ + +CREATE OR REPLACE FUNCTION cstore_drop_trigger() + RETURNS event_trigger + LANGUAGE plpgsql + AS $csdt$ +DECLARE v_obj record; +BEGIN + FOR v_obj IN SELECT * FROM pg_event_trigger_dropped_objects() LOOP + + IF v_obj.object_type NOT IN ('table', 'foreign table') THEN + CONTINUE; + END IF; + + PERFORM public.cstore_clean_table_resources(v_obj.objid); + + END LOOP; +END; +$csdt$; diff --git a/cstore_fdw--1.6--1.7.sql b/cstore_fdw--1.6--1.7.sql new file mode 100644 index 000000000..c7f56f059 --- /dev/null +++ b/cstore_fdw--1.6--1.7.sql @@ -0,0 +1,3 @@ +/* cstore_fdw/cstore_fdw--1.6--1.6.sql */ + +-- No new functions or definitions were added in 1.7 diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql new file mode 100644 index 000000000..ad2683f52 --- /dev/null +++ b/cstore_fdw--1.7.sql @@ -0,0 +1,60 @@ +/* cstore_fdw/cstore_fdw--1.7.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION cstore_fdw" to load this file. \quit + +CREATE FUNCTION cstore_fdw_handler() +RETURNS fdw_handler +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE FUNCTION cstore_fdw_validator(text[], oid) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE FOREIGN DATA WRAPPER cstore_fdw +HANDLER cstore_fdw_handler +VALIDATOR cstore_fdw_validator; + +CREATE FUNCTION cstore_ddl_event_end_trigger() +RETURNS event_trigger +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE EVENT TRIGGER cstore_ddl_event_end +ON ddl_command_end +EXECUTE PROCEDURE cstore_ddl_event_end_trigger(); + +CREATE FUNCTION cstore_table_size(relation regclass) +RETURNS bigint +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE OR REPLACE FUNCTION cstore_clean_table_resources(oid) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE OR REPLACE FUNCTION cstore_drop_trigger() + RETURNS event_trigger + LANGUAGE plpgsql + AS $csdt$ +DECLARE v_obj record; +BEGIN + FOR v_obj IN SELECT * FROM pg_event_trigger_dropped_objects() LOOP + + IF v_obj.object_type NOT IN ('table', 'foreign table') THEN + CONTINUE; + END IF; + + PERFORM public.cstore_clean_table_resources(v_obj.objid); + + END LOOP; +END; +$csdt$; + +CREATE EVENT TRIGGER cstore_drop_event + ON SQL_DROP + EXECUTE PROCEDURE cstore_drop_trigger(); + diff --git a/cstore_fdw.c b/cstore_fdw.c new file mode 100644 index 000000000..b0a327768 --- /dev/null +++ b/cstore_fdw.c @@ -0,0 +1,2414 @@ +/*------------------------------------------------------------------------- + * + * cstore_fdw.c + * + * This file contains the function definitions for scanning, analyzing, and + * copying into cstore_fdw foreign tables. Note that this file uses the API + * provided by cstore_reader and cstore_writer for reading and writing cstore + * files. + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "cstore_fdw.h" +#include "cstore_version_compat.h" + +#include +#include +#include +#include "access/htup_details.h" +#include "access/reloptions.h" +#include "access/sysattr.h" +#include "access/tuptoaster.h" +#include "catalog/namespace.h" +#include "catalog/pg_foreign_table.h" +#include "catalog/pg_namespace.h" +#include "commands/copy.h" +#include "commands/dbcommands.h" +#include "commands/defrem.h" +#include "commands/event_trigger.h" +#include "commands/explain.h" +#include "commands/extension.h" +#include "commands/vacuum.h" +#include "foreign/fdwapi.h" +#include "foreign/foreign.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "optimizer/cost.h" +#include "optimizer/pathnode.h" +#include "optimizer/planmain.h" +#include "optimizer/restrictinfo.h" +#if PG_VERSION_NUM >= 120000 +#include "access/heapam.h" +#include "access/tableam.h" +#include "executor/tuptable.h" +#include "optimizer/optimizer.h" +#else +#include "optimizer/var.h" +#endif +#include "parser/parser.h" +#include "parser/parsetree.h" +#include "parser/parse_coerce.h" +#include "parser/parse_type.h" +#include "storage/fd.h" +#include "tcop/utility.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/memutils.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#if PG_VERSION_NUM >= 120000 +#include "utils/snapmgr.h" +#else +#include "utils/tqual.h" +#endif + + +/* local functions forward declarations */ +#if PG_VERSION_NUM >= 100000 +static void CStoreProcessUtility(PlannedStmt *plannedStatement, const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + QueryEnvironment *queryEnvironment, + DestReceiver *destReceiver, char *completionTag); +#else +static void CStoreProcessUtility(Node *parseTree, const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + DestReceiver *destReceiver, char *completionTag); +#endif +static bool CopyCStoreTableStatement(CopyStmt* copyStatement); +static void CheckSuperuserPrivilegesForCopy(const CopyStmt* copyStatement); +static void CStoreProcessCopyCommand(CopyStmt *copyStatement, const char *queryString, + char *completionTag); +static uint64 CopyIntoCStoreTable(const CopyStmt *copyStatement, + const char *queryString); +static uint64 CopyOutCStoreTable(CopyStmt* copyStatement, const char* queryString); +static void CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement); +static List * DroppedCStoreFilenameList(DropStmt *dropStatement); +static List * FindCStoreTables(List *tableList); +static List * OpenRelationsForTruncate(List *cstoreTableList); +static void TruncateCStoreTables(List *cstoreRelationList); +static void DeleteCStoreTableFiles(char *filename); +static void InitializeCStoreTableFile(Oid relationId, Relation relation); +static bool CStoreTable(Oid relationId); +static bool CStoreServer(ForeignServer *server); +static bool DistributedTable(Oid relationId); +static bool DistributedWorkerCopy(CopyStmt *copyStatement); +static void CreateCStoreDatabaseDirectory(Oid databaseOid); +static bool DirectoryExists(StringInfo directoryName); +static void CreateDirectory(StringInfo directoryName); +static void RemoveCStoreDatabaseDirectory(Oid databaseOid); +static StringInfo OptionNamesString(Oid currentContextId); +static HeapTuple GetSlotHeapTuple(TupleTableSlot *tts); +static CStoreFdwOptions * CStoreGetOptions(Oid foreignTableId); +static char * CStoreGetOptionValue(Oid foreignTableId, const char *optionName); +static void ValidateForeignTableOptions(char *filename, char *compressionTypeString, + char *stripeRowCountString, + char *blockRowCountString); +static char * CStoreDefaultFilePath(Oid foreignTableId); +static CompressionType ParseCompressionType(const char *compressionTypeString); +static void CStoreGetForeignRelSize(PlannerInfo *root, RelOptInfo *baserel, + Oid foreignTableId); +static void CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, + Oid foreignTableId); +#if PG_VERSION_NUM >= 90500 +static ForeignScan * CStoreGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, + Oid foreignTableId, ForeignPath *bestPath, + List *targetList, List *scanClauses, + Plan *outerPlan); +#else +static ForeignScan * CStoreGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, + Oid foreignTableId, ForeignPath *bestPath, + List *targetList, List *scanClauses); +#endif +static double TupleCountEstimate(RelOptInfo *baserel, const char *filename); +static BlockNumber PageCount(const char *filename); +static List * ColumnList(RelOptInfo *baserel, Oid foreignTableId); +static void CStoreExplainForeignScan(ForeignScanState *scanState, + ExplainState *explainState); +static void CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags); +static TupleTableSlot * CStoreIterateForeignScan(ForeignScanState *scanState); +static void CStoreEndForeignScan(ForeignScanState *scanState); +static void CStoreReScanForeignScan(ForeignScanState *scanState); +static bool CStoreAnalyzeForeignTable(Relation relation, + AcquireSampleRowsFunc *acquireSampleRowsFunc, + BlockNumber *totalPageCount); +static int CStoreAcquireSampleRows(Relation relation, int logLevel, + HeapTuple *sampleRows, int targetRowCount, + double *totalRowCount, double *totalDeadRowCount); +static List * CStorePlanForeignModify(PlannerInfo *plannerInfo, ModifyTable *plan, + Index resultRelation, int subplanIndex); +static void CStoreBeginForeignModify(ModifyTableState *modifyTableState, + ResultRelInfo *relationInfo, List *fdwPrivate, + int subplanIndex, int executorflags); +static void CStoreBeginForeignInsert(ModifyTableState *modifyTableState, + ResultRelInfo *relationInfo); +static TupleTableSlot * CStoreExecForeignInsert(EState *executorState, + ResultRelInfo *relationInfo, + TupleTableSlot *tupleSlot, + TupleTableSlot *planSlot); +static void CStoreEndForeignModify(EState *executorState, ResultRelInfo *relationInfo); +static void CStoreEndForeignInsert(EState *executorState, ResultRelInfo *relationInfo); +#if PG_VERSION_NUM >= 90600 +static bool CStoreIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +#endif + +/* declarations for dynamic loading */ +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(cstore_ddl_event_end_trigger); +PG_FUNCTION_INFO_V1(cstore_table_size); +PG_FUNCTION_INFO_V1(cstore_fdw_handler); +PG_FUNCTION_INFO_V1(cstore_fdw_validator); +PG_FUNCTION_INFO_V1(cstore_clean_table_resources); + + +/* saved hook value in case of unload */ +static ProcessUtility_hook_type PreviousProcessUtilityHook = NULL; + + +/* + * _PG_init is called when the module is loaded. In this function we save the + * previous utility hook, and then install our hook to pre-intercept calls to + * the copy command. + */ +void _PG_init(void) +{ + PreviousProcessUtilityHook = ProcessUtility_hook; + ProcessUtility_hook = CStoreProcessUtility; +} + + +/* + * _PG_fini is called when the module is unloaded. This function uninstalls the + * extension's hooks. + */ +void _PG_fini(void) +{ + ProcessUtility_hook = PreviousProcessUtilityHook; +} + + +/* + * cstore_ddl_event_end_trigger is the event trigger function which is called on + * ddl_command_end event. This function creates required directories after the + * CREATE SERVER statement and valid data and footer files after the CREATE FOREIGN + * TABLE statement. + */ +Datum +cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS) +{ + EventTriggerData *triggerData = NULL; + Node *parseTree = NULL; + + /* error if event trigger manager did not call this function */ + if (!CALLED_AS_EVENT_TRIGGER(fcinfo)) + { + ereport(ERROR, (errmsg("trigger not fired by event trigger manager"))); + } + + triggerData = (EventTriggerData *) fcinfo->context; + parseTree = triggerData->parsetree; + + if (nodeTag(parseTree) == T_CreateForeignServerStmt) + { + CreateForeignServerStmt *serverStatement = (CreateForeignServerStmt *) parseTree; + + char *foreignWrapperName = serverStatement->fdwname; + if (strncmp(foreignWrapperName, CSTORE_FDW_NAME, NAMEDATALEN) == 0) + { + CreateCStoreDatabaseDirectory(MyDatabaseId); + } + } + else if (nodeTag(parseTree) == T_CreateForeignTableStmt) + { + CreateForeignTableStmt *createStatement = (CreateForeignTableStmt *) parseTree; + char *serverName = createStatement->servername; + + bool missingOK = false; + ForeignServer *server = GetForeignServerByName(serverName, missingOK); + if (CStoreServer(server)) + { + Oid relationId = RangeVarGetRelid(createStatement->base.relation, + AccessShareLock, false); + Relation relation = heap_open(relationId, AccessExclusiveLock); + + /* + * Make sure database directory exists before creating a table. + * This is necessary when a foreign server is created inside + * a template database and a new database is created out of it. + * We have no chance to hook into server creation to create data + * directory for it during database creation time. + */ + CreateCStoreDatabaseDirectory(MyDatabaseId); + + InitializeCStoreTableFile(relationId, relation); + heap_close(relation, AccessExclusiveLock); + } + } + + PG_RETURN_NULL(); +} + + +/* + * CStoreProcessUtility is the hook for handling utility commands. This function + * customizes the behaviour of "COPY cstore_table" and "DROP FOREIGN TABLE + * cstore_table" commands. For all other utility statements, the function calls + * the previous utility hook or the standard utility command via macro + * CALL_PREVIOUS_UTILITY. + */ +#if PG_VERSION_NUM >= 100000 +static void +CStoreProcessUtility(PlannedStmt *plannedStatement, const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + QueryEnvironment *queryEnvironment, + DestReceiver *destReceiver, char *completionTag) +#else +static void +CStoreProcessUtility(Node * parseTree, const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + DestReceiver *destReceiver, char *completionTag) +#endif +{ +#if PG_VERSION_NUM >= 100000 + Node *parseTree = plannedStatement->utilityStmt; +#endif + + if (nodeTag(parseTree) == T_CopyStmt) + { + CopyStmt *copyStatement = (CopyStmt *) parseTree; + + if (CopyCStoreTableStatement(copyStatement)) + { + CStoreProcessCopyCommand(copyStatement, queryString, completionTag); + } + else + { + CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, + destReceiver, completionTag); + } + } + else if (nodeTag(parseTree) == T_DropStmt) + { + DropStmt *dropStmt = (DropStmt *) parseTree; + + if (dropStmt->removeType == OBJECT_EXTENSION) + { + bool removeCStoreDirectory = false; + ListCell *objectCell = NULL; + + foreach(objectCell, dropStmt->objects) + { + Node *object = (Node *) lfirst(objectCell); + char *objectName = NULL; + +#if PG_VERSION_NUM >= 100000 + Assert(IsA(object, String)); + objectName = strVal(object); +#else + Assert(IsA(object, List)); + objectName = strVal(linitial((List *) object)); +#endif + + if (strncmp(CSTORE_FDW_NAME, objectName, NAMEDATALEN) == 0) + { + removeCStoreDirectory = true; + } + } + + CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, + destReceiver, completionTag); + + if (removeCStoreDirectory) + { + RemoveCStoreDatabaseDirectory(MyDatabaseId); + } + } + else + { + ListCell *fileListCell = NULL; + List *droppedTables = DroppedCStoreFilenameList((DropStmt *) parseTree); + + CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, + destReceiver, completionTag); + + foreach(fileListCell, droppedTables) + { + char *fileName = lfirst(fileListCell); + + DeleteCStoreTableFiles(fileName); + } + } + } + else if (nodeTag(parseTree) == T_TruncateStmt) + { + TruncateStmt *truncateStatement = (TruncateStmt *) parseTree; + List *allTablesList = truncateStatement->relations; + List *cstoreTablesList = FindCStoreTables(allTablesList); + List *otherTablesList = list_difference(allTablesList, cstoreTablesList); + List *cstoreRelationList = OpenRelationsForTruncate(cstoreTablesList); + ListCell *cstoreRelationCell = NULL; + + if (otherTablesList != NIL) + { + truncateStatement->relations = otherTablesList; + + CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, + destReceiver, completionTag); + /* restore the former relation list. Our + * replacement could be freed but still needed + * in a cached plan. A truncate can be cached + * if run from a pl/pgSQL function */ + truncateStatement->relations = allTablesList; + } + + TruncateCStoreTables(cstoreRelationList); + + foreach(cstoreRelationCell, cstoreRelationList) + { + Relation relation = (Relation) lfirst(cstoreRelationCell); + heap_close(relation, AccessExclusiveLock); + } + } + else if (nodeTag(parseTree) == T_AlterTableStmt) + { + AlterTableStmt *alterTable = (AlterTableStmt *) parseTree; + CStoreProcessAlterTableCommand(alterTable); + CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, + destReceiver, completionTag); + } + else if (nodeTag(parseTree) == T_DropdbStmt) + { + DropdbStmt *dropDdStmt = (DropdbStmt *) parseTree; + bool missingOk = true; + Oid databaseOid = get_database_oid(dropDdStmt->dbname, missingOk); + + /* let postgres handle error checking and dropping of the database */ + CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, + destReceiver, completionTag); + + if (databaseOid != InvalidOid) + { + RemoveCStoreDatabaseDirectory(databaseOid); + } + } + /* handle other utility statements */ + else + { + CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, + destReceiver, completionTag); + } +} + + +/* + * CopyCStoreTableStatement check whether the COPY statement is a "COPY cstore_table FROM + * ..." or "COPY cstore_table TO ...." statement. If it is then the function returns + * true. The function returns false otherwise. + */ +static bool +CopyCStoreTableStatement(CopyStmt* copyStatement) +{ + bool copyCStoreTableStatement = false; + + if (copyStatement->relation != NULL) + { + Oid relationId = RangeVarGetRelid(copyStatement->relation, + AccessShareLock, true); + bool cstoreTable = CStoreTable(relationId); + if (cstoreTable) + { + bool distributedTable = DistributedTable(relationId); + bool distributedCopy = DistributedWorkerCopy(copyStatement); + + if (distributedTable || distributedCopy) + { + /* let COPY on distributed tables fall through to Citus */ + copyCStoreTableStatement = false; + } + else + { + copyCStoreTableStatement = true; + } + } + } + + return copyCStoreTableStatement; +} + + +/* + * CheckSuperuserPrivilegesForCopy checks if superuser privilege is required by + * copy operation and reports error if user does not have superuser rights. + */ +static void +CheckSuperuserPrivilegesForCopy(const CopyStmt* copyStatement) +{ + /* + * We disallow copy from file or program except to superusers. These checks + * are based on the checks in DoCopy() function of copy.c. + */ + if (copyStatement->filename != NULL && !superuser()) + { + if (copyStatement->is_program) + { + ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to COPY to or from a program"), + errhint("Anyone can COPY to stdout or from stdin. " + "psql's \\copy command also works for anyone."))); + } + else + { + ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to COPY to or from a file"), + errhint("Anyone can COPY to stdout or from stdin. " + "psql's \\copy command also works for anyone."))); + } + } +} + + +/* + * CStoreProcessCopyCommand handles COPY FROM/TO ... statements. + * It determines the copy direction and forwards execution to appropriate function. + */ +static void +CStoreProcessCopyCommand(CopyStmt *copyStatement, const char* queryString, + char *completionTag) +{ + uint64 processedCount = 0; + + if (copyStatement->is_from) + { + processedCount = CopyIntoCStoreTable(copyStatement, queryString); + } + else + { + processedCount = CopyOutCStoreTable(copyStatement, queryString); + } + + if (completionTag != NULL) + { + snprintf(completionTag, COMPLETION_TAG_BUFSIZE, "COPY " UINT64_FORMAT, + processedCount); + } +} + + +/* + * CopyIntoCStoreTable handles a "COPY cstore_table FROM" statement. This + * function uses the COPY command's functions to read and parse rows from + * the data source specified in the COPY statement. The function then writes + * each row to the file specified in the cstore foreign table options. Finally, + * the function returns the number of copied rows. + */ +static uint64 +CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) +{ + uint64 processedRowCount = 0; + Relation relation = NULL; + Oid relationId = InvalidOid; + TupleDesc tupleDescriptor = NULL; + uint32 columnCount = 0; + CopyState copyState = NULL; + bool nextRowFound = true; + Datum *columnValues = NULL; + bool *columnNulls = NULL; + TableWriteState *writeState = NULL; + CStoreFdwOptions *cstoreFdwOptions = NULL; + MemoryContext tupleContext = NULL; + + /* Only superuser can copy from or to local file */ + CheckSuperuserPrivilegesForCopy(copyStatement); + + Assert(copyStatement->relation != NULL); + + /* + * Open and lock the relation. We acquire ShareUpdateExclusiveLock to allow + * concurrent reads, but block concurrent writes. + */ + relation = heap_openrv(copyStatement->relation, ShareUpdateExclusiveLock); + relationId = RelationGetRelid(relation); + + /* allocate column values and nulls arrays */ + tupleDescriptor = RelationGetDescr(relation); + columnCount = tupleDescriptor->natts; + columnValues = palloc0(columnCount * sizeof(Datum)); + columnNulls = palloc0(columnCount * sizeof(bool)); + + cstoreFdwOptions = CStoreGetOptions(relationId); + + /* + * We create a new memory context called tuple context, and read and write + * each row's values within this memory context. After each read and write, + * we reset the memory context. That way, we immediately release memory + * allocated for each row, and don't bloat memory usage with large input + * files. + */ + tupleContext = AllocSetContextCreate(CurrentMemoryContext, + "CStore COPY Row Memory Context", + ALLOCSET_DEFAULT_SIZES); + + /* init state to read from COPY data source */ +#if (PG_VERSION_NUM >= 100000) + { + ParseState *pstate = make_parsestate(NULL); + pstate->p_sourcetext = queryString; + + copyState = BeginCopyFrom(pstate, relation, copyStatement->filename, + copyStatement->is_program, + NULL, + copyStatement->attlist, + copyStatement->options); + free_parsestate(pstate); + } +#else + copyState = BeginCopyFrom(relation, copyStatement->filename, + copyStatement->is_program, + copyStatement->attlist, + copyStatement->options); +#endif + + /* init state to write to the cstore file */ + writeState = CStoreBeginWrite(cstoreFdwOptions->filename, + cstoreFdwOptions->compressionType, + cstoreFdwOptions->stripeRowCount, + cstoreFdwOptions->blockRowCount, + tupleDescriptor); + + while (nextRowFound) + { + /* read the next row in tupleContext */ + MemoryContext oldContext = MemoryContextSwitchTo(tupleContext); +#if PG_VERSION_NUM >= 120000 + nextRowFound = NextCopyFrom(copyState, NULL, columnValues, columnNulls); +#else + nextRowFound = NextCopyFrom(copyState, NULL, columnValues, columnNulls, NULL); +#endif + MemoryContextSwitchTo(oldContext); + + /* write the row to the cstore file */ + if (nextRowFound) + { + CStoreWriteRow(writeState, columnValues, columnNulls); + processedRowCount++; + } + + MemoryContextReset(tupleContext); + + CHECK_FOR_INTERRUPTS(); + } + + /* end read/write sessions and close the relation */ + EndCopyFrom(copyState); + CStoreEndWrite(writeState); + heap_close(relation, ShareUpdateExclusiveLock); + + return processedRowCount; +} + + +/* + * CopyFromCStoreTable handles a "COPY cstore_table TO ..." statement. Statement + * is converted to "COPY (SELECT * FROM cstore_table) TO ..." and forwarded to + * postgres native COPY handler. Function returns number of files copied to external + * stream. Copying selected columns from cstore table is not currently supported. + */ +static uint64 +CopyOutCStoreTable(CopyStmt* copyStatement, const char* queryString) +{ + uint64 processedCount = 0; + RangeVar *relation = NULL; + char *qualifiedName = NULL; + List *queryList = NIL; + Node *rawQuery = NULL; + + StringInfo newQuerySubstring = makeStringInfo(); + + if (copyStatement->attlist != NIL) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("copy column list is not supported"), + errhint("use 'copy (select from ) to " + "...' instead"))); + } + + relation = copyStatement->relation; + qualifiedName = quote_qualified_identifier(relation->schemaname, + relation->relname); + appendStringInfo(newQuerySubstring, "select * from %s", qualifiedName); + queryList = raw_parser(newQuerySubstring->data); + + /* take the first parse tree */ + rawQuery = linitial(queryList); + + /* + * Set the relation field to NULL so that COPY command works on + * query field instead. + */ + copyStatement->relation = NULL; + +#if (PG_VERSION_NUM >= 100000) + /* + * raw_parser returns list of RawStmt* in PG 10+ we need to + * extract actual query from it. + */ + { + ParseState *pstate = make_parsestate(NULL); + RawStmt *rawStatement = (RawStmt *) rawQuery; + + pstate->p_sourcetext = newQuerySubstring->data; + copyStatement->query = rawStatement->stmt; + + DoCopy(pstate, copyStatement, -1, -1, &processedCount); + free_parsestate(pstate); + } +#else + copyStatement->query = rawQuery; + + DoCopy(copyStatement, queryString, &processedCount); +#endif + + return processedCount; +} + + +/* + * CStoreProcessAlterTableCommand checks if given alter table statement is + * compatible with underlying data structure. Currently it only checks alter + * column type. The function errors out if current column type can not be safely + * converted to requested column type. This check is more restrictive than + * PostgreSQL's because we can not change existing data. + */ +static void +CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement) +{ + ObjectType objectType = alterStatement->relkind; + RangeVar *relationRangeVar = alterStatement->relation; + Oid relationId = InvalidOid; + List *commandList = alterStatement->cmds; + ListCell *commandCell = NULL; + + /* we are only interested in foreign table changes */ + if (objectType != OBJECT_TABLE && objectType != OBJECT_FOREIGN_TABLE) + { + return; + } + + relationId = RangeVarGetRelid(relationRangeVar, AccessShareLock, true); + if (!CStoreTable(relationId)) + { + return; + } + + foreach(commandCell, commandList) + { + AlterTableCmd *alterCommand = (AlterTableCmd *) lfirst(commandCell); + if(alterCommand->subtype == AT_AlterColumnType) + { + char *columnName = alterCommand->name; + ColumnDef *columnDef = (ColumnDef *) alterCommand->def; + Oid targetTypeId = typenameTypeId(NULL, columnDef->typeName); + char *typeName = TypeNameToString(columnDef->typeName); + AttrNumber attributeNumber = get_attnum(relationId, columnName); + Oid currentTypeId = InvalidOid; + + if (attributeNumber <= 0) + { + /* let standard utility handle this */ + continue; + } + + currentTypeId = get_atttype(relationId, attributeNumber); + + /* + * We are only interested in implicit coersion type compatibility. + * Erroring out here to prevent further processing. + */ + if (!can_coerce_type(1, ¤tTypeId, &targetTypeId, COERCION_IMPLICIT)) + { + ereport(ERROR, (errmsg("Column %s cannot be cast automatically to " + "type %s", columnName, typeName))); + } + } + } +} + + +/* + * DropppedCStoreFilenameList extracts and returns the list of cstore file names + * from DROP table statement + */ +static List * +DroppedCStoreFilenameList(DropStmt *dropStatement) +{ + List *droppedCStoreFileList = NIL; + + if (dropStatement->removeType == OBJECT_FOREIGN_TABLE) + { + ListCell *dropObjectCell = NULL; + foreach(dropObjectCell, dropStatement->objects) + { + List *tableNameList = (List *) lfirst(dropObjectCell); + RangeVar *rangeVar = makeRangeVarFromNameList(tableNameList); + + Oid relationId = RangeVarGetRelid(rangeVar, AccessShareLock, true); + if (CStoreTable(relationId)) + { + CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(relationId); + char *defaultfilename = CStoreDefaultFilePath(relationId); + + /* + * Skip files that are placed in default location, they are handled + * by sql drop trigger. Both paths are generated by code, use + * of strcmp is safe here. + */ + if (strcmp(defaultfilename, cstoreFdwOptions->filename) == 0) + { + continue; + } + + droppedCStoreFileList = lappend(droppedCStoreFileList, + cstoreFdwOptions->filename); + } + } + } + + return droppedCStoreFileList; +} + + +/* FindCStoreTables returns list of CStore tables from given table list */ +static List * +FindCStoreTables(List *tableList) +{ + List *cstoreTableList = NIL; + ListCell *relationCell = NULL; + foreach(relationCell, tableList) + { + RangeVar *rangeVar = (RangeVar *) lfirst(relationCell); + Oid relationId = RangeVarGetRelid(rangeVar, AccessShareLock, true); + if (CStoreTable(relationId) && !DistributedTable(relationId)) + { + cstoreTableList = lappend(cstoreTableList, rangeVar); + } + } + + return cstoreTableList; +} + + +/* + * OpenRelationsForTruncate opens and locks relations for tables to be truncated. + * + * It also performs a permission checks to see if the user has truncate privilege + * on tables. + */ +static List * +OpenRelationsForTruncate(List *cstoreTableList) +{ + ListCell *relationCell = NULL; + List *relationIdList = NIL; + List *relationList = NIL; + foreach(relationCell, cstoreTableList) + { + RangeVar *rangeVar = (RangeVar *) lfirst(relationCell); + Relation relation = heap_openrv(rangeVar, AccessExclusiveLock); + Oid relationId = relation->rd_id; + AclResult aclresult = pg_class_aclcheck(relationId, GetUserId(), + ACL_TRUNCATE); + if (aclresult != ACLCHECK_OK) + { + aclcheck_error(aclresult, ACLCHECK_OBJECT_TABLE, get_rel_name(relationId)); + } + + /* check if this relation is repeated */ + if (list_member_oid(relationIdList, relationId)) + { + heap_close(relation, AccessExclusiveLock); + } + else + { + relationIdList = lappend_oid(relationIdList, relationId); + relationList = lappend(relationList, relation); + } + } + + return relationList; +} + + +/* TruncateCStoreTable truncates given cstore tables */ +static void +TruncateCStoreTables(List *cstoreRelationList) +{ + ListCell *relationCell = NULL; + foreach(relationCell, cstoreRelationList) + { + Relation relation = (Relation) lfirst(relationCell); + Oid relationId = relation->rd_id; + CStoreFdwOptions *cstoreFdwOptions = NULL; + + Assert(CStoreTable(relationId)); + + cstoreFdwOptions = CStoreGetOptions(relationId); + DeleteCStoreTableFiles(cstoreFdwOptions->filename); + InitializeCStoreTableFile(relationId, relation); + } +} + + +/* + * DeleteCStoreTableFiles deletes the data and footer files for a cstore table + * whose data filename is given. + */ +static void +DeleteCStoreTableFiles(char *filename) +{ + int dataFileRemoved = 0; + int footerFileRemoved = 0; + + StringInfo tableFooterFilename = makeStringInfo(); + appendStringInfo(tableFooterFilename, "%s%s", filename, CSTORE_FOOTER_FILE_SUFFIX); + + /* delete the footer file */ + footerFileRemoved = unlink(tableFooterFilename->data); + if (footerFileRemoved != 0) + { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not delete file \"%s\": %m", + tableFooterFilename->data))); + } + + /* delete the data file */ + dataFileRemoved = unlink(filename); + if (dataFileRemoved != 0) + { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not delete file \"%s\": %m", + filename))); + } +} + + +/* + * InitializeCStoreTableFile creates data and footer file for a cstore table. + * The function assumes data and footer files do not exist, therefore + * it should be called on empty or non-existing table. Notice that the caller + * is expected to acquire AccessExclusiveLock on the relation. + */ +static void InitializeCStoreTableFile(Oid relationId, Relation relation) +{ + TableWriteState *writeState = NULL; + TupleDesc tupleDescriptor = RelationGetDescr(relation); + CStoreFdwOptions* cstoreFdwOptions = CStoreGetOptions(relationId); + + /* + * Initialize state to write to the cstore file. This creates an + * empty data file and a valid footer file for the table. + */ + writeState = CStoreBeginWrite(cstoreFdwOptions->filename, + cstoreFdwOptions->compressionType, cstoreFdwOptions->stripeRowCount, + cstoreFdwOptions->blockRowCount, tupleDescriptor); + CStoreEndWrite(writeState); +} + + + +/* + * CStoreTable checks if the given table name belongs to a foreign columnar store + * table. If it does, the function returns true. Otherwise, it returns false. + */ +static bool +CStoreTable(Oid relationId) +{ + bool cstoreTable = false; + char relationKind = 0; + + if (relationId == InvalidOid) + { + return false; + } + + relationKind = get_rel_relkind(relationId); + if (relationKind == RELKIND_FOREIGN_TABLE) + { + ForeignTable *foreignTable = GetForeignTable(relationId); + ForeignServer *server = GetForeignServer(foreignTable->serverid); + if (CStoreServer(server)) + { + cstoreTable = true; + } + } + + return cstoreTable; +} + + +/* + * CStoreServer checks if the given foreign server belongs to cstore_fdw. If it + * does, the function returns true. Otherwise, it returns false. + */ +static bool +CStoreServer(ForeignServer *server) +{ + ForeignDataWrapper *foreignDataWrapper = GetForeignDataWrapper(server->fdwid); + bool cstoreServer = false; + + char *foreignWrapperName = foreignDataWrapper->fdwname; + if (strncmp(foreignWrapperName, CSTORE_FDW_NAME, NAMEDATALEN) == 0) + { + cstoreServer = true; + } + + return cstoreServer; +} + + +/* + * DistributedTable checks if the given relationId is the OID of a distributed table, + * which may also be a cstore_fdw table, but in that case COPY should be handled by + * Citus. + */ +static bool +DistributedTable(Oid relationId) +{ + bool distributedTable = false; + Oid partitionOid = InvalidOid; + Relation heapRelation = NULL; + TableScanDesc scanDesc = NULL; + const int scanKeyCount = 1; + ScanKeyData scanKey[1]; + HeapTuple heapTuple = NULL; + + bool missingOK = true; + Oid extensionOid = get_extension_oid(CITUS_EXTENSION_NAME, missingOK); + if (extensionOid == InvalidOid) + { + /* if the citus extension isn't created, no tables are distributed */ + return false; + } + + partitionOid = get_relname_relid(CITUS_PARTITION_TABLE_NAME, PG_CATALOG_NAMESPACE); + if (partitionOid == InvalidOid) + { + /* the pg_dist_partition table does not exist */ + return false; + } + + heapRelation = heap_open(partitionOid, AccessShareLock); + + ScanKeyInit(&scanKey[0], ATTR_NUM_PARTITION_RELATION_ID, InvalidStrategy, + F_OIDEQ, ObjectIdGetDatum(relationId)); + + scanDesc = table_beginscan(heapRelation, SnapshotSelf, scanKeyCount, scanKey); + + heapTuple = heap_getnext(scanDesc, ForwardScanDirection); + + distributedTable = HeapTupleIsValid(heapTuple); + + table_endscan(scanDesc); + relation_close(heapRelation, AccessShareLock); + + return distributedTable; +} + + +/* + * DistributedWorkerCopy returns whether the Citus-specific master_host option is + * present in the COPY options. + */ +static bool +DistributedWorkerCopy(CopyStmt *copyStatement) +{ + ListCell *optionCell = NULL; + foreach(optionCell, copyStatement->options) + { + DefElem *defel = (DefElem *) lfirst(optionCell); + if (strncmp(defel->defname, "master_host", NAMEDATALEN) == 0) + { + return true; + } + } + + return false; +} + + +/* + * CreateCStoreDatabaseDirectory creates the directory (and parent directories, + * if needed) used to store automatically managed cstore_fdw files. The path to + * the directory is $PGDATA/cstore_fdw/{databaseOid}. + */ +static void +CreateCStoreDatabaseDirectory(Oid databaseOid) +{ + bool cstoreDirectoryExists = false; + bool databaseDirectoryExists = false; + StringInfo cstoreDatabaseDirectoryPath = NULL; + + StringInfo cstoreDirectoryPath = makeStringInfo(); + appendStringInfo(cstoreDirectoryPath, "%s/%s", DataDir, CSTORE_FDW_NAME); + + cstoreDirectoryExists = DirectoryExists(cstoreDirectoryPath); + if (!cstoreDirectoryExists) + { + CreateDirectory(cstoreDirectoryPath); + } + + cstoreDatabaseDirectoryPath = makeStringInfo(); + appendStringInfo(cstoreDatabaseDirectoryPath, "%s/%s/%u", DataDir, + CSTORE_FDW_NAME, databaseOid); + + databaseDirectoryExists = DirectoryExists(cstoreDatabaseDirectoryPath); + if (!databaseDirectoryExists) + { + CreateDirectory(cstoreDatabaseDirectoryPath); + } +} + + +/* DirectoryExists checks if a directory exists for the given directory name. */ +static bool +DirectoryExists(StringInfo directoryName) +{ + bool directoryExists = true; + struct stat directoryStat; + + int statOK = stat(directoryName->data, &directoryStat); + if (statOK == 0) + { + /* file already exists; check that it is a directory */ + if (!S_ISDIR(directoryStat.st_mode)) + { + ereport(ERROR, (errmsg("\"%s\" is not a directory", directoryName->data), + errhint("You need to remove or rename the file \"%s\".", + directoryName->data))); + } + } + else + { + if (errno == ENOENT) + { + directoryExists = false; + } + else + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not stat directory \"%s\": %m", + directoryName->data))); + } + } + + return directoryExists; +} + + +/* CreateDirectory creates a new directory with the given directory name. */ +static void +CreateDirectory(StringInfo directoryName) +{ + int makeOK = mkdir(directoryName->data, S_IRWXU); + if (makeOK != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not create directory \"%s\": %m", + directoryName->data))); + } +} + + +/* + * RemoveCStoreDatabaseDirectory removes CStore directory previously + * created for this database. + * However it does not remove 'cstore_fdw' directory even if there + * are no other databases left. + */ +static void +RemoveCStoreDatabaseDirectory(Oid databaseOid) +{ + StringInfo cstoreDirectoryPath = makeStringInfo(); + StringInfo cstoreDatabaseDirectoryPath = makeStringInfo(); + + appendStringInfo(cstoreDirectoryPath, "%s/%s", DataDir, CSTORE_FDW_NAME); + + appendStringInfo(cstoreDatabaseDirectoryPath, "%s/%s/%u", DataDir, + CSTORE_FDW_NAME, databaseOid); + + if (DirectoryExists(cstoreDatabaseDirectoryPath)) + { + rmtree(cstoreDatabaseDirectoryPath->data, true); + } +} + + +/* + * cstore_table_size returns the total on-disk size of a cstore table in bytes. + * The result includes the sizes of data file and footer file. + */ +Datum +cstore_table_size(PG_FUNCTION_ARGS) +{ + Oid relationId = PG_GETARG_OID(0); + + int64 tableSize = 0; + CStoreFdwOptions *cstoreFdwOptions = NULL; + char *dataFilename = NULL; + StringInfo footerFilename = NULL; + int dataFileStatResult = 0; + int footerFileStatResult = 0; + struct stat dataFileStatBuffer; + struct stat footerFileStatBuffer; + + bool cstoreTable = CStoreTable(relationId); + if (!cstoreTable) + { + ereport(ERROR, (errmsg("relation is not a cstore table"))); + } + + cstoreFdwOptions = CStoreGetOptions(relationId); + dataFilename = cstoreFdwOptions->filename; + + dataFileStatResult = stat(dataFilename, &dataFileStatBuffer); + if (dataFileStatResult != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", dataFilename))); + } + + footerFilename = makeStringInfo(); + appendStringInfo(footerFilename, "%s%s", dataFilename, + CSTORE_FOOTER_FILE_SUFFIX); + + footerFileStatResult = stat(footerFilename->data, &footerFileStatBuffer); + if (footerFileStatResult != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", + footerFilename->data))); + } + + tableSize += dataFileStatBuffer.st_size; + tableSize += footerFileStatBuffer.st_size; + + PG_RETURN_INT64(tableSize); +} + + +/* + * cstore_fdw_handler creates and returns a struct with pointers to foreign + * table callback functions. + */ +Datum +cstore_fdw_handler(PG_FUNCTION_ARGS) +{ + FdwRoutine *fdwRoutine = makeNode(FdwRoutine); + + fdwRoutine->GetForeignRelSize = CStoreGetForeignRelSize; + fdwRoutine->GetForeignPaths = CStoreGetForeignPaths; + fdwRoutine->GetForeignPlan = CStoreGetForeignPlan; + fdwRoutine->ExplainForeignScan = CStoreExplainForeignScan; + fdwRoutine->BeginForeignScan = CStoreBeginForeignScan; + fdwRoutine->IterateForeignScan = CStoreIterateForeignScan; + fdwRoutine->ReScanForeignScan = CStoreReScanForeignScan; + fdwRoutine->EndForeignScan = CStoreEndForeignScan; + fdwRoutine->AnalyzeForeignTable = CStoreAnalyzeForeignTable; + fdwRoutine->PlanForeignModify = CStorePlanForeignModify; + fdwRoutine->BeginForeignModify = CStoreBeginForeignModify; + fdwRoutine->ExecForeignInsert = CStoreExecForeignInsert; + fdwRoutine->EndForeignModify = CStoreEndForeignModify; + +#if PG_VERSION_NUM >= 110000 + fdwRoutine->BeginForeignInsert = CStoreBeginForeignInsert; + fdwRoutine->EndForeignInsert = CStoreEndForeignInsert; +#endif + +#if PG_VERSION_NUM >= 90600 + fdwRoutine->IsForeignScanParallelSafe = CStoreIsForeignScanParallelSafe; +#endif + + PG_RETURN_POINTER(fdwRoutine); +} + + +/* + * cstore_fdw_validator validates options given to one of the following commands: + * foreign data wrapper, server, user mapping, or foreign table. This function + * errors out if the given option name or its value is considered invalid. + */ +Datum +cstore_fdw_validator(PG_FUNCTION_ARGS) +{ + Datum optionArray = PG_GETARG_DATUM(0); + Oid optionContextId = PG_GETARG_OID(1); + List *optionList = untransformRelOptions(optionArray); + ListCell *optionCell = NULL; + char *filename = NULL; + char *compressionTypeString = NULL; + char *stripeRowCountString = NULL; + char *blockRowCountString = NULL; + + foreach(optionCell, optionList) + { + DefElem *optionDef = (DefElem *) lfirst(optionCell); + char *optionName = optionDef->defname; + bool optionValid = false; + + int32 optionIndex = 0; + for (optionIndex = 0; optionIndex < ValidOptionCount; optionIndex++) + { + const CStoreValidOption *validOption = &(ValidOptionArray[optionIndex]); + + if ((optionContextId == validOption->optionContextId) && + (strncmp(optionName, validOption->optionName, NAMEDATALEN) == 0)) + { + optionValid = true; + break; + } + } + + /* if invalid option, display an informative error message */ + if (!optionValid) + { + StringInfo optionNamesString = OptionNamesString(optionContextId); + + ereport(ERROR, (errcode(ERRCODE_FDW_INVALID_OPTION_NAME), + errmsg("invalid option \"%s\"", optionName), + errhint("Valid options in this context are: %s", + optionNamesString->data))); + } + + if (strncmp(optionName, OPTION_NAME_FILENAME, NAMEDATALEN) == 0) + { + filename = defGetString(optionDef); + } + else if (strncmp(optionName, OPTION_NAME_COMPRESSION_TYPE, NAMEDATALEN) == 0) + { + compressionTypeString = defGetString(optionDef); + } + else if (strncmp(optionName, OPTION_NAME_STRIPE_ROW_COUNT, NAMEDATALEN) == 0) + { + stripeRowCountString = defGetString(optionDef); + } + else if (strncmp(optionName, OPTION_NAME_BLOCK_ROW_COUNT, NAMEDATALEN) == 0) + { + blockRowCountString = defGetString(optionDef); + } + } + + if (optionContextId == ForeignTableRelationId) + { + ValidateForeignTableOptions(filename, compressionTypeString, + stripeRowCountString, blockRowCountString); + } + + PG_RETURN_VOID(); +} + + +/* + * cstore_clean_table_resources cleans up table data and metadata with provided + * relation id. The function is meant to be called from drop_event_trigger. It + * has no way of knowing if the provided relation id belongs to a cstore table. + * Therefore it first checks if data file exists at default location before + * attempting to remove data and footer files. If the table is created at a + * custom path than its resources would not be removed. + */ +Datum +cstore_clean_table_resources(PG_FUNCTION_ARGS) +{ + Oid relationId = PG_GETARG_OID(0); + StringInfo filePath = makeStringInfo(); + struct stat fileStat; + int statResult = -1; + + appendStringInfo(filePath, "%s/%s/%d/%d", DataDir, CSTORE_FDW_NAME, + (int) MyDatabaseId, (int) relationId); + + /* + * Check to see if the file exist first. This is the only way to + * find out if the table being dropped is a cstore table. + */ + statResult = stat(filePath->data, &fileStat); + if (statResult == 0) + { + DeleteCStoreTableFiles(filePath->data); + } + + PG_RETURN_VOID(); +} + + +/* + * OptionNamesString finds all options that are valid for the current context, + * and concatenates these option names in a comma separated string. The function + * is unchanged from mongo_fdw. + */ +static StringInfo +OptionNamesString(Oid currentContextId) +{ + StringInfo optionNamesString = makeStringInfo(); + bool firstOptionAppended = false; + + int32 optionIndex = 0; + for (optionIndex = 0; optionIndex < ValidOptionCount; optionIndex++) + { + const CStoreValidOption *validOption = &(ValidOptionArray[optionIndex]); + + /* if option belongs to current context, append option name */ + if (currentContextId == validOption->optionContextId) + { + if (firstOptionAppended) + { + appendStringInfoString(optionNamesString, ", "); + } + + appendStringInfoString(optionNamesString, validOption->optionName); + firstOptionAppended = true; + } + } + + return optionNamesString; +} + + +/* + * GetSlotHeapTuple abstracts getting HeapTuple from TupleTableSlot between versions + */ +static HeapTuple +GetSlotHeapTuple(TupleTableSlot *tts) +{ +#if PG_VERSION_NUM >= 120000 + return tts->tts_ops->copy_heap_tuple(tts); +#else + return tts->tts_tuple; +#endif +} + + +/* + * CStoreGetOptions returns the option values to be used when reading and writing + * the cstore file. To resolve these values, the function checks options for the + * foreign table, and if not present, falls back to default values. This function + * errors out if given option values are considered invalid. + */ +static CStoreFdwOptions * +CStoreGetOptions(Oid foreignTableId) +{ + CStoreFdwOptions *cstoreFdwOptions = NULL; + char *filename = NULL; + CompressionType compressionType = DEFAULT_COMPRESSION_TYPE; + int32 stripeRowCount = DEFAULT_STRIPE_ROW_COUNT; + int32 blockRowCount = DEFAULT_BLOCK_ROW_COUNT; + char *compressionTypeString = NULL; + char *stripeRowCountString = NULL; + char *blockRowCountString = NULL; + + filename = CStoreGetOptionValue(foreignTableId, OPTION_NAME_FILENAME); + compressionTypeString = CStoreGetOptionValue(foreignTableId, + OPTION_NAME_COMPRESSION_TYPE); + stripeRowCountString = CStoreGetOptionValue(foreignTableId, + OPTION_NAME_STRIPE_ROW_COUNT); + blockRowCountString = CStoreGetOptionValue(foreignTableId, + OPTION_NAME_BLOCK_ROW_COUNT); + + ValidateForeignTableOptions(filename, compressionTypeString, + stripeRowCountString, blockRowCountString); + + /* parse provided options */ + if (compressionTypeString != NULL) + { + compressionType = ParseCompressionType(compressionTypeString); + } + if (stripeRowCountString != NULL) + { + stripeRowCount = pg_atoi(stripeRowCountString, sizeof(int32), 0); + } + if (blockRowCountString != NULL) + { + blockRowCount = pg_atoi(blockRowCountString, sizeof(int32), 0); + } + + /* set default filename if it is not provided */ + if (filename == NULL) + { + filename = CStoreDefaultFilePath(foreignTableId); + } + + cstoreFdwOptions = palloc0(sizeof(CStoreFdwOptions)); + cstoreFdwOptions->filename = filename; + cstoreFdwOptions->compressionType = compressionType; + cstoreFdwOptions->stripeRowCount = stripeRowCount; + cstoreFdwOptions->blockRowCount = blockRowCount; + + return cstoreFdwOptions; +} + + +/* + * CStoreGetOptionValue walks over foreign table and foreign server options, and + * looks for the option with the given name. If found, the function returns the + * option's value. This function is unchanged from mongo_fdw. + */ +static char * +CStoreGetOptionValue(Oid foreignTableId, const char *optionName) +{ + ForeignTable *foreignTable = NULL; + ForeignServer *foreignServer = NULL; + List *optionList = NIL; + ListCell *optionCell = NULL; + char *optionValue = NULL; + + foreignTable = GetForeignTable(foreignTableId); + foreignServer = GetForeignServer(foreignTable->serverid); + + optionList = list_concat(optionList, foreignTable->options); + optionList = list_concat(optionList, foreignServer->options); + + foreach(optionCell, optionList) + { + DefElem *optionDef = (DefElem *) lfirst(optionCell); + char *optionDefName = optionDef->defname; + + if (strncmp(optionDefName, optionName, NAMEDATALEN) == 0) + { + optionValue = defGetString(optionDef); + break; + } + } + + return optionValue; +} + + +/* + * ValidateForeignTableOptions verifies if given options are valid cstore_fdw + * foreign table options. This function errors out if given option value is + * considered invalid. + */ +static void +ValidateForeignTableOptions(char *filename, char *compressionTypeString, + char *stripeRowCountString, char *blockRowCountString) +{ + /* we currently do not have any checks for filename */ + (void) filename; + + /* check if the provided compression type is valid */ + if (compressionTypeString != NULL) + { + CompressionType compressionType = ParseCompressionType(compressionTypeString); + if (compressionType == COMPRESSION_TYPE_INVALID) + { + ereport(ERROR, (errmsg("invalid compression type"), + errhint("Valid options are: %s", + COMPRESSION_STRING_DELIMITED_LIST))); + } + } + + /* check if the provided stripe row count has correct format and range */ + if (stripeRowCountString != NULL) + { + /* pg_atoi() errors out if the given string is not a valid 32-bit integer */ + int32 stripeRowCount = pg_atoi(stripeRowCountString, sizeof(int32), 0); + if (stripeRowCount < STRIPE_ROW_COUNT_MINIMUM || + stripeRowCount > STRIPE_ROW_COUNT_MAXIMUM) + { + ereport(ERROR, (errmsg("invalid stripe row count"), + errhint("Stripe row count must be an integer between " + "%d and %d", STRIPE_ROW_COUNT_MINIMUM, + STRIPE_ROW_COUNT_MAXIMUM))); + } + } + + /* check if the provided block row count has correct format and range */ + if (blockRowCountString != NULL) + { + /* pg_atoi() errors out if the given string is not a valid 32-bit integer */ + int32 blockRowCount = pg_atoi(blockRowCountString, sizeof(int32), 0); + if (blockRowCount < BLOCK_ROW_COUNT_MINIMUM || + blockRowCount > BLOCK_ROW_COUNT_MAXIMUM) + { + ereport(ERROR, (errmsg("invalid block row count"), + errhint("Block row count must be an integer between " + "%d and %d", BLOCK_ROW_COUNT_MINIMUM, + BLOCK_ROW_COUNT_MAXIMUM))); + } + } +} + + +/* + * CStoreDefaultFilePath constructs the default file path to use for a cstore_fdw + * table. The path is of the form $PGDATA/cstore_fdw/{databaseOid}/{relfilenode}. + */ +static char * +CStoreDefaultFilePath(Oid foreignTableId) +{ + Relation relation = relation_open(foreignTableId, AccessShareLock); + RelFileNode relationFileNode = relation->rd_node; + Oid databaseOid = relationFileNode.dbNode; + Oid relationFileOid = relationFileNode.relNode; + + relation_close(relation, AccessShareLock); + + /* PG12 onward does not create relfilenode for foreign tables */ + if (databaseOid == InvalidOid) + { + databaseOid = MyDatabaseId; + relationFileOid = foreignTableId; + + } + + StringInfo cstoreFilePath = makeStringInfo(); + appendStringInfo(cstoreFilePath, "%s/%s/%u/%u", DataDir, CSTORE_FDW_NAME, + databaseOid, relationFileOid); + + return cstoreFilePath->data; +} + + +/* ParseCompressionType converts a string to a compression type. */ +static CompressionType +ParseCompressionType(const char *compressionTypeString) +{ + CompressionType compressionType = COMPRESSION_TYPE_INVALID; + Assert(compressionTypeString != NULL); + + if (strncmp(compressionTypeString, COMPRESSION_STRING_NONE, NAMEDATALEN) == 0) + { + compressionType = COMPRESSION_NONE; + } + else if (strncmp(compressionTypeString, COMPRESSION_STRING_PG_LZ, NAMEDATALEN) == 0) + { + compressionType = COMPRESSION_PG_LZ; + } + + return compressionType; +} + + +/* + * CStoreGetForeignRelSize obtains relation size estimates for a foreign table and + * puts its estimate for row count into baserel->rows. + */ +static void +CStoreGetForeignRelSize(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId) +{ + CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(foreignTableId); + double tupleCountEstimate = TupleCountEstimate(baserel, cstoreFdwOptions->filename); + double rowSelectivity = clauselist_selectivity(root, baserel->baserestrictinfo, + 0, JOIN_INNER, NULL); + + double outputRowCount = clamp_row_est(tupleCountEstimate * rowSelectivity); + baserel->rows = outputRowCount; +} + + +/* + * CStoreGetForeignPaths creates possible access paths for a scan on the foreign + * table. We currently have one possible access path. This path filters out row + * blocks that are refuted by where clauses, and only returns values for the + * projected columns. + */ +static void +CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId) +{ + Path *foreignScanPath = NULL; + CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(foreignTableId); + Relation relation = heap_open(foreignTableId, AccessShareLock); + + /* + * We skip reading columns that are not in query. Here we assume that all + * columns in relation have the same width, and estimate the number pages + * that will be read by query. + * + * Ideally, we should also take into account the row blocks that will be + * suppressed. But for that we need to know which columns are used for + * sorting. If we wrongly assume that we are sorted by a specific column + * and underestimate the page count, planner may choose nested loop join + * in a place it shouldn't be used. Choosing merge join or hash join is + * usually safer than nested loop join, so we take the more conservative + * approach and assume all rows in the columnar store file will be read. + * We intend to fix this in later version by improving the row sampling + * algorithm and using the correlation statistics to detect which columns + * are in stored in sorted order. + */ + List *queryColumnList = ColumnList(baserel, foreignTableId); + uint32 queryColumnCount = list_length(queryColumnList); + BlockNumber relationPageCount = PageCount(cstoreFdwOptions->filename); + uint32 relationColumnCount = RelationGetNumberOfAttributes(relation); + + double queryColumnRatio = (double) queryColumnCount / relationColumnCount; + double queryPageCount = relationPageCount * queryColumnRatio; + double totalDiskAccessCost = seq_page_cost * queryPageCount; + + double tupleCountEstimate = TupleCountEstimate(baserel, cstoreFdwOptions->filename); + + /* + * We estimate costs almost the same way as cost_seqscan(), thus assuming + * that I/O costs are equivalent to a regular table file of the same size. + */ + double filterCostPerTuple = baserel->baserestrictcost.per_tuple; + double cpuCostPerTuple = cpu_tuple_cost + filterCostPerTuple; + double totalCpuCost = cpuCostPerTuple * tupleCountEstimate; + + double startupCost = baserel->baserestrictcost.startup; + double totalCost = startupCost + totalCpuCost + totalDiskAccessCost; + + /* create a foreign path node and add it as the only possible path */ +#if PG_VERSION_NUM >= 90600 + foreignScanPath = (Path *) create_foreignscan_path(root, baserel, + NULL, /* path target */ + baserel->rows, + startupCost, totalCost, + NIL, /* no known ordering */ + NULL, /* not parameterized */ + NULL, /* no outer path */ + NIL); /* no fdw_private */ + +#elif PG_VERSION_NUM >= 90500 + foreignScanPath = (Path *) create_foreignscan_path(root, baserel, baserel->rows, + startupCost, totalCost, + NIL, /* no known ordering */ + NULL, /* not parameterized */ + NULL, /* no outer path */ + NIL); /* no fdw_private */ +#else + foreignScanPath = (Path *) create_foreignscan_path(root, baserel, baserel->rows, + startupCost, totalCost, + NIL, /* no known ordering */ + NULL, /* not parameterized */ + NIL); /* no fdw_private */ +#endif + + add_path(baserel, foreignScanPath); + heap_close(relation, AccessShareLock); +} + + +/* + * CStoreGetForeignPlan creates a ForeignScan plan node for scanning the foreign + * table. We also add the query column list to scan nodes private list, because + * we need it later for skipping over unused columns in the query. + */ +#if PG_VERSION_NUM >= 90500 +static ForeignScan * +CStoreGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId, + ForeignPath *bestPath, List *targetList, List *scanClauses, + Plan *outerPlan) +#else +static ForeignScan * +CStoreGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId, + ForeignPath *bestPath, List *targetList, List *scanClauses) +#endif +{ + ForeignScan *foreignScan = NULL; + List *columnList = NIL; + List *foreignPrivateList = NIL; + + /* + * Although we skip row blocks that are refuted by the WHERE clause, but + * we have no native ability to evaluate restriction clauses and make sure + * that all non-related rows are filtered out. So we just put all of the + * scanClauses into the plan node's qual list for the executor to check. + */ + scanClauses = extract_actual_clauses(scanClauses, + false); /* extract regular clauses */ + + /* + * As an optimization, we only read columns that are present in the query. + * To find these columns, we need baserel. We don't have access to baserel + * in executor's callback functions, so we get the column list here and put + * it into foreign scan node's private list. + */ + columnList = ColumnList(baserel, foreignTableId); + foreignPrivateList = list_make1(columnList); + + /* create the foreign scan node */ +#if PG_VERSION_NUM >= 90500 + foreignScan = make_foreignscan(targetList, scanClauses, baserel->relid, + NIL, /* no expressions to evaluate */ + foreignPrivateList, + NIL, + NIL, + NULL); /* no outer path */ +#else + foreignScan = make_foreignscan(targetList, scanClauses, baserel->relid, + NIL, /* no expressions to evaluate */ + foreignPrivateList); +#endif + + return foreignScan; +} + + +/* + * TupleCountEstimate estimates the number of base relation tuples in the given + * file. + */ +static double +TupleCountEstimate(RelOptInfo *baserel, const char *filename) +{ + double tupleCountEstimate = 0.0; + + /* check if the user executed Analyze on this foreign table before */ + if (baserel->pages > 0) + { + /* + * We have number of pages and number of tuples from pg_class (from a + * previous ANALYZE), so compute a tuples-per-page estimate and scale + * that by the current file size. + */ + double tupleDensity = baserel->tuples / (double) baserel->pages; + BlockNumber pageCount = PageCount(filename); + + tupleCountEstimate = clamp_row_est(tupleDensity * (double) pageCount); + } + else + { + tupleCountEstimate = (double) CStoreTableRowCount(filename); + } + + return tupleCountEstimate; +} + + +/* PageCount calculates and returns the number of pages in a file. */ +static BlockNumber +PageCount(const char *filename) +{ + BlockNumber pageCount = 0; + struct stat statBuffer; + + /* if file doesn't exist at plan time, use default estimate for its size */ + int statResult = stat(filename, &statBuffer); + if (statResult < 0) + { + statBuffer.st_size = 10 * BLCKSZ; + } + + pageCount = (statBuffer.st_size + (BLCKSZ - 1)) / BLCKSZ; + if (pageCount < 1) + { + pageCount = 1; + } + + return pageCount; +} + + +/* + * ColumnList takes in the planner's information about this foreign table. The + * function then finds all columns needed for query execution, including those + * used in projections, joins, and filter clauses, de-duplicates these columns, + * and returns them in a new list. This function is taken from mongo_fdw with + * slight modifications. + */ +static List * +ColumnList(RelOptInfo *baserel, Oid foreignTableId) +{ + List *columnList = NIL; + List *neededColumnList = NIL; + AttrNumber columnIndex = 1; + AttrNumber columnCount = baserel->max_attr; +#if PG_VERSION_NUM >= 90600 + List *targetColumnList = baserel->reltarget->exprs; +#else + List *targetColumnList = baserel->reltargetlist; +#endif + ListCell *targetColumnCell = NULL; + List *restrictInfoList = baserel->baserestrictinfo; + ListCell *restrictInfoCell = NULL; + const AttrNumber wholeRow = 0; + Relation relation = heap_open(foreignTableId, AccessShareLock); + TupleDesc tupleDescriptor = RelationGetDescr(relation); + + /* first add the columns used in joins and projections */ + foreach(targetColumnCell, targetColumnList) + { + List *targetVarList = NIL; + Node *targetExpr = (Node *) lfirst(targetColumnCell); + +#if PG_VERSION_NUM >= 90600 + targetVarList = pull_var_clause(targetExpr, + PVC_RECURSE_AGGREGATES | + PVC_RECURSE_PLACEHOLDERS); +#else + targetVarList = pull_var_clause(targetExpr, + PVC_RECURSE_AGGREGATES, + PVC_RECURSE_PLACEHOLDERS); +#endif + + neededColumnList = list_union(neededColumnList, targetVarList); + } + + /* then walk over all restriction clauses, and pull up any used columns */ + foreach(restrictInfoCell, restrictInfoList) + { + RestrictInfo *restrictInfo = (RestrictInfo *) lfirst(restrictInfoCell); + Node *restrictClause = (Node *) restrictInfo->clause; + List *clauseColumnList = NIL; + + /* recursively pull up any columns used in the restriction clause */ +#if PG_VERSION_NUM >= 90600 + clauseColumnList = pull_var_clause(restrictClause, + PVC_RECURSE_AGGREGATES | + PVC_RECURSE_PLACEHOLDERS); +#else + clauseColumnList = pull_var_clause(restrictClause, + PVC_RECURSE_AGGREGATES, + PVC_RECURSE_PLACEHOLDERS); +#endif + + neededColumnList = list_union(neededColumnList, clauseColumnList); + } + + /* walk over all column definitions, and de-duplicate column list */ + for (columnIndex = 1; columnIndex <= columnCount; columnIndex++) + { + ListCell *neededColumnCell = NULL; + Var *column = NULL; + Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex - 1); + + if (attributeForm->attisdropped) + { + continue; + } + + /* look for this column in the needed column list */ + foreach(neededColumnCell, neededColumnList) + { + Var *neededColumn = (Var *) lfirst(neededColumnCell); + if (neededColumn->varattno == columnIndex) + { + column = neededColumn; + break; + } + else if (neededColumn->varattno == wholeRow) + { + Index tableId = neededColumn->varno; + + column = makeVar(tableId, columnIndex, attributeForm->atttypid, + attributeForm->atttypmod, attributeForm->attcollation, + 0); + break; + } + } + + if (column != NULL) + { + columnList = lappend(columnList, column); + } + } + + heap_close(relation, AccessShareLock); + + return columnList; +} + + +/* CStoreExplainForeignScan produces extra output for the Explain command. */ +static void +CStoreExplainForeignScan(ForeignScanState *scanState, ExplainState *explainState) +{ + Oid foreignTableId = RelationGetRelid(scanState->ss.ss_currentRelation); + CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(foreignTableId); + + ExplainPropertyText("CStore File", cstoreFdwOptions->filename, explainState); + + /* supress file size if we're not showing cost details */ + if (explainState->costs) + { + struct stat statBuffer; + + int statResult = stat(cstoreFdwOptions->filename, &statBuffer); + if (statResult == 0) + { + ExplainPropertyLong("CStore File Size", (long) statBuffer.st_size, + explainState); + } + } +} + + +/* CStoreBeginForeignScan starts reading the underlying cstore file. */ +static void +CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags) +{ + TableReadState *readState = NULL; + Oid foreignTableId = InvalidOid; + CStoreFdwOptions *cstoreFdwOptions = NULL; + Relation currentRelation = scanState->ss.ss_currentRelation; + TupleDesc tupleDescriptor = RelationGetDescr(currentRelation); + List *columnList = NIL; + ForeignScan *foreignScan = NULL; + List *foreignPrivateList = NIL; + List *whereClauseList = NIL; + + /* if Explain with no Analyze, do nothing */ + if (executorFlags & EXEC_FLAG_EXPLAIN_ONLY) + { + return; + } + + foreignTableId = RelationGetRelid(scanState->ss.ss_currentRelation); + cstoreFdwOptions = CStoreGetOptions(foreignTableId); + + foreignScan = (ForeignScan *) scanState->ss.ps.plan; + foreignPrivateList = (List *) foreignScan->fdw_private; + whereClauseList = foreignScan->scan.plan.qual; + + columnList = (List *) linitial(foreignPrivateList); + readState = CStoreBeginRead(cstoreFdwOptions->filename, tupleDescriptor, + columnList, whereClauseList); + + scanState->fdw_state = (void *) readState; +} + + +/* + * CStoreIterateForeignScan reads the next record from the cstore file, converts + * it to a Postgres tuple, and stores the converted tuple into the ScanTupleSlot + * as a virtual tuple. + */ +static TupleTableSlot * +CStoreIterateForeignScan(ForeignScanState *scanState) +{ + TableReadState *readState = (TableReadState *) scanState->fdw_state; + TupleTableSlot *tupleSlot = scanState->ss.ss_ScanTupleSlot; + bool nextRowFound = false; + + TupleDesc tupleDescriptor = tupleSlot->tts_tupleDescriptor; + Datum *columnValues = tupleSlot->tts_values; + bool *columnNulls = tupleSlot->tts_isnull; + uint32 columnCount = tupleDescriptor->natts; + + /* initialize all values for this row to null */ + memset(columnValues, 0, columnCount * sizeof(Datum)); + memset(columnNulls, true, columnCount * sizeof(bool)); + + ExecClearTuple(tupleSlot); + + nextRowFound = CStoreReadNextRow(readState, columnValues, columnNulls); + if (nextRowFound) + { + ExecStoreVirtualTuple(tupleSlot); + } + + return tupleSlot; +} + + +/* CStoreEndForeignScan finishes scanning the foreign table. */ +static void +CStoreEndForeignScan(ForeignScanState *scanState) +{ + TableReadState *readState = (TableReadState *) scanState->fdw_state; + if (readState != NULL) + { + CStoreEndRead(readState); + } +} + + +/* CStoreReScanForeignScan rescans the foreign table. */ +static void +CStoreReScanForeignScan(ForeignScanState *scanState) +{ + CStoreEndForeignScan(scanState); + CStoreBeginForeignScan(scanState, 0); +} + + +/* + * CStoreAnalyzeForeignTable sets the total page count and the function pointer + * used to acquire a random sample of rows from the foreign file. + */ +static bool +CStoreAnalyzeForeignTable(Relation relation, + AcquireSampleRowsFunc *acquireSampleRowsFunc, + BlockNumber *totalPageCount) +{ + Oid foreignTableId = RelationGetRelid(relation); + CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(foreignTableId); + struct stat statBuffer; + + int statResult = stat(cstoreFdwOptions->filename, &statBuffer); + if (statResult < 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", + cstoreFdwOptions->filename))); + } + + (*totalPageCount) = PageCount(cstoreFdwOptions->filename); + (*acquireSampleRowsFunc) = CStoreAcquireSampleRows; + + return true; +} + + +/* + * CStoreAcquireSampleRows acquires a random sample of rows from the foreign + * table. Selected rows are returned in the caller allocated sampleRows array, + * which must have at least target row count entries. The actual number of rows + * selected is returned as the function result. We also count the number of rows + * in the collection and return it in total row count. We also always set dead + * row count to zero. + * + * Note that the returned list of rows does not always follow their actual order + * in the cstore file. Therefore, correlation estimates derived later could be + * inaccurate, but that's OK. We currently don't use correlation estimates (the + * planner only pays attention to correlation for index scans). + */ +static int +CStoreAcquireSampleRows(Relation relation, int logLevel, + HeapTuple *sampleRows, int targetRowCount, + double *totalRowCount, double *totalDeadRowCount) +{ + int sampleRowCount = 0; + double rowCount = 0.0; + double rowCountToSkip = -1; /* -1 means not set yet */ + double selectionState = 0; + MemoryContext oldContext = CurrentMemoryContext; + MemoryContext tupleContext = NULL; + Datum *columnValues = NULL; + bool *columnNulls = NULL; + TupleTableSlot *scanTupleSlot = NULL; + List *columnList = NIL; + List *foreignPrivateList = NULL; + ForeignScanState *scanState = NULL; + ForeignScan *foreignScan = NULL; + char *relationName = NULL; + int executorFlags = 0; + + TupleDesc tupleDescriptor = RelationGetDescr(relation); + uint32 columnCount = tupleDescriptor->natts; + + + /* create list of columns of the relation */ + uint32 columnIndex = 0; + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); + const Index tableId = 1; + + if (!attributeForm->attisdropped) + { + Var *column = makeVar(tableId, columnIndex + 1, attributeForm->atttypid, + attributeForm->atttypmod, attributeForm->attcollation, 0); + columnList = lappend(columnList, column); + } + } + + /* setup foreign scan plan node */ + foreignPrivateList = list_make1(columnList); + foreignScan = makeNode(ForeignScan); + foreignScan->fdw_private = foreignPrivateList; + + /* set up tuple slot */ + columnValues = palloc0(columnCount * sizeof(Datum)); + columnNulls = palloc0(columnCount * sizeof(bool)); +#if PG_VERSION_NUM >= 120000 + scanTupleSlot = MakeTupleTableSlot(NULL, &TTSOpsVirtual); +#elif PG_VERSION_NUM >= 110000 + scanTupleSlot = MakeTupleTableSlot(NULL); +#else + scanTupleSlot = MakeTupleTableSlot(); +#endif + scanTupleSlot->tts_tupleDescriptor = tupleDescriptor; + scanTupleSlot->tts_values = columnValues; + scanTupleSlot->tts_isnull = columnNulls; + + /* setup scan state */ + scanState = makeNode(ForeignScanState); + scanState->ss.ss_currentRelation = relation; + scanState->ss.ps.plan = (Plan *) foreignScan; + scanState->ss.ss_ScanTupleSlot = scanTupleSlot; + + /* + * Use per-tuple memory context to prevent leak of memory used to read and + * parse rows from the file. + */ + tupleContext = AllocSetContextCreate(CurrentMemoryContext, + "cstore_fdw temporary context", + ALLOCSET_DEFAULT_SIZES); + + CStoreBeginForeignScan(scanState, executorFlags); + + /* prepare for sampling rows */ + selectionState = anl_init_selection_state(targetRowCount); + + for (;;) + { + /* check for user-requested abort or sleep */ + vacuum_delay_point(); + + memset(columnValues, 0, columnCount * sizeof(Datum)); + memset(columnNulls, true, columnCount * sizeof(bool)); + + MemoryContextReset(tupleContext); + MemoryContextSwitchTo(tupleContext); + + /* read the next record */ + CStoreIterateForeignScan(scanState); + + MemoryContextSwitchTo(oldContext); + + /* if there are no more records to read, break */ + if (TTS_EMPTY(scanTupleSlot)) + { + break; + } + + /* + * The first targetRowCount sample rows are simply copied into the + * reservoir. Then we start replacing tuples in the sample until we + * reach the end of the relation. This algorithm is from Jeff Vitter's + * paper (see more info in commands/analyze.c). + */ + if (sampleRowCount < targetRowCount) + { + sampleRows[sampleRowCount] = heap_form_tuple(tupleDescriptor, columnValues, + columnNulls); + sampleRowCount++; + } + else + { + /* + * t in Vitter's paper is the number of records already processed. + * If we need to compute a new S value, we must use the "not yet + * incremented" value of rowCount as t. + */ + if (rowCountToSkip < 0) + { + rowCountToSkip = anl_get_next_S(rowCount, targetRowCount, + &selectionState); + } + + if (rowCountToSkip <= 0) + { + /* + * Found a suitable tuple, so save it, replacing one old tuple + * at random. + */ + int rowIndex = (int) (targetRowCount * anl_random_fract()); + Assert(rowIndex >= 0); + Assert(rowIndex < targetRowCount); + + heap_freetuple(sampleRows[rowIndex]); + sampleRows[rowIndex] = heap_form_tuple(tupleDescriptor, + columnValues, columnNulls); + } + + rowCountToSkip--; + } + + rowCount++; + } + + /* clean up */ + MemoryContextDelete(tupleContext); + pfree(columnValues); + pfree(columnNulls); + + CStoreEndForeignScan(scanState); + + /* emit some interesting relation info */ + relationName = RelationGetRelationName(relation); + ereport(logLevel, (errmsg("\"%s\": file contains %.0f rows; %d rows in sample", + relationName, rowCount, sampleRowCount))); + + (*totalRowCount) = rowCount; + (*totalDeadRowCount) = 0; + + return sampleRowCount; +} + + +/* + * CStorePlanForeignModify checks if operation is supported. Only insert + * command with subquery (ie insert into
select ...) is supported. + * Other forms of insert, delete, and update commands are not supported. It + * throws an error when the command is not supported. + */ +static List * +CStorePlanForeignModify(PlannerInfo *plannerInfo, ModifyTable *plan, + Index resultRelation, int subplanIndex) +{ + bool operationSupported = false; + + if (plan->operation == CMD_INSERT) + { + ListCell *tableCell = NULL; + Query *query = NULL; + + /* + * Only insert operation with select subquery is supported. Other forms + * of insert, update, and delete operations are not supported. + */ + query = plannerInfo->parse; + foreach(tableCell, query->rtable) + { + RangeTblEntry *tableEntry = lfirst(tableCell); + + if (tableEntry->rtekind == RTE_SUBQUERY && + tableEntry->subquery != NULL && + tableEntry->subquery->commandType == CMD_SELECT) + { + operationSupported = true; + break; + } + } + } + + if (!operationSupported) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("operation is not supported"))); + } + + return NIL; +} + + +/* + * CStoreBeginForeignModify prepares cstore table for a modification. + * Only insert is currently supported. + */ +static void +CStoreBeginForeignModify(ModifyTableState *modifyTableState, + ResultRelInfo *relationInfo, List *fdwPrivate, + int subplanIndex, int executorFlags) +{ + /* if Explain with no Analyze, do nothing */ + if (executorFlags & EXEC_FLAG_EXPLAIN_ONLY) + { + return; + } + + Assert (modifyTableState->operation == CMD_INSERT); + + CStoreBeginForeignInsert(modifyTableState, relationInfo); +} + + +/* + * CStoreBeginForeignInsert prepares a cstore table for an insert or rows + * coming from a COPY. + */ +static void +CStoreBeginForeignInsert(ModifyTableState *modifyTableState, ResultRelInfo *relationInfo) +{ + Oid foreignTableOid = InvalidOid; + CStoreFdwOptions *cstoreFdwOptions = NULL; + TupleDesc tupleDescriptor = NULL; + TableWriteState *writeState = NULL; + Relation relation = NULL; + + foreignTableOid = RelationGetRelid(relationInfo->ri_RelationDesc); + relation = heap_open(foreignTableOid, ShareUpdateExclusiveLock); + cstoreFdwOptions = CStoreGetOptions(foreignTableOid); + tupleDescriptor = RelationGetDescr(relationInfo->ri_RelationDesc); + + writeState = CStoreBeginWrite(cstoreFdwOptions->filename, + cstoreFdwOptions->compressionType, + cstoreFdwOptions->stripeRowCount, + cstoreFdwOptions->blockRowCount, + tupleDescriptor); + + writeState->relation = relation; + relationInfo->ri_FdwState = (void *) writeState; +} + + +/* + * CStoreExecForeignInsert inserts a single row to cstore table + * and returns inserted row's data values. + */ +static TupleTableSlot * +CStoreExecForeignInsert(EState *executorState, ResultRelInfo *relationInfo, + TupleTableSlot *tupleSlot, TupleTableSlot *planSlot) +{ + TableWriteState *writeState = (TableWriteState*) relationInfo->ri_FdwState; + HeapTuple heapTuple; + + Assert(writeState != NULL); + + heapTuple = GetSlotHeapTuple(tupleSlot); + + if (HeapTupleHasExternal(heapTuple)) + { + /* detoast any toasted attributes */ + HeapTuple newTuple = toast_flatten_tuple(heapTuple, + tupleSlot->tts_tupleDescriptor); + + ExecForceStoreHeapTuple(newTuple, tupleSlot, true); + } + + slot_getallattrs(tupleSlot); + + CStoreWriteRow(writeState, tupleSlot->tts_values, tupleSlot->tts_isnull); + + return tupleSlot; +} + + +/* + * CStoreEndForeignModify ends the current modification. Only insert is currently + * supported. + */ +static void +CStoreEndForeignModify(EState *executorState, ResultRelInfo *relationInfo) +{ + CStoreEndForeignInsert(executorState, relationInfo); +} + + +/* + * CStoreEndForeignInsert ends the current insert or COPY operation. + */ +static void +CStoreEndForeignInsert(EState *executorState, ResultRelInfo *relationInfo) +{ + TableWriteState *writeState = (TableWriteState*) relationInfo->ri_FdwState; + + /* writeState is NULL during Explain queries */ + if (writeState != NULL) + { + Relation relation = writeState->relation; + + CStoreEndWrite(writeState); + heap_close(relation, ShareUpdateExclusiveLock); + } +} + + +#if PG_VERSION_NUM >= 90600 +/* + * CStoreIsForeignScanParallelSafe always returns true to indicate that + * reading from a cstore_fdw table in a parallel worker is safe. This + * does not enable parallelism for queries on individual cstore_fdw + * tables, but does allow parallel scans of cstore_fdw partitions. + * + * cstore_fdw is parallel-safe because all writes are immediately committed + * to disk and then read from disk. There is no uncommitted state that needs + * to be shared across processes. + */ +static bool +CStoreIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte) +{ + return true; +} +#endif diff --git a/cstore_fdw.control b/cstore_fdw.control new file mode 100644 index 000000000..a95b8509f --- /dev/null +++ b/cstore_fdw.control @@ -0,0 +1,5 @@ +# cstore_fdw extension +comment = 'foreign-data wrapper for flat cstore access' +default_version = '1.7' +module_pathname = '$libdir/cstore_fdw' +relocatable = true diff --git a/cstore_fdw.h b/cstore_fdw.h new file mode 100644 index 000000000..2bc3e9c97 --- /dev/null +++ b/cstore_fdw.h @@ -0,0 +1,353 @@ +/*------------------------------------------------------------------------- + * + * cstore_fdw.h + * + * Type and function declarations for CStore foreign data wrapper. + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#ifndef CSTORE_FDW_H +#define CSTORE_FDW_H + +#include "access/tupdesc.h" +#include "fmgr.h" +#include "catalog/pg_am.h" +#include "catalog/pg_foreign_server.h" +#include "catalog/pg_foreign_table.h" +#include "lib/stringinfo.h" +#include "utils/rel.h" + + +/* Defines for valid option names */ +#define OPTION_NAME_FILENAME "filename" +#define OPTION_NAME_COMPRESSION_TYPE "compression" +#define OPTION_NAME_STRIPE_ROW_COUNT "stripe_row_count" +#define OPTION_NAME_BLOCK_ROW_COUNT "block_row_count" + +/* Default values for option parameters */ +#define DEFAULT_COMPRESSION_TYPE COMPRESSION_NONE +#define DEFAULT_STRIPE_ROW_COUNT 150000 +#define DEFAULT_BLOCK_ROW_COUNT 10000 + +/* Limits for option parameters */ +#define STRIPE_ROW_COUNT_MINIMUM 1000 +#define STRIPE_ROW_COUNT_MAXIMUM 10000000 +#define BLOCK_ROW_COUNT_MINIMUM 1000 +#define BLOCK_ROW_COUNT_MAXIMUM 100000 + +/* String representations of compression types */ +#define COMPRESSION_STRING_NONE "none" +#define COMPRESSION_STRING_PG_LZ "pglz" +#define COMPRESSION_STRING_DELIMITED_LIST "none, pglz" + +/* CStore file signature */ +#define CSTORE_MAGIC_NUMBER "citus_cstore" +#define CSTORE_VERSION_MAJOR 1 +#define CSTORE_VERSION_MINOR 7 + +/* miscellaneous defines */ +#define CSTORE_FDW_NAME "cstore_fdw" +#define CSTORE_FOOTER_FILE_SUFFIX ".footer" +#define CSTORE_TEMP_FILE_SUFFIX ".tmp" +#define CSTORE_TUPLE_COST_MULTIPLIER 10 +#define CSTORE_POSTSCRIPT_SIZE_LENGTH 1 +#define CSTORE_POSTSCRIPT_SIZE_MAX 256 + +/* table containing information about how to partition distributed tables */ +#define CITUS_EXTENSION_NAME "citus" +#define CITUS_PARTITION_TABLE_NAME "pg_dist_partition" + +/* human-readable names for addressing columns of the pg_dist_partition table */ +#define ATTR_NUM_PARTITION_RELATION_ID 1 +#define ATTR_NUM_PARTITION_TYPE 2 +#define ATTR_NUM_PARTITION_KEY 3 + + +/* + * CStoreValidOption keeps an option name and a context. When an option is passed + * into cstore_fdw objects (server and foreign table), we compare this option's + * name and context against those of valid options. + */ +typedef struct CStoreValidOption +{ + const char *optionName; + Oid optionContextId; + +} CStoreValidOption; + + +/* Array of options that are valid for cstore_fdw */ +static const uint32 ValidOptionCount = 4; +static const CStoreValidOption ValidOptionArray[] = +{ + /* foreign table options */ + { OPTION_NAME_FILENAME, ForeignTableRelationId }, + { OPTION_NAME_COMPRESSION_TYPE, ForeignTableRelationId }, + { OPTION_NAME_STRIPE_ROW_COUNT, ForeignTableRelationId }, + { OPTION_NAME_BLOCK_ROW_COUNT, ForeignTableRelationId } +}; + + +/* Enumaration for cstore file's compression method */ +typedef enum +{ + COMPRESSION_TYPE_INVALID = -1, + COMPRESSION_NONE = 0, + COMPRESSION_PG_LZ = 1, + + COMPRESSION_COUNT + +} CompressionType; + + +/* + * CStoreFdwOptions holds the option values to be used when reading or writing + * a cstore file. To resolve these values, we first check foreign table's options, + * and if not present, we then fall back to the default values specified above. + */ +typedef struct CStoreFdwOptions +{ + char *filename; + CompressionType compressionType; + uint64 stripeRowCount; + uint32 blockRowCount; + +} CStoreFdwOptions; + + +/* + * StripeMetadata represents information about a stripe. This information is + * stored in the cstore file's footer. + */ +typedef struct StripeMetadata +{ + uint64 fileOffset; + uint64 skipListLength; + uint64 dataLength; + uint64 footerLength; + +} StripeMetadata; + + +/* TableFooter represents the footer of a cstore file. */ +typedef struct TableFooter +{ + List *stripeMetadataList; + uint64 blockRowCount; + +} TableFooter; + + +/* ColumnBlockSkipNode contains statistics for a ColumnBlockData. */ +typedef struct ColumnBlockSkipNode +{ + /* statistics about values of a column block */ + bool hasMinMax; + Datum minimumValue; + Datum maximumValue; + uint64 rowCount; + + /* + * Offsets and sizes of value and exists streams in the column data. + * These enable us to skip reading suppressed row blocks, and start reading + * a block without reading previous blocks. + */ + uint64 valueBlockOffset; + uint64 valueLength; + uint64 existsBlockOffset; + uint64 existsLength; + + CompressionType valueCompressionType; + +} ColumnBlockSkipNode; + + +/* + * StripeSkipList can be used for skipping row blocks. It contains a column block + * skip node for each block of each column. blockSkipNodeArray[column][block] + * is the entry for the specified column block. + */ +typedef struct StripeSkipList +{ + ColumnBlockSkipNode **blockSkipNodeArray; + uint32 columnCount; + uint32 blockCount; + +} StripeSkipList; + + +/* + * ColumnBlockData represents a block of data in a column. valueArray stores + * the values of data, and existsArray stores whether a value is present. + * valueBuffer is used to store (uncompressed) serialized values + * referenced by Datum's in valueArray. It is only used for by-reference Datum's. + * There is a one-to-one correspondence between valueArray and existsArray. + */ +typedef struct ColumnBlockData +{ + bool *existsArray; + Datum *valueArray; + + /* valueBuffer keeps actual data for type-by-reference datums from valueArray. */ + StringInfo valueBuffer; + +} ColumnBlockData; + + +/* + * ColumnBlockBuffers represents a block of serialized data in a column. + * valueBuffer stores the serialized values of data, and existsBuffer stores + * serialized value of presence information. valueCompressionType contains + * compression type if valueBuffer is compressed. Finally rowCount has + * the number of rows in this block. + */ +typedef struct ColumnBlockBuffers +{ + StringInfo existsBuffer; + StringInfo valueBuffer; + CompressionType valueCompressionType; + +} ColumnBlockBuffers; + + +/* + * ColumnBuffers represents data buffers for a column in a row stripe. Each + * column is made of multiple column blocks. + */ +typedef struct ColumnBuffers +{ + ColumnBlockBuffers **blockBuffersArray; + +} ColumnBuffers; + + +/* StripeBuffers represents data for a row stripe in a cstore file. */ +typedef struct StripeBuffers +{ + uint32 columnCount; + uint32 rowCount; + ColumnBuffers **columnBuffersArray; + +} StripeBuffers; + + +/* + * StripeFooter represents a stripe's footer. In this footer, we keep three + * arrays of sizes. The number of elements in each of the arrays is equal + * to the number of columns. + */ +typedef struct StripeFooter +{ + uint32 columnCount; + uint64 *skipListSizeArray; + uint64 *existsSizeArray; + uint64 *valueSizeArray; + +} StripeFooter; + + +/* TableReadState represents state of a cstore file read operation. */ +typedef struct TableReadState +{ + FILE *tableFile; + TableFooter *tableFooter; + TupleDesc tupleDescriptor; + + /* + * List of Var pointers for columns in the query. We use this both for + * getting vector of projected columns, and also when we want to build + * base constraint to find selected row blocks. + */ + List *projectedColumnList; + + List *whereClauseList; + MemoryContext stripeReadContext; + StripeBuffers *stripeBuffers; + uint32 readStripeCount; + uint64 stripeReadRowCount; + ColumnBlockData **blockDataArray; + int32 deserializedBlockIndex; + +} TableReadState; + + +/* TableWriteState represents state of a cstore file write operation. */ +typedef struct TableWriteState +{ + FILE *tableFile; + TableFooter *tableFooter; + StringInfo tableFooterFilename; + CompressionType compressionType; + TupleDesc tupleDescriptor; + FmgrInfo **comparisonFunctionArray; + uint64 currentFileOffset; + Relation relation; + + MemoryContext stripeWriteContext; + StripeBuffers *stripeBuffers; + StripeSkipList *stripeSkipList; + uint32 stripeMaxRowCount; + ColumnBlockData **blockDataArray; + /* + * compressionBuffer buffer is used as temporary storage during + * data value compression operation. It is kept here to minimize + * memory allocations. It lives in stripeWriteContext and gets + * deallocated when memory context is reset. + */ + StringInfo compressionBuffer; + +} TableWriteState; + +/* Function declarations for extension loading and unloading */ +extern void _PG_init(void); +extern void _PG_fini(void); + +/* event trigger function declarations */ +extern Datum cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS); + +/* Function declarations for utility UDFs */ +extern Datum cstore_table_size(PG_FUNCTION_ARGS); +extern Datum cstore_clean_table_resources(PG_FUNCTION_ARGS); + +/* Function declarations for foreign data wrapper */ +extern Datum cstore_fdw_handler(PG_FUNCTION_ARGS); +extern Datum cstore_fdw_validator(PG_FUNCTION_ARGS); + +/* Function declarations for writing to a cstore file */ +extern TableWriteState * CStoreBeginWrite(const char *filename, + CompressionType compressionType, + uint64 stripeMaxRowCount, + uint32 blockRowCount, + TupleDesc tupleDescriptor); +extern void CStoreWriteRow(TableWriteState *state, Datum *columnValues, + bool *columnNulls); +extern void CStoreEndWrite(TableWriteState * state); + +/* Function declarations for reading from a cstore file */ +extern TableReadState * CStoreBeginRead(const char *filename, TupleDesc tupleDescriptor, + List *projectedColumnList, List *qualConditions); +extern TableFooter * CStoreReadFooter(StringInfo tableFooterFilename); +extern bool CStoreReadFinished(TableReadState *state); +extern bool CStoreReadNextRow(TableReadState *state, Datum *columnValues, + bool *columnNulls); +extern void CStoreEndRead(TableReadState *state); + +/* Function declarations for common functions */ +extern FmgrInfo * GetFunctionInfoOrNull(Oid typeId, Oid accessMethodId, + int16 procedureId); +extern ColumnBlockData ** CreateEmptyBlockDataArray(uint32 columnCount, bool *columnMask, + uint32 blockRowCount); +extern void FreeColumnBlockDataArray(ColumnBlockData **blockDataArray, + uint32 columnCount); +extern uint64 CStoreTableRowCount(const char *filename); +extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, + CompressionType compressionType); +extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressionType); + + +#endif /* CSTORE_FDW_H */ diff --git a/cstore_metadata_serialization.c b/cstore_metadata_serialization.c new file mode 100644 index 000000000..26402f897 --- /dev/null +++ b/cstore_metadata_serialization.c @@ -0,0 +1,581 @@ +/*------------------------------------------------------------------------- + * + * cstore_metadata_serialization.c + * + * This file contains function definitions for serializing/deserializing cstore + * metadata. + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + + +#include "postgres.h" +#include "cstore_fdw.h" +#include "cstore_metadata_serialization.h" +#include "cstore.pb-c.h" +#include "access/tupmacs.h" + + +/* local functions forward declarations */ +static ProtobufCBinaryData DatumToProtobufBinary(Datum datum, bool typeByValue, + int typeLength); +static Datum ProtobufBinaryToDatum(ProtobufCBinaryData protobufBinary, + bool typeByValue, int typeLength); + + +/* + * SerializePostScript serializes the given postscript and returns the result as + * a StringInfo. + */ +StringInfo +SerializePostScript(uint64 tableFooterLength) +{ + StringInfo postscriptBuffer = NULL; + Protobuf__PostScript protobufPostScript = PROTOBUF__POST_SCRIPT__INIT; + uint8 *postscriptData = NULL; + uint32 postscriptSize = 0; + + protobufPostScript.has_tablefooterlength = true; + protobufPostScript.tablefooterlength = tableFooterLength; + protobufPostScript.has_versionmajor = true; + protobufPostScript.versionmajor = CSTORE_VERSION_MAJOR; + protobufPostScript.has_versionminor = true; + protobufPostScript.versionminor = CSTORE_VERSION_MINOR; + protobufPostScript.magicnumber = pstrdup(CSTORE_MAGIC_NUMBER); + + postscriptSize = protobuf__post_script__get_packed_size(&protobufPostScript); + postscriptData = palloc0(postscriptSize); + protobuf__post_script__pack(&protobufPostScript, postscriptData); + + postscriptBuffer = palloc0(sizeof(StringInfoData)); + postscriptBuffer->len = postscriptSize; + postscriptBuffer->maxlen = postscriptSize; + postscriptBuffer->data = (char *) postscriptData; + + return postscriptBuffer; +} + + +/* + * SerializeTableFooter serializes the given table footer and returns the result + * as a StringInfo. + */ +StringInfo +SerializeTableFooter(TableFooter *tableFooter) +{ + StringInfo tableFooterBuffer = NULL; + Protobuf__TableFooter protobufTableFooter = PROTOBUF__TABLE_FOOTER__INIT; + Protobuf__StripeMetadata **stripeMetadataArray = NULL; + ListCell *stripeMetadataCell = NULL; + uint8 *tableFooterData = NULL; + uint32 tableFooterSize = 0; + uint32 stripeIndex = 0; + + List *stripeMetadataList = tableFooter->stripeMetadataList; + uint32 stripeCount = list_length(stripeMetadataList); + stripeMetadataArray = palloc0(stripeCount * sizeof(Protobuf__StripeMetadata *)); + + foreach(stripeMetadataCell, stripeMetadataList) + { + StripeMetadata *stripeMetadata = lfirst(stripeMetadataCell); + + Protobuf__StripeMetadata *protobufStripeMetadata = NULL; + protobufStripeMetadata = palloc0(sizeof(Protobuf__StripeMetadata)); + protobuf__stripe_metadata__init(protobufStripeMetadata); + protobufStripeMetadata->has_fileoffset = true; + protobufStripeMetadata->fileoffset = stripeMetadata->fileOffset; + protobufStripeMetadata->has_skiplistlength = true; + protobufStripeMetadata->skiplistlength = stripeMetadata->skipListLength; + protobufStripeMetadata->has_datalength = true; + protobufStripeMetadata->datalength = stripeMetadata->dataLength; + protobufStripeMetadata->has_footerlength = true; + protobufStripeMetadata->footerlength = stripeMetadata->footerLength; + + stripeMetadataArray[stripeIndex] = protobufStripeMetadata; + stripeIndex++; + } + + protobufTableFooter.n_stripemetadataarray = stripeCount; + protobufTableFooter.stripemetadataarray = stripeMetadataArray; + protobufTableFooter.has_blockrowcount = true; + protobufTableFooter.blockrowcount = tableFooter->blockRowCount; + + tableFooterSize = protobuf__table_footer__get_packed_size(&protobufTableFooter); + tableFooterData = palloc0(tableFooterSize); + protobuf__table_footer__pack(&protobufTableFooter, tableFooterData); + + tableFooterBuffer = palloc0(sizeof(StringInfoData)); + tableFooterBuffer->len = tableFooterSize; + tableFooterBuffer->maxlen = tableFooterSize; + tableFooterBuffer->data = (char *) tableFooterData; + + return tableFooterBuffer; +} + + +/* + * SerializeStripeFooter serializes given stripe footer and returns the result + * as a StringInfo. + */ +StringInfo +SerializeStripeFooter(StripeFooter *stripeFooter) +{ + StringInfo stripeFooterBuffer = NULL; + Protobuf__StripeFooter protobufStripeFooter = PROTOBUF__STRIPE_FOOTER__INIT; + uint8 *stripeFooterData = NULL; + uint32 stripeFooterSize = 0; + + protobufStripeFooter.n_skiplistsizearray = stripeFooter->columnCount; + protobufStripeFooter.skiplistsizearray = (uint64_t *) stripeFooter->skipListSizeArray; + protobufStripeFooter.n_existssizearray = stripeFooter->columnCount; + protobufStripeFooter.existssizearray = (uint64_t *) stripeFooter->existsSizeArray; + protobufStripeFooter.n_valuesizearray = stripeFooter->columnCount; + protobufStripeFooter.valuesizearray = (uint64_t *) stripeFooter->valueSizeArray; + + stripeFooterSize = protobuf__stripe_footer__get_packed_size(&protobufStripeFooter); + stripeFooterData = palloc0(stripeFooterSize); + protobuf__stripe_footer__pack(&protobufStripeFooter, stripeFooterData); + + stripeFooterBuffer = palloc0(sizeof(StringInfoData)); + stripeFooterBuffer->len = stripeFooterSize; + stripeFooterBuffer->maxlen = stripeFooterSize; + stripeFooterBuffer->data = (char *) stripeFooterData; + + return stripeFooterBuffer; +} + + +/* + * SerializeColumnSkipList serializes a column skip list, where the colum skip + * list includes all block skip nodes for that column. The function then returns + * the result as a string info. + */ +StringInfo +SerializeColumnSkipList(ColumnBlockSkipNode *blockSkipNodeArray, uint32 blockCount, + bool typeByValue, int typeLength) +{ + StringInfo blockSkipListBuffer = NULL; + Protobuf__ColumnBlockSkipList protobufBlockSkipList = + PROTOBUF__COLUMN_BLOCK_SKIP_LIST__INIT; + Protobuf__ColumnBlockSkipNode **protobufBlockSkipNodeArray = NULL; + uint32 blockIndex = 0; + uint8 *blockSkipListData = NULL; + uint32 blockSkipListSize = 0; + + protobufBlockSkipNodeArray = palloc0(blockCount * + sizeof(Protobuf__ColumnBlockSkipNode *)); + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + ColumnBlockSkipNode blockSkipNode = blockSkipNodeArray[blockIndex]; + Protobuf__ColumnBlockSkipNode *protobufBlockSkipNode = NULL; + ProtobufCBinaryData binaryMinimumValue = {0, 0}; + ProtobufCBinaryData binaryMaximumValue = {0, 0}; + + if (blockSkipNode.hasMinMax) + { + binaryMinimumValue = DatumToProtobufBinary(blockSkipNode.minimumValue, + typeByValue, typeLength); + binaryMaximumValue = DatumToProtobufBinary(blockSkipNode.maximumValue, + typeByValue, typeLength); + } + + protobufBlockSkipNode = palloc0(sizeof(Protobuf__ColumnBlockSkipNode)); + protobuf__column_block_skip_node__init(protobufBlockSkipNode); + protobufBlockSkipNode->has_rowcount = true; + protobufBlockSkipNode->rowcount = blockSkipNode.rowCount; + protobufBlockSkipNode->has_minimumvalue = blockSkipNode.hasMinMax; + protobufBlockSkipNode->minimumvalue = binaryMinimumValue; + protobufBlockSkipNode->has_maximumvalue = blockSkipNode.hasMinMax; + protobufBlockSkipNode->maximumvalue = binaryMaximumValue; + protobufBlockSkipNode->has_valueblockoffset = true; + protobufBlockSkipNode->valueblockoffset = blockSkipNode.valueBlockOffset; + protobufBlockSkipNode->has_valuelength = true; + protobufBlockSkipNode->valuelength = blockSkipNode.valueLength; + protobufBlockSkipNode->has_existsblockoffset = true; + protobufBlockSkipNode->existsblockoffset = blockSkipNode.existsBlockOffset; + protobufBlockSkipNode->has_existslength = true; + protobufBlockSkipNode->existslength = blockSkipNode.existsLength; + protobufBlockSkipNode->has_valuecompressiontype = true; + protobufBlockSkipNode->valuecompressiontype = + (Protobuf__CompressionType) blockSkipNode.valueCompressionType; + + protobufBlockSkipNodeArray[blockIndex] = protobufBlockSkipNode; + } + + protobufBlockSkipList.n_blockskipnodearray = blockCount; + protobufBlockSkipList.blockskipnodearray = protobufBlockSkipNodeArray; + + blockSkipListSize = + protobuf__column_block_skip_list__get_packed_size(&protobufBlockSkipList); + blockSkipListData = palloc0(blockSkipListSize); + protobuf__column_block_skip_list__pack(&protobufBlockSkipList, blockSkipListData); + + blockSkipListBuffer = palloc0(sizeof(StringInfoData)); + blockSkipListBuffer->len = blockSkipListSize; + blockSkipListBuffer->maxlen = blockSkipListSize; + blockSkipListBuffer->data = (char *) blockSkipListData; + + return blockSkipListBuffer; +} + + +/* + * DeserializePostScript deserializes the given postscript buffer and returns + * the size of table footer in tableFooterLength pointer. + */ +void +DeserializePostScript(StringInfo buffer, uint64 *tableFooterLength) +{ + Protobuf__PostScript *protobufPostScript = NULL; + protobufPostScript = protobuf__post_script__unpack(NULL, buffer->len, + (uint8 *) buffer->data); + if (protobufPostScript == NULL) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("invalid postscript buffer"))); + } + + if (protobufPostScript->versionmajor != CSTORE_VERSION_MAJOR || + protobufPostScript->versionminor > CSTORE_VERSION_MINOR) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("invalid column store version number"))); + } + else if (strncmp(protobufPostScript->magicnumber, CSTORE_MAGIC_NUMBER, + NAMEDATALEN) != 0) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("invalid magic number"))); + } + + (*tableFooterLength) = protobufPostScript->tablefooterlength; + + protobuf__post_script__free_unpacked(protobufPostScript, NULL); +} + + +/* + * DeserializeTableFooter deserializes the given buffer and returns the result as + * a TableFooter struct. + */ +TableFooter * +DeserializeTableFooter(StringInfo buffer) +{ + TableFooter *tableFooter = NULL; + Protobuf__TableFooter *protobufTableFooter = NULL; + List *stripeMetadataList = NIL; + uint64 blockRowCount = 0; + uint32 stripeCount = 0; + uint32 stripeIndex = 0; + + protobufTableFooter = protobuf__table_footer__unpack(NULL, buffer->len, + (uint8 *) buffer->data); + if (protobufTableFooter == NULL) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("invalid table footer buffer"))); + } + + if (!protobufTableFooter->has_blockrowcount) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("missing required table footer metadata fields"))); + } + else if (protobufTableFooter->blockrowcount < BLOCK_ROW_COUNT_MINIMUM || + protobufTableFooter->blockrowcount > BLOCK_ROW_COUNT_MAXIMUM) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("invalid block row count"))); + } + blockRowCount = protobufTableFooter->blockrowcount; + + stripeCount = protobufTableFooter->n_stripemetadataarray; + for (stripeIndex = 0; stripeIndex < stripeCount; stripeIndex++) + { + StripeMetadata *stripeMetadata = NULL; + Protobuf__StripeMetadata *protobufStripeMetadata = NULL; + + protobufStripeMetadata = protobufTableFooter->stripemetadataarray[stripeIndex]; + if (!protobufStripeMetadata->has_fileoffset || + !protobufStripeMetadata->has_skiplistlength || + !protobufStripeMetadata->has_datalength || + !protobufStripeMetadata->has_footerlength) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("missing required stripe metadata fields"))); + } + + stripeMetadata = palloc0(sizeof(StripeMetadata)); + stripeMetadata->fileOffset = protobufStripeMetadata->fileoffset; + stripeMetadata->skipListLength = protobufStripeMetadata->skiplistlength; + stripeMetadata->dataLength = protobufStripeMetadata->datalength; + stripeMetadata->footerLength = protobufStripeMetadata->footerlength; + + stripeMetadataList = lappend(stripeMetadataList, stripeMetadata); + } + + protobuf__table_footer__free_unpacked(protobufTableFooter, NULL); + + tableFooter = palloc0(sizeof(TableFooter)); + tableFooter->stripeMetadataList = stripeMetadataList; + tableFooter->blockRowCount = blockRowCount; + + return tableFooter; +} + + +/* + * DeserializeStripeFooter deserializes the given buffer and returns the result + * as a StripeFooter struct. + */ +StripeFooter * +DeserializeStripeFooter(StringInfo buffer) +{ + StripeFooter *stripeFooter = NULL; + Protobuf__StripeFooter *protobufStripeFooter = NULL; + uint64 *skipListSizeArray = NULL; + uint64 *existsSizeArray = NULL; + uint64 *valueSizeArray = NULL; + uint64 sizeArrayLength = 0; + uint32 columnCount = 0; + + protobufStripeFooter = protobuf__stripe_footer__unpack(NULL, buffer->len, + (uint8 *) buffer->data); + if (protobufStripeFooter == NULL) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("invalid stripe footer buffer"))); + } + + columnCount = protobufStripeFooter->n_skiplistsizearray; + if (protobufStripeFooter->n_existssizearray != columnCount || + protobufStripeFooter->n_valuesizearray != columnCount) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("stripe size array lengths don't match"))); + } + + sizeArrayLength = columnCount * sizeof(uint64); + + skipListSizeArray = palloc0(sizeArrayLength); + existsSizeArray = palloc0(sizeArrayLength); + valueSizeArray = palloc0(sizeArrayLength); + + memcpy(skipListSizeArray, protobufStripeFooter->skiplistsizearray, sizeArrayLength); + memcpy(existsSizeArray, protobufStripeFooter->existssizearray, sizeArrayLength); + memcpy(valueSizeArray, protobufStripeFooter->valuesizearray, sizeArrayLength); + + protobuf__stripe_footer__free_unpacked(protobufStripeFooter, NULL); + + stripeFooter = palloc0(sizeof(StripeFooter)); + stripeFooter->skipListSizeArray = skipListSizeArray; + stripeFooter->existsSizeArray = existsSizeArray; + stripeFooter->valueSizeArray = valueSizeArray; + stripeFooter->columnCount = columnCount; + + return stripeFooter; +} + + +/* + * DeserializeBlockCount deserializes the given column skip list buffer and + * returns the number of blocks in column skip list. + */ +uint32 +DeserializeBlockCount(StringInfo buffer) +{ + uint32 blockCount = 0; + Protobuf__ColumnBlockSkipList *protobufBlockSkipList = NULL; + + protobufBlockSkipList = + protobuf__column_block_skip_list__unpack(NULL, buffer->len, + (uint8 *) buffer->data); + if (protobufBlockSkipList == NULL) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("invalid skip list buffer"))); + } + + blockCount = protobufBlockSkipList->n_blockskipnodearray; + + protobuf__column_block_skip_list__free_unpacked(protobufBlockSkipList, NULL); + + return blockCount; +} + + +/* + * DeserializeRowCount deserializes the given column skip list buffer and + * returns the total number of rows in block skip list. + */ +uint32 +DeserializeRowCount(StringInfo buffer) +{ + uint32 rowCount = 0; + Protobuf__ColumnBlockSkipList *protobufBlockSkipList = NULL; + uint32 blockIndex = 0; + uint32 blockCount = 0; + + protobufBlockSkipList = + protobuf__column_block_skip_list__unpack(NULL, buffer->len, + (uint8 *) buffer->data); + if (protobufBlockSkipList == NULL) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("invalid skip list buffer"))); + } + + blockCount = (uint32) protobufBlockSkipList->n_blockskipnodearray; + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + Protobuf__ColumnBlockSkipNode *protobufBlockSkipNode = + protobufBlockSkipList->blockskipnodearray[blockIndex]; + rowCount += protobufBlockSkipNode->rowcount; + } + + protobuf__column_block_skip_list__free_unpacked(protobufBlockSkipList, NULL); + + return rowCount; +} + + +/* + * DeserializeColumnSkipList deserializes the given buffer and returns the result as + * a ColumnBlockSkipNode array. If the number of unpacked block skip nodes are not + * equal to the given block count function errors out. + */ +ColumnBlockSkipNode * +DeserializeColumnSkipList(StringInfo buffer, bool typeByValue, int typeLength, + uint32 blockCount) +{ + ColumnBlockSkipNode *blockSkipNodeArray = NULL; + uint32 blockIndex = 0; + Protobuf__ColumnBlockSkipList *protobufBlockSkipList = NULL; + + protobufBlockSkipList = + protobuf__column_block_skip_list__unpack(NULL, buffer->len, + (uint8 *) buffer->data); + if (protobufBlockSkipList == NULL) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("invalid skip list buffer"))); + } + + if (protobufBlockSkipList->n_blockskipnodearray != blockCount) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("block skip node count and block count don't match"))); + } + + blockSkipNodeArray = palloc0(blockCount * sizeof(ColumnBlockSkipNode)); + + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + Protobuf__ColumnBlockSkipNode *protobufBlockSkipNode = NULL; + ColumnBlockSkipNode *blockSkipNode = NULL; + bool hasMinMax = false; + Datum minimumValue = 0; + Datum maximumValue = 0; + + protobufBlockSkipNode = protobufBlockSkipList->blockskipnodearray[blockIndex]; + if (!protobufBlockSkipNode->has_rowcount || + !protobufBlockSkipNode->has_existsblockoffset || + !protobufBlockSkipNode->has_valueblockoffset || + !protobufBlockSkipNode->has_existslength || + !protobufBlockSkipNode->has_valuelength || + !protobufBlockSkipNode->has_valuecompressiontype) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("missing required block skip node metadata"))); + } + + if (protobufBlockSkipNode->has_minimumvalue != + protobufBlockSkipNode->has_maximumvalue) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("has minimum and has maximum fields " + "don't match"))); + } + + hasMinMax = protobufBlockSkipNode->has_minimumvalue; + if (hasMinMax) + { + minimumValue = ProtobufBinaryToDatum(protobufBlockSkipNode->minimumvalue, + typeByValue, typeLength); + maximumValue = ProtobufBinaryToDatum(protobufBlockSkipNode->maximumvalue, + typeByValue, typeLength); + } + + blockSkipNode = &blockSkipNodeArray[blockIndex]; + blockSkipNode->rowCount = protobufBlockSkipNode->rowcount; + blockSkipNode->hasMinMax = hasMinMax; + blockSkipNode->minimumValue = minimumValue; + blockSkipNode->maximumValue = maximumValue; + blockSkipNode->existsBlockOffset = protobufBlockSkipNode->existsblockoffset; + blockSkipNode->valueBlockOffset = protobufBlockSkipNode->valueblockoffset; + blockSkipNode->existsLength = protobufBlockSkipNode->existslength; + blockSkipNode->valueLength = protobufBlockSkipNode->valuelength; + blockSkipNode->valueCompressionType = + (CompressionType) protobufBlockSkipNode->valuecompressiontype; + } + + protobuf__column_block_skip_list__free_unpacked(protobufBlockSkipList, NULL); + + return blockSkipNodeArray; +} + + +/* Converts a datum to a ProtobufCBinaryData. */ +static ProtobufCBinaryData +DatumToProtobufBinary(Datum datum, bool datumTypeByValue, int datumTypeLength) +{ + ProtobufCBinaryData protobufBinary = {0, 0}; + + int datumLength = att_addlength_datum(0, datumTypeLength, datum); + char *datumBuffer = palloc0(datumLength); + + if (datumTypeLength > 0) + { + if (datumTypeByValue) + { + store_att_byval(datumBuffer, datum, datumTypeLength); + } + else + { + memcpy(datumBuffer, DatumGetPointer(datum), datumTypeLength); + } + } + else + { + memcpy(datumBuffer, DatumGetPointer(datum), datumLength); + } + + protobufBinary.data = (uint8 *) datumBuffer; + protobufBinary.len = datumLength; + + return protobufBinary; +} + + +/* Converts the given ProtobufCBinaryData to a Datum. */ +static Datum +ProtobufBinaryToDatum(ProtobufCBinaryData protobufBinary, bool datumTypeByValue, + int datumTypeLength) +{ + Datum datum = 0; + + /* + * We copy the protobuf data so the result of this function lives even + * after the unpacked protobuf struct is freed. + */ + char *binaryDataCopy = palloc0(protobufBinary.len); + memcpy(binaryDataCopy, protobufBinary.data, protobufBinary.len); + + datum = fetch_att(binaryDataCopy, datumTypeByValue, datumTypeLength); + + return datum; +} diff --git a/cstore_metadata_serialization.h b/cstore_metadata_serialization.h new file mode 100644 index 000000000..421f8ddff --- /dev/null +++ b/cstore_metadata_serialization.h @@ -0,0 +1,42 @@ +/*------------------------------------------------------------------------- + * + * cstore_metadata_serialization.h + * + * Type and function declarations to serialize/deserialize cstore metadata. + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#ifndef CSTORE_SERIALIZATION_H +#define CSTORE_SERIALIZATION_H + +#include "catalog/pg_attribute.h" +#include "nodes/pg_list.h" +#include "lib/stringinfo.h" +#include "cstore_fdw.h" + + +/* Function declarations for metadata serialization */ +extern StringInfo SerializePostScript(uint64 tableFooterLength); +extern StringInfo SerializeTableFooter(TableFooter *tableFooter); +extern StringInfo SerializeStripeFooter(StripeFooter *stripeFooter); +extern StringInfo SerializeColumnSkipList(ColumnBlockSkipNode *blockSkipNodeArray, + uint32 blockCount, bool typeByValue, + int typeLength); + +/* Function declarations for metadata deserialization */ +extern void DeserializePostScript(StringInfo buffer, uint64 *tableFooterLength); +extern TableFooter * DeserializeTableFooter(StringInfo buffer); +extern uint32 DeserializeBlockCount(StringInfo buffer); +extern uint32 DeserializeRowCount(StringInfo buffer); +extern StripeFooter * DeserializeStripeFooter(StringInfo buffer); +extern ColumnBlockSkipNode * DeserializeColumnSkipList(StringInfo buffer, + bool typeByValue, int typeLength, + uint32 blockCount); + + +#endif /* CSTORE_SERIALIZATION_H */ diff --git a/cstore_reader.c b/cstore_reader.c new file mode 100644 index 000000000..7e9c6bcfd --- /dev/null +++ b/cstore_reader.c @@ -0,0 +1,1383 @@ +/*------------------------------------------------------------------------- + * + * cstore_reader.c + * + * This file contains function definitions for reading cstore files. This + * includes the logic for reading file level metadata, reading row stripes, + * and skipping unrelated row blocks and columns. + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + + +#include "postgres.h" +#include "cstore_fdw.h" +#include "cstore_metadata_serialization.h" +#include "cstore_version_compat.h" + +#include "access/nbtree.h" +#include "access/skey.h" +#include "commands/defrem.h" +#include "nodes/makefuncs.h" +#if PG_VERSION_NUM >= 120000 +#include "nodes/pathnodes.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/optimizer.h" +#else +#include "optimizer/clauses.h" +#include "optimizer/predtest.h" +#include "optimizer/var.h" +#endif +#include "optimizer/restrictinfo.h" +#include "port.h" +#include "storage/fd.h" +#include "utils/memutils.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" + + +/* static function declarations */ +static StripeBuffers * LoadFilteredStripeBuffers(FILE *tableFile, + StripeMetadata *stripeMetadata, + TupleDesc tupleDescriptor, + List *projectedColumnList, + List *whereClauseList); +static void ReadStripeNextRow(StripeBuffers *stripeBuffers, List *projectedColumnList, + uint64 blockIndex, uint64 blockRowIndex, + ColumnBlockData **blockDataArray, + Datum *columnValues, bool *columnNulls); +static ColumnBuffers * LoadColumnBuffers(FILE *tableFile, + ColumnBlockSkipNode *blockSkipNodeArray, + uint32 blockCount, uint64 existsFileOffset, + uint64 valueFileOffset, + Form_pg_attribute attributeForm); +static StripeFooter * LoadStripeFooter(FILE *tableFile, StripeMetadata *stripeMetadata, + uint32 columnCount); +static StripeSkipList * LoadStripeSkipList(FILE *tableFile, + StripeMetadata *stripeMetadata, + StripeFooter *stripeFooter, + uint32 columnCount, + bool *projectedColumnMask, + TupleDesc tupleDescriptor); +static bool * SelectedBlockMask(StripeSkipList *stripeSkipList, + List *projectedColumnList, List *whereClauseList); +static List * BuildRestrictInfoList(List *whereClauseList); +static Node * BuildBaseConstraint(Var *variable); +static OpExpr * MakeOpExpression(Var *variable, int16 strategyNumber); +static Oid GetOperatorByType(Oid typeId, Oid accessMethodId, int16 strategyNumber); +static void UpdateConstraint(Node *baseConstraint, Datum minValue, Datum maxValue); +static StripeSkipList * SelectedBlockSkipList(StripeSkipList *stripeSkipList, + bool *projectedColumnMask, + bool *selectedBlockMask); +static uint32 StripeSkipListRowCount(StripeSkipList *stripeSkipList); +static bool * ProjectedColumnMask(uint32 columnCount, List *projectedColumnList); +static void DeserializeBoolArray(StringInfo boolArrayBuffer, bool *boolArray, + uint32 boolArrayLength); +static void DeserializeDatumArray(StringInfo datumBuffer, bool *existsArray, + uint32 datumCount, bool datumTypeByValue, + int datumTypeLength, char datumTypeAlign, + Datum *datumArray); +static void DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, + uint32 rowCount, ColumnBlockData **blockDataArray, + TupleDesc tupleDescriptor); +static Datum ColumnDefaultValue(TupleConstr *tupleConstraints, + Form_pg_attribute attributeForm); +static int64 FILESize(FILE *file); +static StringInfo ReadFromFile(FILE *file, uint64 offset, uint32 size); +static void ResetUncompressedBlockData(ColumnBlockData **blockDataArray, + uint32 columnCount); +static uint64 StripeRowCount(FILE *tableFile, StripeMetadata *stripeMetadata); + + +/* + * CStoreBeginRead initializes a cstore read operation. This function returns a + * read handle that's used during reading rows and finishing the read operation. + */ +TableReadState * +CStoreBeginRead(const char *filename, TupleDesc tupleDescriptor, + List *projectedColumnList, List *whereClauseList) +{ + TableReadState *readState = NULL; + TableFooter *tableFooter = NULL; + FILE *tableFile = NULL; + MemoryContext stripeReadContext = NULL; + uint32 columnCount = 0; + bool *projectedColumnMask = NULL; + ColumnBlockData **blockDataArray = NULL; + + StringInfo tableFooterFilename = makeStringInfo(); + appendStringInfo(tableFooterFilename, "%s%s", filename, CSTORE_FOOTER_FILE_SUFFIX); + + tableFooter = CStoreReadFooter(tableFooterFilename); + + pfree(tableFooterFilename->data); + pfree(tableFooterFilename); + + tableFile = AllocateFile(filename, PG_BINARY_R); + if (tableFile == NULL) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not open file \"%s\" for reading: %m", + filename))); + } + + /* + * We allocate all stripe specific data in the stripeReadContext, and reset + * this memory context before loading a new stripe. This is to avoid memory + * leaks. + */ + stripeReadContext = AllocSetContextCreate(CurrentMemoryContext, + "Stripe Read Memory Context", + ALLOCSET_DEFAULT_SIZES); + + columnCount = tupleDescriptor->natts; + projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList); + blockDataArray = CreateEmptyBlockDataArray(columnCount, projectedColumnMask, + tableFooter->blockRowCount); + + readState = palloc0(sizeof(TableReadState)); + readState->tableFile = tableFile; + readState->tableFooter = tableFooter; + readState->projectedColumnList = projectedColumnList; + readState->whereClauseList = whereClauseList; + readState->stripeBuffers = NULL; + readState->readStripeCount = 0; + readState->stripeReadRowCount = 0; + readState->tupleDescriptor = tupleDescriptor; + readState->stripeReadContext = stripeReadContext; + readState->blockDataArray = blockDataArray; + readState->deserializedBlockIndex = -1; + + return readState; +} + + +/* + * CStoreReadFooter reads the cstore file footer from the given file. First, the + * function reads the last byte of the file as the postscript size. Then, the + * function reads the postscript. Last, the function reads and deserializes the + * footer. + */ +TableFooter * +CStoreReadFooter(StringInfo tableFooterFilename) +{ + TableFooter *tableFooter = NULL; + FILE *tableFooterFile = NULL; + uint64 footerOffset = 0; + uint64 footerLength = 0; + StringInfo postscriptBuffer = NULL; + StringInfo postscriptSizeBuffer = NULL; + uint64 postscriptSizeOffset = 0; + uint8 postscriptSize = 0; + uint64 footerFileSize = 0; + uint64 postscriptOffset = 0; + StringInfo footerBuffer = NULL; + int freeResult = 0; + + tableFooterFile = AllocateFile(tableFooterFilename->data, PG_BINARY_R); + if (tableFooterFile == NULL) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not open file \"%s\" for reading: %m", + tableFooterFilename->data), + errhint("Try copying in data to the table."))); + } + + footerFileSize = FILESize(tableFooterFile); + if (footerFileSize < CSTORE_POSTSCRIPT_SIZE_LENGTH) + { + ereport(ERROR, (errmsg("invalid cstore file"))); + } + + postscriptSizeOffset = footerFileSize - CSTORE_POSTSCRIPT_SIZE_LENGTH; + postscriptSizeBuffer = ReadFromFile(tableFooterFile, postscriptSizeOffset, + CSTORE_POSTSCRIPT_SIZE_LENGTH); + memcpy(&postscriptSize, postscriptSizeBuffer->data, CSTORE_POSTSCRIPT_SIZE_LENGTH); + if (postscriptSize + CSTORE_POSTSCRIPT_SIZE_LENGTH > footerFileSize) + { + ereport(ERROR, (errmsg("invalid postscript size"))); + } + + postscriptOffset = footerFileSize - (CSTORE_POSTSCRIPT_SIZE_LENGTH + postscriptSize); + postscriptBuffer = ReadFromFile(tableFooterFile, postscriptOffset, postscriptSize); + + DeserializePostScript(postscriptBuffer, &footerLength); + if (footerLength + postscriptSize + CSTORE_POSTSCRIPT_SIZE_LENGTH > footerFileSize) + { + ereport(ERROR, (errmsg("invalid footer size"))); + } + + footerOffset = postscriptOffset - footerLength; + footerBuffer = ReadFromFile(tableFooterFile, footerOffset, footerLength); + tableFooter = DeserializeTableFooter(footerBuffer); + + freeResult = FreeFile(tableFooterFile); + if (freeResult != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not close file: %m"))); + } + + return tableFooter; +} + + +/* + * CStoreReadNextRow tries to read a row from the cstore file. On success, it sets + * column values and nulls, and returns true. If there are no more rows to read, + * the function returns false. + */ +bool +CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNulls) +{ + uint32 blockIndex = 0; + uint32 blockRowIndex = 0; + TableFooter *tableFooter = readState->tableFooter; + MemoryContext oldContext = NULL; + + /* + * If no stripes are loaded, load the next non-empty stripe. Note that when + * loading stripes, we skip over blocks whose contents can be filtered with + * the query's restriction qualifiers. So, even when a stripe is physically + * not empty, we may end up loading it as an empty stripe. + */ + while (readState->stripeBuffers == NULL) + { + StripeBuffers *stripeBuffers = NULL; + StripeMetadata *stripeMetadata = NULL; + List *stripeMetadataList = tableFooter->stripeMetadataList; + uint32 stripeCount = list_length(stripeMetadataList); + + /* if we have read all stripes, return false */ + if (readState->readStripeCount == stripeCount) + { + return false; + } + + oldContext = MemoryContextSwitchTo(readState->stripeReadContext); + MemoryContextReset(readState->stripeReadContext); + + stripeMetadata = list_nth(stripeMetadataList, readState->readStripeCount); + stripeBuffers = LoadFilteredStripeBuffers(readState->tableFile, stripeMetadata, + readState->tupleDescriptor, + readState->projectedColumnList, + readState->whereClauseList); + readState->readStripeCount++; + + MemoryContextSwitchTo(oldContext); + + if (stripeBuffers->rowCount != 0) + { + readState->stripeBuffers = stripeBuffers; + readState->stripeReadRowCount = 0; + readState->deserializedBlockIndex = -1; + ResetUncompressedBlockData(readState->blockDataArray, + stripeBuffers->columnCount); + break; + } + } + + blockIndex = readState->stripeReadRowCount / tableFooter->blockRowCount; + blockRowIndex = readState->stripeReadRowCount % tableFooter->blockRowCount; + + if (blockIndex != readState->deserializedBlockIndex) + { + uint32 lastBlockIndex = 0; + uint32 blockRowCount = 0; + uint32 stripeRowCount = 0; + + stripeRowCount = readState->stripeBuffers->rowCount; + lastBlockIndex = stripeRowCount / tableFooter->blockRowCount; + if (blockIndex == lastBlockIndex) + { + blockRowCount = stripeRowCount % tableFooter->blockRowCount; + } + else + { + blockRowCount = tableFooter->blockRowCount; + } + + oldContext = MemoryContextSwitchTo(readState->stripeReadContext); + + DeserializeBlockData(readState->stripeBuffers, blockIndex, + blockRowCount, readState->blockDataArray, + readState->tupleDescriptor); + + MemoryContextSwitchTo(oldContext); + + readState->deserializedBlockIndex = blockIndex; + } + + ReadStripeNextRow(readState->stripeBuffers, readState->projectedColumnList, + blockIndex, blockRowIndex, readState->blockDataArray, + columnValues, columnNulls); + + /* + * If we finished reading the current stripe, set stripe data to NULL. That + * way, we will load a new stripe the next time this function gets called. + */ + readState->stripeReadRowCount++; + if (readState->stripeReadRowCount == readState->stripeBuffers->rowCount) + { + readState->stripeBuffers = NULL; + } + + return true; +} + + +/* Finishes a cstore read operation. */ +void +CStoreEndRead(TableReadState *readState) +{ + int columnCount = readState->tupleDescriptor->natts; + + MemoryContextDelete(readState->stripeReadContext); + FreeFile(readState->tableFile); + list_free_deep(readState->tableFooter->stripeMetadataList); + FreeColumnBlockDataArray(readState->blockDataArray, columnCount); + pfree(readState->tableFooter); + pfree(readState); +} + + +/* + * CreateEmptyBlockDataArray creates data buffers to keep deserialized exist and + * value arrays for requested columns in columnMask. + */ +ColumnBlockData ** +CreateEmptyBlockDataArray(uint32 columnCount, bool *columnMask, uint32 blockRowCount) +{ + uint32 columnIndex = 0; + ColumnBlockData **blockDataArray = palloc0(columnCount * sizeof(ColumnBlockData*)); + + /* allocate block memory for deserialized data */ + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + if (columnMask[columnIndex]) + { + ColumnBlockData *blockData = palloc0(sizeof(ColumnBlockData)); + + blockData->existsArray = palloc0(blockRowCount * sizeof(bool)); + blockData->valueArray = palloc0(blockRowCount * sizeof(Datum)); + blockData->valueBuffer = NULL; + blockDataArray[columnIndex] = blockData; + } + } + + return blockDataArray; +} + + +/* + * FreeColumnBlockDataArray deallocates data buffers to keep deserialized exist and + * value arrays for requested columns in columnMask. + * ColumnBlockData->serializedValueBuffer lives in memory read/write context + * so it is deallocated automatically when the context is deleted. + */ +void +FreeColumnBlockDataArray(ColumnBlockData **blockDataArray, uint32 columnCount) +{ + uint32 columnIndex = 0; + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBlockData *blockData = blockDataArray[columnIndex]; + if (blockData != NULL) + { + pfree(blockData->existsArray); + pfree(blockData->valueArray); + pfree(blockData); + } + } + + pfree(blockDataArray); +} + + +/* CStoreTableRowCount returns the exact row count of a table using skiplists */ +uint64 +CStoreTableRowCount(const char *filename) +{ + TableFooter *tableFooter = NULL; + FILE *tableFile; + ListCell *stripeMetadataCell = NULL; + uint64 totalRowCount = 0; + + StringInfo tableFooterFilename = makeStringInfo(); + + appendStringInfo(tableFooterFilename, "%s%s", filename, CSTORE_FOOTER_FILE_SUFFIX); + + tableFooter = CStoreReadFooter(tableFooterFilename); + + pfree(tableFooterFilename->data); + pfree(tableFooterFilename); + + tableFile = AllocateFile(filename, PG_BINARY_R); + if (tableFile == NULL) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not open file \"%s\" for reading: %m", filename))); + } + + foreach(stripeMetadataCell, tableFooter->stripeMetadataList) + { + StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell); + totalRowCount += StripeRowCount(tableFile, stripeMetadata); + } + + FreeFile(tableFile); + + return totalRowCount; +} + + +/* + * StripeRowCount reads serialized stripe footer, the first column's + * skip list, and returns number of rows for given stripe. + */ +static uint64 +StripeRowCount(FILE *tableFile, StripeMetadata *stripeMetadata) +{ + uint64 rowCount = 0; + StripeFooter *stripeFooter = NULL; + StringInfo footerBuffer = NULL; + StringInfo firstColumnSkipListBuffer = NULL; + uint64 footerOffset = 0; + + footerOffset += stripeMetadata->fileOffset; + footerOffset += stripeMetadata->skipListLength; + footerOffset += stripeMetadata->dataLength; + + footerBuffer = ReadFromFile(tableFile, footerOffset, stripeMetadata->footerLength); + stripeFooter = DeserializeStripeFooter(footerBuffer); + + firstColumnSkipListBuffer = ReadFromFile(tableFile, stripeMetadata->fileOffset, + stripeFooter->skipListSizeArray[0]); + rowCount = DeserializeRowCount(firstColumnSkipListBuffer); + + return rowCount; +} + + +/* + * LoadFilteredStripeBuffers reads serialized stripe data from the given file. + * The function skips over blocks whose rows are refuted by restriction qualifiers, + * and only loads columns that are projected in the query. + */ +static StripeBuffers * +LoadFilteredStripeBuffers(FILE *tableFile, StripeMetadata *stripeMetadata, + TupleDesc tupleDescriptor, List *projectedColumnList, + List *whereClauseList) +{ + StripeBuffers *stripeBuffers = NULL; + ColumnBuffers **columnBuffersArray = NULL; + uint64 currentColumnFileOffset = 0; + uint32 columnIndex = 0; + uint32 columnCount = tupleDescriptor->natts; + + StripeFooter *stripeFooter = LoadStripeFooter(tableFile, stripeMetadata, + columnCount); + bool *projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList); + + StripeSkipList *stripeSkipList = LoadStripeSkipList(tableFile, stripeMetadata, + stripeFooter, columnCount, + projectedColumnMask, + tupleDescriptor); + + bool *selectedBlockMask = SelectedBlockMask(stripeSkipList, projectedColumnList, + whereClauseList); + + StripeSkipList *selectedBlockSkipList = + SelectedBlockSkipList(stripeSkipList, projectedColumnMask, + selectedBlockMask); + + /* load column data for projected columns */ + columnBuffersArray = palloc0(columnCount * sizeof(ColumnBuffers *)); + currentColumnFileOffset = stripeMetadata->fileOffset + stripeMetadata->skipListLength; + + for (columnIndex = 0; columnIndex < stripeFooter->columnCount; columnIndex++) + { + uint64 existsSize = stripeFooter->existsSizeArray[columnIndex]; + uint64 valueSize = stripeFooter->valueSizeArray[columnIndex]; + uint64 existsFileOffset = currentColumnFileOffset; + uint64 valueFileOffset = currentColumnFileOffset + existsSize; + + if (projectedColumnMask[columnIndex]) + { + ColumnBlockSkipNode *blockSkipNode = + selectedBlockSkipList->blockSkipNodeArray[columnIndex]; + Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); + uint32 blockCount = selectedBlockSkipList->blockCount; + + ColumnBuffers *columnBuffers = LoadColumnBuffers(tableFile, blockSkipNode, + blockCount, + existsFileOffset, + valueFileOffset, + attributeForm); + + columnBuffersArray[columnIndex] = columnBuffers; + } + + currentColumnFileOffset += existsSize; + currentColumnFileOffset += valueSize; + } + + stripeBuffers = palloc0(sizeof(StripeBuffers)); + stripeBuffers->columnCount = columnCount; + stripeBuffers->rowCount = StripeSkipListRowCount(selectedBlockSkipList); + stripeBuffers->columnBuffersArray = columnBuffersArray; + + return stripeBuffers; +} + + +/* + * ReadStripeNextRow reads the next row from the given stripe, finds the projected + * column values within this row, and accordingly sets the column values and nulls. + * Note that this function sets the values for all non-projected columns to null. + */ +static void +ReadStripeNextRow(StripeBuffers *stripeBuffers, List *projectedColumnList, + uint64 blockIndex, uint64 blockRowIndex, + ColumnBlockData **blockDataArray, Datum *columnValues, + bool *columnNulls) +{ + ListCell *projectedColumnCell = NULL; + + /* set all columns to null by default */ + memset(columnNulls, 1, stripeBuffers->columnCount * sizeof(bool)); + + foreach(projectedColumnCell, projectedColumnList) + { + Var *projectedColumn = lfirst(projectedColumnCell); + uint32 projectedColumnIndex = projectedColumn->varattno - 1; + ColumnBlockData *blockData = blockDataArray[projectedColumnIndex]; + + if (blockData->existsArray[blockRowIndex]) + { + columnValues[projectedColumnIndex] = blockData->valueArray[blockRowIndex]; + columnNulls[projectedColumnIndex] = false; + } + } +} + + +/* + * LoadColumnBuffers reads serialized column data from the given file. These + * column data are laid out as sequential blocks in the file; and block positions + * and lengths are retrieved from the column block skip node array. + */ +static ColumnBuffers * +LoadColumnBuffers(FILE *tableFile, ColumnBlockSkipNode *blockSkipNodeArray, + uint32 blockCount, uint64 existsFileOffset, uint64 valueFileOffset, + Form_pg_attribute attributeForm) +{ + ColumnBuffers *columnBuffers = NULL; + uint32 blockIndex = 0; + ColumnBlockBuffers **blockBuffersArray = + palloc0(blockCount * sizeof(ColumnBlockBuffers *)); + + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + blockBuffersArray[blockIndex] = palloc0(sizeof(ColumnBlockBuffers)); + } + + /* + * We first read the "exists" blocks. We don't read "values" array here, + * because "exists" blocks are stored sequentially on disk, and we want to + * minimize disk seeks. + */ + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; + uint64 existsOffset = existsFileOffset + blockSkipNode->existsBlockOffset; + StringInfo rawExistsBuffer = ReadFromFile(tableFile, existsOffset, + blockSkipNode->existsLength); + + blockBuffersArray[blockIndex]->existsBuffer = rawExistsBuffer; + } + + /* then read "values" blocks, which are also stored sequentially on disk */ + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; + CompressionType compressionType = blockSkipNode->valueCompressionType; + uint64 valueOffset = valueFileOffset + blockSkipNode->valueBlockOffset; + StringInfo rawValueBuffer = ReadFromFile(tableFile, valueOffset, + blockSkipNode->valueLength); + + blockBuffersArray[blockIndex]->valueBuffer = rawValueBuffer; + blockBuffersArray[blockIndex]->valueCompressionType = compressionType; + } + + columnBuffers = palloc0(sizeof(ColumnBuffers)); + columnBuffers->blockBuffersArray = blockBuffersArray; + + return columnBuffers; +} + + +/* Reads and returns the given stripe's footer. */ +static StripeFooter * +LoadStripeFooter(FILE *tableFile, StripeMetadata *stripeMetadata, + uint32 columnCount) +{ + StripeFooter *stripeFooter = NULL; + StringInfo footerBuffer = NULL; + uint64 footerOffset = 0; + + footerOffset += stripeMetadata->fileOffset; + footerOffset += stripeMetadata->skipListLength; + footerOffset += stripeMetadata->dataLength; + + footerBuffer = ReadFromFile(tableFile, footerOffset, stripeMetadata->footerLength); + stripeFooter = DeserializeStripeFooter(footerBuffer); + if (stripeFooter->columnCount > columnCount) + { + ereport(ERROR, (errmsg("stripe footer column count and table column count " + "don't match"))); + } + + return stripeFooter; +} + + +/* Reads the skip list for the given stripe. */ +static StripeSkipList * +LoadStripeSkipList(FILE *tableFile, StripeMetadata *stripeMetadata, + StripeFooter *stripeFooter, uint32 columnCount, + bool *projectedColumnMask, + TupleDesc tupleDescriptor) +{ + StripeSkipList *stripeSkipList = NULL; + ColumnBlockSkipNode **blockSkipNodeArray = NULL; + StringInfo firstColumnSkipListBuffer = NULL; + uint64 currentColumnSkipListFileOffset = 0; + uint32 columnIndex = 0; + uint32 stripeBlockCount = 0; + uint32 stripeColumnCount = stripeFooter->columnCount; + + /* deserialize block count */ + firstColumnSkipListBuffer = ReadFromFile(tableFile, stripeMetadata->fileOffset, + stripeFooter->skipListSizeArray[0]); + stripeBlockCount = DeserializeBlockCount(firstColumnSkipListBuffer); + + /* deserialize column skip lists */ + blockSkipNodeArray = palloc0(columnCount * sizeof(ColumnBlockSkipNode *)); + currentColumnSkipListFileOffset = stripeMetadata->fileOffset; + + for (columnIndex = 0; columnIndex < stripeColumnCount; columnIndex++) + { + uint64 columnSkipListSize = stripeFooter->skipListSizeArray[columnIndex]; + bool firstColumn = columnIndex == 0; + + /* + * Only selected columns' column skip lists are read. However, the first + * column's skip list is read regardless of being selected. It is used by + * StripeSkipListRowCount later. + */ + if (projectedColumnMask[columnIndex] || firstColumn) + { + Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); + + StringInfo columnSkipListBuffer = + ReadFromFile(tableFile, currentColumnSkipListFileOffset, + columnSkipListSize); + ColumnBlockSkipNode *columnSkipList = + DeserializeColumnSkipList(columnSkipListBuffer, attributeForm->attbyval, + attributeForm->attlen, stripeBlockCount); + blockSkipNodeArray[columnIndex] = columnSkipList; + } + + currentColumnSkipListFileOffset += columnSkipListSize; + } + + /* table contains additional columns added after this stripe is created */ + for (columnIndex = stripeColumnCount; columnIndex < columnCount; columnIndex++) + { + ColumnBlockSkipNode *columnSkipList = NULL; + uint32 blockIndex = 0; + bool firstColumn = columnIndex == 0; + + /* no need to create ColumnBlockSkipList if the column is not selected */ + if (!projectedColumnMask[columnIndex] && !firstColumn) + { + blockSkipNodeArray[columnIndex] = NULL; + continue; + } + + /* create empty ColumnBlockSkipNode for missing columns*/ + columnSkipList = palloc0(stripeBlockCount * sizeof(ColumnBlockSkipNode)); + + for (blockIndex = 0; blockIndex < stripeBlockCount; blockIndex++) + { + columnSkipList[blockIndex].rowCount = 0; + columnSkipList[blockIndex].hasMinMax = false; + columnSkipList[blockIndex].minimumValue = 0; + columnSkipList[blockIndex].maximumValue = 0; + columnSkipList[blockIndex].existsBlockOffset = 0; + columnSkipList[blockIndex].valueBlockOffset = 0; + columnSkipList[blockIndex].existsLength = 0; + columnSkipList[blockIndex].valueLength = 0; + columnSkipList[blockIndex].valueCompressionType = COMPRESSION_NONE; + } + blockSkipNodeArray[columnIndex] = columnSkipList; + } + + stripeSkipList = palloc0(sizeof(StripeSkipList)); + stripeSkipList->blockSkipNodeArray = blockSkipNodeArray; + stripeSkipList->columnCount = columnCount; + stripeSkipList->blockCount = stripeBlockCount; + + return stripeSkipList; +} + + +/* + * SelectedBlockMask walks over each column's blocks and checks if a block can + * be filtered without reading its data. The filtering happens when all rows in + * the block can be refuted by the given qualifier conditions. + */ +static bool * +SelectedBlockMask(StripeSkipList *stripeSkipList, List *projectedColumnList, + List *whereClauseList) +{ + bool *selectedBlockMask = NULL; + ListCell *columnCell = NULL; + uint32 blockIndex = 0; + List *restrictInfoList = BuildRestrictInfoList(whereClauseList); + + selectedBlockMask = palloc0(stripeSkipList->blockCount * sizeof(bool)); + memset(selectedBlockMask, true, stripeSkipList->blockCount * sizeof(bool)); + + foreach(columnCell, projectedColumnList) + { + Var *column = lfirst(columnCell); + uint32 columnIndex = column->varattno - 1; + FmgrInfo *comparisonFunction = NULL; + Node *baseConstraint = NULL; + + /* if this column's data type doesn't have a comparator, skip it */ + comparisonFunction = GetFunctionInfoOrNull(column->vartype, BTREE_AM_OID, + BTORDER_PROC); + if (comparisonFunction == NULL) + { + continue; + } + + baseConstraint = BuildBaseConstraint(column); + for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) + { + bool predicateRefuted = false; + List *constraintList = NIL; + ColumnBlockSkipNode *blockSkipNodeArray = + stripeSkipList->blockSkipNodeArray[columnIndex]; + ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; + + /* + * A column block with comparable data type can miss min/max values + * if all values in the block are NULL. + */ + if (!blockSkipNode->hasMinMax) + { + continue; + } + + UpdateConstraint(baseConstraint, blockSkipNode->minimumValue, + blockSkipNode->maximumValue); + + constraintList = list_make1(baseConstraint); +#if (PG_VERSION_NUM >= 100000) + predicateRefuted = predicate_refuted_by(constraintList, restrictInfoList, false); +#else + predicateRefuted = predicate_refuted_by(constraintList, restrictInfoList); +#endif + if (predicateRefuted) + { + selectedBlockMask[blockIndex] = false; + } + } + } + + return selectedBlockMask; +} + + +/* + * GetFunctionInfoOrNull first resolves the operator for the given data type, + * access method, and support procedure. The function then uses the resolved + * operator's identifier to fill in a function manager object, and returns + * this object. This function is based on a similar function from CitusDB's code. + */ +FmgrInfo * +GetFunctionInfoOrNull(Oid typeId, Oid accessMethodId, int16 procedureId) +{ + FmgrInfo *functionInfo = NULL; + Oid operatorClassId = InvalidOid; + Oid operatorFamilyId = InvalidOid; + Oid operatorId = InvalidOid; + + /* get default operator class from pg_opclass for datum type */ + operatorClassId = GetDefaultOpClass(typeId, accessMethodId); + if (operatorClassId == InvalidOid) + { + return NULL; + } + + operatorFamilyId = get_opclass_family(operatorClassId); + if (operatorFamilyId == InvalidOid) + { + return NULL; + } + + operatorId = get_opfamily_proc(operatorFamilyId, typeId, typeId, procedureId); + if (operatorId != InvalidOid) + { + functionInfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo)); + + /* fill in the FmgrInfo struct using the operatorId */ + fmgr_info(operatorId, functionInfo); + } + + return functionInfo; +} + + +/* + * BuildRestrictInfoList builds restrict info list using the selection criteria, + * and then return this list. The function is copied from CitusDB's shard pruning + * logic. + */ +static List * +BuildRestrictInfoList(List *whereClauseList) +{ + List *restrictInfoList = NIL; + + ListCell *qualCell = NULL; + foreach(qualCell, whereClauseList) + { + RestrictInfo *restrictInfo = NULL; + Node *qualNode = (Node *) lfirst(qualCell); + + restrictInfo = make_simple_restrictinfo((Expr *) qualNode); + restrictInfoList = lappend(restrictInfoList, restrictInfo); + } + + return restrictInfoList; +} + + +/* + * BuildBaseConstraint builds and returns a base constraint. This constraint + * implements an expression in the form of (var <= max && var >= min), where + * min and max values represent a block's min and max values. These block + * values are filled in after the constraint is built. This function is based + * on a similar function from CitusDB's shard pruning logic. + */ +static Node * +BuildBaseConstraint(Var *variable) +{ + Node *baseConstraint = NULL; + OpExpr *lessThanExpr = NULL; + OpExpr *greaterThanExpr = NULL; + + lessThanExpr = MakeOpExpression(variable, BTLessEqualStrategyNumber); + greaterThanExpr = MakeOpExpression(variable, BTGreaterEqualStrategyNumber); + + baseConstraint = make_and_qual((Node *) lessThanExpr, (Node *) greaterThanExpr); + + return baseConstraint; +} + + +/* + * MakeOpExpression builds an operator expression node. This operator expression + * implements the operator clause as defined by the variable and the strategy + * number. The function is copied from CitusDB's shard pruning logic. + */ +static OpExpr * +MakeOpExpression(Var *variable, int16 strategyNumber) +{ + Oid typeId = variable->vartype; + Oid typeModId = variable->vartypmod; + Oid collationId = variable->varcollid; + + Oid accessMethodId = BTREE_AM_OID; + Oid operatorId = InvalidOid; + Const *constantValue = NULL; + OpExpr *expression = NULL; + + /* Load the operator from system catalogs */ + operatorId = GetOperatorByType(typeId, accessMethodId, strategyNumber); + + constantValue = makeNullConst(typeId, typeModId, collationId); + + /* Now make the expression with the given variable and a null constant */ + expression = (OpExpr *) make_opclause(operatorId, + InvalidOid, /* no result type yet */ + false, /* no return set */ + (Expr *) variable, + (Expr *) constantValue, + InvalidOid, collationId); + + /* Set implementing function id and result type */ + expression->opfuncid = get_opcode(operatorId); + expression->opresulttype = get_func_rettype(expression->opfuncid); + + return expression; +} + + +/* + * GetOperatorByType returns operator Oid for the given type, access method, + * and strategy number. Note that this function incorrectly errors out when + * the given type doesn't have its own operator but can use another compatible + * type's default operator. The function is copied from CitusDB's shard pruning + * logic. + */ +static Oid +GetOperatorByType(Oid typeId, Oid accessMethodId, int16 strategyNumber) +{ + /* Get default operator class from pg_opclass */ + Oid operatorClassId = GetDefaultOpClass(typeId, accessMethodId); + + Oid operatorFamily = get_opclass_family(operatorClassId); + + Oid operatorId = get_opfamily_member(operatorFamily, typeId, typeId, strategyNumber); + + return operatorId; +} + + +/* + * UpdateConstraint updates the base constraint with the given min/max values. + * The function is copied from CitusDB's shard pruning logic. + */ +static void +UpdateConstraint(Node *baseConstraint, Datum minValue, Datum maxValue) +{ + BoolExpr *andExpr = (BoolExpr *) baseConstraint; + Node *lessThanExpr = (Node *) linitial(andExpr->args); + Node *greaterThanExpr = (Node *) lsecond(andExpr->args); + + Node *minNode = get_rightop((Expr *) greaterThanExpr); + Node *maxNode = get_rightop((Expr *) lessThanExpr); + Const *minConstant = NULL; + Const *maxConstant = NULL; + + Assert(IsA(minNode, Const)); + Assert(IsA(maxNode, Const)); + + minConstant = (Const *) minNode; + maxConstant = (Const *) maxNode; + + minConstant->constvalue = minValue; + maxConstant->constvalue = maxValue; + + minConstant->constisnull = false; + maxConstant->constisnull = false; + + minConstant->constbyval = true; + maxConstant->constbyval = true; +} + + +/* + * SelectedBlockSkipList constructs a new StripeSkipList in which the + * non-selected blocks are removed from the given stripeSkipList. + */ +static StripeSkipList * +SelectedBlockSkipList(StripeSkipList *stripeSkipList, bool *projectedColumnMask, + bool *selectedBlockMask) +{ + StripeSkipList *SelectedBlockSkipList = NULL; + ColumnBlockSkipNode **selectedBlockSkipNodeArray = NULL; + uint32 selectedBlockCount = 0; + uint32 blockIndex = 0; + uint32 columnIndex = 0; + uint32 columnCount = stripeSkipList->columnCount; + + for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) + { + if (selectedBlockMask[blockIndex]) + { + selectedBlockCount++; + } + } + + selectedBlockSkipNodeArray = palloc0(columnCount * sizeof(ColumnBlockSkipNode *)); + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + uint32 selectedBlockIndex = 0; + bool firstColumn = columnIndex == 0; + + /* first column's block skip node is always read */ + if (!projectedColumnMask[columnIndex] && !firstColumn) + { + selectedBlockSkipNodeArray[columnIndex] = NULL; + continue; + } + + Assert(stripeSkipList->blockSkipNodeArray[columnIndex] != NULL); + + selectedBlockSkipNodeArray[columnIndex] = palloc0(selectedBlockCount * + sizeof(ColumnBlockSkipNode)); + + for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) + { + if (selectedBlockMask[blockIndex]) + { + selectedBlockSkipNodeArray[columnIndex][selectedBlockIndex] = + stripeSkipList->blockSkipNodeArray[columnIndex][blockIndex]; + selectedBlockIndex++; + } + } + } + + SelectedBlockSkipList = palloc0(sizeof(StripeSkipList)); + SelectedBlockSkipList->blockSkipNodeArray = selectedBlockSkipNodeArray; + SelectedBlockSkipList->blockCount = selectedBlockCount; + SelectedBlockSkipList->columnCount = stripeSkipList->columnCount; + + return SelectedBlockSkipList; +} + + +/* + * StripeSkipListRowCount counts the number of rows in the given stripeSkipList. + * To do this, the function finds the first column, and sums up row counts across + * all blocks for that column. + */ +static uint32 +StripeSkipListRowCount(StripeSkipList *stripeSkipList) +{ + uint32 stripeSkipListRowCount = 0; + uint32 blockIndex = 0; + ColumnBlockSkipNode *firstColumnSkipNodeArray = + stripeSkipList->blockSkipNodeArray[0]; + + for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) + { + uint32 blockRowCount = firstColumnSkipNodeArray[blockIndex].rowCount; + stripeSkipListRowCount += blockRowCount; + } + + return stripeSkipListRowCount; +} + + +/* + * ProjectedColumnMask returns a boolean array in which the projected columns + * from the projected column list are marked as true. + */ +static bool * +ProjectedColumnMask(uint32 columnCount, List *projectedColumnList) +{ + bool *projectedColumnMask = palloc0(columnCount * sizeof(bool)); + ListCell *columnCell = NULL; + + foreach(columnCell, projectedColumnList) + { + Var *column = (Var *) lfirst(columnCell); + uint32 columnIndex = column->varattno - 1; + projectedColumnMask[columnIndex] = true; + } + + return projectedColumnMask; +} + + +/* + * DeserializeBoolArray reads an array of bits from the given buffer and stores + * it in provided bool array. + */ +static void +DeserializeBoolArray(StringInfo boolArrayBuffer, bool *boolArray, + uint32 boolArrayLength) +{ + uint32 boolArrayIndex = 0; + + uint32 maximumBoolCount = boolArrayBuffer->len * 8; + if (boolArrayLength > maximumBoolCount) + { + ereport(ERROR, (errmsg("insufficient data for reading boolean array"))); + } + + for (boolArrayIndex = 0; boolArrayIndex < boolArrayLength; boolArrayIndex++) + { + uint32 byteIndex = boolArrayIndex / 8; + uint32 bitIndex = boolArrayIndex % 8; + uint8 bitmask = (1 << bitIndex); + + uint8 shiftedBit = (boolArrayBuffer->data[byteIndex] & bitmask); + if (shiftedBit == 0) + { + boolArray[boolArrayIndex] = false; + } + else + { + boolArray[boolArrayIndex] = true; + } + } +} + + +/* + * DeserializeDatumArray reads an array of datums from the given buffer and stores + * them in provided datumArray. If a value is marked as false in the exists array, + * the function assumes that the datum isn't in the buffer, and simply skips it. + */ +static void +DeserializeDatumArray(StringInfo datumBuffer, bool *existsArray, uint32 datumCount, + bool datumTypeByValue, int datumTypeLength, + char datumTypeAlign, Datum *datumArray) +{ + uint32 datumIndex = 0; + uint32 currentDatumDataOffset = 0; + + for (datumIndex = 0; datumIndex < datumCount; datumIndex++) + { + char *currentDatumDataPointer = NULL; + + if (!existsArray[datumIndex]) + { + continue; + } + + currentDatumDataPointer = datumBuffer->data + currentDatumDataOffset; + + datumArray[datumIndex] = fetch_att(currentDatumDataPointer, datumTypeByValue, + datumTypeLength); + currentDatumDataOffset = att_addlength_datum(currentDatumDataOffset, + datumTypeLength, + currentDatumDataPointer); + currentDatumDataOffset = att_align_nominal(currentDatumDataOffset, + datumTypeAlign); + + if (currentDatumDataOffset > datumBuffer->len) + { + ereport(ERROR, (errmsg("insufficient data left in datum buffer"))); + } + } +} + + +/* + * DeserializeBlockData deserializes requested data block for all columns and + * stores in blockDataArray. It uncompresses serialized data if necessary. The + * function also deallocates data buffers used for previous block, and compressed + * data buffers for the current block which will not be needed again. If a column + * data is not present serialized buffer, then default value (or null) is used + * to fill value array. + */ +static void +DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, + uint32 rowCount, + ColumnBlockData **blockDataArray, TupleDesc tupleDescriptor) +{ + int columnIndex = 0; + for (columnIndex = 0; columnIndex < stripeBuffers->columnCount; columnIndex++) + { + ColumnBlockData *blockData = blockDataArray[columnIndex]; + Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); + ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; + bool columnAdded = false; + + if ((columnBuffers == NULL) && (blockData != NULL)) + { + columnAdded = true; + } + + if (columnBuffers != NULL) + { + ColumnBlockBuffers *blockBuffers = columnBuffers->blockBuffersArray[blockIndex]; + StringInfo valueBuffer = NULL; + + /* free previous block's data buffers */ + pfree(blockData->valueBuffer->data); + pfree(blockData->valueBuffer); + + /* decompress and deserialize current block's data */ + valueBuffer = DecompressBuffer(blockBuffers->valueBuffer, + blockBuffers->valueCompressionType); + + if (blockBuffers->valueCompressionType != COMPRESSION_NONE) + { + /* compressed data is not needed anymore */ + pfree(blockBuffers->valueBuffer->data); + pfree(blockBuffers->valueBuffer); + } + + DeserializeBoolArray(blockBuffers->existsBuffer, blockData->existsArray, + rowCount); + DeserializeDatumArray(valueBuffer, blockData->existsArray, + rowCount, attributeForm->attbyval, + attributeForm->attlen, attributeForm->attalign, + blockData->valueArray); + + /* store current block's data buffer to be freed at next block read */ + blockData->valueBuffer = valueBuffer; + } + else if (columnAdded) + { + /* + * This is a column that was added after creation of this stripe. + * So we use either the default value or NULL. + */ + if (attributeForm->atthasdef) + { + int rowIndex = 0; + + Datum defaultValue = ColumnDefaultValue(tupleDescriptor->constr, + attributeForm); + + for (rowIndex = 0; rowIndex < rowCount; rowIndex++) + { + blockData->existsArray[rowIndex] = true; + blockData->valueArray[rowIndex] = defaultValue; + } + } + else + { + memset(blockData->existsArray, false, rowCount); + } + + } + } +} + + +/* + * ColumnDefaultValue returns default value for given column. Only const values + * are supported. The function errors on any other default value expressions. + */ +static Datum +ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeForm) +{ + Datum defaultValue = 0; + Node *defaultValueNode = NULL; + int defValIndex = 0; + + for (defValIndex = 0; defValIndex < tupleConstraints->num_defval; defValIndex++) + { + AttrDefault defaultValue = tupleConstraints->defval[defValIndex]; + if (defaultValue.adnum == attributeForm->attnum) + { + defaultValueNode = stringToNode(defaultValue.adbin); + break; + } + } + + Assert(defaultValueNode != NULL); + + /* try reducing the default value node to a const node */ + defaultValueNode = eval_const_expressions(NULL, defaultValueNode); + if (IsA(defaultValueNode, Const)) + { + Const *constNode = (Const *) defaultValueNode; + defaultValue = constNode->constvalue; + } + else + { + const char *columnName = NameStr(attributeForm->attname); + ereport(ERROR, (errmsg("unsupported default value for column \"%s\"", columnName), + errhint("Expression is either mutable or " + "does not evaluate to constant value"))); + } + + return defaultValue; +} + + +/* Returns the size of the given file handle. */ +static int64 +FILESize(FILE *file) +{ + int64 fileSize = 0; + int fseekResult = 0; + + errno = 0; + fseekResult = fseeko(file, 0, SEEK_END); + if (fseekResult != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not seek in file: %m"))); + } + + fileSize = ftello(file); + if (fileSize == -1) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not get position in file: %m"))); + } + + return fileSize; +} + + +/* Reads the given segment from the given file. */ +static StringInfo +ReadFromFile(FILE *file, uint64 offset, uint32 size) +{ + int fseekResult = 0; + int freadResult = 0; + int fileError = 0; + + StringInfo resultBuffer = makeStringInfo(); + enlargeStringInfo(resultBuffer, size); + resultBuffer->len = size; + + if (size == 0) + { + return resultBuffer; + } + + errno = 0; + fseekResult = fseeko(file, offset, SEEK_SET); + if (fseekResult != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not seek in file: %m"))); + } + + freadResult = fread(resultBuffer->data, size, 1, file); + if (freadResult != 1) + { + ereport(ERROR, (errmsg("could not read enough data from file"))); + } + + fileError = ferror(file); + if (fileError != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not read file: %m"))); + } + + return resultBuffer; +} + + + + +/* + * ResetUncompressedBlockData iterates over deserialized column block data + * and sets valueBuffer field to empty buffer. This field is allocated in stripe + * memory context and becomes invalid once memory context is reset. + */ +static void +ResetUncompressedBlockData(ColumnBlockData **blockDataArray, uint32 columnCount) +{ + uint32 columnIndex = 0; + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBlockData *blockData = blockDataArray[columnIndex]; + if (blockData != NULL) + { + blockData->valueBuffer = makeStringInfo(); + } + } +} diff --git a/cstore_version_compat.h b/cstore_version_compat.h new file mode 100644 index 000000000..a7f961fcd --- /dev/null +++ b/cstore_version_compat.h @@ -0,0 +1,58 @@ +/*------------------------------------------------------------------------- + * + * cstore_version_compat.h + * + * Compatibility macros for writing code agnostic to PostgreSQL versions + * + * Copyright (c) 2018, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#ifndef CSTORE_COMPAT_H +#define CSTORE_COMPAT_H + +#if PG_VERSION_NUM < 100000 + +/* Accessor for the i'th attribute of tupdesc. */ +#define TupleDescAttr(tupdesc, i) ((tupdesc)->attrs[(i)]) + +#endif + +#if PG_VERSION_NUM < 110000 +#define ALLOCSET_DEFAULT_SIZES ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE +#define ACLCHECK_OBJECT_TABLE ACL_KIND_CLASS +#else +#define ACLCHECK_OBJECT_TABLE OBJECT_TABLE + +#define ExplainPropertyLong(qlabel, value, es) \ + ExplainPropertyInteger(qlabel, NULL, value, es) +#endif + +#define PREVIOUS_UTILITY (PreviousProcessUtilityHook != NULL \ + ? PreviousProcessUtilityHook : standard_ProcessUtility) +#if PG_VERSION_NUM >= 100000 +#define CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, \ + destReceiver, completionTag) \ + PREVIOUS_UTILITY(plannedStatement, queryString, context, paramListInfo, \ + queryEnvironment, destReceiver, completionTag) +#else +#define CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, \ + destReceiver, completionTag) \ + PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, destReceiver, \ + completionTag) +#endif + +#if PG_VERSION_NUM < 120000 +#define TTS_EMPTY(slot) ((slot)->tts_isempty) +#define ExecForceStoreHeapTuple(tuple, slot, shouldFree) \ + ExecStoreTuple(newTuple, tupleSlot, InvalidBuffer, shouldFree); +#define HeapScanDesc TableScanDesc +#define table_beginscan heap_beginscan +#define table_endscan heap_endscan + +#endif + +#endif /* CSTORE_COMPAT_H */ diff --git a/cstore_writer.c b/cstore_writer.c new file mode 100644 index 000000000..b69064215 --- /dev/null +++ b/cstore_writer.c @@ -0,0 +1,1017 @@ +/*------------------------------------------------------------------------- + * + * cstore_writer.c + * + * This file contains function definitions for writing cstore files. This + * includes the logic for writing file level metadata, writing row stripes, + * and calculating block skip nodes. + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + + +#include "postgres.h" +#include "cstore_fdw.h" +#include "cstore_metadata_serialization.h" +#include "cstore_version_compat.h" + +#include +#include "access/nbtree.h" +#include "catalog/pg_collation.h" +#include "commands/defrem.h" +#if PG_VERSION_NUM >= 120000 +#include "optimizer/optimizer.h" +#else +#include "optimizer/var.h" +#endif +#include "port.h" +#include "storage/fd.h" +#include "utils/memutils.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" + + +static void CStoreWriteFooter(StringInfo footerFileName, TableFooter *tableFooter); +static StripeBuffers * CreateEmptyStripeBuffers(uint32 stripeMaxRowCount, + uint32 blockRowCount, + uint32 columnCount); +static StripeSkipList * CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, + uint32 blockRowCount, + uint32 columnCount); +static StripeMetadata FlushStripe(TableWriteState *writeState); +static StringInfo * CreateSkipListBufferArray(StripeSkipList *stripeSkipList, + TupleDesc tupleDescriptor); +static StripeFooter * CreateStripeFooter(StripeSkipList *stripeSkipList, + StringInfo *skipListBufferArray); +static StringInfo SerializeBoolArray(bool *boolArray, uint32 boolArrayLength); +static void SerializeSingleDatum(StringInfo datumBuffer, Datum datum, + bool datumTypeByValue, int datumTypeLength, + char datumTypeAlign); +static void SerializeBlockData(TableWriteState *writeState, uint32 blockIndex, + uint32 rowCount); +static void UpdateBlockSkipNodeMinMax(ColumnBlockSkipNode *blockSkipNode, + Datum columnValue, bool columnTypeByValue, + int columnTypeLength, Oid columnCollation, + FmgrInfo *comparisonFunction); +static Datum DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength); +static void AppendStripeMetadata(TableFooter *tableFooter, + StripeMetadata stripeMetadata); +static void WriteToFile(FILE *file, void *data, uint32 dataLength); +static void SyncAndCloseFile(FILE *file); +static StringInfo CopyStringInfo(StringInfo sourceString); + + +/* + * CStoreBeginWrite initializes a cstore data load operation and returns a table + * handle. This handle should be used for adding the row values and finishing the + * data load operation. If the cstore footer file already exists, we read the + * footer and then seek to right after the last stripe where the new stripes + * will be added. + */ +TableWriteState * +CStoreBeginWrite(const char *filename, CompressionType compressionType, + uint64 stripeMaxRowCount, uint32 blockRowCount, + TupleDesc tupleDescriptor) +{ + TableWriteState *writeState = NULL; + FILE *tableFile = NULL; + StringInfo tableFooterFilename = NULL; + TableFooter *tableFooter = NULL; + FmgrInfo **comparisonFunctionArray = NULL; + MemoryContext stripeWriteContext = NULL; + uint64 currentFileOffset = 0; + uint32 columnCount = 0; + uint32 columnIndex = 0; + struct stat statBuffer; + int statResult = 0; + bool *columnMaskArray = NULL; + ColumnBlockData **blockData = NULL; + + tableFooterFilename = makeStringInfo(); + appendStringInfo(tableFooterFilename, "%s%s", filename, CSTORE_FOOTER_FILE_SUFFIX); + + statResult = stat(tableFooterFilename->data, &statBuffer); + if (statResult < 0) + { + tableFile = AllocateFile(filename, "w"); + if (tableFile == NULL) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not open file \"%s\" for writing: %m", + filename))); + } + + tableFooter = palloc0(sizeof(TableFooter)); + tableFooter->blockRowCount = blockRowCount; + tableFooter->stripeMetadataList = NIL; + } + else + { + tableFile = AllocateFile(filename, "r+"); + if (tableFile == NULL) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not open file \"%s\" for writing: %m", + filename))); + } + + tableFooter = CStoreReadFooter(tableFooterFilename); + } + + /* + * If stripeMetadataList is not empty, jump to the position right after + * the last position. + */ + if (tableFooter->stripeMetadataList != NIL) + { + StripeMetadata *lastStripe = NULL; + uint64 lastStripeSize = 0; + int fseekResult = 0; + + lastStripe = llast(tableFooter->stripeMetadataList); + lastStripeSize += lastStripe->skipListLength; + lastStripeSize += lastStripe->dataLength; + lastStripeSize += lastStripe->footerLength; + + currentFileOffset = lastStripe->fileOffset + lastStripeSize; + + errno = 0; + fseekResult = fseeko(tableFile, currentFileOffset, SEEK_SET); + if (fseekResult != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not seek in file \"%s\": %m", filename))); + } + } + + /* get comparison function pointers for each of the columns */ + columnCount = tupleDescriptor->natts; + comparisonFunctionArray = palloc0(columnCount * sizeof(FmgrInfo *)); + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + FmgrInfo *comparisonFunction = NULL; + FormData_pg_attribute *attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); + + if (!attributeForm->attisdropped) + { + Oid typeId = attributeForm->atttypid; + + comparisonFunction = GetFunctionInfoOrNull(typeId, BTREE_AM_OID, BTORDER_PROC); + } + + comparisonFunctionArray[columnIndex] = comparisonFunction; + } + + /* + * We allocate all stripe specific data in the stripeWriteContext, and + * reset this memory context once we have flushed the stripe to the file. + * This is to avoid memory leaks. + */ + stripeWriteContext = AllocSetContextCreate(CurrentMemoryContext, + "Stripe Write Memory Context", + ALLOCSET_DEFAULT_SIZES); + + columnMaskArray = palloc(columnCount * sizeof(bool)); + memset(columnMaskArray, true, columnCount); + + blockData = CreateEmptyBlockDataArray(columnCount, columnMaskArray, blockRowCount); + + writeState = palloc0(sizeof(TableWriteState)); + writeState->tableFile = tableFile; + writeState->tableFooterFilename = tableFooterFilename; + writeState->tableFooter = tableFooter; + writeState->compressionType = compressionType; + writeState->stripeMaxRowCount = stripeMaxRowCount; + writeState->tupleDescriptor = tupleDescriptor; + writeState->currentFileOffset = currentFileOffset; + writeState->comparisonFunctionArray = comparisonFunctionArray; + writeState->stripeBuffers = NULL; + writeState->stripeSkipList = NULL; + writeState->stripeWriteContext = stripeWriteContext; + writeState->blockDataArray = blockData; + writeState->compressionBuffer = NULL; + + return writeState; +} + + +/* + * CStoreWriteRow adds a row to the cstore file. If the stripe is not initialized, + * we create structures to hold stripe data and skip list. Then, we serialize and + * append data to serialized value buffer for each of the columns and update + * corresponding skip nodes. Then, whole block data is compressed at every + * rowBlockCount insertion. Then, if row count exceeds stripeMaxRowCount, we flush + * the stripe, and add its metadata to the table footer. + */ +void +CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNulls) +{ + uint32 columnIndex = 0; + uint32 blockIndex = 0; + uint32 blockRowIndex = 0; + StripeBuffers *stripeBuffers = writeState->stripeBuffers; + StripeSkipList *stripeSkipList = writeState->stripeSkipList; + uint32 columnCount = writeState->tupleDescriptor->natts; + TableFooter *tableFooter = writeState->tableFooter; + const uint32 blockRowCount = tableFooter->blockRowCount; + ColumnBlockData **blockDataArray = writeState->blockDataArray; + MemoryContext oldContext = MemoryContextSwitchTo(writeState->stripeWriteContext); + + if (stripeBuffers == NULL) + { + stripeBuffers = CreateEmptyStripeBuffers(writeState->stripeMaxRowCount, + blockRowCount, columnCount); + stripeSkipList = CreateEmptyStripeSkipList(writeState->stripeMaxRowCount, + blockRowCount, columnCount); + writeState->stripeBuffers = stripeBuffers; + writeState->stripeSkipList = stripeSkipList; + writeState->compressionBuffer = makeStringInfo(); + + /* + * serializedValueBuffer lives in stripe write memory context so it needs to be + * initialized when the stripe is created. + */ + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBlockData *blockData = blockDataArray[columnIndex]; + blockData->valueBuffer = makeStringInfo(); + } + } + + blockIndex = stripeBuffers->rowCount / blockRowCount; + blockRowIndex = stripeBuffers->rowCount % blockRowCount; + + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBlockData *blockData = blockDataArray[columnIndex]; + ColumnBlockSkipNode **blockSkipNodeArray = stripeSkipList->blockSkipNodeArray; + ColumnBlockSkipNode *blockSkipNode = + &blockSkipNodeArray[columnIndex][blockIndex]; + + if (columnNulls[columnIndex]) + { + blockData->existsArray[blockRowIndex] = false; + } + else + { + FmgrInfo *comparisonFunction = + writeState->comparisonFunctionArray[columnIndex]; + Form_pg_attribute attributeForm = + TupleDescAttr(writeState->tupleDescriptor, columnIndex); + bool columnTypeByValue = attributeForm->attbyval; + int columnTypeLength = attributeForm->attlen; + Oid columnCollation = attributeForm->attcollation; + char columnTypeAlign = attributeForm->attalign; + + blockData->existsArray[blockRowIndex] = true; + + SerializeSingleDatum(blockData->valueBuffer, columnValues[columnIndex], + columnTypeByValue, columnTypeLength, columnTypeAlign); + + UpdateBlockSkipNodeMinMax(blockSkipNode, columnValues[columnIndex], + columnTypeByValue, columnTypeLength, + columnCollation, comparisonFunction); + } + + blockSkipNode->rowCount++; + } + + stripeSkipList->blockCount = blockIndex + 1; + + /* last row of the block is inserted serialize the block */ + if (blockRowIndex == blockRowCount - 1) + { + SerializeBlockData(writeState, blockIndex, blockRowCount); + } + + stripeBuffers->rowCount++; + if (stripeBuffers->rowCount >= writeState->stripeMaxRowCount) + { + StripeMetadata stripeMetadata = FlushStripe(writeState); + MemoryContextReset(writeState->stripeWriteContext); + + /* set stripe data and skip list to NULL so they are recreated next time */ + writeState->stripeBuffers = NULL; + writeState->stripeSkipList = NULL; + + /* + * Append stripeMetadata in old context so next MemoryContextReset + * doesn't free it. + */ + MemoryContextSwitchTo(oldContext); + AppendStripeMetadata(tableFooter, stripeMetadata); + } + else + { + MemoryContextSwitchTo(oldContext); + } +} + + +/* + * CStoreEndWrite finishes a cstore data load operation. If we have an unflushed + * stripe, we flush it. Then, we sync and close the cstore data file. Last, we + * flush the footer to a temporary file, and atomically rename this temporary + * file to the original footer file. + */ +void +CStoreEndWrite(TableWriteState *writeState) +{ + StringInfo tableFooterFilename = NULL; + StringInfo tempTableFooterFileName = NULL; + int renameResult = 0; + int columnCount = writeState->tupleDescriptor->natts; + StripeBuffers *stripeBuffers = writeState->stripeBuffers; + + if (stripeBuffers != NULL) + { + MemoryContext oldContext = MemoryContextSwitchTo(writeState->stripeWriteContext); + + StripeMetadata stripeMetadata = FlushStripe(writeState); + MemoryContextReset(writeState->stripeWriteContext); + + MemoryContextSwitchTo(oldContext); + AppendStripeMetadata(writeState->tableFooter, stripeMetadata); + } + + SyncAndCloseFile(writeState->tableFile); + + tableFooterFilename = writeState->tableFooterFilename; + tempTableFooterFileName = makeStringInfo(); + appendStringInfo(tempTableFooterFileName, "%s%s", tableFooterFilename->data, + CSTORE_TEMP_FILE_SUFFIX); + + CStoreWriteFooter(tempTableFooterFileName, writeState->tableFooter); + + renameResult = rename(tempTableFooterFileName->data, tableFooterFilename->data); + if (renameResult != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not rename file \"%s\" to \"%s\": %m", + tempTableFooterFileName->data, + tableFooterFilename->data))); + } + + pfree(tempTableFooterFileName->data); + pfree(tempTableFooterFileName); + + MemoryContextDelete(writeState->stripeWriteContext); + list_free_deep(writeState->tableFooter->stripeMetadataList); + pfree(writeState->tableFooter); + pfree(writeState->tableFooterFilename->data); + pfree(writeState->tableFooterFilename); + pfree(writeState->comparisonFunctionArray); + FreeColumnBlockDataArray(writeState->blockDataArray, columnCount); + pfree(writeState); +} + + +/* + * CStoreWriteFooter writes the given footer to given file. First, the function + * serializes and writes the footer to the file. Then, the function serializes + * and writes the postscript. Then, the function writes the postscript size as + * the last byte of the file. Last, the function syncs and closes the footer file. + */ +static void +CStoreWriteFooter(StringInfo tableFooterFilename, TableFooter *tableFooter) +{ + FILE *tableFooterFile = NULL; + StringInfo tableFooterBuffer = NULL; + StringInfo postscriptBuffer = NULL; + uint8 postscriptSize = 0; + + tableFooterFile = AllocateFile(tableFooterFilename->data, PG_BINARY_W); + if (tableFooterFile == NULL) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not open file \"%s\" for writing: %m", + tableFooterFilename->data))); + } + + /* write the footer */ + tableFooterBuffer = SerializeTableFooter(tableFooter); + WriteToFile(tableFooterFile, tableFooterBuffer->data, tableFooterBuffer->len); + + /* write the postscript */ + postscriptBuffer = SerializePostScript(tableFooterBuffer->len); + WriteToFile(tableFooterFile, postscriptBuffer->data, postscriptBuffer->len); + + /* write the 1-byte postscript size */ + Assert(postscriptBuffer->len < CSTORE_POSTSCRIPT_SIZE_MAX); + postscriptSize = postscriptBuffer->len; + WriteToFile(tableFooterFile, &postscriptSize, CSTORE_POSTSCRIPT_SIZE_LENGTH); + + SyncAndCloseFile(tableFooterFile); + + pfree(tableFooterBuffer->data); + pfree(tableFooterBuffer); + pfree(postscriptBuffer->data); + pfree(postscriptBuffer); +} + + +/* + * CreateEmptyStripeBuffers allocates an empty StripeBuffers structure with the given + * column count. + */ +static StripeBuffers * +CreateEmptyStripeBuffers(uint32 stripeMaxRowCount, uint32 blockRowCount, + uint32 columnCount) +{ + StripeBuffers *stripeBuffers = NULL; + uint32 columnIndex = 0; + uint32 maxBlockCount = (stripeMaxRowCount / blockRowCount) + 1; + ColumnBuffers **columnBuffersArray = palloc0(columnCount * sizeof(ColumnBuffers *)); + + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + uint32 blockIndex = 0; + ColumnBlockBuffers **blockBuffersArray = + palloc0(maxBlockCount * sizeof(ColumnBlockBuffers *)); + + for (blockIndex = 0; blockIndex < maxBlockCount; blockIndex++) + { + blockBuffersArray[blockIndex] = palloc0(sizeof(ColumnBlockBuffers)); + blockBuffersArray[blockIndex]->existsBuffer = NULL; + blockBuffersArray[blockIndex]->valueBuffer = NULL; + blockBuffersArray[blockIndex]->valueCompressionType = COMPRESSION_NONE; + } + + columnBuffersArray[columnIndex] = palloc0(sizeof(ColumnBuffers)); + columnBuffersArray[columnIndex]->blockBuffersArray = blockBuffersArray; + } + + stripeBuffers = palloc0(sizeof(StripeBuffers)); + stripeBuffers->columnBuffersArray = columnBuffersArray; + stripeBuffers->columnCount = columnCount; + stripeBuffers->rowCount = 0; + + return stripeBuffers; +} + + +/* + * CreateEmptyStripeSkipList allocates an empty StripeSkipList structure with + * the given column count. This structure has enough blocks to hold statistics + * for stripeMaxRowCount rows. + */ +static StripeSkipList * +CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 blockRowCount, + uint32 columnCount) +{ + StripeSkipList *stripeSkipList = NULL; + uint32 columnIndex = 0; + uint32 maxBlockCount = (stripeMaxRowCount / blockRowCount) + 1; + + ColumnBlockSkipNode **blockSkipNodeArray = + palloc0(columnCount * sizeof(ColumnBlockSkipNode *)); + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + blockSkipNodeArray[columnIndex] = + palloc0(maxBlockCount * sizeof(ColumnBlockSkipNode)); + } + + stripeSkipList = palloc0(sizeof(StripeSkipList)); + stripeSkipList->columnCount = columnCount; + stripeSkipList->blockCount = 0; + stripeSkipList->blockSkipNodeArray = blockSkipNodeArray; + + return stripeSkipList; +} + + +/* + * FlushStripe flushes current stripe data into the file. The function first ensures + * the last data block for each column is properly serialized and compressed. Then, + * the function creates the skip list and footer buffers. Finally, the function + * flushes the skip list, data, and footer buffers to the file. + */ +static StripeMetadata +FlushStripe(TableWriteState *writeState) +{ + StripeMetadata stripeMetadata = {0, 0, 0, 0}; + uint64 skipListLength = 0; + uint64 dataLength = 0; + StringInfo *skipListBufferArray = NULL; + StripeFooter *stripeFooter = NULL; + StringInfo stripeFooterBuffer = NULL; + uint32 columnIndex = 0; + uint32 blockIndex = 0; + TableFooter *tableFooter = writeState->tableFooter; + FILE *tableFile = writeState->tableFile; + StripeBuffers *stripeBuffers = writeState->stripeBuffers; + StripeSkipList *stripeSkipList = writeState->stripeSkipList; + ColumnBlockSkipNode **columnSkipNodeArray = stripeSkipList->blockSkipNodeArray; + TupleDesc tupleDescriptor = writeState->tupleDescriptor; + uint32 columnCount = tupleDescriptor->natts; + uint32 blockCount = stripeSkipList->blockCount; + uint32 blockRowCount = tableFooter->blockRowCount; + uint32 lastBlockIndex = stripeBuffers->rowCount / blockRowCount; + uint32 lastBlockRowCount = stripeBuffers->rowCount % blockRowCount; + + /* + * check if the last block needs serialization , the last block was not serialized + * if it was not full yet, e.g. (rowCount > 0) + */ + if (lastBlockRowCount > 0) + { + SerializeBlockData(writeState, lastBlockIndex, lastBlockRowCount); + } + + /* update buffer sizes and positions in stripe skip list */ + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBlockSkipNode *blockSkipNodeArray = columnSkipNodeArray[columnIndex]; + uint64 currentExistsBlockOffset = 0; + uint64 currentValueBlockOffset = 0; + ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; + + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + ColumnBlockBuffers *blockBuffers = + columnBuffers->blockBuffersArray[blockIndex]; + uint64 existsBufferSize = blockBuffers->existsBuffer->len; + uint64 valueBufferSize = blockBuffers->valueBuffer->len; + CompressionType valueCompressionType = blockBuffers->valueCompressionType; + ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; + + blockSkipNode->existsBlockOffset = currentExistsBlockOffset; + blockSkipNode->existsLength = existsBufferSize; + blockSkipNode->valueBlockOffset = currentValueBlockOffset; + blockSkipNode->valueLength = valueBufferSize; + blockSkipNode->valueCompressionType = valueCompressionType; + + currentExistsBlockOffset += existsBufferSize; + currentValueBlockOffset += valueBufferSize; + } + } + + /* create skip list and footer buffers */ + skipListBufferArray = CreateSkipListBufferArray(stripeSkipList, tupleDescriptor); + stripeFooter = CreateStripeFooter(stripeSkipList, skipListBufferArray); + stripeFooterBuffer = SerializeStripeFooter(stripeFooter); + + /* + * Each stripe has three sections: + * (1) Skip list, which contains statistics for each column block, and can + * be used to skip reading row blocks that are refuted by WHERE clause list, + * (2) Data section, in which we store data for each column continuously. + * We store data for each for each column in blocks. For each block, we + * store two buffers: "exists" buffer, and "value" buffer. "exists" buffer + * tells which values are not NULL. "value" buffer contains values for + * present values. For each column, we first store all "exists" buffers, + * and then all "value" buffers. + * (3) Stripe footer, which contains the skip list buffer size, exists buffer + * size, and value buffer size for each of the columns. + * + * We start by flushing the skip list buffers. + */ + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + StringInfo skipListBuffer = skipListBufferArray[columnIndex]; + WriteToFile(tableFile, skipListBuffer->data, skipListBuffer->len); + } + + /* then, we flush the data buffers */ + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; + uint32 blockIndex = 0; + + for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) + { + ColumnBlockBuffers *blockBuffers = + columnBuffers->blockBuffersArray[blockIndex]; + StringInfo existsBuffer = blockBuffers->existsBuffer; + + WriteToFile(tableFile, existsBuffer->data, existsBuffer->len); + } + + for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) + { + ColumnBlockBuffers *blockBuffers = + columnBuffers->blockBuffersArray[blockIndex]; + StringInfo valueBuffer = blockBuffers->valueBuffer; + + WriteToFile(tableFile, valueBuffer->data, valueBuffer->len); + } + } + + /* finally, we flush the footer buffer */ + WriteToFile(tableFile, stripeFooterBuffer->data, stripeFooterBuffer->len); + + /* set stripe metadata */ + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + skipListLength += stripeFooter->skipListSizeArray[columnIndex]; + dataLength += stripeFooter->existsSizeArray[columnIndex]; + dataLength += stripeFooter->valueSizeArray[columnIndex]; + } + + stripeMetadata.fileOffset = writeState->currentFileOffset; + stripeMetadata.skipListLength = skipListLength; + stripeMetadata.dataLength = dataLength; + stripeMetadata.footerLength = stripeFooterBuffer->len; + + /* advance current file offset */ + writeState->currentFileOffset += skipListLength; + writeState->currentFileOffset += dataLength; + writeState->currentFileOffset += stripeFooterBuffer->len; + + return stripeMetadata; +} + + +/* + * CreateSkipListBufferArray serializes the skip list for each column of the + * given stripe and returns the result as an array. + */ +static StringInfo * +CreateSkipListBufferArray(StripeSkipList *stripeSkipList, TupleDesc tupleDescriptor) +{ + StringInfo *skipListBufferArray = NULL; + uint32 columnIndex = 0; + uint32 columnCount = stripeSkipList->columnCount; + + skipListBufferArray = palloc0(columnCount * sizeof(StringInfo)); + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + StringInfo skipListBuffer = NULL; + ColumnBlockSkipNode *blockSkipNodeArray = + stripeSkipList->blockSkipNodeArray[columnIndex]; + Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); + + skipListBuffer = SerializeColumnSkipList(blockSkipNodeArray, + stripeSkipList->blockCount, + attributeForm->attbyval, + attributeForm->attlen); + + skipListBufferArray[columnIndex] = skipListBuffer; + } + + return skipListBufferArray; +} + + +/* Creates and returns the footer for given stripe. */ +static StripeFooter * +CreateStripeFooter(StripeSkipList *stripeSkipList, StringInfo *skipListBufferArray) +{ + StripeFooter *stripeFooter = NULL; + uint32 columnIndex = 0; + uint32 columnCount = stripeSkipList->columnCount; + uint64 *skipListSizeArray = palloc0(columnCount * sizeof(uint64)); + uint64 *existsSizeArray = palloc0(columnCount * sizeof(uint64)); + uint64 *valueSizeArray = palloc0(columnCount * sizeof(uint64)); + + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBlockSkipNode *blockSkipNodeArray = + stripeSkipList->blockSkipNodeArray[columnIndex]; + uint32 blockIndex = 0; + + for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) + { + existsSizeArray[columnIndex] += blockSkipNodeArray[blockIndex].existsLength; + valueSizeArray[columnIndex] += blockSkipNodeArray[blockIndex].valueLength; + } + skipListSizeArray[columnIndex] = skipListBufferArray[columnIndex]->len; + } + + stripeFooter = palloc0(sizeof(StripeFooter)); + stripeFooter->columnCount = columnCount; + stripeFooter->skipListSizeArray = skipListSizeArray; + stripeFooter->existsSizeArray = existsSizeArray; + stripeFooter->valueSizeArray = valueSizeArray; + + return stripeFooter; +} + + +/* + * SerializeBoolArray serializes the given boolean array and returns the result + * as a StringInfo. This function packs every 8 boolean values into one byte. + */ +static StringInfo +SerializeBoolArray(bool *boolArray, uint32 boolArrayLength) +{ + StringInfo boolArrayBuffer = NULL; + uint32 boolArrayIndex = 0; + uint32 byteCount = (boolArrayLength + 7) / 8; + + boolArrayBuffer = makeStringInfo(); + enlargeStringInfo(boolArrayBuffer, byteCount); + boolArrayBuffer->len = byteCount; + memset(boolArrayBuffer->data, 0, byteCount); + + for (boolArrayIndex = 0; boolArrayIndex < boolArrayLength; boolArrayIndex++) + { + if (boolArray[boolArrayIndex]) + { + uint32 byteIndex = boolArrayIndex / 8; + uint32 bitIndex = boolArrayIndex % 8; + boolArrayBuffer->data[byteIndex] |= (1 << bitIndex); + } + } + + return boolArrayBuffer; +} + + +/* + * SerializeSingleDatum serializes the given datum value and appends it to the + * provided string info buffer. + */ +static void +SerializeSingleDatum(StringInfo datumBuffer, Datum datum, bool datumTypeByValue, + int datumTypeLength, char datumTypeAlign) +{ + uint32 datumLength = att_addlength_datum(0, datumTypeLength, datum); + uint32 datumLengthAligned = att_align_nominal(datumLength, datumTypeAlign); + char *currentDatumDataPointer = NULL; + + enlargeStringInfo(datumBuffer, datumLengthAligned); + + currentDatumDataPointer = datumBuffer->data + datumBuffer->len; + memset(currentDatumDataPointer, 0, datumLengthAligned); + + if (datumTypeLength > 0) + { + if (datumTypeByValue) + { + store_att_byval(currentDatumDataPointer, datum, datumTypeLength); + } + else + { + memcpy(currentDatumDataPointer, DatumGetPointer(datum), datumTypeLength); + } + } + else + { + Assert(!datumTypeByValue); + memcpy(currentDatumDataPointer, DatumGetPointer(datum), datumLength); + } + + datumBuffer->len += datumLengthAligned; +} + + +/* + * SerializeBlockData serializes and compresses block data at given block index with given + * compression type for every column. + */ +static void +SerializeBlockData(TableWriteState *writeState, uint32 blockIndex, uint32 rowCount) +{ + uint32 columnIndex = 0; + StripeBuffers *stripeBuffers = writeState->stripeBuffers; + ColumnBlockData **blockDataArray = writeState->blockDataArray; + CompressionType requestedCompressionType = writeState->compressionType; + const uint32 columnCount = stripeBuffers->columnCount; + StringInfo compressionBuffer = writeState->compressionBuffer; + + /* serialize exist values, data values are already serialized */ + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; + ColumnBlockBuffers *blockBuffers = columnBuffers->blockBuffersArray[blockIndex]; + ColumnBlockData *blockData = blockDataArray[columnIndex]; + + blockBuffers->existsBuffer = SerializeBoolArray(blockData->existsArray, rowCount); + } + + /* + * check and compress value buffers, if a value buffer is not compressable + * then keep it as uncompressed, store compression information. + */ + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; + ColumnBlockBuffers *blockBuffers = columnBuffers->blockBuffersArray[blockIndex]; + ColumnBlockData *blockData = blockDataArray[columnIndex]; + StringInfo serializedValueBuffer = NULL; + CompressionType actualCompressionType = COMPRESSION_NONE; + bool compressed = false; + + serializedValueBuffer = blockData->valueBuffer; + + /* the only other supported compression type is pg_lz for now */ + Assert(requestedCompressionType == COMPRESSION_NONE || + requestedCompressionType == COMPRESSION_PG_LZ); + + /* + * if serializedValueBuffer is be compressed, update serializedValueBuffer + * with compressed data and store compression type. + */ + compressed = CompressBuffer(serializedValueBuffer, compressionBuffer, + requestedCompressionType); + if (compressed) + { + serializedValueBuffer = compressionBuffer; + actualCompressionType = COMPRESSION_PG_LZ; + } + + /* store (compressed) value buffer */ + blockBuffers->valueCompressionType = actualCompressionType; + blockBuffers->valueBuffer = CopyStringInfo(serializedValueBuffer); + + /* valueBuffer needs to be reset for next block's data */ + resetStringInfo(blockData->valueBuffer); + } +} + + +/* + * UpdateBlockSkipNodeMinMax takes the given column value, and checks if this + * value falls outside the range of minimum/maximum values of the given column + * block skip node. If it does, the function updates the column block skip node + * accordingly. + */ +static void +UpdateBlockSkipNodeMinMax(ColumnBlockSkipNode *blockSkipNode, Datum columnValue, + bool columnTypeByValue, int columnTypeLength, + Oid columnCollation, FmgrInfo *comparisonFunction) +{ + bool hasMinMax = blockSkipNode->hasMinMax; + Datum previousMinimum = blockSkipNode->minimumValue; + Datum previousMaximum = blockSkipNode->maximumValue; + Datum currentMinimum = 0; + Datum currentMaximum = 0; + + /* if type doesn't have a comparison function, skip min/max values */ + if (comparisonFunction == NULL) + { + return; + } + + if (!hasMinMax) + { + currentMinimum = DatumCopy(columnValue, columnTypeByValue, columnTypeLength); + currentMaximum = DatumCopy(columnValue, columnTypeByValue, columnTypeLength); + } + else + { + Datum minimumComparisonDatum = FunctionCall2Coll(comparisonFunction, + columnCollation, columnValue, + previousMinimum); + Datum maximumComparisonDatum = FunctionCall2Coll(comparisonFunction, + columnCollation, columnValue, + previousMaximum); + int minimumComparison = DatumGetInt32(minimumComparisonDatum); + int maximumComparison = DatumGetInt32(maximumComparisonDatum); + + if (minimumComparison < 0) + { + currentMinimum = DatumCopy(columnValue, columnTypeByValue, columnTypeLength); + } + else + { + currentMinimum = previousMinimum; + } + + if (maximumComparison > 0) + { + currentMaximum = DatumCopy(columnValue, columnTypeByValue, columnTypeLength); + } + else + { + currentMaximum = previousMaximum; + } + } + + blockSkipNode->hasMinMax = true; + blockSkipNode->minimumValue = currentMinimum; + blockSkipNode->maximumValue = currentMaximum; +} + + +/* Creates a copy of the given datum. */ +static Datum +DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength) +{ + Datum datumCopy = 0; + + if (datumTypeByValue) + { + datumCopy = datum; + } + else + { + uint32 datumLength = att_addlength_datum(0, datumTypeLength, datum); + char *datumData = palloc0(datumLength); + memcpy(datumData, DatumGetPointer(datum), datumLength); + + datumCopy = PointerGetDatum(datumData); + } + + return datumCopy; +} + + +/* + * AppendStripeMetadata adds a copy of given stripeMetadata to the given + * table footer's stripeMetadataList. + */ +static void +AppendStripeMetadata(TableFooter *tableFooter, StripeMetadata stripeMetadata) +{ + StripeMetadata *stripeMetadataCopy = palloc0(sizeof(StripeMetadata)); + memcpy(stripeMetadataCopy, &stripeMetadata, sizeof(StripeMetadata)); + + tableFooter->stripeMetadataList = lappend(tableFooter->stripeMetadataList, + stripeMetadataCopy); +} + + +/* Writes the given data to the given file pointer and checks for errors. */ +static void +WriteToFile(FILE *file, void *data, uint32 dataLength) +{ + int writeResult = 0; + int errorResult = 0; + + if (dataLength == 0) + { + return; + } + + errno = 0; + writeResult = fwrite(data, dataLength, 1, file); + if (writeResult != 1) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not write file: %m"))); + } + + errorResult = ferror(file); + if (errorResult != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("error in file: %m"))); + } +} + + +/* Flushes, syncs, and closes the given file pointer and checks for errors. */ +static void +SyncAndCloseFile(FILE *file) +{ + int flushResult = 0; + int syncResult = 0; + int errorResult = 0; + int freeResult = 0; + + errno = 0; + flushResult = fflush(file); + if (flushResult != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not flush file: %m"))); + } + + syncResult = pg_fsync(fileno(file)); + if (syncResult != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not sync file: %m"))); + } + + errorResult = ferror(file); + if (errorResult != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("error in file: %m"))); + } + + freeResult = FreeFile(file); + if (freeResult != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not close file: %m"))); + } +} + + +/* + * CopyStringInfo creates a deep copy of given source string allocating only needed + * amount of memory. + */ +static StringInfo +CopyStringInfo(StringInfo sourceString) +{ + StringInfo targetString = palloc0(sizeof(StringInfoData)); + + if (sourceString->len > 0) + { + targetString->data = palloc0(sourceString->len); + targetString->len = sourceString->len; + targetString->maxlen = sourceString->len; + memcpy(targetString->data, sourceString->data, sourceString->len); + } + + return targetString; +} diff --git a/data/array_types.csv b/data/array_types.csv new file mode 100644 index 000000000..f20e2d2d0 --- /dev/null +++ b/data/array_types.csv @@ -0,0 +1,3 @@ +"{1,2,3}","{1,2,3}","{a,b,c}" +{},{},{} +"{-2147483648,2147483647}","{-9223372036854775808,9223372036854775807}","{""""}" diff --git a/data/block_filtering.csv b/data/block_filtering.csv new file mode 100644 index 000000000..9812045fd --- /dev/null +++ b/data/block_filtering.csv @@ -0,0 +1,10000 @@ +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 +517 +518 +519 +520 +521 +522 +523 +524 +525 +526 +527 +528 +529 +530 +531 +532 +533 +534 +535 +536 +537 +538 +539 +540 +541 +542 +543 +544 +545 +546 +547 +548 +549 +550 +551 +552 +553 +554 +555 +556 +557 +558 +559 +560 +561 +562 +563 +564 +565 +566 +567 +568 +569 +570 +571 +572 +573 +574 +575 +576 +577 +578 +579 +580 +581 +582 +583 +584 +585 +586 +587 +588 +589 +590 +591 +592 +593 +594 +595 +596 +597 +598 +599 +600 +601 +602 +603 +604 +605 +606 +607 +608 +609 +610 +611 +612 +613 +614 +615 +616 +617 +618 +619 +620 +621 +622 +623 +624 +625 +626 +627 +628 +629 +630 +631 +632 +633 +634 +635 +636 +637 +638 +639 +640 +641 +642 +643 +644 +645 +646 +647 +648 +649 +650 +651 +652 +653 +654 +655 +656 +657 +658 +659 +660 +661 +662 +663 +664 +665 +666 +667 +668 +669 +670 +671 +672 +673 +674 +675 +676 +677 +678 +679 +680 +681 +682 +683 +684 +685 +686 +687 +688 +689 +690 +691 +692 +693 +694 +695 +696 +697 +698 +699 +700 +701 +702 +703 +704 +705 +706 +707 +708 +709 +710 +711 +712 +713 +714 +715 +716 +717 +718 +719 +720 +721 +722 +723 +724 +725 +726 +727 +728 +729 +730 +731 +732 +733 +734 +735 +736 +737 +738 +739 +740 +741 +742 +743 +744 +745 +746 +747 +748 +749 +750 +751 +752 +753 +754 +755 +756 +757 +758 +759 +760 +761 +762 +763 +764 +765 +766 +767 +768 +769 +770 +771 +772 +773 +774 +775 +776 +777 +778 +779 +780 +781 +782 +783 +784 +785 +786 +787 +788 +789 +790 +791 +792 +793 +794 +795 +796 +797 +798 +799 +800 +801 +802 +803 +804 +805 +806 +807 +808 +809 +810 +811 +812 +813 +814 +815 +816 +817 +818 +819 +820 +821 +822 +823 +824 +825 +826 +827 +828 +829 +830 +831 +832 +833 +834 +835 +836 +837 +838 +839 +840 +841 +842 +843 +844 +845 +846 +847 +848 +849 +850 +851 +852 +853 +854 +855 +856 +857 +858 +859 +860 +861 +862 +863 +864 +865 +866 +867 +868 +869 +870 +871 +872 +873 +874 +875 +876 +877 +878 +879 +880 +881 +882 +883 +884 +885 +886 +887 +888 +889 +890 +891 +892 +893 +894 +895 +896 +897 +898 +899 +900 +901 +902 +903 +904 +905 +906 +907 +908 +909 +910 +911 +912 +913 +914 +915 +916 +917 +918 +919 +920 +921 +922 +923 +924 +925 +926 +927 +928 +929 +930 +931 +932 +933 +934 +935 +936 +937 +938 +939 +940 +941 +942 +943 +944 +945 +946 +947 +948 +949 +950 +951 +952 +953 +954 +955 +956 +957 +958 +959 +960 +961 +962 +963 +964 +965 +966 +967 +968 +969 +970 +971 +972 +973 +974 +975 +976 +977 +978 +979 +980 +981 +982 +983 +984 +985 +986 +987 +988 +989 +990 +991 +992 +993 +994 +995 +996 +997 +998 +999 +1000 +1001 +1002 +1003 +1004 +1005 +1006 +1007 +1008 +1009 +1010 +1011 +1012 +1013 +1014 +1015 +1016 +1017 +1018 +1019 +1020 +1021 +1022 +1023 +1024 +1025 +1026 +1027 +1028 +1029 +1030 +1031 +1032 +1033 +1034 +1035 +1036 +1037 +1038 +1039 +1040 +1041 +1042 +1043 +1044 +1045 +1046 +1047 +1048 +1049 +1050 +1051 +1052 +1053 +1054 +1055 +1056 +1057 +1058 +1059 +1060 +1061 +1062 +1063 +1064 +1065 +1066 +1067 +1068 +1069 +1070 +1071 +1072 +1073 +1074 +1075 +1076 +1077 +1078 +1079 +1080 +1081 +1082 +1083 +1084 +1085 +1086 +1087 +1088 +1089 +1090 +1091 +1092 +1093 +1094 +1095 +1096 +1097 +1098 +1099 +1100 +1101 +1102 +1103 +1104 +1105 +1106 +1107 +1108 +1109 +1110 +1111 +1112 +1113 +1114 +1115 +1116 +1117 +1118 +1119 +1120 +1121 +1122 +1123 +1124 +1125 +1126 +1127 +1128 +1129 +1130 +1131 +1132 +1133 +1134 +1135 +1136 +1137 +1138 +1139 +1140 +1141 +1142 +1143 +1144 +1145 +1146 +1147 +1148 +1149 +1150 +1151 +1152 +1153 +1154 +1155 +1156 +1157 +1158 +1159 +1160 +1161 +1162 +1163 +1164 +1165 +1166 +1167 +1168 +1169 +1170 +1171 +1172 +1173 +1174 +1175 +1176 +1177 +1178 +1179 +1180 +1181 +1182 +1183 +1184 +1185 +1186 +1187 +1188 +1189 +1190 +1191 +1192 +1193 +1194 +1195 +1196 +1197 +1198 +1199 +1200 +1201 +1202 +1203 +1204 +1205 +1206 +1207 +1208 +1209 +1210 +1211 +1212 +1213 +1214 +1215 +1216 +1217 +1218 +1219 +1220 +1221 +1222 +1223 +1224 +1225 +1226 +1227 +1228 +1229 +1230 +1231 +1232 +1233 +1234 +1235 +1236 +1237 +1238 +1239 +1240 +1241 +1242 +1243 +1244 +1245 +1246 +1247 +1248 +1249 +1250 +1251 +1252 +1253 +1254 +1255 +1256 +1257 +1258 +1259 +1260 +1261 +1262 +1263 +1264 +1265 +1266 +1267 +1268 +1269 +1270 +1271 +1272 +1273 +1274 +1275 +1276 +1277 +1278 +1279 +1280 +1281 +1282 +1283 +1284 +1285 +1286 +1287 +1288 +1289 +1290 +1291 +1292 +1293 +1294 +1295 +1296 +1297 +1298 +1299 +1300 +1301 +1302 +1303 +1304 +1305 +1306 +1307 +1308 +1309 +1310 +1311 +1312 +1313 +1314 +1315 +1316 +1317 +1318 +1319 +1320 +1321 +1322 +1323 +1324 +1325 +1326 +1327 +1328 +1329 +1330 +1331 +1332 +1333 +1334 +1335 +1336 +1337 +1338 +1339 +1340 +1341 +1342 +1343 +1344 +1345 +1346 +1347 +1348 +1349 +1350 +1351 +1352 +1353 +1354 +1355 +1356 +1357 +1358 +1359 +1360 +1361 +1362 +1363 +1364 +1365 +1366 +1367 +1368 +1369 +1370 +1371 +1372 +1373 +1374 +1375 +1376 +1377 +1378 +1379 +1380 +1381 +1382 +1383 +1384 +1385 +1386 +1387 +1388 +1389 +1390 +1391 +1392 +1393 +1394 +1395 +1396 +1397 +1398 +1399 +1400 +1401 +1402 +1403 +1404 +1405 +1406 +1407 +1408 +1409 +1410 +1411 +1412 +1413 +1414 +1415 +1416 +1417 +1418 +1419 +1420 +1421 +1422 +1423 +1424 +1425 +1426 +1427 +1428 +1429 +1430 +1431 +1432 +1433 +1434 +1435 +1436 +1437 +1438 +1439 +1440 +1441 +1442 +1443 +1444 +1445 +1446 +1447 +1448 +1449 +1450 +1451 +1452 +1453 +1454 +1455 +1456 +1457 +1458 +1459 +1460 +1461 +1462 +1463 +1464 +1465 +1466 +1467 +1468 +1469 +1470 +1471 +1472 +1473 +1474 +1475 +1476 +1477 +1478 +1479 +1480 +1481 +1482 +1483 +1484 +1485 +1486 +1487 +1488 +1489 +1490 +1491 +1492 +1493 +1494 +1495 +1496 +1497 +1498 +1499 +1500 +1501 +1502 +1503 +1504 +1505 +1506 +1507 +1508 +1509 +1510 +1511 +1512 +1513 +1514 +1515 +1516 +1517 +1518 +1519 +1520 +1521 +1522 +1523 +1524 +1525 +1526 +1527 +1528 +1529 +1530 +1531 +1532 +1533 +1534 +1535 +1536 +1537 +1538 +1539 +1540 +1541 +1542 +1543 +1544 +1545 +1546 +1547 +1548 +1549 +1550 +1551 +1552 +1553 +1554 +1555 +1556 +1557 +1558 +1559 +1560 +1561 +1562 +1563 +1564 +1565 +1566 +1567 +1568 +1569 +1570 +1571 +1572 +1573 +1574 +1575 +1576 +1577 +1578 +1579 +1580 +1581 +1582 +1583 +1584 +1585 +1586 +1587 +1588 +1589 +1590 +1591 +1592 +1593 +1594 +1595 +1596 +1597 +1598 +1599 +1600 +1601 +1602 +1603 +1604 +1605 +1606 +1607 +1608 +1609 +1610 +1611 +1612 +1613 +1614 +1615 +1616 +1617 +1618 +1619 +1620 +1621 +1622 +1623 +1624 +1625 +1626 +1627 +1628 +1629 +1630 +1631 +1632 +1633 +1634 +1635 +1636 +1637 +1638 +1639 +1640 +1641 +1642 +1643 +1644 +1645 +1646 +1647 +1648 +1649 +1650 +1651 +1652 +1653 +1654 +1655 +1656 +1657 +1658 +1659 +1660 +1661 +1662 +1663 +1664 +1665 +1666 +1667 +1668 +1669 +1670 +1671 +1672 +1673 +1674 +1675 +1676 +1677 +1678 +1679 +1680 +1681 +1682 +1683 +1684 +1685 +1686 +1687 +1688 +1689 +1690 +1691 +1692 +1693 +1694 +1695 +1696 +1697 +1698 +1699 +1700 +1701 +1702 +1703 +1704 +1705 +1706 +1707 +1708 +1709 +1710 +1711 +1712 +1713 +1714 +1715 +1716 +1717 +1718 +1719 +1720 +1721 +1722 +1723 +1724 +1725 +1726 +1727 +1728 +1729 +1730 +1731 +1732 +1733 +1734 +1735 +1736 +1737 +1738 +1739 +1740 +1741 +1742 +1743 +1744 +1745 +1746 +1747 +1748 +1749 +1750 +1751 +1752 +1753 +1754 +1755 +1756 +1757 +1758 +1759 +1760 +1761 +1762 +1763 +1764 +1765 +1766 +1767 +1768 +1769 +1770 +1771 +1772 +1773 +1774 +1775 +1776 +1777 +1778 +1779 +1780 +1781 +1782 +1783 +1784 +1785 +1786 +1787 +1788 +1789 +1790 +1791 +1792 +1793 +1794 +1795 +1796 +1797 +1798 +1799 +1800 +1801 +1802 +1803 +1804 +1805 +1806 +1807 +1808 +1809 +1810 +1811 +1812 +1813 +1814 +1815 +1816 +1817 +1818 +1819 +1820 +1821 +1822 +1823 +1824 +1825 +1826 +1827 +1828 +1829 +1830 +1831 +1832 +1833 +1834 +1835 +1836 +1837 +1838 +1839 +1840 +1841 +1842 +1843 +1844 +1845 +1846 +1847 +1848 +1849 +1850 +1851 +1852 +1853 +1854 +1855 +1856 +1857 +1858 +1859 +1860 +1861 +1862 +1863 +1864 +1865 +1866 +1867 +1868 +1869 +1870 +1871 +1872 +1873 +1874 +1875 +1876 +1877 +1878 +1879 +1880 +1881 +1882 +1883 +1884 +1885 +1886 +1887 +1888 +1889 +1890 +1891 +1892 +1893 +1894 +1895 +1896 +1897 +1898 +1899 +1900 +1901 +1902 +1903 +1904 +1905 +1906 +1907 +1908 +1909 +1910 +1911 +1912 +1913 +1914 +1915 +1916 +1917 +1918 +1919 +1920 +1921 +1922 +1923 +1924 +1925 +1926 +1927 +1928 +1929 +1930 +1931 +1932 +1933 +1934 +1935 +1936 +1937 +1938 +1939 +1940 +1941 +1942 +1943 +1944 +1945 +1946 +1947 +1948 +1949 +1950 +1951 +1952 +1953 +1954 +1955 +1956 +1957 +1958 +1959 +1960 +1961 +1962 +1963 +1964 +1965 +1966 +1967 +1968 +1969 +1970 +1971 +1972 +1973 +1974 +1975 +1976 +1977 +1978 +1979 +1980 +1981 +1982 +1983 +1984 +1985 +1986 +1987 +1988 +1989 +1990 +1991 +1992 +1993 +1994 +1995 +1996 +1997 +1998 +1999 +2000 +2001 +2002 +2003 +2004 +2005 +2006 +2007 +2008 +2009 +2010 +2011 +2012 +2013 +2014 +2015 +2016 +2017 +2018 +2019 +2020 +2021 +2022 +2023 +2024 +2025 +2026 +2027 +2028 +2029 +2030 +2031 +2032 +2033 +2034 +2035 +2036 +2037 +2038 +2039 +2040 +2041 +2042 +2043 +2044 +2045 +2046 +2047 +2048 +2049 +2050 +2051 +2052 +2053 +2054 +2055 +2056 +2057 +2058 +2059 +2060 +2061 +2062 +2063 +2064 +2065 +2066 +2067 +2068 +2069 +2070 +2071 +2072 +2073 +2074 +2075 +2076 +2077 +2078 +2079 +2080 +2081 +2082 +2083 +2084 +2085 +2086 +2087 +2088 +2089 +2090 +2091 +2092 +2093 +2094 +2095 +2096 +2097 +2098 +2099 +2100 +2101 +2102 +2103 +2104 +2105 +2106 +2107 +2108 +2109 +2110 +2111 +2112 +2113 +2114 +2115 +2116 +2117 +2118 +2119 +2120 +2121 +2122 +2123 +2124 +2125 +2126 +2127 +2128 +2129 +2130 +2131 +2132 +2133 +2134 +2135 +2136 +2137 +2138 +2139 +2140 +2141 +2142 +2143 +2144 +2145 +2146 +2147 +2148 +2149 +2150 +2151 +2152 +2153 +2154 +2155 +2156 +2157 +2158 +2159 +2160 +2161 +2162 +2163 +2164 +2165 +2166 +2167 +2168 +2169 +2170 +2171 +2172 +2173 +2174 +2175 +2176 +2177 +2178 +2179 +2180 +2181 +2182 +2183 +2184 +2185 +2186 +2187 +2188 +2189 +2190 +2191 +2192 +2193 +2194 +2195 +2196 +2197 +2198 +2199 +2200 +2201 +2202 +2203 +2204 +2205 +2206 +2207 +2208 +2209 +2210 +2211 +2212 +2213 +2214 +2215 +2216 +2217 +2218 +2219 +2220 +2221 +2222 +2223 +2224 +2225 +2226 +2227 +2228 +2229 +2230 +2231 +2232 +2233 +2234 +2235 +2236 +2237 +2238 +2239 +2240 +2241 +2242 +2243 +2244 +2245 +2246 +2247 +2248 +2249 +2250 +2251 +2252 +2253 +2254 +2255 +2256 +2257 +2258 +2259 +2260 +2261 +2262 +2263 +2264 +2265 +2266 +2267 +2268 +2269 +2270 +2271 +2272 +2273 +2274 +2275 +2276 +2277 +2278 +2279 +2280 +2281 +2282 +2283 +2284 +2285 +2286 +2287 +2288 +2289 +2290 +2291 +2292 +2293 +2294 +2295 +2296 +2297 +2298 +2299 +2300 +2301 +2302 +2303 +2304 +2305 +2306 +2307 +2308 +2309 +2310 +2311 +2312 +2313 +2314 +2315 +2316 +2317 +2318 +2319 +2320 +2321 +2322 +2323 +2324 +2325 +2326 +2327 +2328 +2329 +2330 +2331 +2332 +2333 +2334 +2335 +2336 +2337 +2338 +2339 +2340 +2341 +2342 +2343 +2344 +2345 +2346 +2347 +2348 +2349 +2350 +2351 +2352 +2353 +2354 +2355 +2356 +2357 +2358 +2359 +2360 +2361 +2362 +2363 +2364 +2365 +2366 +2367 +2368 +2369 +2370 +2371 +2372 +2373 +2374 +2375 +2376 +2377 +2378 +2379 +2380 +2381 +2382 +2383 +2384 +2385 +2386 +2387 +2388 +2389 +2390 +2391 +2392 +2393 +2394 +2395 +2396 +2397 +2398 +2399 +2400 +2401 +2402 +2403 +2404 +2405 +2406 +2407 +2408 +2409 +2410 +2411 +2412 +2413 +2414 +2415 +2416 +2417 +2418 +2419 +2420 +2421 +2422 +2423 +2424 +2425 +2426 +2427 +2428 +2429 +2430 +2431 +2432 +2433 +2434 +2435 +2436 +2437 +2438 +2439 +2440 +2441 +2442 +2443 +2444 +2445 +2446 +2447 +2448 +2449 +2450 +2451 +2452 +2453 +2454 +2455 +2456 +2457 +2458 +2459 +2460 +2461 +2462 +2463 +2464 +2465 +2466 +2467 +2468 +2469 +2470 +2471 +2472 +2473 +2474 +2475 +2476 +2477 +2478 +2479 +2480 +2481 +2482 +2483 +2484 +2485 +2486 +2487 +2488 +2489 +2490 +2491 +2492 +2493 +2494 +2495 +2496 +2497 +2498 +2499 +2500 +2501 +2502 +2503 +2504 +2505 +2506 +2507 +2508 +2509 +2510 +2511 +2512 +2513 +2514 +2515 +2516 +2517 +2518 +2519 +2520 +2521 +2522 +2523 +2524 +2525 +2526 +2527 +2528 +2529 +2530 +2531 +2532 +2533 +2534 +2535 +2536 +2537 +2538 +2539 +2540 +2541 +2542 +2543 +2544 +2545 +2546 +2547 +2548 +2549 +2550 +2551 +2552 +2553 +2554 +2555 +2556 +2557 +2558 +2559 +2560 +2561 +2562 +2563 +2564 +2565 +2566 +2567 +2568 +2569 +2570 +2571 +2572 +2573 +2574 +2575 +2576 +2577 +2578 +2579 +2580 +2581 +2582 +2583 +2584 +2585 +2586 +2587 +2588 +2589 +2590 +2591 +2592 +2593 +2594 +2595 +2596 +2597 +2598 +2599 +2600 +2601 +2602 +2603 +2604 +2605 +2606 +2607 +2608 +2609 +2610 +2611 +2612 +2613 +2614 +2615 +2616 +2617 +2618 +2619 +2620 +2621 +2622 +2623 +2624 +2625 +2626 +2627 +2628 +2629 +2630 +2631 +2632 +2633 +2634 +2635 +2636 +2637 +2638 +2639 +2640 +2641 +2642 +2643 +2644 +2645 +2646 +2647 +2648 +2649 +2650 +2651 +2652 +2653 +2654 +2655 +2656 +2657 +2658 +2659 +2660 +2661 +2662 +2663 +2664 +2665 +2666 +2667 +2668 +2669 +2670 +2671 +2672 +2673 +2674 +2675 +2676 +2677 +2678 +2679 +2680 +2681 +2682 +2683 +2684 +2685 +2686 +2687 +2688 +2689 +2690 +2691 +2692 +2693 +2694 +2695 +2696 +2697 +2698 +2699 +2700 +2701 +2702 +2703 +2704 +2705 +2706 +2707 +2708 +2709 +2710 +2711 +2712 +2713 +2714 +2715 +2716 +2717 +2718 +2719 +2720 +2721 +2722 +2723 +2724 +2725 +2726 +2727 +2728 +2729 +2730 +2731 +2732 +2733 +2734 +2735 +2736 +2737 +2738 +2739 +2740 +2741 +2742 +2743 +2744 +2745 +2746 +2747 +2748 +2749 +2750 +2751 +2752 +2753 +2754 +2755 +2756 +2757 +2758 +2759 +2760 +2761 +2762 +2763 +2764 +2765 +2766 +2767 +2768 +2769 +2770 +2771 +2772 +2773 +2774 +2775 +2776 +2777 +2778 +2779 +2780 +2781 +2782 +2783 +2784 +2785 +2786 +2787 +2788 +2789 +2790 +2791 +2792 +2793 +2794 +2795 +2796 +2797 +2798 +2799 +2800 +2801 +2802 +2803 +2804 +2805 +2806 +2807 +2808 +2809 +2810 +2811 +2812 +2813 +2814 +2815 +2816 +2817 +2818 +2819 +2820 +2821 +2822 +2823 +2824 +2825 +2826 +2827 +2828 +2829 +2830 +2831 +2832 +2833 +2834 +2835 +2836 +2837 +2838 +2839 +2840 +2841 +2842 +2843 +2844 +2845 +2846 +2847 +2848 +2849 +2850 +2851 +2852 +2853 +2854 +2855 +2856 +2857 +2858 +2859 +2860 +2861 +2862 +2863 +2864 +2865 +2866 +2867 +2868 +2869 +2870 +2871 +2872 +2873 +2874 +2875 +2876 +2877 +2878 +2879 +2880 +2881 +2882 +2883 +2884 +2885 +2886 +2887 +2888 +2889 +2890 +2891 +2892 +2893 +2894 +2895 +2896 +2897 +2898 +2899 +2900 +2901 +2902 +2903 +2904 +2905 +2906 +2907 +2908 +2909 +2910 +2911 +2912 +2913 +2914 +2915 +2916 +2917 +2918 +2919 +2920 +2921 +2922 +2923 +2924 +2925 +2926 +2927 +2928 +2929 +2930 +2931 +2932 +2933 +2934 +2935 +2936 +2937 +2938 +2939 +2940 +2941 +2942 +2943 +2944 +2945 +2946 +2947 +2948 +2949 +2950 +2951 +2952 +2953 +2954 +2955 +2956 +2957 +2958 +2959 +2960 +2961 +2962 +2963 +2964 +2965 +2966 +2967 +2968 +2969 +2970 +2971 +2972 +2973 +2974 +2975 +2976 +2977 +2978 +2979 +2980 +2981 +2982 +2983 +2984 +2985 +2986 +2987 +2988 +2989 +2990 +2991 +2992 +2993 +2994 +2995 +2996 +2997 +2998 +2999 +3000 +3001 +3002 +3003 +3004 +3005 +3006 +3007 +3008 +3009 +3010 +3011 +3012 +3013 +3014 +3015 +3016 +3017 +3018 +3019 +3020 +3021 +3022 +3023 +3024 +3025 +3026 +3027 +3028 +3029 +3030 +3031 +3032 +3033 +3034 +3035 +3036 +3037 +3038 +3039 +3040 +3041 +3042 +3043 +3044 +3045 +3046 +3047 +3048 +3049 +3050 +3051 +3052 +3053 +3054 +3055 +3056 +3057 +3058 +3059 +3060 +3061 +3062 +3063 +3064 +3065 +3066 +3067 +3068 +3069 +3070 +3071 +3072 +3073 +3074 +3075 +3076 +3077 +3078 +3079 +3080 +3081 +3082 +3083 +3084 +3085 +3086 +3087 +3088 +3089 +3090 +3091 +3092 +3093 +3094 +3095 +3096 +3097 +3098 +3099 +3100 +3101 +3102 +3103 +3104 +3105 +3106 +3107 +3108 +3109 +3110 +3111 +3112 +3113 +3114 +3115 +3116 +3117 +3118 +3119 +3120 +3121 +3122 +3123 +3124 +3125 +3126 +3127 +3128 +3129 +3130 +3131 +3132 +3133 +3134 +3135 +3136 +3137 +3138 +3139 +3140 +3141 +3142 +3143 +3144 +3145 +3146 +3147 +3148 +3149 +3150 +3151 +3152 +3153 +3154 +3155 +3156 +3157 +3158 +3159 +3160 +3161 +3162 +3163 +3164 +3165 +3166 +3167 +3168 +3169 +3170 +3171 +3172 +3173 +3174 +3175 +3176 +3177 +3178 +3179 +3180 +3181 +3182 +3183 +3184 +3185 +3186 +3187 +3188 +3189 +3190 +3191 +3192 +3193 +3194 +3195 +3196 +3197 +3198 +3199 +3200 +3201 +3202 +3203 +3204 +3205 +3206 +3207 +3208 +3209 +3210 +3211 +3212 +3213 +3214 +3215 +3216 +3217 +3218 +3219 +3220 +3221 +3222 +3223 +3224 +3225 +3226 +3227 +3228 +3229 +3230 +3231 +3232 +3233 +3234 +3235 +3236 +3237 +3238 +3239 +3240 +3241 +3242 +3243 +3244 +3245 +3246 +3247 +3248 +3249 +3250 +3251 +3252 +3253 +3254 +3255 +3256 +3257 +3258 +3259 +3260 +3261 +3262 +3263 +3264 +3265 +3266 +3267 +3268 +3269 +3270 +3271 +3272 +3273 +3274 +3275 +3276 +3277 +3278 +3279 +3280 +3281 +3282 +3283 +3284 +3285 +3286 +3287 +3288 +3289 +3290 +3291 +3292 +3293 +3294 +3295 +3296 +3297 +3298 +3299 +3300 +3301 +3302 +3303 +3304 +3305 +3306 +3307 +3308 +3309 +3310 +3311 +3312 +3313 +3314 +3315 +3316 +3317 +3318 +3319 +3320 +3321 +3322 +3323 +3324 +3325 +3326 +3327 +3328 +3329 +3330 +3331 +3332 +3333 +3334 +3335 +3336 +3337 +3338 +3339 +3340 +3341 +3342 +3343 +3344 +3345 +3346 +3347 +3348 +3349 +3350 +3351 +3352 +3353 +3354 +3355 +3356 +3357 +3358 +3359 +3360 +3361 +3362 +3363 +3364 +3365 +3366 +3367 +3368 +3369 +3370 +3371 +3372 +3373 +3374 +3375 +3376 +3377 +3378 +3379 +3380 +3381 +3382 +3383 +3384 +3385 +3386 +3387 +3388 +3389 +3390 +3391 +3392 +3393 +3394 +3395 +3396 +3397 +3398 +3399 +3400 +3401 +3402 +3403 +3404 +3405 +3406 +3407 +3408 +3409 +3410 +3411 +3412 +3413 +3414 +3415 +3416 +3417 +3418 +3419 +3420 +3421 +3422 +3423 +3424 +3425 +3426 +3427 +3428 +3429 +3430 +3431 +3432 +3433 +3434 +3435 +3436 +3437 +3438 +3439 +3440 +3441 +3442 +3443 +3444 +3445 +3446 +3447 +3448 +3449 +3450 +3451 +3452 +3453 +3454 +3455 +3456 +3457 +3458 +3459 +3460 +3461 +3462 +3463 +3464 +3465 +3466 +3467 +3468 +3469 +3470 +3471 +3472 +3473 +3474 +3475 +3476 +3477 +3478 +3479 +3480 +3481 +3482 +3483 +3484 +3485 +3486 +3487 +3488 +3489 +3490 +3491 +3492 +3493 +3494 +3495 +3496 +3497 +3498 +3499 +3500 +3501 +3502 +3503 +3504 +3505 +3506 +3507 +3508 +3509 +3510 +3511 +3512 +3513 +3514 +3515 +3516 +3517 +3518 +3519 +3520 +3521 +3522 +3523 +3524 +3525 +3526 +3527 +3528 +3529 +3530 +3531 +3532 +3533 +3534 +3535 +3536 +3537 +3538 +3539 +3540 +3541 +3542 +3543 +3544 +3545 +3546 +3547 +3548 +3549 +3550 +3551 +3552 +3553 +3554 +3555 +3556 +3557 +3558 +3559 +3560 +3561 +3562 +3563 +3564 +3565 +3566 +3567 +3568 +3569 +3570 +3571 +3572 +3573 +3574 +3575 +3576 +3577 +3578 +3579 +3580 +3581 +3582 +3583 +3584 +3585 +3586 +3587 +3588 +3589 +3590 +3591 +3592 +3593 +3594 +3595 +3596 +3597 +3598 +3599 +3600 +3601 +3602 +3603 +3604 +3605 +3606 +3607 +3608 +3609 +3610 +3611 +3612 +3613 +3614 +3615 +3616 +3617 +3618 +3619 +3620 +3621 +3622 +3623 +3624 +3625 +3626 +3627 +3628 +3629 +3630 +3631 +3632 +3633 +3634 +3635 +3636 +3637 +3638 +3639 +3640 +3641 +3642 +3643 +3644 +3645 +3646 +3647 +3648 +3649 +3650 +3651 +3652 +3653 +3654 +3655 +3656 +3657 +3658 +3659 +3660 +3661 +3662 +3663 +3664 +3665 +3666 +3667 +3668 +3669 +3670 +3671 +3672 +3673 +3674 +3675 +3676 +3677 +3678 +3679 +3680 +3681 +3682 +3683 +3684 +3685 +3686 +3687 +3688 +3689 +3690 +3691 +3692 +3693 +3694 +3695 +3696 +3697 +3698 +3699 +3700 +3701 +3702 +3703 +3704 +3705 +3706 +3707 +3708 +3709 +3710 +3711 +3712 +3713 +3714 +3715 +3716 +3717 +3718 +3719 +3720 +3721 +3722 +3723 +3724 +3725 +3726 +3727 +3728 +3729 +3730 +3731 +3732 +3733 +3734 +3735 +3736 +3737 +3738 +3739 +3740 +3741 +3742 +3743 +3744 +3745 +3746 +3747 +3748 +3749 +3750 +3751 +3752 +3753 +3754 +3755 +3756 +3757 +3758 +3759 +3760 +3761 +3762 +3763 +3764 +3765 +3766 +3767 +3768 +3769 +3770 +3771 +3772 +3773 +3774 +3775 +3776 +3777 +3778 +3779 +3780 +3781 +3782 +3783 +3784 +3785 +3786 +3787 +3788 +3789 +3790 +3791 +3792 +3793 +3794 +3795 +3796 +3797 +3798 +3799 +3800 +3801 +3802 +3803 +3804 +3805 +3806 +3807 +3808 +3809 +3810 +3811 +3812 +3813 +3814 +3815 +3816 +3817 +3818 +3819 +3820 +3821 +3822 +3823 +3824 +3825 +3826 +3827 +3828 +3829 +3830 +3831 +3832 +3833 +3834 +3835 +3836 +3837 +3838 +3839 +3840 +3841 +3842 +3843 +3844 +3845 +3846 +3847 +3848 +3849 +3850 +3851 +3852 +3853 +3854 +3855 +3856 +3857 +3858 +3859 +3860 +3861 +3862 +3863 +3864 +3865 +3866 +3867 +3868 +3869 +3870 +3871 +3872 +3873 +3874 +3875 +3876 +3877 +3878 +3879 +3880 +3881 +3882 +3883 +3884 +3885 +3886 +3887 +3888 +3889 +3890 +3891 +3892 +3893 +3894 +3895 +3896 +3897 +3898 +3899 +3900 +3901 +3902 +3903 +3904 +3905 +3906 +3907 +3908 +3909 +3910 +3911 +3912 +3913 +3914 +3915 +3916 +3917 +3918 +3919 +3920 +3921 +3922 +3923 +3924 +3925 +3926 +3927 +3928 +3929 +3930 +3931 +3932 +3933 +3934 +3935 +3936 +3937 +3938 +3939 +3940 +3941 +3942 +3943 +3944 +3945 +3946 +3947 +3948 +3949 +3950 +3951 +3952 +3953 +3954 +3955 +3956 +3957 +3958 +3959 +3960 +3961 +3962 +3963 +3964 +3965 +3966 +3967 +3968 +3969 +3970 +3971 +3972 +3973 +3974 +3975 +3976 +3977 +3978 +3979 +3980 +3981 +3982 +3983 +3984 +3985 +3986 +3987 +3988 +3989 +3990 +3991 +3992 +3993 +3994 +3995 +3996 +3997 +3998 +3999 +4000 +4001 +4002 +4003 +4004 +4005 +4006 +4007 +4008 +4009 +4010 +4011 +4012 +4013 +4014 +4015 +4016 +4017 +4018 +4019 +4020 +4021 +4022 +4023 +4024 +4025 +4026 +4027 +4028 +4029 +4030 +4031 +4032 +4033 +4034 +4035 +4036 +4037 +4038 +4039 +4040 +4041 +4042 +4043 +4044 +4045 +4046 +4047 +4048 +4049 +4050 +4051 +4052 +4053 +4054 +4055 +4056 +4057 +4058 +4059 +4060 +4061 +4062 +4063 +4064 +4065 +4066 +4067 +4068 +4069 +4070 +4071 +4072 +4073 +4074 +4075 +4076 +4077 +4078 +4079 +4080 +4081 +4082 +4083 +4084 +4085 +4086 +4087 +4088 +4089 +4090 +4091 +4092 +4093 +4094 +4095 +4096 +4097 +4098 +4099 +4100 +4101 +4102 +4103 +4104 +4105 +4106 +4107 +4108 +4109 +4110 +4111 +4112 +4113 +4114 +4115 +4116 +4117 +4118 +4119 +4120 +4121 +4122 +4123 +4124 +4125 +4126 +4127 +4128 +4129 +4130 +4131 +4132 +4133 +4134 +4135 +4136 +4137 +4138 +4139 +4140 +4141 +4142 +4143 +4144 +4145 +4146 +4147 +4148 +4149 +4150 +4151 +4152 +4153 +4154 +4155 +4156 +4157 +4158 +4159 +4160 +4161 +4162 +4163 +4164 +4165 +4166 +4167 +4168 +4169 +4170 +4171 +4172 +4173 +4174 +4175 +4176 +4177 +4178 +4179 +4180 +4181 +4182 +4183 +4184 +4185 +4186 +4187 +4188 +4189 +4190 +4191 +4192 +4193 +4194 +4195 +4196 +4197 +4198 +4199 +4200 +4201 +4202 +4203 +4204 +4205 +4206 +4207 +4208 +4209 +4210 +4211 +4212 +4213 +4214 +4215 +4216 +4217 +4218 +4219 +4220 +4221 +4222 +4223 +4224 +4225 +4226 +4227 +4228 +4229 +4230 +4231 +4232 +4233 +4234 +4235 +4236 +4237 +4238 +4239 +4240 +4241 +4242 +4243 +4244 +4245 +4246 +4247 +4248 +4249 +4250 +4251 +4252 +4253 +4254 +4255 +4256 +4257 +4258 +4259 +4260 +4261 +4262 +4263 +4264 +4265 +4266 +4267 +4268 +4269 +4270 +4271 +4272 +4273 +4274 +4275 +4276 +4277 +4278 +4279 +4280 +4281 +4282 +4283 +4284 +4285 +4286 +4287 +4288 +4289 +4290 +4291 +4292 +4293 +4294 +4295 +4296 +4297 +4298 +4299 +4300 +4301 +4302 +4303 +4304 +4305 +4306 +4307 +4308 +4309 +4310 +4311 +4312 +4313 +4314 +4315 +4316 +4317 +4318 +4319 +4320 +4321 +4322 +4323 +4324 +4325 +4326 +4327 +4328 +4329 +4330 +4331 +4332 +4333 +4334 +4335 +4336 +4337 +4338 +4339 +4340 +4341 +4342 +4343 +4344 +4345 +4346 +4347 +4348 +4349 +4350 +4351 +4352 +4353 +4354 +4355 +4356 +4357 +4358 +4359 +4360 +4361 +4362 +4363 +4364 +4365 +4366 +4367 +4368 +4369 +4370 +4371 +4372 +4373 +4374 +4375 +4376 +4377 +4378 +4379 +4380 +4381 +4382 +4383 +4384 +4385 +4386 +4387 +4388 +4389 +4390 +4391 +4392 +4393 +4394 +4395 +4396 +4397 +4398 +4399 +4400 +4401 +4402 +4403 +4404 +4405 +4406 +4407 +4408 +4409 +4410 +4411 +4412 +4413 +4414 +4415 +4416 +4417 +4418 +4419 +4420 +4421 +4422 +4423 +4424 +4425 +4426 +4427 +4428 +4429 +4430 +4431 +4432 +4433 +4434 +4435 +4436 +4437 +4438 +4439 +4440 +4441 +4442 +4443 +4444 +4445 +4446 +4447 +4448 +4449 +4450 +4451 +4452 +4453 +4454 +4455 +4456 +4457 +4458 +4459 +4460 +4461 +4462 +4463 +4464 +4465 +4466 +4467 +4468 +4469 +4470 +4471 +4472 +4473 +4474 +4475 +4476 +4477 +4478 +4479 +4480 +4481 +4482 +4483 +4484 +4485 +4486 +4487 +4488 +4489 +4490 +4491 +4492 +4493 +4494 +4495 +4496 +4497 +4498 +4499 +4500 +4501 +4502 +4503 +4504 +4505 +4506 +4507 +4508 +4509 +4510 +4511 +4512 +4513 +4514 +4515 +4516 +4517 +4518 +4519 +4520 +4521 +4522 +4523 +4524 +4525 +4526 +4527 +4528 +4529 +4530 +4531 +4532 +4533 +4534 +4535 +4536 +4537 +4538 +4539 +4540 +4541 +4542 +4543 +4544 +4545 +4546 +4547 +4548 +4549 +4550 +4551 +4552 +4553 +4554 +4555 +4556 +4557 +4558 +4559 +4560 +4561 +4562 +4563 +4564 +4565 +4566 +4567 +4568 +4569 +4570 +4571 +4572 +4573 +4574 +4575 +4576 +4577 +4578 +4579 +4580 +4581 +4582 +4583 +4584 +4585 +4586 +4587 +4588 +4589 +4590 +4591 +4592 +4593 +4594 +4595 +4596 +4597 +4598 +4599 +4600 +4601 +4602 +4603 +4604 +4605 +4606 +4607 +4608 +4609 +4610 +4611 +4612 +4613 +4614 +4615 +4616 +4617 +4618 +4619 +4620 +4621 +4622 +4623 +4624 +4625 +4626 +4627 +4628 +4629 +4630 +4631 +4632 +4633 +4634 +4635 +4636 +4637 +4638 +4639 +4640 +4641 +4642 +4643 +4644 +4645 +4646 +4647 +4648 +4649 +4650 +4651 +4652 +4653 +4654 +4655 +4656 +4657 +4658 +4659 +4660 +4661 +4662 +4663 +4664 +4665 +4666 +4667 +4668 +4669 +4670 +4671 +4672 +4673 +4674 +4675 +4676 +4677 +4678 +4679 +4680 +4681 +4682 +4683 +4684 +4685 +4686 +4687 +4688 +4689 +4690 +4691 +4692 +4693 +4694 +4695 +4696 +4697 +4698 +4699 +4700 +4701 +4702 +4703 +4704 +4705 +4706 +4707 +4708 +4709 +4710 +4711 +4712 +4713 +4714 +4715 +4716 +4717 +4718 +4719 +4720 +4721 +4722 +4723 +4724 +4725 +4726 +4727 +4728 +4729 +4730 +4731 +4732 +4733 +4734 +4735 +4736 +4737 +4738 +4739 +4740 +4741 +4742 +4743 +4744 +4745 +4746 +4747 +4748 +4749 +4750 +4751 +4752 +4753 +4754 +4755 +4756 +4757 +4758 +4759 +4760 +4761 +4762 +4763 +4764 +4765 +4766 +4767 +4768 +4769 +4770 +4771 +4772 +4773 +4774 +4775 +4776 +4777 +4778 +4779 +4780 +4781 +4782 +4783 +4784 +4785 +4786 +4787 +4788 +4789 +4790 +4791 +4792 +4793 +4794 +4795 +4796 +4797 +4798 +4799 +4800 +4801 +4802 +4803 +4804 +4805 +4806 +4807 +4808 +4809 +4810 +4811 +4812 +4813 +4814 +4815 +4816 +4817 +4818 +4819 +4820 +4821 +4822 +4823 +4824 +4825 +4826 +4827 +4828 +4829 +4830 +4831 +4832 +4833 +4834 +4835 +4836 +4837 +4838 +4839 +4840 +4841 +4842 +4843 +4844 +4845 +4846 +4847 +4848 +4849 +4850 +4851 +4852 +4853 +4854 +4855 +4856 +4857 +4858 +4859 +4860 +4861 +4862 +4863 +4864 +4865 +4866 +4867 +4868 +4869 +4870 +4871 +4872 +4873 +4874 +4875 +4876 +4877 +4878 +4879 +4880 +4881 +4882 +4883 +4884 +4885 +4886 +4887 +4888 +4889 +4890 +4891 +4892 +4893 +4894 +4895 +4896 +4897 +4898 +4899 +4900 +4901 +4902 +4903 +4904 +4905 +4906 +4907 +4908 +4909 +4910 +4911 +4912 +4913 +4914 +4915 +4916 +4917 +4918 +4919 +4920 +4921 +4922 +4923 +4924 +4925 +4926 +4927 +4928 +4929 +4930 +4931 +4932 +4933 +4934 +4935 +4936 +4937 +4938 +4939 +4940 +4941 +4942 +4943 +4944 +4945 +4946 +4947 +4948 +4949 +4950 +4951 +4952 +4953 +4954 +4955 +4956 +4957 +4958 +4959 +4960 +4961 +4962 +4963 +4964 +4965 +4966 +4967 +4968 +4969 +4970 +4971 +4972 +4973 +4974 +4975 +4976 +4977 +4978 +4979 +4980 +4981 +4982 +4983 +4984 +4985 +4986 +4987 +4988 +4989 +4990 +4991 +4992 +4993 +4994 +4995 +4996 +4997 +4998 +4999 +5000 +5001 +5002 +5003 +5004 +5005 +5006 +5007 +5008 +5009 +5010 +5011 +5012 +5013 +5014 +5015 +5016 +5017 +5018 +5019 +5020 +5021 +5022 +5023 +5024 +5025 +5026 +5027 +5028 +5029 +5030 +5031 +5032 +5033 +5034 +5035 +5036 +5037 +5038 +5039 +5040 +5041 +5042 +5043 +5044 +5045 +5046 +5047 +5048 +5049 +5050 +5051 +5052 +5053 +5054 +5055 +5056 +5057 +5058 +5059 +5060 +5061 +5062 +5063 +5064 +5065 +5066 +5067 +5068 +5069 +5070 +5071 +5072 +5073 +5074 +5075 +5076 +5077 +5078 +5079 +5080 +5081 +5082 +5083 +5084 +5085 +5086 +5087 +5088 +5089 +5090 +5091 +5092 +5093 +5094 +5095 +5096 +5097 +5098 +5099 +5100 +5101 +5102 +5103 +5104 +5105 +5106 +5107 +5108 +5109 +5110 +5111 +5112 +5113 +5114 +5115 +5116 +5117 +5118 +5119 +5120 +5121 +5122 +5123 +5124 +5125 +5126 +5127 +5128 +5129 +5130 +5131 +5132 +5133 +5134 +5135 +5136 +5137 +5138 +5139 +5140 +5141 +5142 +5143 +5144 +5145 +5146 +5147 +5148 +5149 +5150 +5151 +5152 +5153 +5154 +5155 +5156 +5157 +5158 +5159 +5160 +5161 +5162 +5163 +5164 +5165 +5166 +5167 +5168 +5169 +5170 +5171 +5172 +5173 +5174 +5175 +5176 +5177 +5178 +5179 +5180 +5181 +5182 +5183 +5184 +5185 +5186 +5187 +5188 +5189 +5190 +5191 +5192 +5193 +5194 +5195 +5196 +5197 +5198 +5199 +5200 +5201 +5202 +5203 +5204 +5205 +5206 +5207 +5208 +5209 +5210 +5211 +5212 +5213 +5214 +5215 +5216 +5217 +5218 +5219 +5220 +5221 +5222 +5223 +5224 +5225 +5226 +5227 +5228 +5229 +5230 +5231 +5232 +5233 +5234 +5235 +5236 +5237 +5238 +5239 +5240 +5241 +5242 +5243 +5244 +5245 +5246 +5247 +5248 +5249 +5250 +5251 +5252 +5253 +5254 +5255 +5256 +5257 +5258 +5259 +5260 +5261 +5262 +5263 +5264 +5265 +5266 +5267 +5268 +5269 +5270 +5271 +5272 +5273 +5274 +5275 +5276 +5277 +5278 +5279 +5280 +5281 +5282 +5283 +5284 +5285 +5286 +5287 +5288 +5289 +5290 +5291 +5292 +5293 +5294 +5295 +5296 +5297 +5298 +5299 +5300 +5301 +5302 +5303 +5304 +5305 +5306 +5307 +5308 +5309 +5310 +5311 +5312 +5313 +5314 +5315 +5316 +5317 +5318 +5319 +5320 +5321 +5322 +5323 +5324 +5325 +5326 +5327 +5328 +5329 +5330 +5331 +5332 +5333 +5334 +5335 +5336 +5337 +5338 +5339 +5340 +5341 +5342 +5343 +5344 +5345 +5346 +5347 +5348 +5349 +5350 +5351 +5352 +5353 +5354 +5355 +5356 +5357 +5358 +5359 +5360 +5361 +5362 +5363 +5364 +5365 +5366 +5367 +5368 +5369 +5370 +5371 +5372 +5373 +5374 +5375 +5376 +5377 +5378 +5379 +5380 +5381 +5382 +5383 +5384 +5385 +5386 +5387 +5388 +5389 +5390 +5391 +5392 +5393 +5394 +5395 +5396 +5397 +5398 +5399 +5400 +5401 +5402 +5403 +5404 +5405 +5406 +5407 +5408 +5409 +5410 +5411 +5412 +5413 +5414 +5415 +5416 +5417 +5418 +5419 +5420 +5421 +5422 +5423 +5424 +5425 +5426 +5427 +5428 +5429 +5430 +5431 +5432 +5433 +5434 +5435 +5436 +5437 +5438 +5439 +5440 +5441 +5442 +5443 +5444 +5445 +5446 +5447 +5448 +5449 +5450 +5451 +5452 +5453 +5454 +5455 +5456 +5457 +5458 +5459 +5460 +5461 +5462 +5463 +5464 +5465 +5466 +5467 +5468 +5469 +5470 +5471 +5472 +5473 +5474 +5475 +5476 +5477 +5478 +5479 +5480 +5481 +5482 +5483 +5484 +5485 +5486 +5487 +5488 +5489 +5490 +5491 +5492 +5493 +5494 +5495 +5496 +5497 +5498 +5499 +5500 +5501 +5502 +5503 +5504 +5505 +5506 +5507 +5508 +5509 +5510 +5511 +5512 +5513 +5514 +5515 +5516 +5517 +5518 +5519 +5520 +5521 +5522 +5523 +5524 +5525 +5526 +5527 +5528 +5529 +5530 +5531 +5532 +5533 +5534 +5535 +5536 +5537 +5538 +5539 +5540 +5541 +5542 +5543 +5544 +5545 +5546 +5547 +5548 +5549 +5550 +5551 +5552 +5553 +5554 +5555 +5556 +5557 +5558 +5559 +5560 +5561 +5562 +5563 +5564 +5565 +5566 +5567 +5568 +5569 +5570 +5571 +5572 +5573 +5574 +5575 +5576 +5577 +5578 +5579 +5580 +5581 +5582 +5583 +5584 +5585 +5586 +5587 +5588 +5589 +5590 +5591 +5592 +5593 +5594 +5595 +5596 +5597 +5598 +5599 +5600 +5601 +5602 +5603 +5604 +5605 +5606 +5607 +5608 +5609 +5610 +5611 +5612 +5613 +5614 +5615 +5616 +5617 +5618 +5619 +5620 +5621 +5622 +5623 +5624 +5625 +5626 +5627 +5628 +5629 +5630 +5631 +5632 +5633 +5634 +5635 +5636 +5637 +5638 +5639 +5640 +5641 +5642 +5643 +5644 +5645 +5646 +5647 +5648 +5649 +5650 +5651 +5652 +5653 +5654 +5655 +5656 +5657 +5658 +5659 +5660 +5661 +5662 +5663 +5664 +5665 +5666 +5667 +5668 +5669 +5670 +5671 +5672 +5673 +5674 +5675 +5676 +5677 +5678 +5679 +5680 +5681 +5682 +5683 +5684 +5685 +5686 +5687 +5688 +5689 +5690 +5691 +5692 +5693 +5694 +5695 +5696 +5697 +5698 +5699 +5700 +5701 +5702 +5703 +5704 +5705 +5706 +5707 +5708 +5709 +5710 +5711 +5712 +5713 +5714 +5715 +5716 +5717 +5718 +5719 +5720 +5721 +5722 +5723 +5724 +5725 +5726 +5727 +5728 +5729 +5730 +5731 +5732 +5733 +5734 +5735 +5736 +5737 +5738 +5739 +5740 +5741 +5742 +5743 +5744 +5745 +5746 +5747 +5748 +5749 +5750 +5751 +5752 +5753 +5754 +5755 +5756 +5757 +5758 +5759 +5760 +5761 +5762 +5763 +5764 +5765 +5766 +5767 +5768 +5769 +5770 +5771 +5772 +5773 +5774 +5775 +5776 +5777 +5778 +5779 +5780 +5781 +5782 +5783 +5784 +5785 +5786 +5787 +5788 +5789 +5790 +5791 +5792 +5793 +5794 +5795 +5796 +5797 +5798 +5799 +5800 +5801 +5802 +5803 +5804 +5805 +5806 +5807 +5808 +5809 +5810 +5811 +5812 +5813 +5814 +5815 +5816 +5817 +5818 +5819 +5820 +5821 +5822 +5823 +5824 +5825 +5826 +5827 +5828 +5829 +5830 +5831 +5832 +5833 +5834 +5835 +5836 +5837 +5838 +5839 +5840 +5841 +5842 +5843 +5844 +5845 +5846 +5847 +5848 +5849 +5850 +5851 +5852 +5853 +5854 +5855 +5856 +5857 +5858 +5859 +5860 +5861 +5862 +5863 +5864 +5865 +5866 +5867 +5868 +5869 +5870 +5871 +5872 +5873 +5874 +5875 +5876 +5877 +5878 +5879 +5880 +5881 +5882 +5883 +5884 +5885 +5886 +5887 +5888 +5889 +5890 +5891 +5892 +5893 +5894 +5895 +5896 +5897 +5898 +5899 +5900 +5901 +5902 +5903 +5904 +5905 +5906 +5907 +5908 +5909 +5910 +5911 +5912 +5913 +5914 +5915 +5916 +5917 +5918 +5919 +5920 +5921 +5922 +5923 +5924 +5925 +5926 +5927 +5928 +5929 +5930 +5931 +5932 +5933 +5934 +5935 +5936 +5937 +5938 +5939 +5940 +5941 +5942 +5943 +5944 +5945 +5946 +5947 +5948 +5949 +5950 +5951 +5952 +5953 +5954 +5955 +5956 +5957 +5958 +5959 +5960 +5961 +5962 +5963 +5964 +5965 +5966 +5967 +5968 +5969 +5970 +5971 +5972 +5973 +5974 +5975 +5976 +5977 +5978 +5979 +5980 +5981 +5982 +5983 +5984 +5985 +5986 +5987 +5988 +5989 +5990 +5991 +5992 +5993 +5994 +5995 +5996 +5997 +5998 +5999 +6000 +6001 +6002 +6003 +6004 +6005 +6006 +6007 +6008 +6009 +6010 +6011 +6012 +6013 +6014 +6015 +6016 +6017 +6018 +6019 +6020 +6021 +6022 +6023 +6024 +6025 +6026 +6027 +6028 +6029 +6030 +6031 +6032 +6033 +6034 +6035 +6036 +6037 +6038 +6039 +6040 +6041 +6042 +6043 +6044 +6045 +6046 +6047 +6048 +6049 +6050 +6051 +6052 +6053 +6054 +6055 +6056 +6057 +6058 +6059 +6060 +6061 +6062 +6063 +6064 +6065 +6066 +6067 +6068 +6069 +6070 +6071 +6072 +6073 +6074 +6075 +6076 +6077 +6078 +6079 +6080 +6081 +6082 +6083 +6084 +6085 +6086 +6087 +6088 +6089 +6090 +6091 +6092 +6093 +6094 +6095 +6096 +6097 +6098 +6099 +6100 +6101 +6102 +6103 +6104 +6105 +6106 +6107 +6108 +6109 +6110 +6111 +6112 +6113 +6114 +6115 +6116 +6117 +6118 +6119 +6120 +6121 +6122 +6123 +6124 +6125 +6126 +6127 +6128 +6129 +6130 +6131 +6132 +6133 +6134 +6135 +6136 +6137 +6138 +6139 +6140 +6141 +6142 +6143 +6144 +6145 +6146 +6147 +6148 +6149 +6150 +6151 +6152 +6153 +6154 +6155 +6156 +6157 +6158 +6159 +6160 +6161 +6162 +6163 +6164 +6165 +6166 +6167 +6168 +6169 +6170 +6171 +6172 +6173 +6174 +6175 +6176 +6177 +6178 +6179 +6180 +6181 +6182 +6183 +6184 +6185 +6186 +6187 +6188 +6189 +6190 +6191 +6192 +6193 +6194 +6195 +6196 +6197 +6198 +6199 +6200 +6201 +6202 +6203 +6204 +6205 +6206 +6207 +6208 +6209 +6210 +6211 +6212 +6213 +6214 +6215 +6216 +6217 +6218 +6219 +6220 +6221 +6222 +6223 +6224 +6225 +6226 +6227 +6228 +6229 +6230 +6231 +6232 +6233 +6234 +6235 +6236 +6237 +6238 +6239 +6240 +6241 +6242 +6243 +6244 +6245 +6246 +6247 +6248 +6249 +6250 +6251 +6252 +6253 +6254 +6255 +6256 +6257 +6258 +6259 +6260 +6261 +6262 +6263 +6264 +6265 +6266 +6267 +6268 +6269 +6270 +6271 +6272 +6273 +6274 +6275 +6276 +6277 +6278 +6279 +6280 +6281 +6282 +6283 +6284 +6285 +6286 +6287 +6288 +6289 +6290 +6291 +6292 +6293 +6294 +6295 +6296 +6297 +6298 +6299 +6300 +6301 +6302 +6303 +6304 +6305 +6306 +6307 +6308 +6309 +6310 +6311 +6312 +6313 +6314 +6315 +6316 +6317 +6318 +6319 +6320 +6321 +6322 +6323 +6324 +6325 +6326 +6327 +6328 +6329 +6330 +6331 +6332 +6333 +6334 +6335 +6336 +6337 +6338 +6339 +6340 +6341 +6342 +6343 +6344 +6345 +6346 +6347 +6348 +6349 +6350 +6351 +6352 +6353 +6354 +6355 +6356 +6357 +6358 +6359 +6360 +6361 +6362 +6363 +6364 +6365 +6366 +6367 +6368 +6369 +6370 +6371 +6372 +6373 +6374 +6375 +6376 +6377 +6378 +6379 +6380 +6381 +6382 +6383 +6384 +6385 +6386 +6387 +6388 +6389 +6390 +6391 +6392 +6393 +6394 +6395 +6396 +6397 +6398 +6399 +6400 +6401 +6402 +6403 +6404 +6405 +6406 +6407 +6408 +6409 +6410 +6411 +6412 +6413 +6414 +6415 +6416 +6417 +6418 +6419 +6420 +6421 +6422 +6423 +6424 +6425 +6426 +6427 +6428 +6429 +6430 +6431 +6432 +6433 +6434 +6435 +6436 +6437 +6438 +6439 +6440 +6441 +6442 +6443 +6444 +6445 +6446 +6447 +6448 +6449 +6450 +6451 +6452 +6453 +6454 +6455 +6456 +6457 +6458 +6459 +6460 +6461 +6462 +6463 +6464 +6465 +6466 +6467 +6468 +6469 +6470 +6471 +6472 +6473 +6474 +6475 +6476 +6477 +6478 +6479 +6480 +6481 +6482 +6483 +6484 +6485 +6486 +6487 +6488 +6489 +6490 +6491 +6492 +6493 +6494 +6495 +6496 +6497 +6498 +6499 +6500 +6501 +6502 +6503 +6504 +6505 +6506 +6507 +6508 +6509 +6510 +6511 +6512 +6513 +6514 +6515 +6516 +6517 +6518 +6519 +6520 +6521 +6522 +6523 +6524 +6525 +6526 +6527 +6528 +6529 +6530 +6531 +6532 +6533 +6534 +6535 +6536 +6537 +6538 +6539 +6540 +6541 +6542 +6543 +6544 +6545 +6546 +6547 +6548 +6549 +6550 +6551 +6552 +6553 +6554 +6555 +6556 +6557 +6558 +6559 +6560 +6561 +6562 +6563 +6564 +6565 +6566 +6567 +6568 +6569 +6570 +6571 +6572 +6573 +6574 +6575 +6576 +6577 +6578 +6579 +6580 +6581 +6582 +6583 +6584 +6585 +6586 +6587 +6588 +6589 +6590 +6591 +6592 +6593 +6594 +6595 +6596 +6597 +6598 +6599 +6600 +6601 +6602 +6603 +6604 +6605 +6606 +6607 +6608 +6609 +6610 +6611 +6612 +6613 +6614 +6615 +6616 +6617 +6618 +6619 +6620 +6621 +6622 +6623 +6624 +6625 +6626 +6627 +6628 +6629 +6630 +6631 +6632 +6633 +6634 +6635 +6636 +6637 +6638 +6639 +6640 +6641 +6642 +6643 +6644 +6645 +6646 +6647 +6648 +6649 +6650 +6651 +6652 +6653 +6654 +6655 +6656 +6657 +6658 +6659 +6660 +6661 +6662 +6663 +6664 +6665 +6666 +6667 +6668 +6669 +6670 +6671 +6672 +6673 +6674 +6675 +6676 +6677 +6678 +6679 +6680 +6681 +6682 +6683 +6684 +6685 +6686 +6687 +6688 +6689 +6690 +6691 +6692 +6693 +6694 +6695 +6696 +6697 +6698 +6699 +6700 +6701 +6702 +6703 +6704 +6705 +6706 +6707 +6708 +6709 +6710 +6711 +6712 +6713 +6714 +6715 +6716 +6717 +6718 +6719 +6720 +6721 +6722 +6723 +6724 +6725 +6726 +6727 +6728 +6729 +6730 +6731 +6732 +6733 +6734 +6735 +6736 +6737 +6738 +6739 +6740 +6741 +6742 +6743 +6744 +6745 +6746 +6747 +6748 +6749 +6750 +6751 +6752 +6753 +6754 +6755 +6756 +6757 +6758 +6759 +6760 +6761 +6762 +6763 +6764 +6765 +6766 +6767 +6768 +6769 +6770 +6771 +6772 +6773 +6774 +6775 +6776 +6777 +6778 +6779 +6780 +6781 +6782 +6783 +6784 +6785 +6786 +6787 +6788 +6789 +6790 +6791 +6792 +6793 +6794 +6795 +6796 +6797 +6798 +6799 +6800 +6801 +6802 +6803 +6804 +6805 +6806 +6807 +6808 +6809 +6810 +6811 +6812 +6813 +6814 +6815 +6816 +6817 +6818 +6819 +6820 +6821 +6822 +6823 +6824 +6825 +6826 +6827 +6828 +6829 +6830 +6831 +6832 +6833 +6834 +6835 +6836 +6837 +6838 +6839 +6840 +6841 +6842 +6843 +6844 +6845 +6846 +6847 +6848 +6849 +6850 +6851 +6852 +6853 +6854 +6855 +6856 +6857 +6858 +6859 +6860 +6861 +6862 +6863 +6864 +6865 +6866 +6867 +6868 +6869 +6870 +6871 +6872 +6873 +6874 +6875 +6876 +6877 +6878 +6879 +6880 +6881 +6882 +6883 +6884 +6885 +6886 +6887 +6888 +6889 +6890 +6891 +6892 +6893 +6894 +6895 +6896 +6897 +6898 +6899 +6900 +6901 +6902 +6903 +6904 +6905 +6906 +6907 +6908 +6909 +6910 +6911 +6912 +6913 +6914 +6915 +6916 +6917 +6918 +6919 +6920 +6921 +6922 +6923 +6924 +6925 +6926 +6927 +6928 +6929 +6930 +6931 +6932 +6933 +6934 +6935 +6936 +6937 +6938 +6939 +6940 +6941 +6942 +6943 +6944 +6945 +6946 +6947 +6948 +6949 +6950 +6951 +6952 +6953 +6954 +6955 +6956 +6957 +6958 +6959 +6960 +6961 +6962 +6963 +6964 +6965 +6966 +6967 +6968 +6969 +6970 +6971 +6972 +6973 +6974 +6975 +6976 +6977 +6978 +6979 +6980 +6981 +6982 +6983 +6984 +6985 +6986 +6987 +6988 +6989 +6990 +6991 +6992 +6993 +6994 +6995 +6996 +6997 +6998 +6999 +7000 +7001 +7002 +7003 +7004 +7005 +7006 +7007 +7008 +7009 +7010 +7011 +7012 +7013 +7014 +7015 +7016 +7017 +7018 +7019 +7020 +7021 +7022 +7023 +7024 +7025 +7026 +7027 +7028 +7029 +7030 +7031 +7032 +7033 +7034 +7035 +7036 +7037 +7038 +7039 +7040 +7041 +7042 +7043 +7044 +7045 +7046 +7047 +7048 +7049 +7050 +7051 +7052 +7053 +7054 +7055 +7056 +7057 +7058 +7059 +7060 +7061 +7062 +7063 +7064 +7065 +7066 +7067 +7068 +7069 +7070 +7071 +7072 +7073 +7074 +7075 +7076 +7077 +7078 +7079 +7080 +7081 +7082 +7083 +7084 +7085 +7086 +7087 +7088 +7089 +7090 +7091 +7092 +7093 +7094 +7095 +7096 +7097 +7098 +7099 +7100 +7101 +7102 +7103 +7104 +7105 +7106 +7107 +7108 +7109 +7110 +7111 +7112 +7113 +7114 +7115 +7116 +7117 +7118 +7119 +7120 +7121 +7122 +7123 +7124 +7125 +7126 +7127 +7128 +7129 +7130 +7131 +7132 +7133 +7134 +7135 +7136 +7137 +7138 +7139 +7140 +7141 +7142 +7143 +7144 +7145 +7146 +7147 +7148 +7149 +7150 +7151 +7152 +7153 +7154 +7155 +7156 +7157 +7158 +7159 +7160 +7161 +7162 +7163 +7164 +7165 +7166 +7167 +7168 +7169 +7170 +7171 +7172 +7173 +7174 +7175 +7176 +7177 +7178 +7179 +7180 +7181 +7182 +7183 +7184 +7185 +7186 +7187 +7188 +7189 +7190 +7191 +7192 +7193 +7194 +7195 +7196 +7197 +7198 +7199 +7200 +7201 +7202 +7203 +7204 +7205 +7206 +7207 +7208 +7209 +7210 +7211 +7212 +7213 +7214 +7215 +7216 +7217 +7218 +7219 +7220 +7221 +7222 +7223 +7224 +7225 +7226 +7227 +7228 +7229 +7230 +7231 +7232 +7233 +7234 +7235 +7236 +7237 +7238 +7239 +7240 +7241 +7242 +7243 +7244 +7245 +7246 +7247 +7248 +7249 +7250 +7251 +7252 +7253 +7254 +7255 +7256 +7257 +7258 +7259 +7260 +7261 +7262 +7263 +7264 +7265 +7266 +7267 +7268 +7269 +7270 +7271 +7272 +7273 +7274 +7275 +7276 +7277 +7278 +7279 +7280 +7281 +7282 +7283 +7284 +7285 +7286 +7287 +7288 +7289 +7290 +7291 +7292 +7293 +7294 +7295 +7296 +7297 +7298 +7299 +7300 +7301 +7302 +7303 +7304 +7305 +7306 +7307 +7308 +7309 +7310 +7311 +7312 +7313 +7314 +7315 +7316 +7317 +7318 +7319 +7320 +7321 +7322 +7323 +7324 +7325 +7326 +7327 +7328 +7329 +7330 +7331 +7332 +7333 +7334 +7335 +7336 +7337 +7338 +7339 +7340 +7341 +7342 +7343 +7344 +7345 +7346 +7347 +7348 +7349 +7350 +7351 +7352 +7353 +7354 +7355 +7356 +7357 +7358 +7359 +7360 +7361 +7362 +7363 +7364 +7365 +7366 +7367 +7368 +7369 +7370 +7371 +7372 +7373 +7374 +7375 +7376 +7377 +7378 +7379 +7380 +7381 +7382 +7383 +7384 +7385 +7386 +7387 +7388 +7389 +7390 +7391 +7392 +7393 +7394 +7395 +7396 +7397 +7398 +7399 +7400 +7401 +7402 +7403 +7404 +7405 +7406 +7407 +7408 +7409 +7410 +7411 +7412 +7413 +7414 +7415 +7416 +7417 +7418 +7419 +7420 +7421 +7422 +7423 +7424 +7425 +7426 +7427 +7428 +7429 +7430 +7431 +7432 +7433 +7434 +7435 +7436 +7437 +7438 +7439 +7440 +7441 +7442 +7443 +7444 +7445 +7446 +7447 +7448 +7449 +7450 +7451 +7452 +7453 +7454 +7455 +7456 +7457 +7458 +7459 +7460 +7461 +7462 +7463 +7464 +7465 +7466 +7467 +7468 +7469 +7470 +7471 +7472 +7473 +7474 +7475 +7476 +7477 +7478 +7479 +7480 +7481 +7482 +7483 +7484 +7485 +7486 +7487 +7488 +7489 +7490 +7491 +7492 +7493 +7494 +7495 +7496 +7497 +7498 +7499 +7500 +7501 +7502 +7503 +7504 +7505 +7506 +7507 +7508 +7509 +7510 +7511 +7512 +7513 +7514 +7515 +7516 +7517 +7518 +7519 +7520 +7521 +7522 +7523 +7524 +7525 +7526 +7527 +7528 +7529 +7530 +7531 +7532 +7533 +7534 +7535 +7536 +7537 +7538 +7539 +7540 +7541 +7542 +7543 +7544 +7545 +7546 +7547 +7548 +7549 +7550 +7551 +7552 +7553 +7554 +7555 +7556 +7557 +7558 +7559 +7560 +7561 +7562 +7563 +7564 +7565 +7566 +7567 +7568 +7569 +7570 +7571 +7572 +7573 +7574 +7575 +7576 +7577 +7578 +7579 +7580 +7581 +7582 +7583 +7584 +7585 +7586 +7587 +7588 +7589 +7590 +7591 +7592 +7593 +7594 +7595 +7596 +7597 +7598 +7599 +7600 +7601 +7602 +7603 +7604 +7605 +7606 +7607 +7608 +7609 +7610 +7611 +7612 +7613 +7614 +7615 +7616 +7617 +7618 +7619 +7620 +7621 +7622 +7623 +7624 +7625 +7626 +7627 +7628 +7629 +7630 +7631 +7632 +7633 +7634 +7635 +7636 +7637 +7638 +7639 +7640 +7641 +7642 +7643 +7644 +7645 +7646 +7647 +7648 +7649 +7650 +7651 +7652 +7653 +7654 +7655 +7656 +7657 +7658 +7659 +7660 +7661 +7662 +7663 +7664 +7665 +7666 +7667 +7668 +7669 +7670 +7671 +7672 +7673 +7674 +7675 +7676 +7677 +7678 +7679 +7680 +7681 +7682 +7683 +7684 +7685 +7686 +7687 +7688 +7689 +7690 +7691 +7692 +7693 +7694 +7695 +7696 +7697 +7698 +7699 +7700 +7701 +7702 +7703 +7704 +7705 +7706 +7707 +7708 +7709 +7710 +7711 +7712 +7713 +7714 +7715 +7716 +7717 +7718 +7719 +7720 +7721 +7722 +7723 +7724 +7725 +7726 +7727 +7728 +7729 +7730 +7731 +7732 +7733 +7734 +7735 +7736 +7737 +7738 +7739 +7740 +7741 +7742 +7743 +7744 +7745 +7746 +7747 +7748 +7749 +7750 +7751 +7752 +7753 +7754 +7755 +7756 +7757 +7758 +7759 +7760 +7761 +7762 +7763 +7764 +7765 +7766 +7767 +7768 +7769 +7770 +7771 +7772 +7773 +7774 +7775 +7776 +7777 +7778 +7779 +7780 +7781 +7782 +7783 +7784 +7785 +7786 +7787 +7788 +7789 +7790 +7791 +7792 +7793 +7794 +7795 +7796 +7797 +7798 +7799 +7800 +7801 +7802 +7803 +7804 +7805 +7806 +7807 +7808 +7809 +7810 +7811 +7812 +7813 +7814 +7815 +7816 +7817 +7818 +7819 +7820 +7821 +7822 +7823 +7824 +7825 +7826 +7827 +7828 +7829 +7830 +7831 +7832 +7833 +7834 +7835 +7836 +7837 +7838 +7839 +7840 +7841 +7842 +7843 +7844 +7845 +7846 +7847 +7848 +7849 +7850 +7851 +7852 +7853 +7854 +7855 +7856 +7857 +7858 +7859 +7860 +7861 +7862 +7863 +7864 +7865 +7866 +7867 +7868 +7869 +7870 +7871 +7872 +7873 +7874 +7875 +7876 +7877 +7878 +7879 +7880 +7881 +7882 +7883 +7884 +7885 +7886 +7887 +7888 +7889 +7890 +7891 +7892 +7893 +7894 +7895 +7896 +7897 +7898 +7899 +7900 +7901 +7902 +7903 +7904 +7905 +7906 +7907 +7908 +7909 +7910 +7911 +7912 +7913 +7914 +7915 +7916 +7917 +7918 +7919 +7920 +7921 +7922 +7923 +7924 +7925 +7926 +7927 +7928 +7929 +7930 +7931 +7932 +7933 +7934 +7935 +7936 +7937 +7938 +7939 +7940 +7941 +7942 +7943 +7944 +7945 +7946 +7947 +7948 +7949 +7950 +7951 +7952 +7953 +7954 +7955 +7956 +7957 +7958 +7959 +7960 +7961 +7962 +7963 +7964 +7965 +7966 +7967 +7968 +7969 +7970 +7971 +7972 +7973 +7974 +7975 +7976 +7977 +7978 +7979 +7980 +7981 +7982 +7983 +7984 +7985 +7986 +7987 +7988 +7989 +7990 +7991 +7992 +7993 +7994 +7995 +7996 +7997 +7998 +7999 +8000 +8001 +8002 +8003 +8004 +8005 +8006 +8007 +8008 +8009 +8010 +8011 +8012 +8013 +8014 +8015 +8016 +8017 +8018 +8019 +8020 +8021 +8022 +8023 +8024 +8025 +8026 +8027 +8028 +8029 +8030 +8031 +8032 +8033 +8034 +8035 +8036 +8037 +8038 +8039 +8040 +8041 +8042 +8043 +8044 +8045 +8046 +8047 +8048 +8049 +8050 +8051 +8052 +8053 +8054 +8055 +8056 +8057 +8058 +8059 +8060 +8061 +8062 +8063 +8064 +8065 +8066 +8067 +8068 +8069 +8070 +8071 +8072 +8073 +8074 +8075 +8076 +8077 +8078 +8079 +8080 +8081 +8082 +8083 +8084 +8085 +8086 +8087 +8088 +8089 +8090 +8091 +8092 +8093 +8094 +8095 +8096 +8097 +8098 +8099 +8100 +8101 +8102 +8103 +8104 +8105 +8106 +8107 +8108 +8109 +8110 +8111 +8112 +8113 +8114 +8115 +8116 +8117 +8118 +8119 +8120 +8121 +8122 +8123 +8124 +8125 +8126 +8127 +8128 +8129 +8130 +8131 +8132 +8133 +8134 +8135 +8136 +8137 +8138 +8139 +8140 +8141 +8142 +8143 +8144 +8145 +8146 +8147 +8148 +8149 +8150 +8151 +8152 +8153 +8154 +8155 +8156 +8157 +8158 +8159 +8160 +8161 +8162 +8163 +8164 +8165 +8166 +8167 +8168 +8169 +8170 +8171 +8172 +8173 +8174 +8175 +8176 +8177 +8178 +8179 +8180 +8181 +8182 +8183 +8184 +8185 +8186 +8187 +8188 +8189 +8190 +8191 +8192 +8193 +8194 +8195 +8196 +8197 +8198 +8199 +8200 +8201 +8202 +8203 +8204 +8205 +8206 +8207 +8208 +8209 +8210 +8211 +8212 +8213 +8214 +8215 +8216 +8217 +8218 +8219 +8220 +8221 +8222 +8223 +8224 +8225 +8226 +8227 +8228 +8229 +8230 +8231 +8232 +8233 +8234 +8235 +8236 +8237 +8238 +8239 +8240 +8241 +8242 +8243 +8244 +8245 +8246 +8247 +8248 +8249 +8250 +8251 +8252 +8253 +8254 +8255 +8256 +8257 +8258 +8259 +8260 +8261 +8262 +8263 +8264 +8265 +8266 +8267 +8268 +8269 +8270 +8271 +8272 +8273 +8274 +8275 +8276 +8277 +8278 +8279 +8280 +8281 +8282 +8283 +8284 +8285 +8286 +8287 +8288 +8289 +8290 +8291 +8292 +8293 +8294 +8295 +8296 +8297 +8298 +8299 +8300 +8301 +8302 +8303 +8304 +8305 +8306 +8307 +8308 +8309 +8310 +8311 +8312 +8313 +8314 +8315 +8316 +8317 +8318 +8319 +8320 +8321 +8322 +8323 +8324 +8325 +8326 +8327 +8328 +8329 +8330 +8331 +8332 +8333 +8334 +8335 +8336 +8337 +8338 +8339 +8340 +8341 +8342 +8343 +8344 +8345 +8346 +8347 +8348 +8349 +8350 +8351 +8352 +8353 +8354 +8355 +8356 +8357 +8358 +8359 +8360 +8361 +8362 +8363 +8364 +8365 +8366 +8367 +8368 +8369 +8370 +8371 +8372 +8373 +8374 +8375 +8376 +8377 +8378 +8379 +8380 +8381 +8382 +8383 +8384 +8385 +8386 +8387 +8388 +8389 +8390 +8391 +8392 +8393 +8394 +8395 +8396 +8397 +8398 +8399 +8400 +8401 +8402 +8403 +8404 +8405 +8406 +8407 +8408 +8409 +8410 +8411 +8412 +8413 +8414 +8415 +8416 +8417 +8418 +8419 +8420 +8421 +8422 +8423 +8424 +8425 +8426 +8427 +8428 +8429 +8430 +8431 +8432 +8433 +8434 +8435 +8436 +8437 +8438 +8439 +8440 +8441 +8442 +8443 +8444 +8445 +8446 +8447 +8448 +8449 +8450 +8451 +8452 +8453 +8454 +8455 +8456 +8457 +8458 +8459 +8460 +8461 +8462 +8463 +8464 +8465 +8466 +8467 +8468 +8469 +8470 +8471 +8472 +8473 +8474 +8475 +8476 +8477 +8478 +8479 +8480 +8481 +8482 +8483 +8484 +8485 +8486 +8487 +8488 +8489 +8490 +8491 +8492 +8493 +8494 +8495 +8496 +8497 +8498 +8499 +8500 +8501 +8502 +8503 +8504 +8505 +8506 +8507 +8508 +8509 +8510 +8511 +8512 +8513 +8514 +8515 +8516 +8517 +8518 +8519 +8520 +8521 +8522 +8523 +8524 +8525 +8526 +8527 +8528 +8529 +8530 +8531 +8532 +8533 +8534 +8535 +8536 +8537 +8538 +8539 +8540 +8541 +8542 +8543 +8544 +8545 +8546 +8547 +8548 +8549 +8550 +8551 +8552 +8553 +8554 +8555 +8556 +8557 +8558 +8559 +8560 +8561 +8562 +8563 +8564 +8565 +8566 +8567 +8568 +8569 +8570 +8571 +8572 +8573 +8574 +8575 +8576 +8577 +8578 +8579 +8580 +8581 +8582 +8583 +8584 +8585 +8586 +8587 +8588 +8589 +8590 +8591 +8592 +8593 +8594 +8595 +8596 +8597 +8598 +8599 +8600 +8601 +8602 +8603 +8604 +8605 +8606 +8607 +8608 +8609 +8610 +8611 +8612 +8613 +8614 +8615 +8616 +8617 +8618 +8619 +8620 +8621 +8622 +8623 +8624 +8625 +8626 +8627 +8628 +8629 +8630 +8631 +8632 +8633 +8634 +8635 +8636 +8637 +8638 +8639 +8640 +8641 +8642 +8643 +8644 +8645 +8646 +8647 +8648 +8649 +8650 +8651 +8652 +8653 +8654 +8655 +8656 +8657 +8658 +8659 +8660 +8661 +8662 +8663 +8664 +8665 +8666 +8667 +8668 +8669 +8670 +8671 +8672 +8673 +8674 +8675 +8676 +8677 +8678 +8679 +8680 +8681 +8682 +8683 +8684 +8685 +8686 +8687 +8688 +8689 +8690 +8691 +8692 +8693 +8694 +8695 +8696 +8697 +8698 +8699 +8700 +8701 +8702 +8703 +8704 +8705 +8706 +8707 +8708 +8709 +8710 +8711 +8712 +8713 +8714 +8715 +8716 +8717 +8718 +8719 +8720 +8721 +8722 +8723 +8724 +8725 +8726 +8727 +8728 +8729 +8730 +8731 +8732 +8733 +8734 +8735 +8736 +8737 +8738 +8739 +8740 +8741 +8742 +8743 +8744 +8745 +8746 +8747 +8748 +8749 +8750 +8751 +8752 +8753 +8754 +8755 +8756 +8757 +8758 +8759 +8760 +8761 +8762 +8763 +8764 +8765 +8766 +8767 +8768 +8769 +8770 +8771 +8772 +8773 +8774 +8775 +8776 +8777 +8778 +8779 +8780 +8781 +8782 +8783 +8784 +8785 +8786 +8787 +8788 +8789 +8790 +8791 +8792 +8793 +8794 +8795 +8796 +8797 +8798 +8799 +8800 +8801 +8802 +8803 +8804 +8805 +8806 +8807 +8808 +8809 +8810 +8811 +8812 +8813 +8814 +8815 +8816 +8817 +8818 +8819 +8820 +8821 +8822 +8823 +8824 +8825 +8826 +8827 +8828 +8829 +8830 +8831 +8832 +8833 +8834 +8835 +8836 +8837 +8838 +8839 +8840 +8841 +8842 +8843 +8844 +8845 +8846 +8847 +8848 +8849 +8850 +8851 +8852 +8853 +8854 +8855 +8856 +8857 +8858 +8859 +8860 +8861 +8862 +8863 +8864 +8865 +8866 +8867 +8868 +8869 +8870 +8871 +8872 +8873 +8874 +8875 +8876 +8877 +8878 +8879 +8880 +8881 +8882 +8883 +8884 +8885 +8886 +8887 +8888 +8889 +8890 +8891 +8892 +8893 +8894 +8895 +8896 +8897 +8898 +8899 +8900 +8901 +8902 +8903 +8904 +8905 +8906 +8907 +8908 +8909 +8910 +8911 +8912 +8913 +8914 +8915 +8916 +8917 +8918 +8919 +8920 +8921 +8922 +8923 +8924 +8925 +8926 +8927 +8928 +8929 +8930 +8931 +8932 +8933 +8934 +8935 +8936 +8937 +8938 +8939 +8940 +8941 +8942 +8943 +8944 +8945 +8946 +8947 +8948 +8949 +8950 +8951 +8952 +8953 +8954 +8955 +8956 +8957 +8958 +8959 +8960 +8961 +8962 +8963 +8964 +8965 +8966 +8967 +8968 +8969 +8970 +8971 +8972 +8973 +8974 +8975 +8976 +8977 +8978 +8979 +8980 +8981 +8982 +8983 +8984 +8985 +8986 +8987 +8988 +8989 +8990 +8991 +8992 +8993 +8994 +8995 +8996 +8997 +8998 +8999 +9000 +9001 +9002 +9003 +9004 +9005 +9006 +9007 +9008 +9009 +9010 +9011 +9012 +9013 +9014 +9015 +9016 +9017 +9018 +9019 +9020 +9021 +9022 +9023 +9024 +9025 +9026 +9027 +9028 +9029 +9030 +9031 +9032 +9033 +9034 +9035 +9036 +9037 +9038 +9039 +9040 +9041 +9042 +9043 +9044 +9045 +9046 +9047 +9048 +9049 +9050 +9051 +9052 +9053 +9054 +9055 +9056 +9057 +9058 +9059 +9060 +9061 +9062 +9063 +9064 +9065 +9066 +9067 +9068 +9069 +9070 +9071 +9072 +9073 +9074 +9075 +9076 +9077 +9078 +9079 +9080 +9081 +9082 +9083 +9084 +9085 +9086 +9087 +9088 +9089 +9090 +9091 +9092 +9093 +9094 +9095 +9096 +9097 +9098 +9099 +9100 +9101 +9102 +9103 +9104 +9105 +9106 +9107 +9108 +9109 +9110 +9111 +9112 +9113 +9114 +9115 +9116 +9117 +9118 +9119 +9120 +9121 +9122 +9123 +9124 +9125 +9126 +9127 +9128 +9129 +9130 +9131 +9132 +9133 +9134 +9135 +9136 +9137 +9138 +9139 +9140 +9141 +9142 +9143 +9144 +9145 +9146 +9147 +9148 +9149 +9150 +9151 +9152 +9153 +9154 +9155 +9156 +9157 +9158 +9159 +9160 +9161 +9162 +9163 +9164 +9165 +9166 +9167 +9168 +9169 +9170 +9171 +9172 +9173 +9174 +9175 +9176 +9177 +9178 +9179 +9180 +9181 +9182 +9183 +9184 +9185 +9186 +9187 +9188 +9189 +9190 +9191 +9192 +9193 +9194 +9195 +9196 +9197 +9198 +9199 +9200 +9201 +9202 +9203 +9204 +9205 +9206 +9207 +9208 +9209 +9210 +9211 +9212 +9213 +9214 +9215 +9216 +9217 +9218 +9219 +9220 +9221 +9222 +9223 +9224 +9225 +9226 +9227 +9228 +9229 +9230 +9231 +9232 +9233 +9234 +9235 +9236 +9237 +9238 +9239 +9240 +9241 +9242 +9243 +9244 +9245 +9246 +9247 +9248 +9249 +9250 +9251 +9252 +9253 +9254 +9255 +9256 +9257 +9258 +9259 +9260 +9261 +9262 +9263 +9264 +9265 +9266 +9267 +9268 +9269 +9270 +9271 +9272 +9273 +9274 +9275 +9276 +9277 +9278 +9279 +9280 +9281 +9282 +9283 +9284 +9285 +9286 +9287 +9288 +9289 +9290 +9291 +9292 +9293 +9294 +9295 +9296 +9297 +9298 +9299 +9300 +9301 +9302 +9303 +9304 +9305 +9306 +9307 +9308 +9309 +9310 +9311 +9312 +9313 +9314 +9315 +9316 +9317 +9318 +9319 +9320 +9321 +9322 +9323 +9324 +9325 +9326 +9327 +9328 +9329 +9330 +9331 +9332 +9333 +9334 +9335 +9336 +9337 +9338 +9339 +9340 +9341 +9342 +9343 +9344 +9345 +9346 +9347 +9348 +9349 +9350 +9351 +9352 +9353 +9354 +9355 +9356 +9357 +9358 +9359 +9360 +9361 +9362 +9363 +9364 +9365 +9366 +9367 +9368 +9369 +9370 +9371 +9372 +9373 +9374 +9375 +9376 +9377 +9378 +9379 +9380 +9381 +9382 +9383 +9384 +9385 +9386 +9387 +9388 +9389 +9390 +9391 +9392 +9393 +9394 +9395 +9396 +9397 +9398 +9399 +9400 +9401 +9402 +9403 +9404 +9405 +9406 +9407 +9408 +9409 +9410 +9411 +9412 +9413 +9414 +9415 +9416 +9417 +9418 +9419 +9420 +9421 +9422 +9423 +9424 +9425 +9426 +9427 +9428 +9429 +9430 +9431 +9432 +9433 +9434 +9435 +9436 +9437 +9438 +9439 +9440 +9441 +9442 +9443 +9444 +9445 +9446 +9447 +9448 +9449 +9450 +9451 +9452 +9453 +9454 +9455 +9456 +9457 +9458 +9459 +9460 +9461 +9462 +9463 +9464 +9465 +9466 +9467 +9468 +9469 +9470 +9471 +9472 +9473 +9474 +9475 +9476 +9477 +9478 +9479 +9480 +9481 +9482 +9483 +9484 +9485 +9486 +9487 +9488 +9489 +9490 +9491 +9492 +9493 +9494 +9495 +9496 +9497 +9498 +9499 +9500 +9501 +9502 +9503 +9504 +9505 +9506 +9507 +9508 +9509 +9510 +9511 +9512 +9513 +9514 +9515 +9516 +9517 +9518 +9519 +9520 +9521 +9522 +9523 +9524 +9525 +9526 +9527 +9528 +9529 +9530 +9531 +9532 +9533 +9534 +9535 +9536 +9537 +9538 +9539 +9540 +9541 +9542 +9543 +9544 +9545 +9546 +9547 +9548 +9549 +9550 +9551 +9552 +9553 +9554 +9555 +9556 +9557 +9558 +9559 +9560 +9561 +9562 +9563 +9564 +9565 +9566 +9567 +9568 +9569 +9570 +9571 +9572 +9573 +9574 +9575 +9576 +9577 +9578 +9579 +9580 +9581 +9582 +9583 +9584 +9585 +9586 +9587 +9588 +9589 +9590 +9591 +9592 +9593 +9594 +9595 +9596 +9597 +9598 +9599 +9600 +9601 +9602 +9603 +9604 +9605 +9606 +9607 +9608 +9609 +9610 +9611 +9612 +9613 +9614 +9615 +9616 +9617 +9618 +9619 +9620 +9621 +9622 +9623 +9624 +9625 +9626 +9627 +9628 +9629 +9630 +9631 +9632 +9633 +9634 +9635 +9636 +9637 +9638 +9639 +9640 +9641 +9642 +9643 +9644 +9645 +9646 +9647 +9648 +9649 +9650 +9651 +9652 +9653 +9654 +9655 +9656 +9657 +9658 +9659 +9660 +9661 +9662 +9663 +9664 +9665 +9666 +9667 +9668 +9669 +9670 +9671 +9672 +9673 +9674 +9675 +9676 +9677 +9678 +9679 +9680 +9681 +9682 +9683 +9684 +9685 +9686 +9687 +9688 +9689 +9690 +9691 +9692 +9693 +9694 +9695 +9696 +9697 +9698 +9699 +9700 +9701 +9702 +9703 +9704 +9705 +9706 +9707 +9708 +9709 +9710 +9711 +9712 +9713 +9714 +9715 +9716 +9717 +9718 +9719 +9720 +9721 +9722 +9723 +9724 +9725 +9726 +9727 +9728 +9729 +9730 +9731 +9732 +9733 +9734 +9735 +9736 +9737 +9738 +9739 +9740 +9741 +9742 +9743 +9744 +9745 +9746 +9747 +9748 +9749 +9750 +9751 +9752 +9753 +9754 +9755 +9756 +9757 +9758 +9759 +9760 +9761 +9762 +9763 +9764 +9765 +9766 +9767 +9768 +9769 +9770 +9771 +9772 +9773 +9774 +9775 +9776 +9777 +9778 +9779 +9780 +9781 +9782 +9783 +9784 +9785 +9786 +9787 +9788 +9789 +9790 +9791 +9792 +9793 +9794 +9795 +9796 +9797 +9798 +9799 +9800 +9801 +9802 +9803 +9804 +9805 +9806 +9807 +9808 +9809 +9810 +9811 +9812 +9813 +9814 +9815 +9816 +9817 +9818 +9819 +9820 +9821 +9822 +9823 +9824 +9825 +9826 +9827 +9828 +9829 +9830 +9831 +9832 +9833 +9834 +9835 +9836 +9837 +9838 +9839 +9840 +9841 +9842 +9843 +9844 +9845 +9846 +9847 +9848 +9849 +9850 +9851 +9852 +9853 +9854 +9855 +9856 +9857 +9858 +9859 +9860 +9861 +9862 +9863 +9864 +9865 +9866 +9867 +9868 +9869 +9870 +9871 +9872 +9873 +9874 +9875 +9876 +9877 +9878 +9879 +9880 +9881 +9882 +9883 +9884 +9885 +9886 +9887 +9888 +9889 +9890 +9891 +9892 +9893 +9894 +9895 +9896 +9897 +9898 +9899 +9900 +9901 +9902 +9903 +9904 +9905 +9906 +9907 +9908 +9909 +9910 +9911 +9912 +9913 +9914 +9915 +9916 +9917 +9918 +9919 +9920 +9921 +9922 +9923 +9924 +9925 +9926 +9927 +9928 +9929 +9930 +9931 +9932 +9933 +9934 +9935 +9936 +9937 +9938 +9939 +9940 +9941 +9942 +9943 +9944 +9945 +9946 +9947 +9948 +9949 +9950 +9951 +9952 +9953 +9954 +9955 +9956 +9957 +9958 +9959 +9960 +9961 +9962 +9963 +9964 +9965 +9966 +9967 +9968 +9969 +9970 +9971 +9972 +9973 +9974 +9975 +9976 +9977 +9978 +9979 +9980 +9981 +9982 +9983 +9984 +9985 +9986 +9987 +9988 +9989 +9990 +9991 +9992 +9993 +9994 +9995 +9996 +9997 +9998 +9999 +10000 diff --git a/data/contestants.1.csv b/data/contestants.1.csv new file mode 100644 index 000000000..bdd28c4d4 --- /dev/null +++ b/data/contestants.1.csv @@ -0,0 +1,5 @@ +a,1990-01-10,2090,97.1,XA ,{a} +b,1990-11-01,2203,98.1,XA ,"{a,b}" +c,1988-11-01,2907,99.4,XB ,"{w,y}" +d,1985-05-05,2314,98.3,XB ,{} +e,1995-05-05,2236,98.2,XC ,{a} diff --git a/data/contestants.2.csv b/data/contestants.2.csv new file mode 100644 index 000000000..1a4506bc3 --- /dev/null +++ b/data/contestants.2.csv @@ -0,0 +1,3 @@ +f,1983-04-02,3090,99.6,XD ,"{a,b,c,y}" +g,1991-12-13,1803,85.1,XD ,"{a,c}" +h,1987-10-26,2112,95.4,XD ,"{w,a}" diff --git a/data/datetime_types.csv b/data/datetime_types.csv new file mode 100644 index 000000000..b5c2eb099 --- /dev/null +++ b/data/datetime_types.csv @@ -0,0 +1,2 @@ +2000-01-02 04:05:06,1999-01-08 14:05:06+02,2000-01-02,04:05:06,04:00:00 +1970-01-01 00:00:00,infinity,-infinity,00:00:00,00:00:00 diff --git a/data/enum_and_composite_types.csv b/data/enum_and_composite_types.csv new file mode 100644 index 000000000..979f2ebc3 --- /dev/null +++ b/data/enum_and_composite_types.csv @@ -0,0 +1,2 @@ +a,"(2,b)" +b,"(3,c)" diff --git a/data/null_values.csv b/data/null_values.csv new file mode 100644 index 000000000..7ddd67b24 --- /dev/null +++ b/data/null_values.csv @@ -0,0 +1,2 @@ +,{NULL},"(,)" +,, diff --git a/data/other_types.csv b/data/other_types.csv new file mode 100644 index 000000000..487f386fb --- /dev/null +++ b/data/other_types.csv @@ -0,0 +1,2 @@ +f,\xdeadbeef,$1.00,192.168.1.2,10101,a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11,"{""key"": ""value""}" +t,\xcdb0,$1.50,127.0.0.1,"",a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11,[] diff --git a/data/range_types.csv b/data/range_types.csv new file mode 100644 index 000000000..db0ca880c --- /dev/null +++ b/data/range_types.csv @@ -0,0 +1,2 @@ +"[1,3)","[1,3)","[1,3)","[""2000-01-02 00:30:00"",""2010-02-03 12:30:00"")" +empty,"[1,)","(,)",empty diff --git a/expected/alter.out b/expected/alter.out new file mode 100644 index 000000000..659e2723e --- /dev/null +++ b/expected/alter.out @@ -0,0 +1,178 @@ +-- +-- Testing ALTER TABLE on cstore_fdw tables. +-- +CREATE FOREIGN TABLE test_alter_table (a int, b int, c int) SERVER cstore_server; +WITH sample_data AS (VALUES + (1, 2, 3), + (4, 5, 6), + (7, 8, 9) +) +INSERT INTO test_alter_table SELECT * FROM sample_data; +-- drop a column +ALTER FOREIGN TABLE test_alter_table DROP COLUMN a; +-- test analyze +ANALYZE test_alter_table; +-- verify select queries run as expected +SELECT * FROM test_alter_table; + b | c +---+--- + 2 | 3 + 5 | 6 + 8 | 9 +(3 rows) + +SELECT a FROM test_alter_table; +ERROR: column "a" does not exist +LINE 1: SELECT a FROM test_alter_table; + ^ +SELECT b FROM test_alter_table; + b +--- + 2 + 5 + 8 +(3 rows) + +-- verify insert runs as expected +INSERT INTO test_alter_table (SELECT 3, 5, 8); +ERROR: INSERT has more expressions than target columns +LINE 1: INSERT INTO test_alter_table (SELECT 3, 5, 8); + ^ +INSERT INTO test_alter_table (SELECT 5, 8); +-- add a column with no defaults +ALTER FOREIGN TABLE test_alter_table ADD COLUMN d int; +SELECT * FROM test_alter_table; + b | c | d +---+---+--- + 2 | 3 | + 5 | 6 | + 8 | 9 | + 5 | 8 | +(4 rows) + +INSERT INTO test_alter_table (SELECT 3, 5, 8); +SELECT * FROM test_alter_table; + b | c | d +---+---+--- + 2 | 3 | + 5 | 6 | + 8 | 9 | + 5 | 8 | + 3 | 5 | 8 +(5 rows) + +-- add a fixed-length column with default value +ALTER FOREIGN TABLE test_alter_table ADD COLUMN e int default 3; +SELECT * from test_alter_table; + b | c | d | e +---+---+---+--- + 2 | 3 | | 3 + 5 | 6 | | 3 + 8 | 9 | | 3 + 5 | 8 | | 3 + 3 | 5 | 8 | 3 +(5 rows) + +INSERT INTO test_alter_table (SELECT 1, 2, 4, 8); +SELECT * from test_alter_table; + b | c | d | e +---+---+---+--- + 2 | 3 | | 3 + 5 | 6 | | 3 + 8 | 9 | | 3 + 5 | 8 | | 3 + 3 | 5 | 8 | 3 + 1 | 2 | 4 | 8 +(6 rows) + +-- add a variable-length column with default value +ALTER FOREIGN TABLE test_alter_table ADD COLUMN f text DEFAULT 'TEXT ME'; +SELECT * from test_alter_table; + b | c | d | e | f +---+---+---+---+--------- + 2 | 3 | | 3 | TEXT ME + 5 | 6 | | 3 | TEXT ME + 8 | 9 | | 3 | TEXT ME + 5 | 8 | | 3 | TEXT ME + 3 | 5 | 8 | 3 | TEXT ME + 1 | 2 | 4 | 8 | TEXT ME +(6 rows) + +INSERT INTO test_alter_table (SELECT 1, 2, 4, 8, 'ABCDEF'); +SELECT * from test_alter_table; + b | c | d | e | f +---+---+---+---+--------- + 2 | 3 | | 3 | TEXT ME + 5 | 6 | | 3 | TEXT ME + 8 | 9 | | 3 | TEXT ME + 5 | 8 | | 3 | TEXT ME + 3 | 5 | 8 | 3 | TEXT ME + 1 | 2 | 4 | 8 | TEXT ME + 1 | 2 | 4 | 8 | ABCDEF +(7 rows) + +-- drop couple of columns +ALTER FOREIGN TABLE test_alter_table DROP COLUMN c; +ALTER FOREIGN TABLE test_alter_table DROP COLUMN e; +ANALYZE test_alter_table; +SELECT * from test_alter_table; + b | d | f +---+---+--------- + 2 | | TEXT ME + 5 | | TEXT ME + 8 | | TEXT ME + 5 | | TEXT ME + 3 | 8 | TEXT ME + 1 | 4 | TEXT ME + 1 | 4 | ABCDEF +(7 rows) + +SELECT count(*) from test_alter_table; + count +------- + 7 +(1 row) + +SELECT count(t.*) from test_alter_table t; + count +------- + 7 +(1 row) + +-- unsupported default values +ALTER FOREIGN TABLE test_alter_table ADD COLUMN g boolean DEFAULT isfinite(current_date); +ALTER FOREIGN TABLE test_alter_table ADD COLUMN h DATE DEFAULT current_date; +SELECT * FROM test_alter_table; +ERROR: unsupported default value for column "g" +HINT: Expression is either mutable or does not evaluate to constant value +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN g DROP DEFAULT; +SELECT * FROM test_alter_table; +ERROR: unsupported default value for column "h" +HINT: Expression is either mutable or does not evaluate to constant value +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN h DROP DEFAULT; +ANALYZE test_alter_table; +SELECT * FROM test_alter_table; + b | d | f | g | h +---+---+---------+---+--- + 2 | | TEXT ME | | + 5 | | TEXT ME | | + 8 | | TEXT ME | | + 5 | | TEXT ME | | + 3 | 8 | TEXT ME | | + 1 | 4 | TEXT ME | | + 1 | 4 | ABCDEF | | +(7 rows) + +-- unsupported type change +ALTER FOREIGN TABLE test_alter_table ADD COLUMN i int; +ALTER FOREIGN TABLE test_alter_table ADD COLUMN j float; +ALTER FOREIGN TABLE test_alter_table ADD COLUMN k text; +-- this is valid type change +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN i TYPE float; +-- this is not valid +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN j TYPE int; +ERROR: Column j cannot be cast automatically to type pg_catalog.int4 +-- text / varchar conversion is valid both ways +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE varchar(20); +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE text; +DROP FOREIGN TABLE test_alter_table; diff --git a/expected/analyze.out b/expected/analyze.out new file mode 100644 index 000000000..f8c4d974a --- /dev/null +++ b/expected/analyze.out @@ -0,0 +1,19 @@ +-- +-- Test the ANALYZE command for cstore_fdw tables. +-- +-- ANALYZE uncompressed table +ANALYZE contestant; +SELECT count(*) FROM pg_stats WHERE tablename='contestant'; + count +------- + 6 +(1 row) + +-- ANALYZE compressed table +ANALYZE contestant_compressed; +SELECT count(*) FROM pg_stats WHERE tablename='contestant_compressed'; + count +------- + 6 +(1 row) + diff --git a/expected/drop.out b/expected/drop.out new file mode 100644 index 000000000..dc5678da7 --- /dev/null +++ b/expected/drop.out @@ -0,0 +1,97 @@ +-- +-- Tests the different DROP commands for cstore_fdw tables. +-- +-- DROP FOREIGN TABL +-- DROP SCHEMA +-- DROP EXTENSION +-- DROP DATABASE +-- +-- Note that travis does not create +-- cstore_fdw extension in default database (postgres). This has caused +-- different behavior between travis tests and local tests. Thus +-- 'postgres' directory is excluded from comparison to have the same result. +-- store postgres database oid +SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset +-- Check that files for the automatically managed table exist in the +-- cstore_fdw/{databaseoid} directory. +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 2 +(1 row) + +-- DROP cstore_fdw tables +DROP FOREIGN TABLE contestant; +DROP FOREIGN TABLE contestant_compressed; +-- Create a cstore_fdw table under a schema and drop it. +CREATE SCHEMA test_schema; +CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; +DROP SCHEMA test_schema CASCADE; +NOTICE: drop cascades to foreign table test_schema.test_table +-- Check that the files have been deleted and the directory is empty after the +-- DROP table command. +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 0 +(1 row) + +SELECT current_database() datname \gset +CREATE DATABASE db_to_drop; +\c db_to_drop +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset +CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; +-- should see 2 files, data and footer file for single table +SELECT count(*) FROM pg_ls_dir('cstore_fdw/' || :databaseoid); + count +------- + 2 +(1 row) + +-- should see 2 directories 1 for each database, excluding postgres database +SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; + count +------- + 2 +(1 row) + +DROP EXTENSION cstore_fdw CASCADE; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to server cstore_server +drop cascades to foreign table test_table +-- should only see 1 directory here +SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; + count +------- + 1 +(1 row) + +-- test database drop +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset +CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; +-- should see 2 directories 1 for each database +SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; + count +------- + 2 +(1 row) + +\c :datname +DROP DATABASE db_to_drop; +-- should only see 1 directory for the default database +SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; + count +------- + 1 +(1 row) + diff --git a/expected/functions.out b/expected/functions.out new file mode 100644 index 000000000..117fc15f9 --- /dev/null +++ b/expected/functions.out @@ -0,0 +1,18 @@ +-- +-- Test utility functions for cstore_fdw tables. +-- +CREATE FOREIGN TABLE empty_table (a int) SERVER cstore_server; +CREATE FOREIGN TABLE table_with_data (a int) SERVER cstore_server; +CREATE TABLE non_cstore_table (a int); +COPY table_with_data FROM STDIN; +SELECT cstore_table_size('empty_table') < cstore_table_size('table_with_data'); + ?column? +---------- + t +(1 row) + +SELECT cstore_table_size('non_cstore_table'); +ERROR: relation is not a cstore table +DROP FOREIGN TABLE empty_table; +DROP FOREIGN TABLE table_with_data; +DROP TABLE non_cstore_table; diff --git a/expected/insert.out b/expected/insert.out new file mode 100644 index 000000000..49d9ed132 --- /dev/null +++ b/expected/insert.out @@ -0,0 +1,88 @@ +-- +-- Testing insert on cstore_fdw tables. +-- +CREATE FOREIGN TABLE test_insert_command (a int) SERVER cstore_server; +-- test single row inserts fail +select count(*) from test_insert_command; + count +------- + 0 +(1 row) + +insert into test_insert_command values(1); +ERROR: operation is not supported +select count(*) from test_insert_command; + count +------- + 0 +(1 row) + +insert into test_insert_command default values; +ERROR: operation is not supported +select count(*) from test_insert_command; + count +------- + 0 +(1 row) + +-- test inserting from another table succeed +CREATE TABLE test_insert_command_data (a int); +select count(*) from test_insert_command_data; + count +------- + 0 +(1 row) + +insert into test_insert_command_data values(1); +select count(*) from test_insert_command_data; + count +------- + 1 +(1 row) + +insert into test_insert_command select * from test_insert_command_data; +select count(*) from test_insert_command; + count +------- + 1 +(1 row) + +drop table test_insert_command_data; +drop foreign table test_insert_command; +-- test long attribute value insertion +-- create sufficiently long text so that data is stored in toast +CREATE TABLE test_long_text AS +SELECT a as int_val, string_agg(random()::text, '') as text_val +FROM generate_series(1, 10) a, generate_series(1, 1000) b +GROUP BY a ORDER BY a; +-- store hash values of text for later comparison +CREATE TABLE test_long_text_hash AS +SELECT int_val, md5(text_val) AS hash +FROM test_long_text; +CREATE FOREIGN TABLE test_cstore_long_text(int_val int, text_val text) +SERVER cstore_server; +-- store long text in cstore table +INSERT INTO test_cstore_long_text SELECT * FROM test_long_text; +-- drop source table to remove original text from toast +DROP TABLE test_long_text; +-- check if text data is still available in cstore table +-- by comparing previously stored hash. +SELECT a.int_val +FROM test_long_text_hash a, test_cstore_long_text c +WHERE a.int_val = c.int_val AND a.hash = md5(c.text_val); + int_val +--------- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 +(10 rows) + +DROP TABLE test_long_text_hash; +DROP FOREIGN TABLE test_cstore_long_text; diff --git a/expected/query.out b/expected/query.out new file mode 100644 index 000000000..7ac3508a4 --- /dev/null +++ b/expected/query.out @@ -0,0 +1,105 @@ +-- +-- Test querying cstore_fdw tables. +-- +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; +-- Query uncompressed data +SELECT count(*) FROM contestant; + count +------- + 8 +(1 row) + +SELECT avg(rating), stddev_samp(rating) FROM contestant; + avg | stddev_samp +-----------------------+------------------ + 2344.3750000000000000 | 433.746119785032 +(1 row) + +SELECT country, avg(rating) FROM contestant WHERE rating > 2200 + GROUP BY country ORDER BY country; + country | avg +---------+----------------------- + XA | 2203.0000000000000000 + XB | 2610.5000000000000000 + XC | 2236.0000000000000000 + XD | 3090.0000000000000000 +(4 rows) + +SELECT * FROM contestant ORDER BY handle; + handle | birthdate | rating | percentile | country | achievements +--------+------------+--------+------------+---------+-------------- + a | 1990-01-10 | 2090 | 97.1 | XA | {a} + b | 1990-11-01 | 2203 | 98.1 | XA | {a,b} + c | 1988-11-01 | 2907 | 99.4 | XB | {w,y} + d | 1985-05-05 | 2314 | 98.3 | XB | {} + e | 1995-05-05 | 2236 | 98.2 | XC | {a} + f | 1983-04-02 | 3090 | 99.6 | XD | {a,b,c,y} + g | 1991-12-13 | 1803 | 85.1 | XD | {a,c} + h | 1987-10-26 | 2112 | 95.4 | XD | {w,a} +(8 rows) + +-- Query compressed data +SELECT count(*) FROM contestant_compressed; + count +------- + 8 +(1 row) + +SELECT avg(rating), stddev_samp(rating) FROM contestant_compressed; + avg | stddev_samp +-----------------------+------------------ + 2344.3750000000000000 | 433.746119785032 +(1 row) + +SELECT country, avg(rating) FROM contestant_compressed WHERE rating > 2200 + GROUP BY country ORDER BY country; + country | avg +---------+----------------------- + XA | 2203.0000000000000000 + XB | 2610.5000000000000000 + XC | 2236.0000000000000000 + XD | 3090.0000000000000000 +(4 rows) + +SELECT * FROM contestant_compressed ORDER BY handle; + handle | birthdate | rating | percentile | country | achievements +--------+------------+--------+------------+---------+-------------- + a | 1990-01-10 | 2090 | 97.1 | XA | {a} + b | 1990-11-01 | 2203 | 98.1 | XA | {a,b} + c | 1988-11-01 | 2907 | 99.4 | XB | {w,y} + d | 1985-05-05 | 2314 | 98.3 | XB | {} + e | 1995-05-05 | 2236 | 98.2 | XC | {a} + f | 1983-04-02 | 3090 | 99.6 | XD | {a,b,c,y} + g | 1991-12-13 | 1803 | 85.1 | XD | {a,c} + h | 1987-10-26 | 2112 | 95.4 | XD | {w,a} +(8 rows) + +-- Verify that we handle whole-row references correctly +SELECT to_json(v) FROM contestant v ORDER BY rating LIMIT 1; + to_json +------------------------------------------------------------------------------------------------------------------ + {"handle":"g","birthdate":"1991-12-13","rating":1803,"percentile":85.1,"country":"XD ","achievements":["a","c"]} +(1 row) + +-- Test variables used in expressions +CREATE FOREIGN TABLE union_first (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE union_second (a int, b int) SERVER cstore_server; +INSERT INTO union_first SELECT a, a FROM generate_series(1, 5) a; +INSERT INTO union_second SELECT a, a FROM generate_series(11, 15) a; +(SELECT a*1, b FROM union_first) union all (SELECT a*1, b FROM union_second); + ?column? | b +----------+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 11 | 11 + 12 | 12 + 13 | 13 + 14 | 14 + 15 | 15 +(10 rows) + +DROP FOREIGN TABLE union_first, union_second; diff --git a/expected/truncate.out b/expected/truncate.out new file mode 100644 index 000000000..e16a6ea9f --- /dev/null +++ b/expected/truncate.out @@ -0,0 +1,262 @@ +-- +-- Test the TRUNCATE TABLE command for cstore_fdw tables. +-- +-- print whether we're using version > 10 to make version-specific tests clear +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; + version_above_ten +------------------- + t +(1 row) + +-- Check that files for the automatically managed table exist in the +-- cstore_fdw/{databaseoid} directory. +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 0 +(1 row) + +-- CREATE a cstore_fdw table, fill with some data -- +CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_compressed (a int, b int) SERVER cstore_server OPTIONS (compression 'pglz'); +CREATE TABLE cstore_truncate_test_regular (a int, b int); +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +-- query rows +SELECT * FROM cstore_truncate_test; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +TRUNCATE TABLE cstore_truncate_test; +SELECT * FROM cstore_truncate_test; + a | b +---+--- +(0 rows) + +SELECT COUNT(*) from cstore_truncate_test; + count +------- + 0 +(1 row) + +SELECT count(*) FROM cstore_truncate_test_compressed; + count +------- + 20 +(1 row) + +TRUNCATE TABLE cstore_truncate_test_compressed; +SELECT count(*) FROM cstore_truncate_test_compressed; + count +------- + 0 +(1 row) + +SELECT cstore_table_size('cstore_truncate_test_compressed'); + cstore_table_size +------------------- + 26 +(1 row) + +-- make sure data files still present +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 6 +(1 row) + +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; +INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; +SELECT * from cstore_truncate_test; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +SELECT * from cstore_truncate_test_second; + a | b +----+---- + 20 | 20 + 21 | 21 + 22 | 22 + 23 | 23 + 24 | 24 + 25 | 25 + 26 | 26 + 27 | 27 + 28 | 28 + 29 | 29 + 30 | 30 +(11 rows) + +SELECT * from cstore_truncate_test_regular; + a | b +----+---- + 10 | 10 + 11 | 11 + 12 | 12 + 13 | 13 + 14 | 14 + 15 | 15 + 16 | 16 + 17 | 17 + 18 | 18 + 19 | 19 + 20 | 20 +(11 rows) + +-- make sure multi truncate works +-- notice that the same table might be repeated +TRUNCATE TABLE cstore_truncate_test, + cstore_truncate_test_regular, + cstore_truncate_test_second, + cstore_truncate_test; +SELECT * from cstore_truncate_test; + a | b +---+--- +(0 rows) + +SELECT * from cstore_truncate_test_second; + a | b +---+--- +(0 rows) + +SELECT * from cstore_truncate_test_regular; + a | b +---+--- +(0 rows) + +-- test if truncate on empty table works +TRUNCATE TABLE cstore_truncate_test; +SELECT * from cstore_truncate_test; + a | b +---+--- +(0 rows) + +-- test if a cached truncate from a pl/pgsql function works +CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ +BEGIN + INSERT INTO cstore_truncate_test_regular select a, a from generate_series(1, 10) a; + TRUNCATE TABLE cstore_truncate_test_regular; +END;$$ +LANGUAGE plpgsql; +SELECT cstore_truncate_test_regular_func(); + cstore_truncate_test_regular_func +----------------------------------- + +(1 row) + +-- the cached plans are used stating from the second call +SELECT cstore_truncate_test_regular_func(); + cstore_truncate_test_regular_func +----------------------------------- + +(1 row) + +DROP FUNCTION cstore_truncate_test_regular_func(); +DROP FOREIGN TABLE cstore_truncate_test, cstore_truncate_test_second; +DROP TABLE cstore_truncate_test_regular; +DROP FOREIGN TABLE cstore_truncate_test_compressed; +-- test truncate with schema +CREATE SCHEMA truncate_schema; +CREATE FOREIGN TABLE truncate_schema.truncate_tbl (id int) SERVER cstore_server OPTIONS(compression 'pglz'); +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + count +------- + 0 +(1 row) + +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +-- create a user that can not truncate +CREATE USER truncate_user; +GRANT USAGE ON SCHEMA truncate_schema TO truncate_user; +GRANT SELECT ON TABLE truncate_schema.truncate_tbl TO truncate_user; +REVOKE TRUNCATE ON TABLE truncate_schema.truncate_tbl FROM truncate_user; +SELECT current_user \gset +\c - truncate_user +-- verify truncate command fails and check number of rows +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +ERROR: permission denied for table truncate_tbl +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +-- switch to super user, grant truncate to truncate_user +\c - :current_user +GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; +-- verify truncate_user can truncate now +\c - truncate_user +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 0 +(1 row) + +\c - :current_user +-- cleanup +DROP SCHEMA truncate_schema CASCADE; +NOTICE: drop cascades to foreign table truncate_schema.truncate_tbl +DROP USER truncate_user; +-- verify files are removed +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 0 +(1 row) + diff --git a/expected/truncate_0.out b/expected/truncate_0.out new file mode 100644 index 000000000..c8cc4ad98 --- /dev/null +++ b/expected/truncate_0.out @@ -0,0 +1,262 @@ +-- +-- Test the TRUNCATE TABLE command for cstore_fdw tables. +-- +-- print whether we're using version > 10 to make version-specific tests clear +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; + version_above_ten +------------------- + f +(1 row) + +-- Check that files for the automatically managed table exist in the +-- cstore_fdw/{databaseoid} directory. +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 0 +(1 row) + +-- CREATE a cstore_fdw table, fill with some data -- +CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_compressed (a int, b int) SERVER cstore_server OPTIONS (compression 'pglz'); +CREATE TABLE cstore_truncate_test_regular (a int, b int); +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +-- query rows +SELECT * FROM cstore_truncate_test; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +TRUNCATE TABLE cstore_truncate_test; +SELECT * FROM cstore_truncate_test; + a | b +---+--- +(0 rows) + +SELECT COUNT(*) from cstore_truncate_test; + count +------- + 0 +(1 row) + +SELECT count(*) FROM cstore_truncate_test_compressed; + count +------- + 20 +(1 row) + +TRUNCATE TABLE cstore_truncate_test_compressed; +SELECT count(*) FROM cstore_truncate_test_compressed; + count +------- + 0 +(1 row) + +SELECT cstore_table_size('cstore_truncate_test_compressed'); + cstore_table_size +------------------- + 26 +(1 row) + +-- make sure data files still present +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 6 +(1 row) + +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; +INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; +SELECT * from cstore_truncate_test; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +SELECT * from cstore_truncate_test_second; + a | b +----+---- + 20 | 20 + 21 | 21 + 22 | 22 + 23 | 23 + 24 | 24 + 25 | 25 + 26 | 26 + 27 | 27 + 28 | 28 + 29 | 29 + 30 | 30 +(11 rows) + +SELECT * from cstore_truncate_test_regular; + a | b +----+---- + 10 | 10 + 11 | 11 + 12 | 12 + 13 | 13 + 14 | 14 + 15 | 15 + 16 | 16 + 17 | 17 + 18 | 18 + 19 | 19 + 20 | 20 +(11 rows) + +-- make sure multi truncate works +-- notice that the same table might be repeated +TRUNCATE TABLE cstore_truncate_test, + cstore_truncate_test_regular, + cstore_truncate_test_second, + cstore_truncate_test; +SELECT * from cstore_truncate_test; + a | b +---+--- +(0 rows) + +SELECT * from cstore_truncate_test_second; + a | b +---+--- +(0 rows) + +SELECT * from cstore_truncate_test_regular; + a | b +---+--- +(0 rows) + +-- test if truncate on empty table works +TRUNCATE TABLE cstore_truncate_test; +SELECT * from cstore_truncate_test; + a | b +---+--- +(0 rows) + +-- test if a cached truncate from a pl/pgsql function works +CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ +BEGIN + INSERT INTO cstore_truncate_test_regular select a, a from generate_series(1, 10) a; + TRUNCATE TABLE cstore_truncate_test_regular; +END;$$ +LANGUAGE plpgsql; +SELECT cstore_truncate_test_regular_func(); + cstore_truncate_test_regular_func +----------------------------------- + +(1 row) + +-- the cached plans are used stating from the second call +SELECT cstore_truncate_test_regular_func(); + cstore_truncate_test_regular_func +----------------------------------- + +(1 row) + +DROP FUNCTION cstore_truncate_test_regular_func(); +DROP FOREIGN TABLE cstore_truncate_test, cstore_truncate_test_second; +DROP TABLE cstore_truncate_test_regular; +DROP FOREIGN TABLE cstore_truncate_test_compressed; +-- test truncate with schema +CREATE SCHEMA truncate_schema; +CREATE FOREIGN TABLE truncate_schema.truncate_tbl (id int) SERVER cstore_server OPTIONS(compression 'pglz'); +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + count +------- + 0 +(1 row) + +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +-- create a user that can not truncate +CREATE USER truncate_user; +GRANT USAGE ON SCHEMA truncate_schema TO truncate_user; +GRANT SELECT ON TABLE truncate_schema.truncate_tbl TO truncate_user; +REVOKE TRUNCATE ON TABLE truncate_schema.truncate_tbl FROM truncate_user; +SELECT current_user \gset +\c - truncate_user +-- verify truncate command fails and check number of rows +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +ERROR: permission denied for relation truncate_tbl +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +-- switch to super user, grant truncate to truncate_user +\c - :current_user +GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; +-- verify truncate_user can truncate now +\c - truncate_user +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 0 +(1 row) + +\c - :current_user +-- cleanup +DROP SCHEMA truncate_schema CASCADE; +NOTICE: drop cascades to foreign table truncate_schema.truncate_tbl +DROP USER truncate_user; +-- verify files are removed +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 0 +(1 row) + diff --git a/input/block_filtering.source b/input/block_filtering.source new file mode 100644 index 000000000..4451262d4 --- /dev/null +++ b/input/block_filtering.source @@ -0,0 +1,71 @@ +-- +-- Test block filtering in cstore_fdw using min/max values in stripe skip lists. +-- + + +-- +-- filtered_row_count returns number of rows filtered by the WHERE clause. +-- If blocks get filtered by cstore_fdw, less rows are passed to WHERE +-- clause, so this function should return a lower number. +-- +CREATE OR REPLACE FUNCTION filtered_row_count (query text) RETURNS bigint AS +$$ + DECLARE + result bigint; + rec text; + BEGIN + result := 0; + + FOR rec IN EXECUTE 'EXPLAIN ANALYZE ' || query LOOP + IF rec ~ '^\s+Rows Removed by Filter' then + result := regexp_replace(rec, '[^0-9]*', '', 'g'); + END IF; + END LOOP; + + RETURN result; + END; +$$ LANGUAGE PLPGSQL; + + +-- Create and load data +CREATE FOREIGN TABLE test_block_filtering (a int) + SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/block_filtering.cstore', + block_row_count '1000', stripe_row_count '2000'); + +COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; + + +-- Verify that filtered_row_count is less than 1000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 200'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 9900'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 9900'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); + + +-- Verify that filtered_row_count is less than 2000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 1 AND 10'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN -10 AND 0'); + + +-- Load data for second time and verify that filtered_row_count is exactly twice as before +COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); + + +-- Verify that we are fine with collations which use a different alphabet order +CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") + SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/collation_block_filtering.cstore'); +COPY collation_block_filtering_test FROM STDIN; +A +Å +B +\. + +SELECT * FROM collation_block_filtering_test WHERE A > 'B'; diff --git a/input/copyto.source b/input/copyto.source new file mode 100644 index 000000000..96403a3f4 --- /dev/null +++ b/input/copyto.source @@ -0,0 +1,18 @@ +-- +-- Test copying data from cstore_fdw tables. +-- +CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/test_contestant.cstore'); + +-- load table data from file +COPY test_contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; + +-- export using COPY table TO ... +COPY test_contestant TO STDOUT; + +-- export using COPY (SELECT * FROM table) TO ... +COPY (select * from test_contestant) TO STDOUT; + +DROP FOREIGN TABLE test_contestant CASCADE; diff --git a/input/create.source b/input/create.source new file mode 100644 index 000000000..fbd27dc50 --- /dev/null +++ b/input/create.source @@ -0,0 +1,49 @@ +-- +-- Test the CREATE statements related to cstore_fdw. +-- + + +-- Install cstore_fdw +CREATE EXTENSION cstore_fdw; + +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; + + +-- Validator tests +CREATE FOREIGN TABLE test_validator_invalid_option () + SERVER cstore_server + OPTIONS(filename 'data.cstore', bad_option_name '1'); -- ERROR + +CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () + SERVER cstore_server + OPTIONS(filename 'data.cstore', stripe_row_count '0'); -- ERROR + +CREATE FOREIGN TABLE test_validator_invalid_block_row_count () + SERVER cstore_server + OPTIONS(filename 'data.cstore', block_row_count '0'); -- ERROR + +CREATE FOREIGN TABLE test_validator_invalid_compression_type () + SERVER cstore_server + OPTIONS(filename 'data.cstore', compression 'invalid_compression'); -- ERROR + +-- Invalid file path test +CREATE FOREIGN TABLE test_invalid_file_path () + SERVER cstore_server + OPTIONS(filename 'bad_directory_path/bad_file_path'); --ERROR + +-- Create uncompressed table +CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/contestant.cstore'); + + +-- Create compressed table with automatically determined file path +CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server + OPTIONS(compression 'pglz'); + +-- Test that querying an empty table works +ANALYZE contestant; +SELECT count(*) FROM contestant; diff --git a/input/data_types.source b/input/data_types.source new file mode 100644 index 000000000..c3398c67b --- /dev/null +++ b/input/data_types.source @@ -0,0 +1,74 @@ +-- +-- Test loading and reading different data types to/from cstore_fdw foreign tables. +-- + + +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; +SET timezone to 'GMT'; +SET intervalstyle TO 'POSTGRES_VERBOSE'; + + +-- Test array types +CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], + text_array text[]) SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/array_types.cstore'); + +COPY test_array_types FROM '@abs_srcdir@/data/array_types.csv' WITH CSV; + +SELECT * FROM test_array_types; + + +-- Test date/time types +CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, + timestamp_with_timezone timestamp with time zone, date date, time time, + interval interval) SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/datetime_types.cstore'); + +COPY test_datetime_types FROM '@abs_srcdir@/data/datetime_types.csv' WITH CSV; + +SELECT * FROM test_datetime_types; + + +-- Test enum and composite types +CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); +CREATE TYPE composite_type AS (a int, b text); + +CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, + composite composite_type) SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/enum_and_composite_types.cstore'); + +COPY test_enum_and_composite_types FROM + '@abs_srcdir@/data/enum_and_composite_types.csv' WITH CSV; + +SELECT * FROM test_enum_and_composite_types; + + +-- Test range types +CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, + numrange numrange, tsrange tsrange) SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/range_types.cstore'); + +COPY test_range_types FROM '@abs_srcdir@/data/range_types.csv' WITH CSV; + +SELECT * FROM test_range_types; + + +-- Test other types +CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, + inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/other_types.cstore'); + +COPY test_other_types FROM '@abs_srcdir@/data/other_types.csv' WITH CSV; + +SELECT * FROM test_other_types; + + +-- Test null values +CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) + SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/null_values.cstore'); + +COPY test_null_values FROM '@abs_srcdir@/data/null_values.csv' WITH CSV; + +SELECT * FROM test_null_values; diff --git a/input/load.source b/input/load.source new file mode 100644 index 000000000..0913acde7 --- /dev/null +++ b/input/load.source @@ -0,0 +1,44 @@ +-- +-- Test loading data into cstore_fdw tables. +-- + +-- COPY with incorrect delimiter +COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' + WITH DELIMITER '|'; -- ERROR + +-- COPY with invalid program +COPY contestant FROM PROGRAM 'invalid_program' WITH CSV; -- ERROR + +-- COPY into uncompressed table from file +COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; + +-- COPY into uncompressed table from program +COPY contestant FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' WITH CSV; + +-- COPY into compressed table +COPY contestant_compressed FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; + +-- COPY into uncompressed table from program +COPY contestant_compressed FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' + WITH CSV; + +-- Test column list +CREATE FOREIGN TABLE famous_constants (id int, name text, value real) + SERVER cstore_server; +COPY famous_constants (value, name, id) FROM STDIN WITH CSV; +3.141,pi,1 +2.718,e,2 +0.577,gamma,3 +5.291e-11,bohr radius,4 +\. + +COPY famous_constants (name, value) FROM STDIN WITH CSV; +avagadro,6.022e23 +electron mass,9.109e-31 +proton mass,1.672e-27 +speed of light,2.997e8 +\. + +SELECT * FROM famous_constants ORDER BY id, name; + +DROP FOREIGN TABLE famous_constants; diff --git a/output/block_filtering.source b/output/block_filtering.source new file mode 100644 index 000000000..21e1eb772 --- /dev/null +++ b/output/block_filtering.source @@ -0,0 +1,118 @@ +-- +-- Test block filtering in cstore_fdw using min/max values in stripe skip lists. +-- +-- +-- filtered_row_count returns number of rows filtered by the WHERE clause. +-- If blocks get filtered by cstore_fdw, less rows are passed to WHERE +-- clause, so this function should return a lower number. +-- +CREATE OR REPLACE FUNCTION filtered_row_count (query text) RETURNS bigint AS +$$ + DECLARE + result bigint; + rec text; + BEGIN + result := 0; + + FOR rec IN EXECUTE 'EXPLAIN ANALYZE ' || query LOOP + IF rec ~ '^\s+Rows Removed by Filter' then + result := regexp_replace(rec, '[^0-9]*', '', 'g'); + END IF; + END LOOP; + + RETURN result; + END; +$$ LANGUAGE PLPGSQL; +-- Create and load data +CREATE FOREIGN TABLE test_block_filtering (a int) + SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/block_filtering.cstore', + block_row_count '1000', stripe_row_count '2000'); +COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; +-- Verify that filtered_row_count is less than 1000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); + filtered_row_count +-------------------- + 0 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); + filtered_row_count +-------------------- + 801 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 200'); + filtered_row_count +-------------------- + 200 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 9900'); + filtered_row_count +-------------------- + 101 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 9900'); + filtered_row_count +-------------------- + 900 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); + filtered_row_count +-------------------- + 0 +(1 row) + +-- Verify that filtered_row_count is less than 2000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 1 AND 10'); + filtered_row_count +-------------------- + 990 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); + filtered_row_count +-------------------- + 1979 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN -10 AND 0'); + filtered_row_count +-------------------- + 0 +(1 row) + +-- Load data for second time and verify that filtered_row_count is exactly twice as before +COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); + filtered_row_count +-------------------- + 1602 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); + filtered_row_count +-------------------- + 0 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); + filtered_row_count +-------------------- + 3958 +(1 row) + +-- Verify that we are fine with collations which use a different alphabet order +CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") + SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/collation_block_filtering.cstore'); +COPY collation_block_filtering_test FROM STDIN; +SELECT * FROM collation_block_filtering_test WHERE A > 'B'; + a +--- + Å +(1 row) + diff --git a/output/copyto.source b/output/copyto.source new file mode 100644 index 000000000..6024dd205 --- /dev/null +++ b/output/copyto.source @@ -0,0 +1,24 @@ +-- +-- Test copying data from cstore_fdw tables. +-- +CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/test_contestant.cstore'); +-- load table data from file +COPY test_contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; +-- export using COPY table TO ... +COPY test_contestant TO STDOUT; +a 01-10-1990 2090 97.1 XA {a} +b 11-01-1990 2203 98.1 XA {a,b} +c 11-01-1988 2907 99.4 XB {w,y} +d 05-05-1985 2314 98.3 XB {} +e 05-05-1995 2236 98.2 XC {a} +-- export using COPY (SELECT * FROM table) TO ... +COPY (select * from test_contestant) TO STDOUT; +a 01-10-1990 2090 97.1 XA {a} +b 11-01-1990 2203 98.1 XA {a,b} +c 11-01-1988 2907 99.4 XB {w,y} +d 05-05-1985 2314 98.3 XB {} +e 05-05-1995 2236 98.2 XC {a} +DROP FOREIGN TABLE test_contestant CASCADE; diff --git a/output/create.source b/output/create.source new file mode 100644 index 000000000..937afa2a0 --- /dev/null +++ b/output/create.source @@ -0,0 +1,50 @@ +-- +-- Test the CREATE statements related to cstore_fdw. +-- +-- Install cstore_fdw +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +-- Validator tests +CREATE FOREIGN TABLE test_validator_invalid_option () + SERVER cstore_server + OPTIONS(filename 'data.cstore', bad_option_name '1'); -- ERROR +ERROR: invalid option "bad_option_name" +HINT: Valid options in this context are: filename, compression, stripe_row_count, block_row_count +CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () + SERVER cstore_server + OPTIONS(filename 'data.cstore', stripe_row_count '0'); -- ERROR +ERROR: invalid stripe row count +HINT: Stripe row count must be an integer between 1000 and 10000000 +CREATE FOREIGN TABLE test_validator_invalid_block_row_count () + SERVER cstore_server + OPTIONS(filename 'data.cstore', block_row_count '0'); -- ERROR +ERROR: invalid block row count +HINT: Block row count must be an integer between 1000 and 100000 +CREATE FOREIGN TABLE test_validator_invalid_compression_type () + SERVER cstore_server + OPTIONS(filename 'data.cstore', compression 'invalid_compression'); -- ERROR +ERROR: invalid compression type +HINT: Valid options are: none, pglz +-- Invalid file path test +CREATE FOREIGN TABLE test_invalid_file_path () + SERVER cstore_server + OPTIONS(filename 'bad_directory_path/bad_file_path'); --ERROR +ERROR: could not open file "bad_directory_path/bad_file_path" for writing: No such file or directory +-- Create uncompressed table +CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/contestant.cstore'); +-- Create compressed table with automatically determined file path +CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server + OPTIONS(compression 'pglz'); +-- Test that querying an empty table works +ANALYZE contestant; +SELECT count(*) FROM contestant; + count +------- + 0 +(1 row) + diff --git a/output/data_types.source b/output/data_types.source new file mode 100644 index 000000000..efa03a663 --- /dev/null +++ b/output/data_types.source @@ -0,0 +1,84 @@ +-- +-- Test loading and reading different data types to/from cstore_fdw foreign tables. +-- +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; +SET timezone to 'GMT'; +SET intervalstyle TO 'POSTGRES_VERBOSE'; +-- Test array types +CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], + text_array text[]) SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/array_types.cstore'); +COPY test_array_types FROM '@abs_srcdir@/data/array_types.csv' WITH CSV; +SELECT * FROM test_array_types; + int_array | bigint_array | text_array +--------------------------+--------------------------------------------+------------ + {1,2,3} | {1,2,3} | {a,b,c} + {} | {} | {} + {-2147483648,2147483647} | {-9223372036854775808,9223372036854775807} | {""} +(3 rows) + +-- Test date/time types +CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, + timestamp_with_timezone timestamp with time zone, date date, time time, + interval interval) SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/datetime_types.cstore'); +COPY test_datetime_types FROM '@abs_srcdir@/data/datetime_types.csv' WITH CSV; +SELECT * FROM test_datetime_types; + timestamp | timestamp_with_timezone | date | time | interval +---------------------+-------------------------+------------+----------+----------- + 2000-01-02 04:05:06 | 1999-01-08 12:05:06+00 | 2000-01-02 | 04:05:06 | @ 4 hours + 1970-01-01 00:00:00 | infinity | -infinity | 00:00:00 | @ 0 +(2 rows) + +-- Test enum and composite types +CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); +CREATE TYPE composite_type AS (a int, b text); +CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, + composite composite_type) SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/enum_and_composite_types.cstore'); +COPY test_enum_and_composite_types FROM + '@abs_srcdir@/data/enum_and_composite_types.csv' WITH CSV; +SELECT * FROM test_enum_and_composite_types; + enum | composite +------+----------- + a | (2,b) + b | (3,c) +(2 rows) + +-- Test range types +CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, + numrange numrange, tsrange tsrange) SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/range_types.cstore'); +COPY test_range_types FROM '@abs_srcdir@/data/range_types.csv' WITH CSV; +SELECT * FROM test_range_types; + int4range | int8range | numrange | tsrange +-----------+-----------+----------+----------------------------------------------- + [1,3) | [1,3) | [1,3) | ["2000-01-02 00:30:00","2010-02-03 12:30:00") + empty | [1,) | (,) | empty +(2 rows) + +-- Test other types +CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, + inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/other_types.cstore'); +COPY test_other_types FROM '@abs_srcdir@/data/other_types.csv' WITH CSV; +SELECT * FROM test_other_types; + bool | bytea | money | inet | bitstring | uuid | json +------+------------+-------+-------------+-----------+--------------------------------------+------------------ + f | \xdeadbeef | $1.00 | 192.168.1.2 | 10101 | a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11 | {"key": "value"} + t | \xcdb0 | $1.50 | 127.0.0.1 | | a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11 | [] +(2 rows) + +-- Test null values +CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) + SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/null_values.cstore'); +COPY test_null_values FROM '@abs_srcdir@/data/null_values.csv' WITH CSV; +SELECT * FROM test_null_values; + a | b | c +---+--------+----- + | {NULL} | (,) + | | +(2 rows) + diff --git a/output/load.source b/output/load.source new file mode 100644 index 000000000..c76f203eb --- /dev/null +++ b/output/load.source @@ -0,0 +1,39 @@ +-- +-- Test loading data into cstore_fdw tables. +-- +-- COPY with incorrect delimiter +COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' + WITH DELIMITER '|'; -- ERROR +ERROR: missing data for column "birthdate" +-- COPY with invalid program +COPY contestant FROM PROGRAM 'invalid_program' WITH CSV; -- ERROR +ERROR: program "invalid_program" failed +DETAIL: command not found +-- COPY into uncompressed table from file +COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; +-- COPY into uncompressed table from program +COPY contestant FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' WITH CSV; +-- COPY into compressed table +COPY contestant_compressed FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; +-- COPY into uncompressed table from program +COPY contestant_compressed FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' + WITH CSV; +-- Test column list +CREATE FOREIGN TABLE famous_constants (id int, name text, value real) + SERVER cstore_server; +COPY famous_constants (value, name, id) FROM STDIN WITH CSV; +COPY famous_constants (name, value) FROM STDIN WITH CSV; +SELECT * FROM famous_constants ORDER BY id, name; + id | name | value +----+----------------+----------- + 1 | pi | 3.141 + 2 | e | 2.718 + 3 | gamma | 0.577 + 4 | bohr radius | 5.291e-11 + | avagadro | 6.022e+23 + | electron mass | 9.109e-31 + | proton mass | 1.672e-27 + | speed of light | 2.997e+08 +(8 rows) + +DROP FOREIGN TABLE famous_constants; diff --git a/sql/alter.sql b/sql/alter.sql new file mode 100644 index 000000000..5ba3beb34 --- /dev/null +++ b/sql/alter.sql @@ -0,0 +1,85 @@ +-- +-- Testing ALTER TABLE on cstore_fdw tables. +-- + +CREATE FOREIGN TABLE test_alter_table (a int, b int, c int) SERVER cstore_server; + +WITH sample_data AS (VALUES + (1, 2, 3), + (4, 5, 6), + (7, 8, 9) +) +INSERT INTO test_alter_table SELECT * FROM sample_data; + +-- drop a column +ALTER FOREIGN TABLE test_alter_table DROP COLUMN a; + +-- test analyze +ANALYZE test_alter_table; + +-- verify select queries run as expected +SELECT * FROM test_alter_table; +SELECT a FROM test_alter_table; +SELECT b FROM test_alter_table; + +-- verify insert runs as expected +INSERT INTO test_alter_table (SELECT 3, 5, 8); +INSERT INTO test_alter_table (SELECT 5, 8); + + +-- add a column with no defaults +ALTER FOREIGN TABLE test_alter_table ADD COLUMN d int; +SELECT * FROM test_alter_table; +INSERT INTO test_alter_table (SELECT 3, 5, 8); +SELECT * FROM test_alter_table; + + +-- add a fixed-length column with default value +ALTER FOREIGN TABLE test_alter_table ADD COLUMN e int default 3; +SELECT * from test_alter_table; +INSERT INTO test_alter_table (SELECT 1, 2, 4, 8); +SELECT * from test_alter_table; + + +-- add a variable-length column with default value +ALTER FOREIGN TABLE test_alter_table ADD COLUMN f text DEFAULT 'TEXT ME'; +SELECT * from test_alter_table; +INSERT INTO test_alter_table (SELECT 1, 2, 4, 8, 'ABCDEF'); +SELECT * from test_alter_table; + + +-- drop couple of columns +ALTER FOREIGN TABLE test_alter_table DROP COLUMN c; +ALTER FOREIGN TABLE test_alter_table DROP COLUMN e; +ANALYZE test_alter_table; +SELECT * from test_alter_table; +SELECT count(*) from test_alter_table; +SELECT count(t.*) from test_alter_table t; + + +-- unsupported default values +ALTER FOREIGN TABLE test_alter_table ADD COLUMN g boolean DEFAULT isfinite(current_date); +ALTER FOREIGN TABLE test_alter_table ADD COLUMN h DATE DEFAULT current_date; +SELECT * FROM test_alter_table; +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN g DROP DEFAULT; +SELECT * FROM test_alter_table; +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN h DROP DEFAULT; +ANALYZE test_alter_table; +SELECT * FROM test_alter_table; + +-- unsupported type change +ALTER FOREIGN TABLE test_alter_table ADD COLUMN i int; +ALTER FOREIGN TABLE test_alter_table ADD COLUMN j float; +ALTER FOREIGN TABLE test_alter_table ADD COLUMN k text; + +-- this is valid type change +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN i TYPE float; + +-- this is not valid +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN j TYPE int; + +-- text / varchar conversion is valid both ways +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE varchar(20); +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE text; + +DROP FOREIGN TABLE test_alter_table; diff --git a/sql/analyze.sql b/sql/analyze.sql new file mode 100644 index 000000000..4476454a6 --- /dev/null +++ b/sql/analyze.sql @@ -0,0 +1,11 @@ +-- +-- Test the ANALYZE command for cstore_fdw tables. +-- + +-- ANALYZE uncompressed table +ANALYZE contestant; +SELECT count(*) FROM pg_stats WHERE tablename='contestant'; + +-- ANALYZE compressed table +ANALYZE contestant_compressed; +SELECT count(*) FROM pg_stats WHERE tablename='contestant_compressed'; diff --git a/sql/drop.sql b/sql/drop.sql new file mode 100644 index 000000000..a0852a279 --- /dev/null +++ b/sql/drop.sql @@ -0,0 +1,76 @@ +-- +-- Tests the different DROP commands for cstore_fdw tables. +-- +-- DROP FOREIGN TABL +-- DROP SCHEMA +-- DROP EXTENSION +-- DROP DATABASE +-- + +-- Note that travis does not create +-- cstore_fdw extension in default database (postgres). This has caused +-- different behavior between travis tests and local tests. Thus +-- 'postgres' directory is excluded from comparison to have the same result. + +-- store postgres database oid +SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset + +-- Check that files for the automatically managed table exist in the +-- cstore_fdw/{databaseoid} directory. +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + +-- DROP cstore_fdw tables +DROP FOREIGN TABLE contestant; +DROP FOREIGN TABLE contestant_compressed; + +-- Create a cstore_fdw table under a schema and drop it. +CREATE SCHEMA test_schema; +CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; +DROP SCHEMA test_schema CASCADE; + +-- Check that the files have been deleted and the directory is empty after the +-- DROP table command. +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + +SELECT current_database() datname \gset + +CREATE DATABASE db_to_drop; +\c db_to_drop +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset + +CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; +-- should see 2 files, data and footer file for single table +SELECT count(*) FROM pg_ls_dir('cstore_fdw/' || :databaseoid); + +-- should see 2 directories 1 for each database, excluding postgres database +SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; + +DROP EXTENSION cstore_fdw CASCADE; + +-- should only see 1 directory here +SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; + +-- test database drop +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset + +CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; + +-- should see 2 directories 1 for each database +SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; + +\c :datname + +DROP DATABASE db_to_drop; + +-- should only see 1 directory for the default database +SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; diff --git a/sql/functions.sql b/sql/functions.sql new file mode 100644 index 000000000..ed7e260b3 --- /dev/null +++ b/sql/functions.sql @@ -0,0 +1,20 @@ +-- +-- Test utility functions for cstore_fdw tables. +-- + +CREATE FOREIGN TABLE empty_table (a int) SERVER cstore_server; +CREATE FOREIGN TABLE table_with_data (a int) SERVER cstore_server; +CREATE TABLE non_cstore_table (a int); + +COPY table_with_data FROM STDIN; +1 +2 +3 +\. + +SELECT cstore_table_size('empty_table') < cstore_table_size('table_with_data'); +SELECT cstore_table_size('non_cstore_table'); + +DROP FOREIGN TABLE empty_table; +DROP FOREIGN TABLE table_with_data; +DROP TABLE non_cstore_table; diff --git a/sql/insert.sql b/sql/insert.sql new file mode 100644 index 000000000..7a6b075ce --- /dev/null +++ b/sql/insert.sql @@ -0,0 +1,56 @@ +-- +-- Testing insert on cstore_fdw tables. +-- + +CREATE FOREIGN TABLE test_insert_command (a int) SERVER cstore_server; + +-- test single row inserts fail +select count(*) from test_insert_command; +insert into test_insert_command values(1); +select count(*) from test_insert_command; + +insert into test_insert_command default values; +select count(*) from test_insert_command; + +-- test inserting from another table succeed +CREATE TABLE test_insert_command_data (a int); + +select count(*) from test_insert_command_data; +insert into test_insert_command_data values(1); +select count(*) from test_insert_command_data; + +insert into test_insert_command select * from test_insert_command_data; +select count(*) from test_insert_command; + +drop table test_insert_command_data; +drop foreign table test_insert_command; + +-- test long attribute value insertion +-- create sufficiently long text so that data is stored in toast +CREATE TABLE test_long_text AS +SELECT a as int_val, string_agg(random()::text, '') as text_val +FROM generate_series(1, 10) a, generate_series(1, 1000) b +GROUP BY a ORDER BY a; + +-- store hash values of text for later comparison +CREATE TABLE test_long_text_hash AS +SELECT int_val, md5(text_val) AS hash +FROM test_long_text; + +CREATE FOREIGN TABLE test_cstore_long_text(int_val int, text_val text) +SERVER cstore_server; + +-- store long text in cstore table +INSERT INTO test_cstore_long_text SELECT * FROM test_long_text; + +-- drop source table to remove original text from toast +DROP TABLE test_long_text; + +-- check if text data is still available in cstore table +-- by comparing previously stored hash. +SELECT a.int_val +FROM test_long_text_hash a, test_cstore_long_text c +WHERE a.int_val = c.int_val AND a.hash = md5(c.text_val); + +DROP TABLE test_long_text_hash; +DROP FOREIGN TABLE test_cstore_long_text; diff --git a/sql/query.sql b/sql/query.sql new file mode 100644 index 000000000..87743e7bd --- /dev/null +++ b/sql/query.sql @@ -0,0 +1,34 @@ +-- +-- Test querying cstore_fdw tables. +-- + +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; + +-- Query uncompressed data +SELECT count(*) FROM contestant; +SELECT avg(rating), stddev_samp(rating) FROM contestant; +SELECT country, avg(rating) FROM contestant WHERE rating > 2200 + GROUP BY country ORDER BY country; +SELECT * FROM contestant ORDER BY handle; + +-- Query compressed data +SELECT count(*) FROM contestant_compressed; +SELECT avg(rating), stddev_samp(rating) FROM contestant_compressed; +SELECT country, avg(rating) FROM contestant_compressed WHERE rating > 2200 + GROUP BY country ORDER BY country; +SELECT * FROM contestant_compressed ORDER BY handle; + +-- Verify that we handle whole-row references correctly +SELECT to_json(v) FROM contestant v ORDER BY rating LIMIT 1; + +-- Test variables used in expressions +CREATE FOREIGN TABLE union_first (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE union_second (a int, b int) SERVER cstore_server; + +INSERT INTO union_first SELECT a, a FROM generate_series(1, 5) a; +INSERT INTO union_second SELECT a, a FROM generate_series(11, 15) a; + +(SELECT a*1, b FROM union_first) union all (SELECT a*1, b FROM union_second); + +DROP FOREIGN TABLE union_first, union_second; diff --git a/sql/truncate.sql b/sql/truncate.sql new file mode 100644 index 000000000..0aac2bd34 --- /dev/null +++ b/sql/truncate.sql @@ -0,0 +1,135 @@ +-- +-- Test the TRUNCATE TABLE command for cstore_fdw tables. +-- + +-- print whether we're using version > 10 to make version-specific tests clear +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; + +-- Check that files for the automatically managed table exist in the +-- cstore_fdw/{databaseoid} directory. +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + +-- CREATE a cstore_fdw table, fill with some data -- +CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_compressed (a int, b int) SERVER cstore_server OPTIONS (compression 'pglz'); +CREATE TABLE cstore_truncate_test_regular (a int, b int); + +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; + +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; + +-- query rows +SELECT * FROM cstore_truncate_test; + +TRUNCATE TABLE cstore_truncate_test; + +SELECT * FROM cstore_truncate_test; + +SELECT COUNT(*) from cstore_truncate_test; + +SELECT count(*) FROM cstore_truncate_test_compressed; +TRUNCATE TABLE cstore_truncate_test_compressed; +SELECT count(*) FROM cstore_truncate_test_compressed; + +SELECT cstore_table_size('cstore_truncate_test_compressed'); + +-- make sure data files still present +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; +INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; + +SELECT * from cstore_truncate_test; + +SELECT * from cstore_truncate_test_second; + +SELECT * from cstore_truncate_test_regular; + +-- make sure multi truncate works +-- notice that the same table might be repeated +TRUNCATE TABLE cstore_truncate_test, + cstore_truncate_test_regular, + cstore_truncate_test_second, + cstore_truncate_test; + +SELECT * from cstore_truncate_test; +SELECT * from cstore_truncate_test_second; +SELECT * from cstore_truncate_test_regular; + +-- test if truncate on empty table works +TRUNCATE TABLE cstore_truncate_test; +SELECT * from cstore_truncate_test; + +-- test if a cached truncate from a pl/pgsql function works +CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ +BEGIN + INSERT INTO cstore_truncate_test_regular select a, a from generate_series(1, 10) a; + TRUNCATE TABLE cstore_truncate_test_regular; +END;$$ +LANGUAGE plpgsql; + +SELECT cstore_truncate_test_regular_func(); +-- the cached plans are used stating from the second call +SELECT cstore_truncate_test_regular_func(); +DROP FUNCTION cstore_truncate_test_regular_func(); + +DROP FOREIGN TABLE cstore_truncate_test, cstore_truncate_test_second; +DROP TABLE cstore_truncate_test_regular; +DROP FOREIGN TABLE cstore_truncate_test_compressed; + +-- test truncate with schema +CREATE SCHEMA truncate_schema; +CREATE FOREIGN TABLE truncate_schema.truncate_tbl (id int) SERVER cstore_server OPTIONS(compression 'pglz'); +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); + +-- create a user that can not truncate +CREATE USER truncate_user; +GRANT USAGE ON SCHEMA truncate_schema TO truncate_user; +GRANT SELECT ON TABLE truncate_schema.truncate_tbl TO truncate_user; +REVOKE TRUNCATE ON TABLE truncate_schema.truncate_tbl FROM truncate_user; + +SELECT current_user \gset + +\c - truncate_user +-- verify truncate command fails and check number of rows +SELECT count(*) FROM truncate_schema.truncate_tbl; +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT count(*) FROM truncate_schema.truncate_tbl; + +-- switch to super user, grant truncate to truncate_user +\c - :current_user +GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; + +-- verify truncate_user can truncate now +\c - truncate_user +SELECT count(*) FROM truncate_schema.truncate_tbl; +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT count(*) FROM truncate_schema.truncate_tbl; + +\c - :current_user + +-- cleanup +DROP SCHEMA truncate_schema CASCADE; +DROP USER truncate_user; + +-- verify files are removed +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; From ba506acd35dee6859c517fbd45669a44a782bfa8 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Mon, 31 Aug 2020 11:39:08 -0700 Subject: [PATCH 02/91] Refactor the FDW API to take code out of cstore_fdw.c. --- Makefile | 4 +- cstore.c | 170 ++++++++++++++++++++++++++++ cstore.h | 311 +++++++++++++++++++++++++++++++++++++++++++++++++++ cstore_fdw.c | 258 +++++++++--------------------------------- cstore_fdw.h | 286 +--------------------------------------------- mod.c | 30 +++++ 6 files changed, 568 insertions(+), 491 deletions(-) create mode 100644 cstore.c create mode 100644 cstore.h create mode 100644 mod.c diff --git a/Makefile b/Makefile index 72daebc55..bd3ae77ce 100644 --- a/Makefile +++ b/Makefile @@ -7,8 +7,8 @@ MODULE_big = cstore_fdw PG_CPPFLAGS = --std=c99 SHLIB_LINK = -lprotobuf-c -OBJS = cstore.pb-c.o cstore_fdw.o cstore_writer.o cstore_reader.o \ - cstore_metadata_serialization.o cstore_compression.o +OBJS = cstore.pb-c.o cstore.o cstore_fdw.o cstore_writer.o cstore_reader.o \ + cstore_metadata_serialization.o cstore_compression.o mod.o EXTENSION = cstore_fdw DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cstore_fdw--1.4--1.5.sql \ diff --git a/cstore.c b/cstore.c new file mode 100644 index 000000000..ccb59675f --- /dev/null +++ b/cstore.c @@ -0,0 +1,170 @@ +/*------------------------------------------------------------------------- + * + * cstore.c + * + * This file contains... + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "miscadmin.h" + +#include "cstore.h" + +#include + +static void CreateDirectory(StringInfo directoryName); +static bool DirectoryExists(StringInfo directoryName); + +/* ParseCompressionType converts a string to a compression type. */ +CompressionType +ParseCompressionType(const char *compressionTypeString) +{ + CompressionType compressionType = COMPRESSION_TYPE_INVALID; + Assert(compressionTypeString != NULL); + + if (strncmp(compressionTypeString, COMPRESSION_STRING_NONE, NAMEDATALEN) == 0) + { + compressionType = COMPRESSION_NONE; + } + else if (strncmp(compressionTypeString, COMPRESSION_STRING_PG_LZ, NAMEDATALEN) == 0) + { + compressionType = COMPRESSION_PG_LZ; + } + + return compressionType; +} + +/* CreateDirectory creates a new directory with the given directory name. */ +static void +CreateDirectory(StringInfo directoryName) +{ + int makeOK = mkdir(directoryName->data, S_IRWXU); + if (makeOK != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not create directory \"%s\": %m", + directoryName->data))); + } +} + +/* DirectoryExists checks if a directory exists for the given directory name. */ +static bool +DirectoryExists(StringInfo directoryName) +{ + bool directoryExists = true; + struct stat directoryStat; + + int statOK = stat(directoryName->data, &directoryStat); + if (statOK == 0) + { + /* file already exists; check that it is a directory */ + if (!S_ISDIR(directoryStat.st_mode)) + { + ereport(ERROR, (errmsg("\"%s\" is not a directory", directoryName->data), + errhint("You need to remove or rename the file \"%s\".", + directoryName->data))); + } + } + else + { + if (errno == ENOENT) + { + directoryExists = false; + } + else + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not stat directory \"%s\": %m", + directoryName->data))); + } + } + + return directoryExists; +} + +/* + * RemoveCStoreDatabaseDirectory removes CStore directory previously + * created for this database. + * However it does not remove 'cstore_fdw' directory even if there + * are no other databases left. + */ +void +RemoveCStoreDatabaseDirectory(Oid databaseOid) +{ + StringInfo cstoreDirectoryPath = makeStringInfo(); + StringInfo cstoreDatabaseDirectoryPath = makeStringInfo(); + + appendStringInfo(cstoreDirectoryPath, "%s/%s", DataDir, CSTORE_FDW_NAME); + + appendStringInfo(cstoreDatabaseDirectoryPath, "%s/%s/%u", DataDir, + CSTORE_FDW_NAME, databaseOid); + + if (DirectoryExists(cstoreDatabaseDirectoryPath)) + { + rmtree(cstoreDatabaseDirectoryPath->data, true); + } +} + + +/* + * InitializeCStoreTableFile creates data and footer file for a cstore table. + * The function assumes data and footer files do not exist, therefore + * it should be called on empty or non-existing table. Notice that the caller + * is expected to acquire AccessExclusiveLock on the relation. + */ +void +InitializeCStoreTableFile(Oid relationId, Relation relation, CStoreOptions *cstoreOptions) +{ + TableWriteState *writeState = NULL; + TupleDesc tupleDescriptor = RelationGetDescr(relation); + + /* + * Initialize state to write to the cstore file. This creates an + * empty data file and a valid footer file for the table. + */ + writeState = CStoreBeginWrite(cstoreOptions->filename, + cstoreOptions->compressionType, cstoreOptions->stripeRowCount, + cstoreOptions->blockRowCount, tupleDescriptor); + CStoreEndWrite(writeState); +} + + +/* + * CreateCStoreDatabaseDirectory creates the directory (and parent directories, + * if needed) used to store automatically managed cstore_fdw files. The path to + * the directory is $PGDATA/cstore_fdw/{databaseOid}. + */ +void +CreateCStoreDatabaseDirectory(Oid databaseOid) +{ + bool cstoreDirectoryExists = false; + bool databaseDirectoryExists = false; + StringInfo cstoreDatabaseDirectoryPath = NULL; + + StringInfo cstoreDirectoryPath = makeStringInfo(); + appendStringInfo(cstoreDirectoryPath, "%s/%s", DataDir, CSTORE_FDW_NAME); + + cstoreDirectoryExists = DirectoryExists(cstoreDirectoryPath); + if (!cstoreDirectoryExists) + { + CreateDirectory(cstoreDirectoryPath); + } + + cstoreDatabaseDirectoryPath = makeStringInfo(); + appendStringInfo(cstoreDatabaseDirectoryPath, "%s/%s/%u", DataDir, + CSTORE_FDW_NAME, databaseOid); + + databaseDirectoryExists = DirectoryExists(cstoreDatabaseDirectoryPath); + if (!databaseDirectoryExists) + { + CreateDirectory(cstoreDatabaseDirectoryPath); + } +} + diff --git a/cstore.h b/cstore.h new file mode 100644 index 000000000..f51a972e2 --- /dev/null +++ b/cstore.h @@ -0,0 +1,311 @@ +/*------------------------------------------------------------------------- + * + * cstore.h + * + * Type and function declarations for CStore + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#ifndef CSTORE_H +#define CSTORE_H + +#include "access/tupdesc.h" +#include "fmgr.h" +#include "catalog/pg_am.h" +#include "catalog/pg_foreign_server.h" +#include "catalog/pg_foreign_table.h" +#include "lib/stringinfo.h" +#include "utils/rel.h" + +/* Defines for valid option names */ +#define OPTION_NAME_FILENAME "filename" +#define OPTION_NAME_COMPRESSION_TYPE "compression" +#define OPTION_NAME_STRIPE_ROW_COUNT "stripe_row_count" +#define OPTION_NAME_BLOCK_ROW_COUNT "block_row_count" + +/* Default values for option parameters */ +#define DEFAULT_COMPRESSION_TYPE COMPRESSION_NONE +#define DEFAULT_STRIPE_ROW_COUNT 150000 +#define DEFAULT_BLOCK_ROW_COUNT 10000 + +/* Limits for option parameters */ +#define STRIPE_ROW_COUNT_MINIMUM 1000 +#define STRIPE_ROW_COUNT_MAXIMUM 10000000 +#define BLOCK_ROW_COUNT_MINIMUM 1000 +#define BLOCK_ROW_COUNT_MAXIMUM 100000 + +/* String representations of compression types */ +#define COMPRESSION_STRING_NONE "none" +#define COMPRESSION_STRING_PG_LZ "pglz" + +/* CStore file signature */ +#define CSTORE_MAGIC_NUMBER "citus_cstore" +#define CSTORE_VERSION_MAJOR 1 +#define CSTORE_VERSION_MINOR 7 + +/* miscellaneous defines */ +#define CSTORE_FDW_NAME "cstore_fdw" +#define CSTORE_FOOTER_FILE_SUFFIX ".footer" +#define CSTORE_TEMP_FILE_SUFFIX ".tmp" +#define CSTORE_TUPLE_COST_MULTIPLIER 10 +#define CSTORE_POSTSCRIPT_SIZE_LENGTH 1 +#define CSTORE_POSTSCRIPT_SIZE_MAX 256 + +/* Enumaration for cstore file's compression method */ +typedef enum +{ + COMPRESSION_TYPE_INVALID = -1, + COMPRESSION_NONE = 0, + COMPRESSION_PG_LZ = 1, + + COMPRESSION_COUNT + +} CompressionType; + + +/* + * CStoreFdwOptions holds the option values to be used when reading or writing + * a cstore file. To resolve these values, we first check foreign table's options, + * and if not present, we then fall back to the default values specified above. + */ +typedef struct CStoreOptions +{ + char *filename; + CompressionType compressionType; + uint64 stripeRowCount; + uint32 blockRowCount; + +} CStoreOptions; + + +/* + * StripeMetadata represents information about a stripe. This information is + * stored in the cstore file's footer. + */ +typedef struct StripeMetadata +{ + uint64 fileOffset; + uint64 skipListLength; + uint64 dataLength; + uint64 footerLength; + +} StripeMetadata; + + +/* TableFooter represents the footer of a cstore file. */ +typedef struct TableFooter +{ + List *stripeMetadataList; + uint64 blockRowCount; + +} TableFooter; + + +/* ColumnBlockSkipNode contains statistics for a ColumnBlockData. */ +typedef struct ColumnBlockSkipNode +{ + /* statistics about values of a column block */ + bool hasMinMax; + Datum minimumValue; + Datum maximumValue; + uint64 rowCount; + + /* + * Offsets and sizes of value and exists streams in the column data. + * These enable us to skip reading suppressed row blocks, and start reading + * a block without reading previous blocks. + */ + uint64 valueBlockOffset; + uint64 valueLength; + uint64 existsBlockOffset; + uint64 existsLength; + + CompressionType valueCompressionType; + +} ColumnBlockSkipNode; + + +/* + * StripeSkipList can be used for skipping row blocks. It contains a column block + * skip node for each block of each column. blockSkipNodeArray[column][block] + * is the entry for the specified column block. + */ +typedef struct StripeSkipList +{ + ColumnBlockSkipNode **blockSkipNodeArray; + uint32 columnCount; + uint32 blockCount; + +} StripeSkipList; + + +/* + * ColumnBlockData represents a block of data in a column. valueArray stores + * the values of data, and existsArray stores whether a value is present. + * valueBuffer is used to store (uncompressed) serialized values + * referenced by Datum's in valueArray. It is only used for by-reference Datum's. + * There is a one-to-one correspondence between valueArray and existsArray. + */ +typedef struct ColumnBlockData +{ + bool *existsArray; + Datum *valueArray; + + /* valueBuffer keeps actual data for type-by-reference datums from valueArray. */ + StringInfo valueBuffer; + +} ColumnBlockData; + + +/* + * ColumnBlockBuffers represents a block of serialized data in a column. + * valueBuffer stores the serialized values of data, and existsBuffer stores + * serialized value of presence information. valueCompressionType contains + * compression type if valueBuffer is compressed. Finally rowCount has + * the number of rows in this block. + */ +typedef struct ColumnBlockBuffers +{ + StringInfo existsBuffer; + StringInfo valueBuffer; + CompressionType valueCompressionType; + +} ColumnBlockBuffers; + + +/* + * ColumnBuffers represents data buffers for a column in a row stripe. Each + * column is made of multiple column blocks. + */ +typedef struct ColumnBuffers +{ + ColumnBlockBuffers **blockBuffersArray; + +} ColumnBuffers; + + +/* StripeBuffers represents data for a row stripe in a cstore file. */ +typedef struct StripeBuffers +{ + uint32 columnCount; + uint32 rowCount; + ColumnBuffers **columnBuffersArray; + +} StripeBuffers; + + +/* + * StripeFooter represents a stripe's footer. In this footer, we keep three + * arrays of sizes. The number of elements in each of the arrays is equal + * to the number of columns. + */ +typedef struct StripeFooter +{ + uint32 columnCount; + uint64 *skipListSizeArray; + uint64 *existsSizeArray; + uint64 *valueSizeArray; + +} StripeFooter; + + +/* TableReadState represents state of a cstore file read operation. */ +typedef struct TableReadState +{ + FILE *tableFile; + TableFooter *tableFooter; + TupleDesc tupleDescriptor; + + /* + * List of Var pointers for columns in the query. We use this both for + * getting vector of projected columns, and also when we want to build + * base constraint to find selected row blocks. + */ + List *projectedColumnList; + + List *whereClauseList; + MemoryContext stripeReadContext; + StripeBuffers *stripeBuffers; + uint32 readStripeCount; + uint64 stripeReadRowCount; + ColumnBlockData **blockDataArray; + int32 deserializedBlockIndex; + +} TableReadState; + + +/* TableWriteState represents state of a cstore file write operation. */ +typedef struct TableWriteState +{ + FILE *tableFile; + TableFooter *tableFooter; + StringInfo tableFooterFilename; + CompressionType compressionType; + TupleDesc tupleDescriptor; + FmgrInfo **comparisonFunctionArray; + uint64 currentFileOffset; + Relation relation; + + MemoryContext stripeWriteContext; + StripeBuffers *stripeBuffers; + StripeSkipList *stripeSkipList; + uint32 stripeMaxRowCount; + ColumnBlockData **blockDataArray; + /* + * compressionBuffer buffer is used as temporary storage during + * data value compression operation. It is kept here to minimize + * memory allocations. It lives in stripeWriteContext and gets + * deallocated when memory context is reset. + */ + StringInfo compressionBuffer; + +} TableWriteState; + +/* Function declarations for extension loading and unloading */ +extern void _PG_init(void); +extern void _PG_fini(void); + +extern CompressionType ParseCompressionType(const char *compressionTypeString); +extern void InitializeCStoreTableFile(Oid relationId, Relation relation, + CStoreOptions *cstoreOptions); +extern void CreateCStoreDatabaseDirectory(Oid databaseOid); +extern void RemoveCStoreDatabaseDirectory(Oid databaseOid); + +/* Function declarations for writing to a cstore file */ +extern TableWriteState * CStoreBeginWrite(const char *filename, + CompressionType compressionType, + uint64 stripeMaxRowCount, + uint32 blockRowCount, + TupleDesc tupleDescriptor); +extern void CStoreWriteRow(TableWriteState *state, Datum *columnValues, + bool *columnNulls); +extern void CStoreEndWrite(TableWriteState * state); + +/* Function declarations for reading from a cstore file */ +extern TableReadState * CStoreBeginRead(const char *filename, TupleDesc tupleDescriptor, + List *projectedColumnList, List *qualConditions); +extern TableFooter * CStoreReadFooter(StringInfo tableFooterFilename); +extern bool CStoreReadFinished(TableReadState *state); +extern bool CStoreReadNextRow(TableReadState *state, Datum *columnValues, + bool *columnNulls); +extern void CStoreEndRead(TableReadState *state); + +/* Function declarations for common functions */ +extern FmgrInfo * GetFunctionInfoOrNull(Oid typeId, Oid accessMethodId, + int16 procedureId); +extern ColumnBlockData ** CreateEmptyBlockDataArray(uint32 columnCount, bool *columnMask, + uint32 blockRowCount); +extern void FreeColumnBlockDataArray(ColumnBlockData **blockDataArray, + uint32 columnCount); +extern uint64 CStoreTableRowCount(const char *filename); +extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, + CompressionType compressionType); +extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressionType); + + +#endif /* CSTORE_H */ diff --git a/cstore_fdw.c b/cstore_fdw.c index b0a327768..c80d53f2c 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -95,24 +95,18 @@ static List * FindCStoreTables(List *tableList); static List * OpenRelationsForTruncate(List *cstoreTableList); static void TruncateCStoreTables(List *cstoreRelationList); static void DeleteCStoreTableFiles(char *filename); -static void InitializeCStoreTableFile(Oid relationId, Relation relation); static bool CStoreTable(Oid relationId); static bool CStoreServer(ForeignServer *server); static bool DistributedTable(Oid relationId); static bool DistributedWorkerCopy(CopyStmt *copyStatement); -static void CreateCStoreDatabaseDirectory(Oid databaseOid); -static bool DirectoryExists(StringInfo directoryName); -static void CreateDirectory(StringInfo directoryName); -static void RemoveCStoreDatabaseDirectory(Oid databaseOid); static StringInfo OptionNamesString(Oid currentContextId); static HeapTuple GetSlotHeapTuple(TupleTableSlot *tts); -static CStoreFdwOptions * CStoreGetOptions(Oid foreignTableId); +static CStoreOptions * CStoreGetOptions(Oid foreignTableId); static char * CStoreGetOptionValue(Oid foreignTableId, const char *optionName); static void ValidateForeignTableOptions(char *filename, char *compressionTypeString, char *stripeRowCountString, char *blockRowCountString); static char * CStoreDefaultFilePath(Oid foreignTableId); -static CompressionType ParseCompressionType(const char *compressionTypeString); static void CStoreGetForeignRelSize(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId); static void CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, @@ -160,9 +154,6 @@ static bool CStoreIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); #endif -/* declarations for dynamic loading */ -PG_MODULE_MAGIC; - PG_FUNCTION_INFO_V1(cstore_ddl_event_end_trigger); PG_FUNCTION_INFO_V1(cstore_table_size); PG_FUNCTION_INFO_V1(cstore_fdw_handler); @@ -175,11 +166,11 @@ static ProcessUtility_hook_type PreviousProcessUtilityHook = NULL; /* - * _PG_init is called when the module is loaded. In this function we save the + * Called when the module is loaded. In this function we save the * previous utility hook, and then install our hook to pre-intercept calls to * the copy command. */ -void _PG_init(void) +void cstore_fdw_init() { PreviousProcessUtilityHook = ProcessUtility_hook; ProcessUtility_hook = CStoreProcessUtility; @@ -187,10 +178,10 @@ void _PG_init(void) /* - * _PG_fini is called when the module is unloaded. This function uninstalls the + * Called when the module is unloaded. This function uninstalls the * extension's hooks. */ -void _PG_fini(void) +void cstore_fdw_finish() { ProcessUtility_hook = PreviousProcessUtilityHook; } @@ -249,7 +240,7 @@ cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS) */ CreateCStoreDatabaseDirectory(MyDatabaseId); - InitializeCStoreTableFile(relationId, relation); + InitializeCStoreTableFile(relationId, relation, CStoreGetOptions(relationId)); heap_close(relation, AccessExclusiveLock); } } @@ -525,7 +516,7 @@ CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) Datum *columnValues = NULL; bool *columnNulls = NULL; TableWriteState *writeState = NULL; - CStoreFdwOptions *cstoreFdwOptions = NULL; + CStoreOptions *cstoreOptions = NULL; MemoryContext tupleContext = NULL; /* Only superuser can copy from or to local file */ @@ -546,7 +537,7 @@ CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) columnValues = palloc0(columnCount * sizeof(Datum)); columnNulls = palloc0(columnCount * sizeof(bool)); - cstoreFdwOptions = CStoreGetOptions(relationId); + cstoreOptions = CStoreGetOptions(relationId); /* * We create a new memory context called tuple context, and read and write @@ -580,10 +571,10 @@ CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) #endif /* init state to write to the cstore file */ - writeState = CStoreBeginWrite(cstoreFdwOptions->filename, - cstoreFdwOptions->compressionType, - cstoreFdwOptions->stripeRowCount, - cstoreFdwOptions->blockRowCount, + writeState = CStoreBeginWrite(cstoreOptions->filename, + cstoreOptions->compressionType, + cstoreOptions->stripeRowCount, + cstoreOptions->blockRowCount, tupleDescriptor); while (nextRowFound) @@ -765,7 +756,7 @@ DroppedCStoreFilenameList(DropStmt *dropStatement) Oid relationId = RangeVarGetRelid(rangeVar, AccessShareLock, true); if (CStoreTable(relationId)) { - CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(relationId); + CStoreOptions *cstoreOptions = CStoreGetOptions(relationId); char *defaultfilename = CStoreDefaultFilePath(relationId); /* @@ -773,13 +764,13 @@ DroppedCStoreFilenameList(DropStmt *dropStatement) * by sql drop trigger. Both paths are generated by code, use * of strcmp is safe here. */ - if (strcmp(defaultfilename, cstoreFdwOptions->filename) == 0) + if (strcmp(defaultfilename, cstoreOptions->filename) == 0) { continue; } droppedCStoreFileList = lappend(droppedCStoreFileList, - cstoreFdwOptions->filename); + cstoreOptions->filename); } } } @@ -857,13 +848,13 @@ TruncateCStoreTables(List *cstoreRelationList) { Relation relation = (Relation) lfirst(relationCell); Oid relationId = relation->rd_id; - CStoreFdwOptions *cstoreFdwOptions = NULL; + CStoreOptions *cstoreOptions = NULL; Assert(CStoreTable(relationId)); - cstoreFdwOptions = CStoreGetOptions(relationId); - DeleteCStoreTableFiles(cstoreFdwOptions->filename); - InitializeCStoreTableFile(relationId, relation); + cstoreOptions = CStoreGetOptions(relationId); + DeleteCStoreTableFiles(cstoreOptions->filename); + InitializeCStoreTableFile(relationId, relation, CStoreGetOptions(relationId)); } } @@ -901,29 +892,6 @@ DeleteCStoreTableFiles(char *filename) } -/* - * InitializeCStoreTableFile creates data and footer file for a cstore table. - * The function assumes data and footer files do not exist, therefore - * it should be called on empty or non-existing table. Notice that the caller - * is expected to acquire AccessExclusiveLock on the relation. - */ -static void InitializeCStoreTableFile(Oid relationId, Relation relation) -{ - TableWriteState *writeState = NULL; - TupleDesc tupleDescriptor = RelationGetDescr(relation); - CStoreFdwOptions* cstoreFdwOptions = CStoreGetOptions(relationId); - - /* - * Initialize state to write to the cstore file. This creates an - * empty data file and a valid footer file for the table. - */ - writeState = CStoreBeginWrite(cstoreFdwOptions->filename, - cstoreFdwOptions->compressionType, cstoreFdwOptions->stripeRowCount, - cstoreFdwOptions->blockRowCount, tupleDescriptor); - CStoreEndWrite(writeState); -} - - /* * CStoreTable checks if the given table name belongs to a foreign columnar store @@ -1045,111 +1013,7 @@ DistributedWorkerCopy(CopyStmt *copyStatement) } -/* - * CreateCStoreDatabaseDirectory creates the directory (and parent directories, - * if needed) used to store automatically managed cstore_fdw files. The path to - * the directory is $PGDATA/cstore_fdw/{databaseOid}. - */ -static void -CreateCStoreDatabaseDirectory(Oid databaseOid) -{ - bool cstoreDirectoryExists = false; - bool databaseDirectoryExists = false; - StringInfo cstoreDatabaseDirectoryPath = NULL; - StringInfo cstoreDirectoryPath = makeStringInfo(); - appendStringInfo(cstoreDirectoryPath, "%s/%s", DataDir, CSTORE_FDW_NAME); - - cstoreDirectoryExists = DirectoryExists(cstoreDirectoryPath); - if (!cstoreDirectoryExists) - { - CreateDirectory(cstoreDirectoryPath); - } - - cstoreDatabaseDirectoryPath = makeStringInfo(); - appendStringInfo(cstoreDatabaseDirectoryPath, "%s/%s/%u", DataDir, - CSTORE_FDW_NAME, databaseOid); - - databaseDirectoryExists = DirectoryExists(cstoreDatabaseDirectoryPath); - if (!databaseDirectoryExists) - { - CreateDirectory(cstoreDatabaseDirectoryPath); - } -} - - -/* DirectoryExists checks if a directory exists for the given directory name. */ -static bool -DirectoryExists(StringInfo directoryName) -{ - bool directoryExists = true; - struct stat directoryStat; - - int statOK = stat(directoryName->data, &directoryStat); - if (statOK == 0) - { - /* file already exists; check that it is a directory */ - if (!S_ISDIR(directoryStat.st_mode)) - { - ereport(ERROR, (errmsg("\"%s\" is not a directory", directoryName->data), - errhint("You need to remove or rename the file \"%s\".", - directoryName->data))); - } - } - else - { - if (errno == ENOENT) - { - directoryExists = false; - } - else - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not stat directory \"%s\": %m", - directoryName->data))); - } - } - - return directoryExists; -} - - -/* CreateDirectory creates a new directory with the given directory name. */ -static void -CreateDirectory(StringInfo directoryName) -{ - int makeOK = mkdir(directoryName->data, S_IRWXU); - if (makeOK != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not create directory \"%s\": %m", - directoryName->data))); - } -} - - -/* - * RemoveCStoreDatabaseDirectory removes CStore directory previously - * created for this database. - * However it does not remove 'cstore_fdw' directory even if there - * are no other databases left. - */ -static void -RemoveCStoreDatabaseDirectory(Oid databaseOid) -{ - StringInfo cstoreDirectoryPath = makeStringInfo(); - StringInfo cstoreDatabaseDirectoryPath = makeStringInfo(); - - appendStringInfo(cstoreDirectoryPath, "%s/%s", DataDir, CSTORE_FDW_NAME); - - appendStringInfo(cstoreDatabaseDirectoryPath, "%s/%s/%u", DataDir, - CSTORE_FDW_NAME, databaseOid); - - if (DirectoryExists(cstoreDatabaseDirectoryPath)) - { - rmtree(cstoreDatabaseDirectoryPath->data, true); - } -} /* @@ -1162,7 +1026,7 @@ cstore_table_size(PG_FUNCTION_ARGS) Oid relationId = PG_GETARG_OID(0); int64 tableSize = 0; - CStoreFdwOptions *cstoreFdwOptions = NULL; + CStoreOptions *cstoreOptions = NULL; char *dataFilename = NULL; StringInfo footerFilename = NULL; int dataFileStatResult = 0; @@ -1176,8 +1040,8 @@ cstore_table_size(PG_FUNCTION_ARGS) ereport(ERROR, (errmsg("relation is not a cstore table"))); } - cstoreFdwOptions = CStoreGetOptions(relationId); - dataFilename = cstoreFdwOptions->filename; + cstoreOptions = CStoreGetOptions(relationId); + dataFilename = cstoreOptions->filename; dataFileStatResult = stat(dataFilename, &dataFileStatBuffer); if (dataFileStatResult != 0) @@ -1402,10 +1266,10 @@ GetSlotHeapTuple(TupleTableSlot *tts) * foreign table, and if not present, falls back to default values. This function * errors out if given option values are considered invalid. */ -static CStoreFdwOptions * +static CStoreOptions * CStoreGetOptions(Oid foreignTableId) { - CStoreFdwOptions *cstoreFdwOptions = NULL; + CStoreOptions *cstoreOptions = NULL; char *filename = NULL; CompressionType compressionType = DEFAULT_COMPRESSION_TYPE; int32 stripeRowCount = DEFAULT_STRIPE_ROW_COUNT; @@ -1445,13 +1309,13 @@ CStoreGetOptions(Oid foreignTableId) filename = CStoreDefaultFilePath(foreignTableId); } - cstoreFdwOptions = palloc0(sizeof(CStoreFdwOptions)); - cstoreFdwOptions->filename = filename; - cstoreFdwOptions->compressionType = compressionType; - cstoreFdwOptions->stripeRowCount = stripeRowCount; - cstoreFdwOptions->blockRowCount = blockRowCount; + cstoreOptions = palloc0(sizeof(CStoreOptions)); + cstoreOptions->filename = filename; + cstoreOptions->compressionType = compressionType; + cstoreOptions->stripeRowCount = stripeRowCount; + cstoreOptions->blockRowCount = blockRowCount; - return cstoreFdwOptions; + return cstoreOptions; } @@ -1577,26 +1441,6 @@ CStoreDefaultFilePath(Oid foreignTableId) } -/* ParseCompressionType converts a string to a compression type. */ -static CompressionType -ParseCompressionType(const char *compressionTypeString) -{ - CompressionType compressionType = COMPRESSION_TYPE_INVALID; - Assert(compressionTypeString != NULL); - - if (strncmp(compressionTypeString, COMPRESSION_STRING_NONE, NAMEDATALEN) == 0) - { - compressionType = COMPRESSION_NONE; - } - else if (strncmp(compressionTypeString, COMPRESSION_STRING_PG_LZ, NAMEDATALEN) == 0) - { - compressionType = COMPRESSION_PG_LZ; - } - - return compressionType; -} - - /* * CStoreGetForeignRelSize obtains relation size estimates for a foreign table and * puts its estimate for row count into baserel->rows. @@ -1604,8 +1448,8 @@ ParseCompressionType(const char *compressionTypeString) static void CStoreGetForeignRelSize(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId) { - CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(foreignTableId); - double tupleCountEstimate = TupleCountEstimate(baserel, cstoreFdwOptions->filename); + CStoreOptions *cstoreOptions = CStoreGetOptions(foreignTableId); + double tupleCountEstimate = TupleCountEstimate(baserel, cstoreOptions->filename); double rowSelectivity = clauselist_selectivity(root, baserel->baserestrictinfo, 0, JOIN_INNER, NULL); @@ -1624,7 +1468,7 @@ static void CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId) { Path *foreignScanPath = NULL; - CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(foreignTableId); + CStoreOptions *cstoreOptions = CStoreGetOptions(foreignTableId); Relation relation = heap_open(foreignTableId, AccessShareLock); /* @@ -1645,14 +1489,14 @@ CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId */ List *queryColumnList = ColumnList(baserel, foreignTableId); uint32 queryColumnCount = list_length(queryColumnList); - BlockNumber relationPageCount = PageCount(cstoreFdwOptions->filename); + BlockNumber relationPageCount = PageCount(cstoreOptions->filename); uint32 relationColumnCount = RelationGetNumberOfAttributes(relation); double queryColumnRatio = (double) queryColumnCount / relationColumnCount; double queryPageCount = relationPageCount * queryColumnRatio; double totalDiskAccessCost = seq_page_cost * queryPageCount; - double tupleCountEstimate = TupleCountEstimate(baserel, cstoreFdwOptions->filename); + double tupleCountEstimate = TupleCountEstimate(baserel, cstoreOptions->filename); /* * We estimate costs almost the same way as cost_seqscan(), thus assuming @@ -1922,16 +1766,16 @@ static void CStoreExplainForeignScan(ForeignScanState *scanState, ExplainState *explainState) { Oid foreignTableId = RelationGetRelid(scanState->ss.ss_currentRelation); - CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(foreignTableId); + CStoreOptions *cstoreOptions = CStoreGetOptions(foreignTableId); - ExplainPropertyText("CStore File", cstoreFdwOptions->filename, explainState); + ExplainPropertyText("CStore File", cstoreOptions->filename, explainState); /* supress file size if we're not showing cost details */ if (explainState->costs) { struct stat statBuffer; - int statResult = stat(cstoreFdwOptions->filename, &statBuffer); + int statResult = stat(cstoreOptions->filename, &statBuffer); if (statResult == 0) { ExplainPropertyLong("CStore File Size", (long) statBuffer.st_size, @@ -1947,7 +1791,7 @@ CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags) { TableReadState *readState = NULL; Oid foreignTableId = InvalidOid; - CStoreFdwOptions *cstoreFdwOptions = NULL; + CStoreOptions *cstoreOptions = NULL; Relation currentRelation = scanState->ss.ss_currentRelation; TupleDesc tupleDescriptor = RelationGetDescr(currentRelation); List *columnList = NIL; @@ -1962,14 +1806,14 @@ CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags) } foreignTableId = RelationGetRelid(scanState->ss.ss_currentRelation); - cstoreFdwOptions = CStoreGetOptions(foreignTableId); + cstoreOptions = CStoreGetOptions(foreignTableId); foreignScan = (ForeignScan *) scanState->ss.ps.plan; foreignPrivateList = (List *) foreignScan->fdw_private; whereClauseList = foreignScan->scan.plan.qual; columnList = (List *) linitial(foreignPrivateList); - readState = CStoreBeginRead(cstoreFdwOptions->filename, tupleDescriptor, + readState = CStoreBeginRead(cstoreOptions->filename, tupleDescriptor, columnList, whereClauseList); scanState->fdw_state = (void *) readState; @@ -2040,18 +1884,18 @@ CStoreAnalyzeForeignTable(Relation relation, BlockNumber *totalPageCount) { Oid foreignTableId = RelationGetRelid(relation); - CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(foreignTableId); + CStoreOptions *cstoreOptions = CStoreGetOptions(foreignTableId); struct stat statBuffer; - int statResult = stat(cstoreFdwOptions->filename, &statBuffer); + int statResult = stat(cstoreOptions->filename, &statBuffer); if (statResult < 0) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", - cstoreFdwOptions->filename))); + cstoreOptions->filename))); } - (*totalPageCount) = PageCount(cstoreFdwOptions->filename); + (*totalPageCount) = PageCount(cstoreOptions->filename); (*acquireSampleRowsFunc) = CStoreAcquireSampleRows; return true; @@ -2311,20 +2155,20 @@ static void CStoreBeginForeignInsert(ModifyTableState *modifyTableState, ResultRelInfo *relationInfo) { Oid foreignTableOid = InvalidOid; - CStoreFdwOptions *cstoreFdwOptions = NULL; + CStoreOptions *cstoreOptions = NULL; TupleDesc tupleDescriptor = NULL; TableWriteState *writeState = NULL; Relation relation = NULL; foreignTableOid = RelationGetRelid(relationInfo->ri_RelationDesc); relation = heap_open(foreignTableOid, ShareUpdateExclusiveLock); - cstoreFdwOptions = CStoreGetOptions(foreignTableOid); + cstoreOptions = CStoreGetOptions(foreignTableOid); tupleDescriptor = RelationGetDescr(relationInfo->ri_RelationDesc); - writeState = CStoreBeginWrite(cstoreFdwOptions->filename, - cstoreFdwOptions->compressionType, - cstoreFdwOptions->stripeRowCount, - cstoreFdwOptions->blockRowCount, + writeState = CStoreBeginWrite(cstoreOptions->filename, + cstoreOptions->compressionType, + cstoreOptions->stripeRowCount, + cstoreOptions->blockRowCount, tupleDescriptor); writeState->relation = relation; diff --git a/cstore_fdw.h b/cstore_fdw.h index 2bc3e9c97..7b8475497 100644 --- a/cstore_fdw.h +++ b/cstore_fdw.h @@ -22,41 +22,7 @@ #include "lib/stringinfo.h" #include "utils/rel.h" - -/* Defines for valid option names */ -#define OPTION_NAME_FILENAME "filename" -#define OPTION_NAME_COMPRESSION_TYPE "compression" -#define OPTION_NAME_STRIPE_ROW_COUNT "stripe_row_count" -#define OPTION_NAME_BLOCK_ROW_COUNT "block_row_count" - -/* Default values for option parameters */ -#define DEFAULT_COMPRESSION_TYPE COMPRESSION_NONE -#define DEFAULT_STRIPE_ROW_COUNT 150000 -#define DEFAULT_BLOCK_ROW_COUNT 10000 - -/* Limits for option parameters */ -#define STRIPE_ROW_COUNT_MINIMUM 1000 -#define STRIPE_ROW_COUNT_MAXIMUM 10000000 -#define BLOCK_ROW_COUNT_MINIMUM 1000 -#define BLOCK_ROW_COUNT_MAXIMUM 100000 - -/* String representations of compression types */ -#define COMPRESSION_STRING_NONE "none" -#define COMPRESSION_STRING_PG_LZ "pglz" -#define COMPRESSION_STRING_DELIMITED_LIST "none, pglz" - -/* CStore file signature */ -#define CSTORE_MAGIC_NUMBER "citus_cstore" -#define CSTORE_VERSION_MAJOR 1 -#define CSTORE_VERSION_MINOR 7 - -/* miscellaneous defines */ -#define CSTORE_FDW_NAME "cstore_fdw" -#define CSTORE_FOOTER_FILE_SUFFIX ".footer" -#define CSTORE_TEMP_FILE_SUFFIX ".tmp" -#define CSTORE_TUPLE_COST_MULTIPLIER 10 -#define CSTORE_POSTSCRIPT_SIZE_LENGTH 1 -#define CSTORE_POSTSCRIPT_SIZE_MAX 256 +#include "cstore.h" /* table containing information about how to partition distributed tables */ #define CITUS_EXTENSION_NAME "citus" @@ -67,7 +33,6 @@ #define ATTR_NUM_PARTITION_TYPE 2 #define ATTR_NUM_PARTITION_KEY 3 - /* * CStoreValidOption keeps an option name and a context. When an option is passed * into cstore_fdw objects (server and foreign table), we compare this option's @@ -80,6 +45,7 @@ typedef struct CStoreValidOption } CStoreValidOption; +#define COMPRESSION_STRING_DELIMITED_LIST "none, pglz" /* Array of options that are valid for cstore_fdw */ static const uint32 ValidOptionCount = 4; @@ -92,220 +58,8 @@ static const CStoreValidOption ValidOptionArray[] = { OPTION_NAME_BLOCK_ROW_COUNT, ForeignTableRelationId } }; - -/* Enumaration for cstore file's compression method */ -typedef enum -{ - COMPRESSION_TYPE_INVALID = -1, - COMPRESSION_NONE = 0, - COMPRESSION_PG_LZ = 1, - - COMPRESSION_COUNT - -} CompressionType; - - -/* - * CStoreFdwOptions holds the option values to be used when reading or writing - * a cstore file. To resolve these values, we first check foreign table's options, - * and if not present, we then fall back to the default values specified above. - */ -typedef struct CStoreFdwOptions -{ - char *filename; - CompressionType compressionType; - uint64 stripeRowCount; - uint32 blockRowCount; - -} CStoreFdwOptions; - - -/* - * StripeMetadata represents information about a stripe. This information is - * stored in the cstore file's footer. - */ -typedef struct StripeMetadata -{ - uint64 fileOffset; - uint64 skipListLength; - uint64 dataLength; - uint64 footerLength; - -} StripeMetadata; - - -/* TableFooter represents the footer of a cstore file. */ -typedef struct TableFooter -{ - List *stripeMetadataList; - uint64 blockRowCount; - -} TableFooter; - - -/* ColumnBlockSkipNode contains statistics for a ColumnBlockData. */ -typedef struct ColumnBlockSkipNode -{ - /* statistics about values of a column block */ - bool hasMinMax; - Datum minimumValue; - Datum maximumValue; - uint64 rowCount; - - /* - * Offsets and sizes of value and exists streams in the column data. - * These enable us to skip reading suppressed row blocks, and start reading - * a block without reading previous blocks. - */ - uint64 valueBlockOffset; - uint64 valueLength; - uint64 existsBlockOffset; - uint64 existsLength; - - CompressionType valueCompressionType; - -} ColumnBlockSkipNode; - - -/* - * StripeSkipList can be used for skipping row blocks. It contains a column block - * skip node for each block of each column. blockSkipNodeArray[column][block] - * is the entry for the specified column block. - */ -typedef struct StripeSkipList -{ - ColumnBlockSkipNode **blockSkipNodeArray; - uint32 columnCount; - uint32 blockCount; - -} StripeSkipList; - - -/* - * ColumnBlockData represents a block of data in a column. valueArray stores - * the values of data, and existsArray stores whether a value is present. - * valueBuffer is used to store (uncompressed) serialized values - * referenced by Datum's in valueArray. It is only used for by-reference Datum's. - * There is a one-to-one correspondence between valueArray and existsArray. - */ -typedef struct ColumnBlockData -{ - bool *existsArray; - Datum *valueArray; - - /* valueBuffer keeps actual data for type-by-reference datums from valueArray. */ - StringInfo valueBuffer; - -} ColumnBlockData; - - -/* - * ColumnBlockBuffers represents a block of serialized data in a column. - * valueBuffer stores the serialized values of data, and existsBuffer stores - * serialized value of presence information. valueCompressionType contains - * compression type if valueBuffer is compressed. Finally rowCount has - * the number of rows in this block. - */ -typedef struct ColumnBlockBuffers -{ - StringInfo existsBuffer; - StringInfo valueBuffer; - CompressionType valueCompressionType; - -} ColumnBlockBuffers; - - -/* - * ColumnBuffers represents data buffers for a column in a row stripe. Each - * column is made of multiple column blocks. - */ -typedef struct ColumnBuffers -{ - ColumnBlockBuffers **blockBuffersArray; - -} ColumnBuffers; - - -/* StripeBuffers represents data for a row stripe in a cstore file. */ -typedef struct StripeBuffers -{ - uint32 columnCount; - uint32 rowCount; - ColumnBuffers **columnBuffersArray; - -} StripeBuffers; - - -/* - * StripeFooter represents a stripe's footer. In this footer, we keep three - * arrays of sizes. The number of elements in each of the arrays is equal - * to the number of columns. - */ -typedef struct StripeFooter -{ - uint32 columnCount; - uint64 *skipListSizeArray; - uint64 *existsSizeArray; - uint64 *valueSizeArray; - -} StripeFooter; - - -/* TableReadState represents state of a cstore file read operation. */ -typedef struct TableReadState -{ - FILE *tableFile; - TableFooter *tableFooter; - TupleDesc tupleDescriptor; - - /* - * List of Var pointers for columns in the query. We use this both for - * getting vector of projected columns, and also when we want to build - * base constraint to find selected row blocks. - */ - List *projectedColumnList; - - List *whereClauseList; - MemoryContext stripeReadContext; - StripeBuffers *stripeBuffers; - uint32 readStripeCount; - uint64 stripeReadRowCount; - ColumnBlockData **blockDataArray; - int32 deserializedBlockIndex; - -} TableReadState; - - -/* TableWriteState represents state of a cstore file write operation. */ -typedef struct TableWriteState -{ - FILE *tableFile; - TableFooter *tableFooter; - StringInfo tableFooterFilename; - CompressionType compressionType; - TupleDesc tupleDescriptor; - FmgrInfo **comparisonFunctionArray; - uint64 currentFileOffset; - Relation relation; - - MemoryContext stripeWriteContext; - StripeBuffers *stripeBuffers; - StripeSkipList *stripeSkipList; - uint32 stripeMaxRowCount; - ColumnBlockData **blockDataArray; - /* - * compressionBuffer buffer is used as temporary storage during - * data value compression operation. It is kept here to minimize - * memory allocations. It lives in stripeWriteContext and gets - * deallocated when memory context is reset. - */ - StringInfo compressionBuffer; - -} TableWriteState; - -/* Function declarations for extension loading and unloading */ -extern void _PG_init(void); -extern void _PG_fini(void); +void cstore_fdw_init(void); +void cstore_fdw_finish(void); /* event trigger function declarations */ extern Datum cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS); @@ -318,36 +72,4 @@ extern Datum cstore_clean_table_resources(PG_FUNCTION_ARGS); extern Datum cstore_fdw_handler(PG_FUNCTION_ARGS); extern Datum cstore_fdw_validator(PG_FUNCTION_ARGS); -/* Function declarations for writing to a cstore file */ -extern TableWriteState * CStoreBeginWrite(const char *filename, - CompressionType compressionType, - uint64 stripeMaxRowCount, - uint32 blockRowCount, - TupleDesc tupleDescriptor); -extern void CStoreWriteRow(TableWriteState *state, Datum *columnValues, - bool *columnNulls); -extern void CStoreEndWrite(TableWriteState * state); - -/* Function declarations for reading from a cstore file */ -extern TableReadState * CStoreBeginRead(const char *filename, TupleDesc tupleDescriptor, - List *projectedColumnList, List *qualConditions); -extern TableFooter * CStoreReadFooter(StringInfo tableFooterFilename); -extern bool CStoreReadFinished(TableReadState *state); -extern bool CStoreReadNextRow(TableReadState *state, Datum *columnValues, - bool *columnNulls); -extern void CStoreEndRead(TableReadState *state); - -/* Function declarations for common functions */ -extern FmgrInfo * GetFunctionInfoOrNull(Oid typeId, Oid accessMethodId, - int16 procedureId); -extern ColumnBlockData ** CreateEmptyBlockDataArray(uint32 columnCount, bool *columnMask, - uint32 blockRowCount); -extern void FreeColumnBlockDataArray(ColumnBlockData **blockDataArray, - uint32 columnCount); -extern uint64 CStoreTableRowCount(const char *filename); -extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, - CompressionType compressionType); -extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressionType); - - #endif /* CSTORE_FDW_H */ diff --git a/mod.c b/mod.c new file mode 100644 index 000000000..aa65ac0ec --- /dev/null +++ b/mod.c @@ -0,0 +1,30 @@ +/*------------------------------------------------------------------------- + * + * mod.c + * + * This file contains module-level definitions. + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "cstore_fdw.h" + +PG_MODULE_MAGIC; + +void _PG_init(void) +{ + cstore_fdw_init(); +} + + +void _PG_fini(void) +{ + cstore_fdw_finish(); +} + From 59d5d961702df30b6f30314499557e4b5ce5f4fb Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 2 Sep 2020 10:31:10 -0700 Subject: [PATCH 03/91] move _PG_* declarations to mod.h --- cstore.h | 4 ---- mod.c | 1 + mod.h | 21 +++++++++++++++++++++ 3 files changed, 22 insertions(+), 4 deletions(-) create mode 100644 mod.h diff --git a/cstore.h b/cstore.h index f51a972e2..d45fde914 100644 --- a/cstore.h +++ b/cstore.h @@ -266,10 +266,6 @@ typedef struct TableWriteState } TableWriteState; -/* Function declarations for extension loading and unloading */ -extern void _PG_init(void); -extern void _PG_fini(void); - extern CompressionType ParseCompressionType(const char *compressionTypeString); extern void InitializeCStoreTableFile(Oid relationId, Relation relation, CStoreOptions *cstoreOptions); diff --git a/mod.c b/mod.c index aa65ac0ec..8cb138c62 100644 --- a/mod.c +++ b/mod.c @@ -13,6 +13,7 @@ #include "postgres.h" +#include "mod.h" #include "cstore_fdw.h" PG_MODULE_MAGIC; diff --git a/mod.h b/mod.h new file mode 100644 index 000000000..3196bc809 --- /dev/null +++ b/mod.h @@ -0,0 +1,21 @@ +/*------------------------------------------------------------------------- + * + * mod.h + * + * Type and function declarations for CStore + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#ifndef MOD_H +#define MOD_H + +/* Function declarations for extension loading and unloading */ +extern void _PG_init(void); +extern void _PG_fini(void); + +#endif /* MOD_H */ From 3089c92103607acb62cbb06f1944c5509c18d1eb Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 2 Sep 2020 11:41:01 -0700 Subject: [PATCH 04/91] header file and include cleanup --- cstore.c | 38 +++++++++++++- cstore.h | 7 +-- cstore_compression.c | 2 +- cstore_fdw.c | 92 ++++++++++++++++----------------- cstore_fdw.h | 44 +--------------- cstore_metadata_serialization.c | 7 +-- cstore_metadata_serialization.h | 6 --- cstore_reader.c | 12 ++--- cstore_version_compat.h | 2 +- cstore_writer.c | 18 ++----- mod.c | 2 + 11 files changed, 101 insertions(+), 129 deletions(-) diff --git a/cstore.c b/cstore.c index ccb59675f..e704bc31d 100644 --- a/cstore.c +++ b/cstore.c @@ -13,12 +13,14 @@ #include "postgres.h" +#include +#include + #include "miscadmin.h" +#include "utils/rel.h" #include "cstore.h" -#include - static void CreateDirectory(StringInfo directoryName); static bool DirectoryExists(StringInfo directoryName); @@ -168,3 +170,35 @@ CreateCStoreDatabaseDirectory(Oid databaseOid) } } + +/* + * DeleteCStoreTableFiles deletes the data and footer files for a cstore table + * whose data filename is given. + */ +void +DeleteCStoreTableFiles(char *filename) +{ + int dataFileRemoved = 0; + int footerFileRemoved = 0; + + StringInfo tableFooterFilename = makeStringInfo(); + appendStringInfo(tableFooterFilename, "%s%s", filename, CSTORE_FOOTER_FILE_SUFFIX); + + /* delete the footer file */ + footerFileRemoved = unlink(tableFooterFilename->data); + if (footerFileRemoved != 0) + { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not delete file \"%s\": %m", + tableFooterFilename->data))); + } + + /* delete the data file */ + dataFileRemoved = unlink(filename); + if (dataFileRemoved != 0) + { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not delete file \"%s\": %m", + filename))); + } +} diff --git a/cstore.h b/cstore.h index d45fde914..a694e1e29 100644 --- a/cstore.h +++ b/cstore.h @@ -14,13 +14,9 @@ #ifndef CSTORE_H #define CSTORE_H -#include "access/tupdesc.h" #include "fmgr.h" -#include "catalog/pg_am.h" -#include "catalog/pg_foreign_server.h" -#include "catalog/pg_foreign_table.h" #include "lib/stringinfo.h" -#include "utils/rel.h" +#include "utils/relcache.h" /* Defines for valid option names */ #define OPTION_NAME_FILENAME "filename" @@ -271,6 +267,7 @@ extern void InitializeCStoreTableFile(Oid relationId, Relation relation, CStoreOptions *cstoreOptions); extern void CreateCStoreDatabaseDirectory(Oid databaseOid); extern void RemoveCStoreDatabaseDirectory(Oid databaseOid); +extern void DeleteCStoreTableFiles(char *filename); /* Function declarations for writing to a cstore file */ extern TableWriteState * CStoreBeginWrite(const char *filename, diff --git a/cstore_compression.c b/cstore_compression.c index 3b37fd47a..a3c5f9f7d 100644 --- a/cstore_compression.c +++ b/cstore_compression.c @@ -12,7 +12,6 @@ *------------------------------------------------------------------------- */ #include "postgres.h" -#include "cstore_fdw.h" #if PG_VERSION_NUM >= 90500 #include "common/pg_lzcompress.h" @@ -20,6 +19,7 @@ #include "utils/pg_lzcompress.h" #endif +#include "cstore.h" diff --git a/cstore_fdw.c b/cstore_fdw.c index c80d53f2c..9787fd2a2 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -15,15 +15,11 @@ */ #include "postgres.h" -#include "cstore_fdw.h" -#include "cstore_version_compat.h" #include -#include -#include -#include "access/htup_details.h" + +#include "access/heapam.h" #include "access/reloptions.h" -#include "access/sysattr.h" #include "access/tuptoaster.h" #include "catalog/namespace.h" #include "catalog/pg_foreign_table.h" @@ -39,35 +35,71 @@ #include "foreign/foreign.h" #include "miscadmin.h" #include "nodes/makefuncs.h" +#if PG_VERSION_NUM < 120000 #include "optimizer/cost.h" +#endif #include "optimizer/pathnode.h" #include "optimizer/planmain.h" #include "optimizer/restrictinfo.h" #if PG_VERSION_NUM >= 120000 #include "access/heapam.h" -#include "access/tableam.h" -#include "executor/tuptable.h" #include "optimizer/optimizer.h" #else #include "optimizer/var.h" #endif #include "parser/parser.h" -#include "parser/parsetree.h" #include "parser/parse_coerce.h" #include "parser/parse_type.h" -#include "storage/fd.h" #include "tcop/utility.h" #include "utils/builtins.h" #include "utils/fmgroids.h" -#include "utils/memutils.h" #include "utils/lsyscache.h" -#include "utils/rel.h" #if PG_VERSION_NUM >= 120000 #include "utils/snapmgr.h" #else #include "utils/tqual.h" #endif +#if PG_VERSION_NUM < 120000 +#include "utils/rel.h" +#endif +#include "cstore.h" +#include "cstore_fdw.h" +#include "cstore_version_compat.h" + +/* table containing information about how to partition distributed tables */ +#define CITUS_EXTENSION_NAME "citus" +#define CITUS_PARTITION_TABLE_NAME "pg_dist_partition" + +/* human-readable names for addressing columns of the pg_dist_partition table */ +#define ATTR_NUM_PARTITION_RELATION_ID 1 +#define ATTR_NUM_PARTITION_TYPE 2 +#define ATTR_NUM_PARTITION_KEY 3 + +/* + * CStoreValidOption keeps an option name and a context. When an option is passed + * into cstore_fdw objects (server and foreign table), we compare this option's + * name and context against those of valid options. + */ +typedef struct CStoreValidOption +{ + const char *optionName; + Oid optionContextId; + +} CStoreValidOption; + +#define COMPRESSION_STRING_DELIMITED_LIST "none, pglz" + +/* Array of options that are valid for cstore_fdw */ +static const uint32 ValidOptionCount = 4; +static const CStoreValidOption ValidOptionArray[] = +{ + /* foreign table options */ + { OPTION_NAME_FILENAME, ForeignTableRelationId }, + { OPTION_NAME_COMPRESSION_TYPE, ForeignTableRelationId }, + { OPTION_NAME_STRIPE_ROW_COUNT, ForeignTableRelationId }, + { OPTION_NAME_BLOCK_ROW_COUNT, ForeignTableRelationId } +}; /* local functions forward declarations */ #if PG_VERSION_NUM >= 100000 @@ -94,7 +126,6 @@ static List * DroppedCStoreFilenameList(DropStmt *dropStatement); static List * FindCStoreTables(List *tableList); static List * OpenRelationsForTruncate(List *cstoreTableList); static void TruncateCStoreTables(List *cstoreRelationList); -static void DeleteCStoreTableFiles(char *filename); static bool CStoreTable(Oid relationId); static bool CStoreServer(ForeignServer *server); static bool DistributedTable(Oid relationId); @@ -858,41 +889,6 @@ TruncateCStoreTables(List *cstoreRelationList) } } - -/* - * DeleteCStoreTableFiles deletes the data and footer files for a cstore table - * whose data filename is given. - */ -static void -DeleteCStoreTableFiles(char *filename) -{ - int dataFileRemoved = 0; - int footerFileRemoved = 0; - - StringInfo tableFooterFilename = makeStringInfo(); - appendStringInfo(tableFooterFilename, "%s%s", filename, CSTORE_FOOTER_FILE_SUFFIX); - - /* delete the footer file */ - footerFileRemoved = unlink(tableFooterFilename->data); - if (footerFileRemoved != 0) - { - ereport(WARNING, (errcode_for_file_access(), - errmsg("could not delete file \"%s\": %m", - tableFooterFilename->data))); - } - - /* delete the data file */ - dataFileRemoved = unlink(filename); - if (dataFileRemoved != 0) - { - ereport(WARNING, (errcode_for_file_access(), - errmsg("could not delete file \"%s\": %m", - filename))); - } -} - - - /* * CStoreTable checks if the given table name belongs to a foreign columnar store * table. If it does, the function returns true. Otherwise, it returns false. diff --git a/cstore_fdw.h b/cstore_fdw.h index 7b8475497..c7b4460ed 100644 --- a/cstore_fdw.h +++ b/cstore_fdw.h @@ -14,49 +14,9 @@ #ifndef CSTORE_FDW_H #define CSTORE_FDW_H -#include "access/tupdesc.h" +#include "postgres.h" + #include "fmgr.h" -#include "catalog/pg_am.h" -#include "catalog/pg_foreign_server.h" -#include "catalog/pg_foreign_table.h" -#include "lib/stringinfo.h" -#include "utils/rel.h" - -#include "cstore.h" - -/* table containing information about how to partition distributed tables */ -#define CITUS_EXTENSION_NAME "citus" -#define CITUS_PARTITION_TABLE_NAME "pg_dist_partition" - -/* human-readable names for addressing columns of the pg_dist_partition table */ -#define ATTR_NUM_PARTITION_RELATION_ID 1 -#define ATTR_NUM_PARTITION_TYPE 2 -#define ATTR_NUM_PARTITION_KEY 3 - -/* - * CStoreValidOption keeps an option name and a context. When an option is passed - * into cstore_fdw objects (server and foreign table), we compare this option's - * name and context against those of valid options. - */ -typedef struct CStoreValidOption -{ - const char *optionName; - Oid optionContextId; - -} CStoreValidOption; - -#define COMPRESSION_STRING_DELIMITED_LIST "none, pglz" - -/* Array of options that are valid for cstore_fdw */ -static const uint32 ValidOptionCount = 4; -static const CStoreValidOption ValidOptionArray[] = -{ - /* foreign table options */ - { OPTION_NAME_FILENAME, ForeignTableRelationId }, - { OPTION_NAME_COMPRESSION_TYPE, ForeignTableRelationId }, - { OPTION_NAME_STRIPE_ROW_COUNT, ForeignTableRelationId }, - { OPTION_NAME_BLOCK_ROW_COUNT, ForeignTableRelationId } -}; void cstore_fdw_init(void); void cstore_fdw_finish(void); diff --git a/cstore_metadata_serialization.c b/cstore_metadata_serialization.c index 26402f897..67ae2ec2c 100644 --- a/cstore_metadata_serialization.c +++ b/cstore_metadata_serialization.c @@ -14,11 +14,12 @@ #include "postgres.h" -#include "cstore_fdw.h" -#include "cstore_metadata_serialization.h" -#include "cstore.pb-c.h" + #include "access/tupmacs.h" +#include "cstore.h" +#include "cstore_metadata_serialization.h" +#include "cstore.pb-c.h" /* local functions forward declarations */ static ProtobufCBinaryData DatumToProtobufBinary(Datum datum, bool typeByValue, diff --git a/cstore_metadata_serialization.h b/cstore_metadata_serialization.h index 421f8ddff..b8890a5d4 100644 --- a/cstore_metadata_serialization.h +++ b/cstore_metadata_serialization.h @@ -14,12 +14,6 @@ #ifndef CSTORE_SERIALIZATION_H #define CSTORE_SERIALIZATION_H -#include "catalog/pg_attribute.h" -#include "nodes/pg_list.h" -#include "lib/stringinfo.h" -#include "cstore_fdw.h" - - /* Function declarations for metadata serialization */ extern StringInfo SerializePostScript(uint64 tableFooterLength); extern StringInfo SerializeTableFooter(TableFooter *tableFooter); diff --git a/cstore_reader.c b/cstore_reader.c index 7e9c6bcfd..68ce5cdad 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -15,30 +15,26 @@ #include "postgres.h" -#include "cstore_fdw.h" -#include "cstore_metadata_serialization.h" -#include "cstore_version_compat.h" #include "access/nbtree.h" -#include "access/skey.h" +#include "catalog/pg_am.h" #include "commands/defrem.h" #include "nodes/makefuncs.h" #if PG_VERSION_NUM >= 120000 -#include "nodes/pathnodes.h" #include "nodes/nodeFuncs.h" #include "optimizer/optimizer.h" #else #include "optimizer/clauses.h" #include "optimizer/predtest.h" -#include "optimizer/var.h" #endif #include "optimizer/restrictinfo.h" -#include "port.h" #include "storage/fd.h" #include "utils/memutils.h" #include "utils/lsyscache.h" -#include "utils/rel.h" +#include "cstore.h" +#include "cstore_metadata_serialization.h" +#include "cstore_version_compat.h" /* static function declarations */ static StripeBuffers * LoadFilteredStripeBuffers(FILE *tableFile, diff --git a/cstore_version_compat.h b/cstore_version_compat.h index a7f961fcd..1b80b16c3 100644 --- a/cstore_version_compat.h +++ b/cstore_version_compat.h @@ -49,7 +49,7 @@ #define TTS_EMPTY(slot) ((slot)->tts_isempty) #define ExecForceStoreHeapTuple(tuple, slot, shouldFree) \ ExecStoreTuple(newTuple, tupleSlot, InvalidBuffer, shouldFree); -#define HeapScanDesc TableScanDesc +#define TableScanDesc HeapScanDesc #define table_beginscan heap_beginscan #define table_endscan heap_endscan diff --git a/cstore_writer.c b/cstore_writer.c index b69064215..51a01c8f3 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -15,25 +15,17 @@ #include "postgres.h" -#include "cstore_fdw.h" -#include "cstore_metadata_serialization.h" -#include "cstore_version_compat.h" #include + #include "access/nbtree.h" -#include "catalog/pg_collation.h" -#include "commands/defrem.h" -#if PG_VERSION_NUM >= 120000 -#include "optimizer/optimizer.h" -#else -#include "optimizer/var.h" -#endif -#include "port.h" +#include "catalog/pg_am.h" #include "storage/fd.h" #include "utils/memutils.h" -#include "utils/lsyscache.h" -#include "utils/rel.h" +#include "cstore.h" +#include "cstore_metadata_serialization.h" +#include "cstore_version_compat.h" static void CStoreWriteFooter(StringInfo footerFileName, TableFooter *tableFooter); static StripeBuffers * CreateEmptyStripeBuffers(uint32 stripeMaxRowCount, diff --git a/mod.c b/mod.c index 8cb138c62..dbc8eb923 100644 --- a/mod.c +++ b/mod.c @@ -13,6 +13,8 @@ #include "postgres.h" +#include "fmgr.h" + #include "mod.h" #include "cstore_fdw.h" From 406bebe4b8bfef7b25863d8c5516fe0deacc27e1 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Wed, 2 Sep 2020 14:27:24 -0700 Subject: [PATCH 05/91] update .gitignore --- .gitignore | 4 ++++ cstore_fdw--1.7.sql | 28 ++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/.gitignore b/.gitignore index f95fd0b87..ad9061861 100644 --- a/.gitignore +++ b/.gitignore @@ -53,5 +53,9 @@ /expected/create.out /expected/data_types.out /expected/load.out +/results/* +/.deps/* +/regression.diffs +/regression.out *.pb-c.* diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index ad2683f52..2f001485f 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -58,3 +58,31 @@ CREATE EVENT TRIGGER cstore_drop_event ON SQL_DROP EXECUTE PROCEDURE cstore_drop_trigger(); +CREATE TABLE cstore_table_metadata( + relid oid, + version_major int, + version_minor int +); + +CREATE TABLE cstore_stripe( + relid oid, + stripe bigint +); + +CREATE TABLE cstore_column_block_skip_node( + relid oid, + stripe bigint, + attr int, + blockid int, + rowcount bigint, + min_value text, + max_value text, + value_offset bigint, + value_length bigint, + value_compression_type char, + exists_offset bigint, + exists_length bigint); + +CREATE INDEX cstore_column_block_skip_node_idx + ON cstore_column_block_skip_node + USING BTREE(relid, stripe, attr, blockid); From f691576f13e54e008320eab3252c12f21b34c13c Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Thu, 3 Sep 2020 09:57:59 -0700 Subject: [PATCH 06/91] Move StripeFooter to metadata tables. --- .gitignore | 1 + Makefile | 5 +- cstore.c | 2 +- cstore.h | 17 ++- cstore.proto | 7 +- cstore_fdw--1.7.sql | 34 ++---- cstore_fdw.c | 23 ++-- cstore_metadata_serialization.c | 88 +-------------- cstore_metadata_serialization.h | 2 - cstore_metadata_tables.c | 186 ++++++++++++++++++++++++++++++++ cstore_reader.c | 75 ++++++------- cstore_writer.c | 19 ++-- 12 files changed, 275 insertions(+), 184 deletions(-) create mode 100644 cstore_metadata_tables.c diff --git a/.gitignore b/.gitignore index ad9061861..21c5e32ea 100644 --- a/.gitignore +++ b/.gitignore @@ -57,5 +57,6 @@ /.deps/* /regression.diffs /regression.out +.vscode *.pb-c.* diff --git a/Makefile b/Makefile index bd3ae77ce..8f1bf08cc 100644 --- a/Makefile +++ b/Makefile @@ -5,10 +5,11 @@ MODULE_big = cstore_fdw -PG_CPPFLAGS = --std=c99 +PG_CPPFLAGS = -std=c11 SHLIB_LINK = -lprotobuf-c OBJS = cstore.pb-c.o cstore.o cstore_fdw.o cstore_writer.o cstore_reader.o \ - cstore_metadata_serialization.o cstore_compression.o mod.o + cstore_metadata_serialization.o cstore_compression.o mod.o \ + cstore_metadata_tables.o EXTENSION = cstore_fdw DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cstore_fdw--1.4--1.5.sql \ diff --git a/cstore.c b/cstore.c index e704bc31d..a98f983e3 100644 --- a/cstore.c +++ b/cstore.c @@ -131,7 +131,7 @@ InitializeCStoreTableFile(Oid relationId, Relation relation, CStoreOptions *csto * Initialize state to write to the cstore file. This creates an * empty data file and a valid footer file for the table. */ - writeState = CStoreBeginWrite(cstoreOptions->filename, + writeState = CStoreBeginWrite(relationId, cstoreOptions->filename, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, tupleDescriptor); CStoreEndWrite(writeState); diff --git a/cstore.h b/cstore.h index a694e1e29..500a38cdb 100644 --- a/cstore.h +++ b/cstore.h @@ -89,6 +89,7 @@ typedef struct StripeMetadata uint64 skipListLength; uint64 dataLength; uint64 footerLength; + uint64 id; } StripeMetadata; @@ -213,6 +214,8 @@ typedef struct StripeFooter /* TableReadState represents state of a cstore file read operation. */ typedef struct TableReadState { + Oid relationId; + FILE *tableFile; TableFooter *tableFooter; TupleDesc tupleDescriptor; @@ -238,6 +241,7 @@ typedef struct TableReadState /* TableWriteState represents state of a cstore file write operation. */ typedef struct TableWriteState { + Oid relationId; FILE *tableFile; TableFooter *tableFooter; StringInfo tableFooterFilename; @@ -248,6 +252,7 @@ typedef struct TableWriteState Relation relation; MemoryContext stripeWriteContext; + uint64 currentStripeId; StripeBuffers *stripeBuffers; StripeSkipList *stripeSkipList; uint32 stripeMaxRowCount; @@ -270,7 +275,8 @@ extern void RemoveCStoreDatabaseDirectory(Oid databaseOid); extern void DeleteCStoreTableFiles(char *filename); /* Function declarations for writing to a cstore file */ -extern TableWriteState * CStoreBeginWrite(const char *filename, +extern TableWriteState * CStoreBeginWrite(Oid relationId, + const char *filename, CompressionType compressionType, uint64 stripeMaxRowCount, uint32 blockRowCount, @@ -280,7 +286,8 @@ extern void CStoreWriteRow(TableWriteState *state, Datum *columnValues, extern void CStoreEndWrite(TableWriteState * state); /* Function declarations for reading from a cstore file */ -extern TableReadState * CStoreBeginRead(const char *filename, TupleDesc tupleDescriptor, +extern TableReadState * CStoreBeginRead(Oid relationId, const char *filename, + TupleDesc tupleDescriptor, List *projectedColumnList, List *qualConditions); extern TableFooter * CStoreReadFooter(StringInfo tableFooterFilename); extern bool CStoreReadFinished(TableReadState *state); @@ -295,10 +302,14 @@ extern ColumnBlockData ** CreateEmptyBlockDataArray(uint32 columnCount, bool *co uint32 blockRowCount); extern void FreeColumnBlockDataArray(ColumnBlockData **blockDataArray, uint32 columnCount); -extern uint64 CStoreTableRowCount(const char *filename); +extern uint64 CStoreTableRowCount(Oid relid, const char *filename); extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, CompressionType compressionType); extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressionType); +/* cstore_metadata_tables.c */ +extern void SaveStripeFooter(Oid relid, uint64 stripe, StripeFooter *footer); +extern StripeFooter * ReadStripeFooter(Oid relid, uint64 stripe, int relationColumnCount); + #endif /* CSTORE_H */ diff --git a/cstore.proto b/cstore.proto index 6e24c9075..ea949c77c 100644 --- a/cstore.proto +++ b/cstore.proto @@ -23,17 +23,12 @@ message ColumnBlockSkipList { repeated ColumnBlockSkipNode blockSkipNodeArray = 1; } -message StripeFooter { - repeated uint64 skipListSizeArray = 1; - repeated uint64 existsSizeArray = 2; - repeated uint64 valueSizeArray = 3; -} - message StripeMetadata { optional uint64 fileOffset = 1; optional uint64 skipListLength = 2; optional uint64 dataLength = 3; optional uint64 footerLength = 4; + optional uint64 id = 5; } message TableFooter { diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index 2f001485f..fd526e711 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -58,31 +58,17 @@ CREATE EVENT TRIGGER cstore_drop_event ON SQL_DROP EXECUTE PROCEDURE cstore_drop_trigger(); -CREATE TABLE cstore_table_metadata( - relid oid, - version_major int, - version_minor int -); - -CREATE TABLE cstore_stripe( - relid oid, - stripe bigint -); - -CREATE TABLE cstore_column_block_skip_node( +CREATE TABLE cstore_stripe_attr ( relid oid, stripe bigint, attr int, - blockid int, - rowcount bigint, - min_value text, - max_value text, - value_offset bigint, - value_length bigint, - value_compression_type char, - exists_offset bigint, - exists_length bigint); + exists_size bigint, + value_size bigint, + skiplist_size bigint +) WITH (user_catalog_table = true); -CREATE INDEX cstore_column_block_skip_node_idx - ON cstore_column_block_skip_node - USING BTREE(relid, stripe, attr, blockid); +CREATE INDEX cstore_stripe_attr_idx + ON cstore_stripe_attr + USING BTREE(relid, stripe, attr); + +ALTER TABLE cstore_stripe_attr SET SCHEMA pg_catalog; diff --git a/cstore_fdw.c b/cstore_fdw.c index 9787fd2a2..6bcb92269 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -152,7 +152,7 @@ static ForeignScan * CStoreGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel Oid foreignTableId, ForeignPath *bestPath, List *targetList, List *scanClauses); #endif -static double TupleCountEstimate(RelOptInfo *baserel, const char *filename); +static double TupleCountEstimate(Oid relid, RelOptInfo *baserel, const char *filename); static BlockNumber PageCount(const char *filename); static List * ColumnList(RelOptInfo *baserel, Oid foreignTableId); static void CStoreExplainForeignScan(ForeignScanState *scanState, @@ -602,7 +602,8 @@ CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) #endif /* init state to write to the cstore file */ - writeState = CStoreBeginWrite(cstoreOptions->filename, + writeState = CStoreBeginWrite(relationId, + cstoreOptions->filename, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, @@ -1414,6 +1415,7 @@ ValidateForeignTableOptions(char *filename, char *compressionTypeString, static char * CStoreDefaultFilePath(Oid foreignTableId) { + StringInfo cstoreFilePath = NULL; Relation relation = relation_open(foreignTableId, AccessShareLock); RelFileNode relationFileNode = relation->rd_node; Oid databaseOid = relationFileNode.dbNode; @@ -1429,7 +1431,7 @@ CStoreDefaultFilePath(Oid foreignTableId) } - StringInfo cstoreFilePath = makeStringInfo(); + cstoreFilePath = makeStringInfo(); appendStringInfo(cstoreFilePath, "%s/%s/%u/%u", DataDir, CSTORE_FDW_NAME, databaseOid, relationFileOid); @@ -1445,7 +1447,7 @@ static void CStoreGetForeignRelSize(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId) { CStoreOptions *cstoreOptions = CStoreGetOptions(foreignTableId); - double tupleCountEstimate = TupleCountEstimate(baserel, cstoreOptions->filename); + double tupleCountEstimate = TupleCountEstimate(foreignTableId, baserel, cstoreOptions->filename); double rowSelectivity = clauselist_selectivity(root, baserel->baserestrictinfo, 0, JOIN_INNER, NULL); @@ -1492,7 +1494,7 @@ CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId double queryPageCount = relationPageCount * queryColumnRatio; double totalDiskAccessCost = seq_page_cost * queryPageCount; - double tupleCountEstimate = TupleCountEstimate(baserel, cstoreOptions->filename); + double tupleCountEstimate = TupleCountEstimate(foreignTableId, baserel, cstoreOptions->filename); /* * We estimate costs almost the same way as cost_seqscan(), thus assuming @@ -1597,7 +1599,7 @@ CStoreGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId, * file. */ static double -TupleCountEstimate(RelOptInfo *baserel, const char *filename) +TupleCountEstimate(Oid relid, RelOptInfo *baserel, const char *filename) { double tupleCountEstimate = 0.0; @@ -1616,7 +1618,7 @@ TupleCountEstimate(RelOptInfo *baserel, const char *filename) } else { - tupleCountEstimate = (double) CStoreTableRowCount(filename); + tupleCountEstimate = (double) CStoreTableRowCount(relid, filename); } return tupleCountEstimate; @@ -1809,8 +1811,8 @@ CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags) whereClauseList = foreignScan->scan.plan.qual; columnList = (List *) linitial(foreignPrivateList); - readState = CStoreBeginRead(cstoreOptions->filename, tupleDescriptor, - columnList, whereClauseList); + readState = CStoreBeginRead(foreignTableId, cstoreOptions->filename, + tupleDescriptor, columnList, whereClauseList); scanState->fdw_state = (void *) readState; } @@ -2161,7 +2163,8 @@ CStoreBeginForeignInsert(ModifyTableState *modifyTableState, ResultRelInfo *rela cstoreOptions = CStoreGetOptions(foreignTableOid); tupleDescriptor = RelationGetDescr(relationInfo->ri_RelationDesc); - writeState = CStoreBeginWrite(cstoreOptions->filename, + writeState = CStoreBeginWrite(foreignTableOid, + cstoreOptions->filename, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, diff --git a/cstore_metadata_serialization.c b/cstore_metadata_serialization.c index 67ae2ec2c..94e3c3116 100644 --- a/cstore_metadata_serialization.c +++ b/cstore_metadata_serialization.c @@ -95,6 +95,8 @@ SerializeTableFooter(TableFooter *tableFooter) protobufStripeMetadata->datalength = stripeMetadata->dataLength; protobufStripeMetadata->has_footerlength = true; protobufStripeMetadata->footerlength = stripeMetadata->footerLength; + protobufStripeMetadata->has_id = true; + protobufStripeMetadata->id = stripeMetadata->id; stripeMetadataArray[stripeIndex] = protobufStripeMetadata; stripeIndex++; @@ -118,38 +120,6 @@ SerializeTableFooter(TableFooter *tableFooter) } -/* - * SerializeStripeFooter serializes given stripe footer and returns the result - * as a StringInfo. - */ -StringInfo -SerializeStripeFooter(StripeFooter *stripeFooter) -{ - StringInfo stripeFooterBuffer = NULL; - Protobuf__StripeFooter protobufStripeFooter = PROTOBUF__STRIPE_FOOTER__INIT; - uint8 *stripeFooterData = NULL; - uint32 stripeFooterSize = 0; - - protobufStripeFooter.n_skiplistsizearray = stripeFooter->columnCount; - protobufStripeFooter.skiplistsizearray = (uint64_t *) stripeFooter->skipListSizeArray; - protobufStripeFooter.n_existssizearray = stripeFooter->columnCount; - protobufStripeFooter.existssizearray = (uint64_t *) stripeFooter->existsSizeArray; - protobufStripeFooter.n_valuesizearray = stripeFooter->columnCount; - protobufStripeFooter.valuesizearray = (uint64_t *) stripeFooter->valueSizeArray; - - stripeFooterSize = protobuf__stripe_footer__get_packed_size(&protobufStripeFooter); - stripeFooterData = palloc0(stripeFooterSize); - protobuf__stripe_footer__pack(&protobufStripeFooter, stripeFooterData); - - stripeFooterBuffer = palloc0(sizeof(StringInfoData)); - stripeFooterBuffer->len = stripeFooterSize; - stripeFooterBuffer->maxlen = stripeFooterSize; - stripeFooterBuffer->data = (char *) stripeFooterData; - - return stripeFooterBuffer; -} - - /* * SerializeColumnSkipList serializes a column skip list, where the colum skip * list includes all block skip nodes for that column. The function then returns @@ -315,6 +285,7 @@ DeserializeTableFooter(StringInfo buffer) stripeMetadata->skipListLength = protobufStripeMetadata->skiplistlength; stripeMetadata->dataLength = protobufStripeMetadata->datalength; stripeMetadata->footerLength = protobufStripeMetadata->footerlength; + stripeMetadata->id = protobufStripeMetadata->id; stripeMetadataList = lappend(stripeMetadataList, stripeMetadata); } @@ -329,59 +300,6 @@ DeserializeTableFooter(StringInfo buffer) } -/* - * DeserializeStripeFooter deserializes the given buffer and returns the result - * as a StripeFooter struct. - */ -StripeFooter * -DeserializeStripeFooter(StringInfo buffer) -{ - StripeFooter *stripeFooter = NULL; - Protobuf__StripeFooter *protobufStripeFooter = NULL; - uint64 *skipListSizeArray = NULL; - uint64 *existsSizeArray = NULL; - uint64 *valueSizeArray = NULL; - uint64 sizeArrayLength = 0; - uint32 columnCount = 0; - - protobufStripeFooter = protobuf__stripe_footer__unpack(NULL, buffer->len, - (uint8 *) buffer->data); - if (protobufStripeFooter == NULL) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("invalid stripe footer buffer"))); - } - - columnCount = protobufStripeFooter->n_skiplistsizearray; - if (protobufStripeFooter->n_existssizearray != columnCount || - protobufStripeFooter->n_valuesizearray != columnCount) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("stripe size array lengths don't match"))); - } - - sizeArrayLength = columnCount * sizeof(uint64); - - skipListSizeArray = palloc0(sizeArrayLength); - existsSizeArray = palloc0(sizeArrayLength); - valueSizeArray = palloc0(sizeArrayLength); - - memcpy(skipListSizeArray, protobufStripeFooter->skiplistsizearray, sizeArrayLength); - memcpy(existsSizeArray, protobufStripeFooter->existssizearray, sizeArrayLength); - memcpy(valueSizeArray, protobufStripeFooter->valuesizearray, sizeArrayLength); - - protobuf__stripe_footer__free_unpacked(protobufStripeFooter, NULL); - - stripeFooter = palloc0(sizeof(StripeFooter)); - stripeFooter->skipListSizeArray = skipListSizeArray; - stripeFooter->existsSizeArray = existsSizeArray; - stripeFooter->valueSizeArray = valueSizeArray; - stripeFooter->columnCount = columnCount; - - return stripeFooter; -} - - /* * DeserializeBlockCount deserializes the given column skip list buffer and * returns the number of blocks in column skip list. diff --git a/cstore_metadata_serialization.h b/cstore_metadata_serialization.h index b8890a5d4..d5b7c90ff 100644 --- a/cstore_metadata_serialization.h +++ b/cstore_metadata_serialization.h @@ -17,7 +17,6 @@ /* Function declarations for metadata serialization */ extern StringInfo SerializePostScript(uint64 tableFooterLength); extern StringInfo SerializeTableFooter(TableFooter *tableFooter); -extern StringInfo SerializeStripeFooter(StripeFooter *stripeFooter); extern StringInfo SerializeColumnSkipList(ColumnBlockSkipNode *blockSkipNodeArray, uint32 blockCount, bool typeByValue, int typeLength); @@ -27,7 +26,6 @@ extern void DeserializePostScript(StringInfo buffer, uint64 *tableFooterLength); extern TableFooter * DeserializeTableFooter(StringInfo buffer); extern uint32 DeserializeBlockCount(StringInfo buffer); extern uint32 DeserializeRowCount(StringInfo buffer); -extern StripeFooter * DeserializeStripeFooter(StringInfo buffer); extern ColumnBlockSkipNode * DeserializeColumnSkipList(StringInfo buffer, bool typeByValue, int typeLength, uint32 blockCount); diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c new file mode 100644 index 000000000..e2d003989 --- /dev/null +++ b/cstore_metadata_tables.c @@ -0,0 +1,186 @@ +/*------------------------------------------------------------------------- + * + * cstore_metadata_tables.c + * + * Copyright (c), Citus Data, Inc. + * + *------------------------------------------------------------------------- + */ + + +#include "postgres.h" +#include "cstore.h" +#include "cstore_version_compat.h" + +#include +#include "access/nbtree.h" +#include "access/table.h" +#include "access/xact.h" +#include "catalog/indexing.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_collation.h" +#include "commands/defrem.h" +#include "lib/stringinfo.h" +#include "optimizer/optimizer.h" +#include "port.h" +#include "storage/fd.h" +#include "utils/fmgroids.h" +#include "utils/memutils.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" + +#include "cstore_metadata_serialization.h" + +static Oid CStoreStripeAttrRelationId(void); +static Oid CStoreStripeAttrIndexRelationId(void); +static void InsertStripeAttrRow(Oid relid, uint64 stripe, AttrNumber attr, + uint64 existsSize, uint64 valuesSize, + uint64 skiplistSize); + +/* constants for cstore_stripe_attr */ +#define Natts_cstore_stripe_attr 6 +#define Anum_cstore_stripe_attr_relid 1 +#define Anum_cstore_stripe_attr_stripe 2 +#define Anum_cstore_stripe_attr_attr 3 +#define Anum_cstore_stripe_attr_exists_size 4 +#define Anum_cstore_stripe_attr_value_size 5 +#define Anum_cstore_stripe_attr_skiplist_size 6 + +/* + * SaveStripeFooter stores give StripeFooter as cstore_stripe_attr records. + */ +void +SaveStripeFooter(Oid relid, uint64 stripe, StripeFooter *footer) +{ + for (AttrNumber attr = 1; attr <= footer->columnCount; attr++) + { + InsertStripeAttrRow(relid, stripe, attr, + footer->existsSizeArray[attr - 1], + footer->valueSizeArray[attr - 1], + footer->skipListSizeArray[attr - 1]); + } +} + + +/* + * InsertStripeAttrRow adds a row to cstore_stripe_attr. + */ +static void +InsertStripeAttrRow(Oid relid, uint64 stripe, AttrNumber attr, + uint64 existsSize, uint64 valuesSize, + uint64 skiplistSize) +{ + bool nulls[Natts_cstore_stripe_attr] = { 0 }; + Datum values[Natts_cstore_stripe_attr] = { + ObjectIdGetDatum(relid), + Int64GetDatum(stripe), + Int16GetDatum(attr), + Int64GetDatum(existsSize), + Int64GetDatum(valuesSize), + Int64GetDatum(skiplistSize) + }; + + Oid cstoreStripeAttrOid = CStoreStripeAttrRelationId(); + Relation cstoreStripeAttrs = heap_open(cstoreStripeAttrOid, RowExclusiveLock); + TupleDesc tupleDescriptor = RelationGetDescr(cstoreStripeAttrs); + + HeapTuple tuple = heap_form_tuple(tupleDescriptor, values, nulls); + + CatalogTupleInsert(cstoreStripeAttrs, tuple); + + CommandCounterIncrement(); + + heap_close(cstoreStripeAttrs, NoLock); +} + + +/* + * ReadStripeFooter returns a StripeFooter by reading relevant records from + * cstore_stripe_attr. + */ +StripeFooter * +ReadStripeFooter(Oid relid, uint64 stripe, int relationColumnCount) +{ + StripeFooter *footer = NULL; + HeapTuple heapTuple; + + Oid cstoreStripeAttrOid = CStoreStripeAttrRelationId(); + Relation cstoreStripeAttrs = heap_open(cstoreStripeAttrOid, AccessShareLock); + Relation index = index_open(CStoreStripeAttrIndexRelationId(), AccessShareLock); + TupleDesc tupleDescriptor = RelationGetDescr(cstoreStripeAttrs); + + SysScanDesc scanDescriptor = NULL; + ScanKeyData scanKey[2]; + ScanKeyInit(&scanKey[0], Anum_cstore_stripe_attr_relid, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relid)); + ScanKeyInit(&scanKey[1], Anum_cstore_stripe_attr_stripe, + BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(stripe)); + + scanDescriptor = systable_beginscan_ordered(cstoreStripeAttrs, index, NULL, 2, + scanKey); + + footer = palloc0(sizeof(StripeFooter)); + footer->existsSizeArray = palloc0(relationColumnCount * sizeof(int64)); + footer->valueSizeArray = palloc0(relationColumnCount * sizeof(int64)); + footer->skipListSizeArray = palloc0(relationColumnCount * sizeof(int64)); + + /* + * Stripe can have less columns than the relation if ALTER TABLE happens + * after stripe is formed. So we calculate column count of a stripe as + * maximum attribute number for that stripe. + */ + footer->columnCount = 0; + + while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) + { + Datum datumArray[Natts_cstore_stripe_attr]; + bool isNullArray[Natts_cstore_stripe_attr]; + AttrNumber attr = 0; + + heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray); + attr = DatumGetInt16(datumArray[2]); + + footer->columnCount = Max(footer->columnCount, attr); + + while (attr > relationColumnCount) + { + ereport(ERROR, (errmsg("unexpected attribute %d for a relation with %d attrs", + attr, relationColumnCount))); + } + + footer->existsSizeArray[attr - 1] = + DatumGetInt64(datumArray[Anum_cstore_stripe_attr_exists_size - 1]); + footer->valueSizeArray[attr - 1] = + DatumGetInt64(datumArray[Anum_cstore_stripe_attr_value_size - 1]); + footer->skipListSizeArray[attr - 1] = + DatumGetInt64(datumArray[Anum_cstore_stripe_attr_skiplist_size - 1]); + } + + systable_endscan_ordered(scanDescriptor); + index_close(index, NoLock); + heap_close(cstoreStripeAttrs, NoLock); + + return footer; +} + + +/* + * CStoreStripeAttrRelationId returns relation id of cstore_stripe_attr. + * TODO: should we cache this similar to citus? + */ +static Oid +CStoreStripeAttrRelationId(void) +{ + return get_relname_relid("cstore_stripe_attr", PG_CATALOG_NAMESPACE); +} + + +/* + * CStoreStripeAttrRelationId returns relation id of cstore_stripe_attr_idx. + * TODO: should we cache this similar to citus? + */ +static Oid +CStoreStripeAttrIndexRelationId(void) +{ + return get_relname_relid("cstore_stripe_attr_idx", PG_CATALOG_NAMESPACE); +} diff --git a/cstore_reader.c b/cstore_reader.c index 68ce5cdad..6caf99bc7 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -31,6 +31,7 @@ #include "storage/fd.h" #include "utils/memutils.h" #include "utils/lsyscache.h" +#include "utils/rel.h" #include "cstore.h" #include "cstore_metadata_serialization.h" @@ -39,6 +40,7 @@ /* static function declarations */ static StripeBuffers * LoadFilteredStripeBuffers(FILE *tableFile, StripeMetadata *stripeMetadata, + StripeFooter *stripeFooter, TupleDesc tupleDescriptor, List *projectedColumnList, List *whereClauseList); @@ -51,8 +53,6 @@ static ColumnBuffers * LoadColumnBuffers(FILE *tableFile, uint32 blockCount, uint64 existsFileOffset, uint64 valueFileOffset, Form_pg_attribute attributeForm); -static StripeFooter * LoadStripeFooter(FILE *tableFile, StripeMetadata *stripeMetadata, - uint32 columnCount); static StripeSkipList * LoadStripeSkipList(FILE *tableFile, StripeMetadata *stripeMetadata, StripeFooter *stripeFooter, @@ -86,7 +86,8 @@ static int64 FILESize(FILE *file); static StringInfo ReadFromFile(FILE *file, uint64 offset, uint32 size); static void ResetUncompressedBlockData(ColumnBlockData **blockDataArray, uint32 columnCount); -static uint64 StripeRowCount(FILE *tableFile, StripeMetadata *stripeMetadata); +static uint64 StripeRowCount(Oid relid, FILE *tableFile, StripeMetadata *stripeMetadata); +static int RelationColumnCount(Oid relid); /* @@ -94,7 +95,7 @@ static uint64 StripeRowCount(FILE *tableFile, StripeMetadata *stripeMetadata); * read handle that's used during reading rows and finishing the read operation. */ TableReadState * -CStoreBeginRead(const char *filename, TupleDesc tupleDescriptor, +CStoreBeginRead(Oid relationId, const char *filename, TupleDesc tupleDescriptor, List *projectedColumnList, List *whereClauseList) { TableReadState *readState = NULL; @@ -136,6 +137,7 @@ CStoreBeginRead(const char *filename, TupleDesc tupleDescriptor, tableFooter->blockRowCount); readState = palloc0(sizeof(TableReadState)); + readState->relationId = relationId; readState->tableFile = tableFile; readState->tableFooter = tableFooter; readState->projectedColumnList = projectedColumnList; @@ -247,6 +249,7 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu StripeMetadata *stripeMetadata = NULL; List *stripeMetadataList = tableFooter->stripeMetadataList; uint32 stripeCount = list_length(stripeMetadataList); + StripeFooter *stripeFooter = NULL; /* if we have read all stripes, return false */ if (readState->readStripeCount == stripeCount) @@ -258,7 +261,11 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu MemoryContextReset(readState->stripeReadContext); stripeMetadata = list_nth(stripeMetadataList, readState->readStripeCount); + stripeFooter = ReadStripeFooter(readState->relationId, + stripeMetadata->id, + readState->tupleDescriptor->natts); stripeBuffers = LoadFilteredStripeBuffers(readState->tableFile, stripeMetadata, + stripeFooter, readState->tupleDescriptor, readState->projectedColumnList, readState->whereClauseList); @@ -396,7 +403,7 @@ FreeColumnBlockDataArray(ColumnBlockData **blockDataArray, uint32 columnCount) /* CStoreTableRowCount returns the exact row count of a table using skiplists */ uint64 -CStoreTableRowCount(const char *filename) +CStoreTableRowCount(Oid relid, const char *filename) { TableFooter *tableFooter = NULL; FILE *tableFile; @@ -422,7 +429,7 @@ CStoreTableRowCount(const char *filename) foreach(stripeMetadataCell, tableFooter->stripeMetadataList) { StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell); - totalRowCount += StripeRowCount(tableFile, stripeMetadata); + totalRowCount += StripeRowCount(relid, tableFile, stripeMetadata); } FreeFile(tableFile); @@ -436,20 +443,13 @@ CStoreTableRowCount(const char *filename) * skip list, and returns number of rows for given stripe. */ static uint64 -StripeRowCount(FILE *tableFile, StripeMetadata *stripeMetadata) +StripeRowCount(Oid relid, FILE *tableFile, StripeMetadata *stripeMetadata) { uint64 rowCount = 0; - StripeFooter *stripeFooter = NULL; - StringInfo footerBuffer = NULL; StringInfo firstColumnSkipListBuffer = NULL; - uint64 footerOffset = 0; - footerOffset += stripeMetadata->fileOffset; - footerOffset += stripeMetadata->skipListLength; - footerOffset += stripeMetadata->dataLength; - - footerBuffer = ReadFromFile(tableFile, footerOffset, stripeMetadata->footerLength); - stripeFooter = DeserializeStripeFooter(footerBuffer); + StripeFooter * stripeFooter = ReadStripeFooter(relid, stripeMetadata->id, + RelationColumnCount(relid)); firstColumnSkipListBuffer = ReadFromFile(tableFile, stripeMetadata->fileOffset, stripeFooter->skipListSizeArray[0]); @@ -466,8 +466,8 @@ StripeRowCount(FILE *tableFile, StripeMetadata *stripeMetadata) */ static StripeBuffers * LoadFilteredStripeBuffers(FILE *tableFile, StripeMetadata *stripeMetadata, - TupleDesc tupleDescriptor, List *projectedColumnList, - List *whereClauseList) + StripeFooter *stripeFooter, TupleDesc tupleDescriptor, + List *projectedColumnList, List *whereClauseList) { StripeBuffers *stripeBuffers = NULL; ColumnBuffers **columnBuffersArray = NULL; @@ -475,8 +475,6 @@ LoadFilteredStripeBuffers(FILE *tableFile, StripeMetadata *stripeMetadata, uint32 columnIndex = 0; uint32 columnCount = tupleDescriptor->natts; - StripeFooter *stripeFooter = LoadStripeFooter(tableFile, stripeMetadata, - columnCount); bool *projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList); StripeSkipList *stripeSkipList = LoadStripeSkipList(tableFile, stripeMetadata, @@ -617,31 +615,6 @@ LoadColumnBuffers(FILE *tableFile, ColumnBlockSkipNode *blockSkipNodeArray, } -/* Reads and returns the given stripe's footer. */ -static StripeFooter * -LoadStripeFooter(FILE *tableFile, StripeMetadata *stripeMetadata, - uint32 columnCount) -{ - StripeFooter *stripeFooter = NULL; - StringInfo footerBuffer = NULL; - uint64 footerOffset = 0; - - footerOffset += stripeMetadata->fileOffset; - footerOffset += stripeMetadata->skipListLength; - footerOffset += stripeMetadata->dataLength; - - footerBuffer = ReadFromFile(tableFile, footerOffset, stripeMetadata->footerLength); - stripeFooter = DeserializeStripeFooter(footerBuffer); - if (stripeFooter->columnCount > columnCount) - { - ereport(ERROR, (errmsg("stripe footer column count and table column count " - "don't match"))); - } - - return stripeFooter; -} - - /* Reads the skip list for the given stripe. */ static StripeSkipList * LoadStripeSkipList(FILE *tableFile, StripeMetadata *stripeMetadata, @@ -1377,3 +1350,15 @@ ResetUncompressedBlockData(ColumnBlockData **blockDataArray, uint32 columnCount) } } } + + +static int +RelationColumnCount(Oid relid) +{ + Relation rel = RelationIdGetRelation(relid); + TupleDesc tupleDesc = RelationGetDescr(rel); + int columnCount = tupleDesc->natts; + RelationClose(rel); + + return columnCount; +} diff --git a/cstore_writer.c b/cstore_writer.c index 51a01c8f3..76fc703f3 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -65,7 +65,8 @@ static StringInfo CopyStringInfo(StringInfo sourceString); * will be added. */ TableWriteState * -CStoreBeginWrite(const char *filename, CompressionType compressionType, +CStoreBeginWrite(Oid relationId, + const char *filename, CompressionType compressionType, uint64 stripeMaxRowCount, uint32 blockRowCount, TupleDesc tupleDescriptor) { @@ -82,6 +83,7 @@ CStoreBeginWrite(const char *filename, CompressionType compressionType, int statResult = 0; bool *columnMaskArray = NULL; ColumnBlockData **blockData = NULL; + uint64 currentStripeId = 0; tableFooterFilename = makeStringInfo(); appendStringInfo(tableFooterFilename, "%s%s", filename, CSTORE_FOOTER_FILE_SUFFIX); @@ -130,6 +132,7 @@ CStoreBeginWrite(const char *filename, CompressionType compressionType, lastStripeSize += lastStripe->footerLength; currentFileOffset = lastStripe->fileOffset + lastStripeSize; + currentStripeId = lastStripe->id + 1; errno = 0; fseekResult = fseeko(tableFile, currentFileOffset, SEEK_SET); @@ -173,6 +176,7 @@ CStoreBeginWrite(const char *filename, CompressionType compressionType, blockData = CreateEmptyBlockDataArray(columnCount, columnMaskArray, blockRowCount); writeState = palloc0(sizeof(TableWriteState)); + writeState->relationId = relationId; writeState->tableFile = tableFile; writeState->tableFooterFilename = tableFooterFilename; writeState->tableFooter = tableFooter; @@ -186,6 +190,7 @@ CStoreBeginWrite(const char *filename, CompressionType compressionType, writeState->stripeWriteContext = stripeWriteContext; writeState->blockDataArray = blockData; writeState->compressionBuffer = NULL; + writeState->currentStripeId = currentStripeId; return writeState; } @@ -286,6 +291,8 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul StripeMetadata stripeMetadata = FlushStripe(writeState); MemoryContextReset(writeState->stripeWriteContext); + writeState->currentStripeId++; + /* set stripe data and skip list to NULL so they are recreated next time */ writeState->stripeBuffers = NULL; writeState->stripeSkipList = NULL; @@ -490,7 +497,6 @@ FlushStripe(TableWriteState *writeState) uint64 dataLength = 0; StringInfo *skipListBufferArray = NULL; StripeFooter *stripeFooter = NULL; - StringInfo stripeFooterBuffer = NULL; uint32 columnIndex = 0; uint32 blockIndex = 0; TableFooter *tableFooter = writeState->tableFooter; @@ -545,7 +551,6 @@ FlushStripe(TableWriteState *writeState) /* create skip list and footer buffers */ skipListBufferArray = CreateSkipListBufferArray(stripeSkipList, tupleDescriptor); stripeFooter = CreateStripeFooter(stripeSkipList, skipListBufferArray); - stripeFooterBuffer = SerializeStripeFooter(stripeFooter); /* * Each stripe has three sections: @@ -594,7 +599,9 @@ FlushStripe(TableWriteState *writeState) } /* finally, we flush the footer buffer */ - WriteToFile(tableFile, stripeFooterBuffer->data, stripeFooterBuffer->len); + SaveStripeFooter(writeState->relationId, + writeState->currentStripeId, + stripeFooter); /* set stripe metadata */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) @@ -607,12 +614,12 @@ FlushStripe(TableWriteState *writeState) stripeMetadata.fileOffset = writeState->currentFileOffset; stripeMetadata.skipListLength = skipListLength; stripeMetadata.dataLength = dataLength; - stripeMetadata.footerLength = stripeFooterBuffer->len; + stripeMetadata.footerLength = 0; + stripeMetadata.id = writeState->currentStripeId; /* advance current file offset */ writeState->currentFileOffset += skipListLength; writeState->currentFileOffset += dataLength; - writeState->currentFileOffset += stripeFooterBuffer->len; return stripeMetadata; } From b74de68ce3f4c9e83f10f169a646ea192908d6c3 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 7 Sep 2020 15:48:23 -0700 Subject: [PATCH 07/91] Add 'make reindent' --- .gitattributes | 26 ++++++++ Makefile | 3 + cstore.c | 8 ++- cstore.h | 16 +---- cstore_compression.c | 17 ++--- cstore_fdw.c | 112 +++++++++++++++++--------------- cstore_fdw.h | 2 +- cstore_metadata_serialization.c | 8 +-- cstore_metadata_serialization.h | 2 +- cstore_reader.c | 31 +++++---- cstore_version_compat.h | 7 +- cstore_writer.c | 16 +++-- mod.c | 7 +- 13 files changed, 143 insertions(+), 112 deletions(-) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..215ae1909 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,26 @@ +* whitespace=space-before-tab,trailing-space +*.[chly] whitespace=space-before-tab,trailing-space,indent-with-non-tab,tabwidth=4 +*.dsl whitespace=space-before-tab,trailing-space,tab-in-indent +*.patch -whitespace +*.pl whitespace=space-before-tab,trailing-space,tabwidth=4 +*.po whitespace=space-before-tab,trailing-space,tab-in-indent,-blank-at-eof +*.sgml whitespace=space-before-tab,trailing-space,tab-in-indent,-blank-at-eol +*.x[ms]l whitespace=space-before-tab,trailing-space,tab-in-indent + +# Avoid confusing ASCII underlines with leftover merge conflict markers +README conflict-marker-size=32 +README.* conflict-marker-size=32 + +# Certain data files that contain special whitespace, and other special cases +*.data -whitespace + +# Test output files that contain extra whitespace +*.out -whitespace +src/test/regress/output/*.source -whitespace + +# These files are maintained or generated elsewhere. We take them as is. +configure -whitespace + +# all C files (implementation and header) use our style... +*.[ch] citus-style + diff --git a/Makefile b/Makefile index 8f1bf08cc..10d7fcc14 100644 --- a/Makefile +++ b/Makefile @@ -58,3 +58,6 @@ installcheck: remove_cstore_files remove_cstore_files: rm -f data/*.cstore data/*.cstore.footer + +reindent: + citus_indent . diff --git a/cstore.c b/cstore.c index a98f983e3..a259f0430 100644 --- a/cstore.c +++ b/cstore.c @@ -43,6 +43,7 @@ ParseCompressionType(const char *compressionTypeString) return compressionType; } + /* CreateDirectory creates a new directory with the given directory name. */ static void CreateDirectory(StringInfo directoryName) @@ -56,6 +57,7 @@ CreateDirectory(StringInfo directoryName) } } + /* DirectoryExists checks if a directory exists for the given directory name. */ static bool DirectoryExists(StringInfo directoryName) @@ -91,6 +93,7 @@ DirectoryExists(StringInfo directoryName) return directoryExists; } + /* * RemoveCStoreDatabaseDirectory removes CStore directory previously * created for this database. @@ -132,8 +135,9 @@ InitializeCStoreTableFile(Oid relationId, Relation relation, CStoreOptions *csto * empty data file and a valid footer file for the table. */ writeState = CStoreBeginWrite(relationId, cstoreOptions->filename, - cstoreOptions->compressionType, cstoreOptions->stripeRowCount, - cstoreOptions->blockRowCount, tupleDescriptor); + cstoreOptions->compressionType, + cstoreOptions->stripeRowCount, + cstoreOptions->blockRowCount, tupleDescriptor); CStoreEndWrite(writeState); } diff --git a/cstore.h b/cstore.h index 500a38cdb..ed850d9ef 100644 --- a/cstore.h +++ b/cstore.h @@ -60,7 +60,6 @@ typedef enum COMPRESSION_PG_LZ = 1, COMPRESSION_COUNT - } CompressionType; @@ -75,7 +74,6 @@ typedef struct CStoreOptions CompressionType compressionType; uint64 stripeRowCount; uint32 blockRowCount; - } CStoreOptions; @@ -90,7 +88,6 @@ typedef struct StripeMetadata uint64 dataLength; uint64 footerLength; uint64 id; - } StripeMetadata; @@ -99,7 +96,6 @@ typedef struct TableFooter { List *stripeMetadataList; uint64 blockRowCount; - } TableFooter; @@ -123,7 +119,6 @@ typedef struct ColumnBlockSkipNode uint64 existsLength; CompressionType valueCompressionType; - } ColumnBlockSkipNode; @@ -137,7 +132,6 @@ typedef struct StripeSkipList ColumnBlockSkipNode **blockSkipNodeArray; uint32 columnCount; uint32 blockCount; - } StripeSkipList; @@ -155,7 +149,6 @@ typedef struct ColumnBlockData /* valueBuffer keeps actual data for type-by-reference datums from valueArray. */ StringInfo valueBuffer; - } ColumnBlockData; @@ -171,7 +164,6 @@ typedef struct ColumnBlockBuffers StringInfo existsBuffer; StringInfo valueBuffer; CompressionType valueCompressionType; - } ColumnBlockBuffers; @@ -182,7 +174,6 @@ typedef struct ColumnBlockBuffers typedef struct ColumnBuffers { ColumnBlockBuffers **blockBuffersArray; - } ColumnBuffers; @@ -192,7 +183,6 @@ typedef struct StripeBuffers uint32 columnCount; uint32 rowCount; ColumnBuffers **columnBuffersArray; - } StripeBuffers; @@ -207,7 +197,6 @@ typedef struct StripeFooter uint64 *skipListSizeArray; uint64 *existsSizeArray; uint64 *valueSizeArray; - } StripeFooter; @@ -234,7 +223,6 @@ typedef struct TableReadState uint64 stripeReadRowCount; ColumnBlockData **blockDataArray; int32 deserializedBlockIndex; - } TableReadState; @@ -257,6 +245,7 @@ typedef struct TableWriteState StripeSkipList *stripeSkipList; uint32 stripeMaxRowCount; ColumnBlockData **blockDataArray; + /* * compressionBuffer buffer is used as temporary storage during * data value compression operation. It is kept here to minimize @@ -264,7 +253,6 @@ typedef struct TableWriteState * deallocated when memory context is reset. */ StringInfo compressionBuffer; - } TableWriteState; extern CompressionType ParseCompressionType(const char *compressionTypeString); @@ -283,7 +271,7 @@ extern TableWriteState * CStoreBeginWrite(Oid relationId, TupleDesc tupleDescriptor); extern void CStoreWriteRow(TableWriteState *state, Datum *columnValues, bool *columnNulls); -extern void CStoreEndWrite(TableWriteState * state); +extern void CStoreEndWrite(TableWriteState *state); /* Function declarations for reading from a cstore file */ extern TableReadState * CStoreBeginRead(Oid relationId, const char *filename, diff --git a/cstore_compression.c b/cstore_compression.c index a3c5f9f7d..f6122614a 100644 --- a/cstore_compression.c +++ b/cstore_compression.c @@ -22,38 +22,39 @@ #include "cstore.h" - #if PG_VERSION_NUM >= 90500 + /* * The information at the start of the compressed data. This decription is taken * from pg_lzcompress in pre-9.5 version of PostgreSQL. */ typedef struct CStoreCompressHeader { - int32 vl_len_; /* varlena header (do not touch directly!) */ - int32 rawsize; + int32 vl_len_; /* varlena header (do not touch directly!) */ + int32 rawsize; } CStoreCompressHeader; /* * Utilities for manipulation of header information for compressed data */ -#define CSTORE_COMPRESS_HDRSZ ((int32) sizeof(CStoreCompressHeader)) +#define CSTORE_COMPRESS_HDRSZ ((int32) sizeof(CStoreCompressHeader)) #define CSTORE_COMPRESS_RAWSIZE(ptr) (((CStoreCompressHeader *) (ptr))->rawsize) #define CSTORE_COMPRESS_RAWDATA(ptr) (((char *) (ptr)) + CSTORE_COMPRESS_HDRSZ) -#define CSTORE_COMPRESS_SET_RAWSIZE(ptr, len) (((CStoreCompressHeader *) (ptr))->rawsize = (len)) +#define CSTORE_COMPRESS_SET_RAWSIZE(ptr, len) (((CStoreCompressHeader *) (ptr))->rawsize = \ + (len)) #else -#define CSTORE_COMPRESS_HDRSZ (0) +#define CSTORE_COMPRESS_HDRSZ (0) #define CSTORE_COMPRESS_RAWSIZE(ptr) (PGLZ_RAW_SIZE((PGLZ_Header *) buffer->data)) #define CSTORE_COMPRESS_RAWDATA(ptr) (((PGLZ_Header *) (ptr))) -#define CSTORE_COMPRESS_SET_RAWSIZE(ptr, len) (((CStoreCompressHeader *) (ptr))->rawsize = (len)) +#define CSTORE_COMPRESS_SET_RAWSIZE(ptr, len) (((CStoreCompressHeader *) (ptr))->rawsize = \ + (len)) #endif - /* * CompressBuffer compresses the given buffer with the given compression type * outputBuffer enlarged to contain compressed data. The function returns true diff --git a/cstore_fdw.c b/cstore_fdw.c index 6bcb92269..7d43c07d5 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -85,7 +85,6 @@ typedef struct CStoreValidOption { const char *optionName; Oid optionContextId; - } CStoreValidOption; #define COMPRESSION_STRING_DELIMITED_LIST "none, pglz" @@ -114,13 +113,13 @@ static void CStoreProcessUtility(Node *parseTree, const char *queryString, ParamListInfo paramListInfo, DestReceiver *destReceiver, char *completionTag); #endif -static bool CopyCStoreTableStatement(CopyStmt* copyStatement); -static void CheckSuperuserPrivilegesForCopy(const CopyStmt* copyStatement); +static bool CopyCStoreTableStatement(CopyStmt *copyStatement); +static void CheckSuperuserPrivilegesForCopy(const CopyStmt *copyStatement); static void CStoreProcessCopyCommand(CopyStmt *copyStatement, const char *queryString, char *completionTag); static uint64 CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString); -static uint64 CopyOutCStoreTable(CopyStmt* copyStatement, const char* queryString); +static uint64 CopyOutCStoreTable(CopyStmt *copyStatement, const char *queryString); static void CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement); static List * DroppedCStoreFilenameList(DropStmt *dropStatement); static List * FindCStoreTables(List *tableList); @@ -168,7 +167,7 @@ static int CStoreAcquireSampleRows(Relation relation, int logLevel, HeapTuple *sampleRows, int targetRowCount, double *totalRowCount, double *totalDeadRowCount); static List * CStorePlanForeignModify(PlannerInfo *plannerInfo, ModifyTable *plan, - Index resultRelation, int subplanIndex); + Index resultRelation, int subplanIndex); static void CStoreBeginForeignModify(ModifyTableState *modifyTableState, ResultRelInfo *relationInfo, List *fdwPrivate, int subplanIndex, int executorflags); @@ -201,7 +200,8 @@ static ProcessUtility_hook_type PreviousProcessUtilityHook = NULL; * previous utility hook, and then install our hook to pre-intercept calls to * the copy command. */ -void cstore_fdw_init() +void +cstore_fdw_init() { PreviousProcessUtilityHook = ProcessUtility_hook; ProcessUtility_hook = CStoreProcessUtility; @@ -212,7 +212,8 @@ void cstore_fdw_init() * Called when the module is unloaded. This function uninstalls the * extension's hooks. */ -void cstore_fdw_finish() +void +cstore_fdw_finish() { ProcessUtility_hook = PreviousProcessUtilityHook; } @@ -296,10 +297,10 @@ CStoreProcessUtility(PlannedStmt *plannedStatement, const char *queryString, DestReceiver *destReceiver, char *completionTag) #else static void -CStoreProcessUtility(Node * parseTree, const char *queryString, +CStoreProcessUtility(Node * parseTree, const char * queryString, ProcessUtilityContext context, ParamListInfo paramListInfo, - DestReceiver *destReceiver, char *completionTag) + DestReceiver * destReceiver, char * completionTag) #endif { #if PG_VERSION_NUM >= 100000 @@ -387,11 +388,12 @@ CStoreProcessUtility(Node * parseTree, const char *queryString, CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, destReceiver, completionTag); - /* restore the former relation list. Our - * replacement could be freed but still needed - * in a cached plan. A truncate can be cached - * if run from a pl/pgSQL function */ - truncateStatement->relations = allTablesList; + + /* restore the former relation list. Our + * replacement could be freed but still needed + * in a cached plan. A truncate can be cached + * if run from a pl/pgSQL function */ + truncateStatement->relations = allTablesList; } TruncateCStoreTables(cstoreRelationList); @@ -439,7 +441,7 @@ CStoreProcessUtility(Node * parseTree, const char *queryString, * true. The function returns false otherwise. */ static bool -CopyCStoreTableStatement(CopyStmt* copyStatement) +CopyCStoreTableStatement(CopyStmt *copyStatement) { bool copyCStoreTableStatement = false; @@ -474,7 +476,7 @@ CopyCStoreTableStatement(CopyStmt* copyStatement) * copy operation and reports error if user does not have superuser rights. */ static void -CheckSuperuserPrivilegesForCopy(const CopyStmt* copyStatement) +CheckSuperuserPrivilegesForCopy(const CopyStmt *copyStatement) { /* * We disallow copy from file or program except to superusers. These checks @@ -485,16 +487,16 @@ CheckSuperuserPrivilegesForCopy(const CopyStmt* copyStatement) if (copyStatement->is_program) { ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), - errmsg("must be superuser to COPY to or from a program"), - errhint("Anyone can COPY to stdout or from stdin. " - "psql's \\copy command also works for anyone."))); + errmsg("must be superuser to COPY to or from a program"), + errhint("Anyone can COPY to stdout or from stdin. " + "psql's \\copy command also works for anyone."))); } else { ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), - errmsg("must be superuser to COPY to or from a file"), - errhint("Anyone can COPY to stdout or from stdin. " - "psql's \\copy command also works for anyone."))); + errmsg("must be superuser to COPY to or from a file"), + errhint("Anyone can COPY to stdout or from stdin. " + "psql's \\copy command also works for anyone."))); } } } @@ -505,7 +507,7 @@ CheckSuperuserPrivilegesForCopy(const CopyStmt* copyStatement) * It determines the copy direction and forwards execution to appropriate function. */ static void -CStoreProcessCopyCommand(CopyStmt *copyStatement, const char* queryString, +CStoreProcessCopyCommand(CopyStmt *copyStatement, const char *queryString, char *completionTag) { uint64 processedCount = 0; @@ -648,7 +650,7 @@ CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) * stream. Copying selected columns from cstore table is not currently supported. */ static uint64 -CopyOutCStoreTable(CopyStmt* copyStatement, const char* queryString) +CopyOutCStoreTable(CopyStmt *copyStatement, const char *queryString) { uint64 processedCount = 0; RangeVar *relation = NULL; @@ -682,6 +684,7 @@ CopyOutCStoreTable(CopyStmt* copyStatement, const char* queryString) copyStatement->relation = NULL; #if (PG_VERSION_NUM >= 100000) + /* * raw_parser returns list of RawStmt* in PG 10+ we need to * extract actual query from it. @@ -737,7 +740,7 @@ CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement) foreach(commandCell, commandList) { AlterTableCmd *alterCommand = (AlterTableCmd *) lfirst(commandCell); - if(alterCommand->subtype == AT_AlterColumnType) + if (alterCommand->subtype == AT_AlterColumnType) { char *columnName = alterCommand->name; ColumnDef *columnDef = (ColumnDef *) alterCommand->def; @@ -849,7 +852,7 @@ OpenRelationsForTruncate(List *cstoreTableList) Relation relation = heap_openrv(rangeVar, AccessExclusiveLock); Oid relationId = relation->rd_id; AclResult aclresult = pg_class_aclcheck(relationId, GetUserId(), - ACL_TRUNCATE); + ACL_TRUNCATE); if (aclresult != ACLCHECK_OK) { aclcheck_error(aclresult, ACLCHECK_OBJECT_TABLE, get_rel_name(relationId)); @@ -890,6 +893,7 @@ TruncateCStoreTables(List *cstoreRelationList) } } + /* * CStoreTable checks if the given table name belongs to a foreign columnar store * table. If it does, the function returns true. Otherwise, it returns false. @@ -996,23 +1000,20 @@ DistributedTable(Oid relationId) static bool DistributedWorkerCopy(CopyStmt *copyStatement) { - ListCell *optionCell = NULL; - foreach(optionCell, copyStatement->options) - { - DefElem *defel = (DefElem *) lfirst(optionCell); - if (strncmp(defel->defname, "master_host", NAMEDATALEN) == 0) - { - return true; - } - } + ListCell *optionCell = NULL; + foreach(optionCell, copyStatement->options) + { + DefElem *defel = (DefElem *) lfirst(optionCell); + if (strncmp(defel->defname, "master_host", NAMEDATALEN) == 0) + { + return true; + } + } - return false; + return false; } - - - /* * cstore_table_size returns the total on-disk size of a cstore table in bytes. * The result includes the sizes of data file and footer file. @@ -1056,7 +1057,7 @@ cstore_table_size(PG_FUNCTION_ARGS) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", - footerFilename->data))); + footerFilename->data))); } tableSize += dataFileStatBuffer.st_size; @@ -1428,7 +1429,6 @@ CStoreDefaultFilePath(Oid foreignTableId) { databaseOid = MyDatabaseId; relationFileOid = foreignTableId; - } cstoreFilePath = makeStringInfo(); @@ -1447,7 +1447,8 @@ static void CStoreGetForeignRelSize(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId) { CStoreOptions *cstoreOptions = CStoreGetOptions(foreignTableId); - double tupleCountEstimate = TupleCountEstimate(foreignTableId, baserel, cstoreOptions->filename); + double tupleCountEstimate = TupleCountEstimate(foreignTableId, baserel, + cstoreOptions->filename); double rowSelectivity = clauselist_selectivity(root, baserel->baserestrictinfo, 0, JOIN_INNER, NULL); @@ -1494,7 +1495,8 @@ CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId double queryPageCount = relationPageCount * queryColumnRatio; double totalDiskAccessCost = seq_page_cost * queryPageCount; - double tupleCountEstimate = TupleCountEstimate(foreignTableId, baserel, cstoreOptions->filename); + double tupleCountEstimate = TupleCountEstimate(foreignTableId, baserel, + cstoreOptions->filename); /* * We estimate costs almost the same way as cost_seqscan(), thus assuming @@ -1505,7 +1507,7 @@ CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId double totalCpuCost = cpuCostPerTuple * tupleCountEstimate; double startupCost = baserel->baserestrictcost.startup; - double totalCost = startupCost + totalCpuCost + totalDiskAccessCost; + double totalCost = startupCost + totalCpuCost + totalDiskAccessCost; /* create a foreign path node and add it as the only possible path */ #if PG_VERSION_NUM >= 90600 @@ -1550,8 +1552,8 @@ CStoreGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId, Plan *outerPlan) #else static ForeignScan * -CStoreGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId, - ForeignPath *bestPath, List *targetList, List *scanClauses) +CStoreGetForeignPlan(PlannerInfo * root, RelOptInfo * baserel, Oid foreignTableId, + ForeignPath * bestPath, List * targetList, List * scanClauses) #endif { ForeignScan *foreignScan = NULL; @@ -1720,7 +1722,7 @@ ColumnList(RelOptInfo *baserel, Oid foreignTableId) { ListCell *neededColumnCell = NULL; Var *column = NULL; - Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex - 1); + Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex - 1); if (attributeForm->attisdropped) { @@ -1920,7 +1922,7 @@ CStoreAcquireSampleRows(Relation relation, int logLevel, { int sampleRowCount = 0; double rowCount = 0.0; - double rowCountToSkip = -1; /* -1 means not set yet */ + double rowCountToSkip = -1; /* -1 means not set yet */ double selectionState = 0; MemoryContext oldContext = CurrentMemoryContext; MemoryContext tupleContext = NULL; @@ -1948,7 +1950,8 @@ CStoreAcquireSampleRows(Relation relation, int logLevel, if (!attributeForm->attisdropped) { Var *column = makeVar(tableId, columnIndex + 1, attributeForm->atttypid, - attributeForm->atttypmod, attributeForm->attcollation, 0); + attributeForm->atttypmod, attributeForm->attcollation, + 0); columnList = lappend(columnList, column); } } @@ -2139,7 +2142,7 @@ CStoreBeginForeignModify(ModifyTableState *modifyTableState, return; } - Assert (modifyTableState->operation == CMD_INSERT); + Assert(modifyTableState->operation == CMD_INSERT); CStoreBeginForeignInsert(modifyTableState, relationInfo); } @@ -2152,7 +2155,7 @@ CStoreBeginForeignModify(ModifyTableState *modifyTableState, static void CStoreBeginForeignInsert(ModifyTableState *modifyTableState, ResultRelInfo *relationInfo) { - Oid foreignTableOid = InvalidOid; + Oid foreignTableOid = InvalidOid; CStoreOptions *cstoreOptions = NULL; TupleDesc tupleDescriptor = NULL; TableWriteState *writeState = NULL; @@ -2183,7 +2186,7 @@ static TupleTableSlot * CStoreExecForeignInsert(EState *executorState, ResultRelInfo *relationInfo, TupleTableSlot *tupleSlot, TupleTableSlot *planSlot) { - TableWriteState *writeState = (TableWriteState*) relationInfo->ri_FdwState; + TableWriteState *writeState = (TableWriteState *) relationInfo->ri_FdwState; HeapTuple heapTuple; Assert(writeState != NULL); @@ -2224,7 +2227,7 @@ CStoreEndForeignModify(EState *executorState, ResultRelInfo *relationInfo) static void CStoreEndForeignInsert(EState *executorState, ResultRelInfo *relationInfo) { - TableWriteState *writeState = (TableWriteState*) relationInfo->ri_FdwState; + TableWriteState *writeState = (TableWriteState *) relationInfo->ri_FdwState; /* writeState is NULL during Explain queries */ if (writeState != NULL) @@ -2238,6 +2241,7 @@ CStoreEndForeignInsert(EState *executorState, ResultRelInfo *relationInfo) #if PG_VERSION_NUM >= 90600 + /* * CStoreIsForeignScanParallelSafe always returns true to indicate that * reading from a cstore_fdw table in a parallel worker is safe. This @@ -2254,4 +2258,6 @@ CStoreIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel, { return true; } + + #endif diff --git a/cstore_fdw.h b/cstore_fdw.h index c7b4460ed..1c8170ae8 100644 --- a/cstore_fdw.h +++ b/cstore_fdw.h @@ -32,4 +32,4 @@ extern Datum cstore_clean_table_resources(PG_FUNCTION_ARGS); extern Datum cstore_fdw_handler(PG_FUNCTION_ARGS); extern Datum cstore_fdw_validator(PG_FUNCTION_ARGS); -#endif /* CSTORE_FDW_H */ +#endif /* CSTORE_FDW_H */ diff --git a/cstore_metadata_serialization.c b/cstore_metadata_serialization.c index 94e3c3116..09c17ee7f 100644 --- a/cstore_metadata_serialization.c +++ b/cstore_metadata_serialization.c @@ -143,8 +143,8 @@ SerializeColumnSkipList(ColumnBlockSkipNode *blockSkipNodeArray, uint32 blockCou { ColumnBlockSkipNode blockSkipNode = blockSkipNodeArray[blockIndex]; Protobuf__ColumnBlockSkipNode *protobufBlockSkipNode = NULL; - ProtobufCBinaryData binaryMinimumValue = {0, 0}; - ProtobufCBinaryData binaryMaximumValue = {0, 0}; + ProtobufCBinaryData binaryMinimumValue = { 0, 0 }; + ProtobufCBinaryData binaryMaximumValue = { 0, 0 }; if (blockSkipNode.hasMinMax) { @@ -352,7 +352,7 @@ DeserializeRowCount(StringInfo buffer) for (blockIndex = 0; blockIndex < blockCount; blockIndex++) { Protobuf__ColumnBlockSkipNode *protobufBlockSkipNode = - protobufBlockSkipList->blockskipnodearray[blockIndex]; + protobufBlockSkipList->blockskipnodearray[blockIndex]; rowCount += protobufBlockSkipNode->rowcount; } @@ -452,7 +452,7 @@ DeserializeColumnSkipList(StringInfo buffer, bool typeByValue, int typeLength, static ProtobufCBinaryData DatumToProtobufBinary(Datum datum, bool datumTypeByValue, int datumTypeLength) { - ProtobufCBinaryData protobufBinary = {0, 0}; + ProtobufCBinaryData protobufBinary = { 0, 0 }; int datumLength = att_addlength_datum(0, datumTypeLength, datum); char *datumBuffer = palloc0(datumLength); diff --git a/cstore_metadata_serialization.h b/cstore_metadata_serialization.h index d5b7c90ff..12a3d135b 100644 --- a/cstore_metadata_serialization.h +++ b/cstore_metadata_serialization.h @@ -31,4 +31,4 @@ extern ColumnBlockSkipNode * DeserializeColumnSkipList(StringInfo buffer, uint32 blockCount); -#endif /* CSTORE_SERIALIZATION_H */ +#endif /* CSTORE_SERIALIZATION_H */ diff --git a/cstore_reader.c b/cstore_reader.c index 6caf99bc7..78c7fe00e 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -67,7 +67,7 @@ static OpExpr * MakeOpExpression(Var *variable, int16 strategyNumber); static Oid GetOperatorByType(Oid typeId, Oid accessMethodId, int16 strategyNumber); static void UpdateConstraint(Node *baseConstraint, Datum minValue, Datum maxValue); static StripeSkipList * SelectedBlockSkipList(StripeSkipList *stripeSkipList, - bool *projectedColumnMask, + bool *projectedColumnMask, bool *selectedBlockMask); static uint32 StripeSkipListRowCount(StripeSkipList *stripeSkipList); static bool * ProjectedColumnMask(uint32 columnCount, List *projectedColumnList); @@ -104,7 +104,7 @@ CStoreBeginRead(Oid relationId, const char *filename, TupleDesc tupleDescriptor, MemoryContext stripeReadContext = NULL; uint32 columnCount = 0; bool *projectedColumnMask = NULL; - ColumnBlockData **blockDataArray = NULL; + ColumnBlockData **blockDataArray = NULL; StringInfo tableFooterFilename = makeStringInfo(); appendStringInfo(tableFooterFilename, "%s%s", filename, CSTORE_FOOTER_FILE_SUFFIX); @@ -134,7 +134,7 @@ CStoreBeginRead(Oid relationId, const char *filename, TupleDesc tupleDescriptor, columnCount = tupleDescriptor->natts; projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList); blockDataArray = CreateEmptyBlockDataArray(columnCount, projectedColumnMask, - tableFooter->blockRowCount); + tableFooter->blockRowCount); readState = palloc0(sizeof(TableReadState)); readState->relationId = relationId; @@ -356,7 +356,7 @@ ColumnBlockData ** CreateEmptyBlockDataArray(uint32 columnCount, bool *columnMask, uint32 blockRowCount) { uint32 columnIndex = 0; - ColumnBlockData **blockDataArray = palloc0(columnCount * sizeof(ColumnBlockData*)); + ColumnBlockData **blockDataArray = palloc0(columnCount * sizeof(ColumnBlockData *)); /* allocate block memory for deserialized data */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) @@ -448,12 +448,12 @@ StripeRowCount(Oid relid, FILE *tableFile, StripeMetadata *stripeMetadata) uint64 rowCount = 0; StringInfo firstColumnSkipListBuffer = NULL; - StripeFooter * stripeFooter = ReadStripeFooter(relid, stripeMetadata->id, - RelationColumnCount(relid)); + StripeFooter *stripeFooter = ReadStripeFooter(relid, stripeMetadata->id, + RelationColumnCount(relid)); firstColumnSkipListBuffer = ReadFromFile(tableFile, stripeMetadata->fileOffset, - stripeFooter->skipListSizeArray[0]); - rowCount = DeserializeRowCount(firstColumnSkipListBuffer); + stripeFooter->skipListSizeArray[0]); + rowCount = DeserializeRowCount(firstColumnSkipListBuffer); return rowCount; } @@ -573,7 +573,7 @@ LoadColumnBuffers(FILE *tableFile, ColumnBlockSkipNode *blockSkipNodeArray, ColumnBuffers *columnBuffers = NULL; uint32 blockIndex = 0; ColumnBlockBuffers **blockBuffersArray = - palloc0(blockCount * sizeof(ColumnBlockBuffers *)); + palloc0(blockCount * sizeof(ColumnBlockBuffers *)); for (blockIndex = 0; blockIndex < blockCount; blockIndex++) { @@ -761,7 +761,8 @@ SelectedBlockMask(StripeSkipList *stripeSkipList, List *projectedColumnList, constraintList = list_make1(baseConstraint); #if (PG_VERSION_NUM >= 100000) - predicateRefuted = predicate_refuted_by(constraintList, restrictInfoList, false); + predicateRefuted = predicate_refuted_by(constraintList, restrictInfoList, + false); #else predicateRefuted = predicate_refuted_by(constraintList, restrictInfoList); #endif @@ -877,7 +878,7 @@ MakeOpExpression(Var *variable, int16 strategyNumber) Oid accessMethodId = BTREE_AM_OID; Oid operatorId = InvalidOid; - Const *constantValue = NULL; + Const *constantValue = NULL; OpExpr *expression = NULL; /* Load the operator from system catalogs */ @@ -888,7 +889,7 @@ MakeOpExpression(Var *variable, int16 strategyNumber) /* Now make the expression with the given variable and a null constant */ expression = (OpExpr *) make_opclause(operatorId, InvalidOid, /* no result type yet */ - false, /* no return set */ + false, /* no return set */ (Expr *) variable, (Expr *) constantValue, InvalidOid, collationId); @@ -1163,7 +1164,8 @@ DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, if (columnBuffers != NULL) { - ColumnBlockBuffers *blockBuffers = columnBuffers->blockBuffersArray[blockIndex]; + ColumnBlockBuffers *blockBuffers = + columnBuffers->blockBuffersArray[blockIndex]; StringInfo valueBuffer = NULL; /* free previous block's data buffers */ @@ -1214,7 +1216,6 @@ DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, { memset(blockData->existsArray, false, rowCount); } - } } } @@ -1330,8 +1331,6 @@ ReadFromFile(FILE *file, uint64 offset, uint32 size) } - - /* * ResetUncompressedBlockData iterates over deserialized column block data * and sets valueBuffer field to empty buffer. This field is allocated in stripe diff --git a/cstore_version_compat.h b/cstore_version_compat.h index 1b80b16c3..95521c1aa 100644 --- a/cstore_version_compat.h +++ b/cstore_version_compat.h @@ -22,7 +22,8 @@ #endif #if PG_VERSION_NUM < 110000 -#define ALLOCSET_DEFAULT_SIZES ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE +#define ALLOCSET_DEFAULT_SIZES ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, \ + ALLOCSET_DEFAULT_MAXSIZE #define ACLCHECK_OBJECT_TABLE ACL_KIND_CLASS #else #define ACLCHECK_OBJECT_TABLE OBJECT_TABLE @@ -46,9 +47,9 @@ #endif #if PG_VERSION_NUM < 120000 -#define TTS_EMPTY(slot) ((slot)->tts_isempty) +#define TTS_EMPTY(slot) ((slot)->tts_isempty) #define ExecForceStoreHeapTuple(tuple, slot, shouldFree) \ - ExecStoreTuple(newTuple, tupleSlot, InvalidBuffer, shouldFree); + ExecStoreTuple(newTuple, tupleSlot, InvalidBuffer, shouldFree); #define TableScanDesc HeapScanDesc #define table_beginscan heap_beginscan #define table_endscan heap_endscan diff --git a/cstore_writer.c b/cstore_writer.c index 76fc703f3..318d8d518 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -149,13 +149,15 @@ CStoreBeginWrite(Oid relationId, for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { FmgrInfo *comparisonFunction = NULL; - FormData_pg_attribute *attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); + FormData_pg_attribute *attributeForm = TupleDescAttr(tupleDescriptor, + columnIndex); if (!attributeForm->attisdropped) { Oid typeId = attributeForm->atttypid; - comparisonFunction = GetFunctionInfoOrNull(typeId, BTREE_AM_OID, BTORDER_PROC); + comparisonFunction = GetFunctionInfoOrNull(typeId, BTREE_AM_OID, + BTORDER_PROC); } comparisonFunctionArray[columnIndex] = comparisonFunction; @@ -262,7 +264,7 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul bool columnTypeByValue = attributeForm->attbyval; int columnTypeLength = attributeForm->attlen; Oid columnCollation = attributeForm->attcollation; - char columnTypeAlign = attributeForm->attalign; + char columnTypeAlign = attributeForm->attalign; blockData->existsArray[blockRowIndex] = true; @@ -492,7 +494,7 @@ CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 blockRowCount, static StripeMetadata FlushStripe(TableWriteState *writeState) { - StripeMetadata stripeMetadata = {0, 0, 0, 0}; + StripeMetadata stripeMetadata = { 0, 0, 0, 0 }; uint64 skipListLength = 0; uint64 dataLength = 0; StringInfo *skipListBufferArray = NULL; @@ -531,7 +533,7 @@ FlushStripe(TableWriteState *writeState) for (blockIndex = 0; blockIndex < blockCount; blockIndex++) { ColumnBlockBuffers *blockBuffers = - columnBuffers->blockBuffersArray[blockIndex]; + columnBuffers->blockBuffersArray[blockIndex]; uint64 existsBufferSize = blockBuffers->existsBuffer->len; uint64 valueBufferSize = blockBuffers->valueBuffer->len; CompressionType valueCompressionType = blockBuffers->valueCompressionType; @@ -582,7 +584,7 @@ FlushStripe(TableWriteState *writeState) for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) { ColumnBlockBuffers *blockBuffers = - columnBuffers->blockBuffersArray[blockIndex]; + columnBuffers->blockBuffersArray[blockIndex]; StringInfo existsBuffer = blockBuffers->existsBuffer; WriteToFile(tableFile, existsBuffer->data, existsBuffer->len); @@ -591,7 +593,7 @@ FlushStripe(TableWriteState *writeState) for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) { ColumnBlockBuffers *blockBuffers = - columnBuffers->blockBuffersArray[blockIndex]; + columnBuffers->blockBuffersArray[blockIndex]; StringInfo valueBuffer = blockBuffers->valueBuffer; WriteToFile(tableFile, valueBuffer->data, valueBuffer->len); diff --git a/mod.c b/mod.c index dbc8eb923..d962e9820 100644 --- a/mod.c +++ b/mod.c @@ -20,14 +20,15 @@ PG_MODULE_MAGIC; -void _PG_init(void) +void +_PG_init(void) { cstore_fdw_init(); } -void _PG_fini(void) +void +_PG_fini(void) { cstore_fdw_finish(); } - From 9e247cdf40200cc85c813d3774575bb63829d886 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 7 Sep 2020 21:51:28 -0700 Subject: [PATCH 08/91] Move table footer to metadata tables --- cstore.c | 15 +- cstore.h | 18 +- cstore.proto | 22 --- cstore_fdw--1.7.sql | 30 ++- cstore_fdw.c | 17 +- cstore_metadata_serialization.c | 198 ------------------- cstore_metadata_serialization.h | 3 - cstore_metadata_tables.c | 332 +++++++++++++++++++++++++++++++- cstore_reader.c | 142 ++------------ cstore_writer.c | 139 +++---------- expected/truncate.out | 4 +- 11 files changed, 407 insertions(+), 513 deletions(-) diff --git a/cstore.c b/cstore.c index a259f0430..658c15745 100644 --- a/cstore.c +++ b/cstore.c @@ -130,6 +130,8 @@ InitializeCStoreTableFile(Oid relationId, Relation relation, CStoreOptions *csto TableWriteState *writeState = NULL; TupleDesc tupleDescriptor = RelationGetDescr(relation); + InitCStoreTableMetadata(relationId, cstoreOptions->blockRowCount); + /* * Initialize state to write to the cstore file. This creates an * empty data file and a valid footer file for the table. @@ -183,19 +185,6 @@ void DeleteCStoreTableFiles(char *filename) { int dataFileRemoved = 0; - int footerFileRemoved = 0; - - StringInfo tableFooterFilename = makeStringInfo(); - appendStringInfo(tableFooterFilename, "%s%s", filename, CSTORE_FOOTER_FILE_SUFFIX); - - /* delete the footer file */ - footerFileRemoved = unlink(tableFooterFilename->data); - if (footerFileRemoved != 0) - { - ereport(WARNING, (errcode_for_file_access(), - errmsg("could not delete file \"%s\": %m", - tableFooterFilename->data))); - } /* delete the data file */ dataFileRemoved = unlink(filename); diff --git a/cstore.h b/cstore.h index ed850d9ef..20cac7e05 100644 --- a/cstore.h +++ b/cstore.h @@ -46,8 +46,6 @@ /* miscellaneous defines */ #define CSTORE_FDW_NAME "cstore_fdw" -#define CSTORE_FOOTER_FILE_SUFFIX ".footer" -#define CSTORE_TEMP_FILE_SUFFIX ".tmp" #define CSTORE_TUPLE_COST_MULTIPLIER 10 #define CSTORE_POSTSCRIPT_SIZE_LENGTH 1 #define CSTORE_POSTSCRIPT_SIZE_MAX 256 @@ -91,12 +89,12 @@ typedef struct StripeMetadata } StripeMetadata; -/* TableFooter represents the footer of a cstore file. */ -typedef struct TableFooter +/* TableMetadata represents the metadata of a cstore file. */ +typedef struct TableMetadata { List *stripeMetadataList; uint64 blockRowCount; -} TableFooter; +} TableMetadata; /* ColumnBlockSkipNode contains statistics for a ColumnBlockData. */ @@ -206,7 +204,7 @@ typedef struct TableReadState Oid relationId; FILE *tableFile; - TableFooter *tableFooter; + TableMetadata *tableMetadata; TupleDesc tupleDescriptor; /* @@ -231,8 +229,7 @@ typedef struct TableWriteState { Oid relationId; FILE *tableFile; - TableFooter *tableFooter; - StringInfo tableFooterFilename; + TableMetadata *tableMetadata; CompressionType compressionType; TupleDesc tupleDescriptor; FmgrInfo **comparisonFunctionArray; @@ -277,7 +274,6 @@ extern void CStoreEndWrite(TableWriteState *state); extern TableReadState * CStoreBeginRead(Oid relationId, const char *filename, TupleDesc tupleDescriptor, List *projectedColumnList, List *qualConditions); -extern TableFooter * CStoreReadFooter(StringInfo tableFooterFilename); extern bool CStoreReadFinished(TableReadState *state); extern bool CStoreReadNextRow(TableReadState *state, Datum *columnValues, bool *columnNulls); @@ -298,6 +294,8 @@ extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressio /* cstore_metadata_tables.c */ extern void SaveStripeFooter(Oid relid, uint64 stripe, StripeFooter *footer); extern StripeFooter * ReadStripeFooter(Oid relid, uint64 stripe, int relationColumnCount); - +extern void InitCStoreTableMetadata(Oid relid, int blockRowCount); +extern void InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe); +extern TableMetadata * ReadTableMetadata(Oid relid); #endif /* CSTORE_H */ diff --git a/cstore.proto b/cstore.proto index ea949c77c..a7525b633 100644 --- a/cstore.proto +++ b/cstore.proto @@ -22,25 +22,3 @@ message ColumnBlockSkipNode { message ColumnBlockSkipList { repeated ColumnBlockSkipNode blockSkipNodeArray = 1; } - -message StripeMetadata { - optional uint64 fileOffset = 1; - optional uint64 skipListLength = 2; - optional uint64 dataLength = 3; - optional uint64 footerLength = 4; - optional uint64 id = 5; -} - -message TableFooter { - repeated StripeMetadata stripeMetadataArray = 1; - optional uint32 blockRowCount = 2; -} - -message PostScript { - optional uint64 tableFooterLength = 1; - optional uint64 versionMajor = 2; - optional uint64 versionMinor = 3; - - // Leave this last in the record - optional string magicNumber = 8000; -} diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index fd526e711..86589ca90 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -58,17 +58,37 @@ CREATE EVENT TRIGGER cstore_drop_event ON SQL_DROP EXECUTE PROCEDURE cstore_drop_trigger(); +CREATE TABLE cstore_tables ( + relid oid, + block_row_count int, + version_major bigint, + version_minor bigint, + PRIMARY KEY (relid) +) WITH (user_catalog_table = true); + +ALTER TABLE cstore_tables SET SCHEMA pg_catalog; + +CREATE TABLE cstore_stripes ( + relid oid, + stripe bigint, + file_offset bigint, + skiplist_length bigint, + data_length bigint, + PRIMARY KEY (relid, stripe), + FOREIGN KEY (relid) REFERENCES cstore_tables(relid) ON DELETE CASCADE +) WITH (user_catalog_table = true); + +ALTER TABLE cstore_stripes SET SCHEMA pg_catalog; + CREATE TABLE cstore_stripe_attr ( relid oid, stripe bigint, attr int, exists_size bigint, value_size bigint, - skiplist_size bigint + skiplist_size bigint, + PRIMARY KEY (relid, stripe, attr), + FOREIGN KEY (relid, stripe) REFERENCES cstore_stripes(relid, stripe) ON DELETE CASCADE ) WITH (user_catalog_table = true); -CREATE INDEX cstore_stripe_attr_idx - ON cstore_stripe_attr - USING BTREE(relid, stripe, attr); - ALTER TABLE cstore_stripe_attr SET SCHEMA pg_catalog; diff --git a/cstore_fdw.c b/cstore_fdw.c index 7d43c07d5..8ce3a7296 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -426,6 +426,7 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, RemoveCStoreDatabaseDirectory(databaseOid); } } + /* handle other utility statements */ else { @@ -1026,11 +1027,8 @@ cstore_table_size(PG_FUNCTION_ARGS) int64 tableSize = 0; CStoreOptions *cstoreOptions = NULL; char *dataFilename = NULL; - StringInfo footerFilename = NULL; int dataFileStatResult = 0; - int footerFileStatResult = 0; struct stat dataFileStatBuffer; - struct stat footerFileStatBuffer; bool cstoreTable = CStoreTable(relationId); if (!cstoreTable) @@ -1048,20 +1046,7 @@ cstore_table_size(PG_FUNCTION_ARGS) errmsg("could not stat file \"%s\": %m", dataFilename))); } - footerFilename = makeStringInfo(); - appendStringInfo(footerFilename, "%s%s", dataFilename, - CSTORE_FOOTER_FILE_SUFFIX); - - footerFileStatResult = stat(footerFilename->data, &footerFileStatBuffer); - if (footerFileStatResult != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not stat file \"%s\": %m", - footerFilename->data))); - } - tableSize += dataFileStatBuffer.st_size; - tableSize += footerFileStatBuffer.st_size; PG_RETURN_INT64(tableSize); } diff --git a/cstore_metadata_serialization.c b/cstore_metadata_serialization.c index 09c17ee7f..2b06d4a15 100644 --- a/cstore_metadata_serialization.c +++ b/cstore_metadata_serialization.c @@ -28,98 +28,6 @@ static Datum ProtobufBinaryToDatum(ProtobufCBinaryData protobufBinary, bool typeByValue, int typeLength); -/* - * SerializePostScript serializes the given postscript and returns the result as - * a StringInfo. - */ -StringInfo -SerializePostScript(uint64 tableFooterLength) -{ - StringInfo postscriptBuffer = NULL; - Protobuf__PostScript protobufPostScript = PROTOBUF__POST_SCRIPT__INIT; - uint8 *postscriptData = NULL; - uint32 postscriptSize = 0; - - protobufPostScript.has_tablefooterlength = true; - protobufPostScript.tablefooterlength = tableFooterLength; - protobufPostScript.has_versionmajor = true; - protobufPostScript.versionmajor = CSTORE_VERSION_MAJOR; - protobufPostScript.has_versionminor = true; - protobufPostScript.versionminor = CSTORE_VERSION_MINOR; - protobufPostScript.magicnumber = pstrdup(CSTORE_MAGIC_NUMBER); - - postscriptSize = protobuf__post_script__get_packed_size(&protobufPostScript); - postscriptData = palloc0(postscriptSize); - protobuf__post_script__pack(&protobufPostScript, postscriptData); - - postscriptBuffer = palloc0(sizeof(StringInfoData)); - postscriptBuffer->len = postscriptSize; - postscriptBuffer->maxlen = postscriptSize; - postscriptBuffer->data = (char *) postscriptData; - - return postscriptBuffer; -} - - -/* - * SerializeTableFooter serializes the given table footer and returns the result - * as a StringInfo. - */ -StringInfo -SerializeTableFooter(TableFooter *tableFooter) -{ - StringInfo tableFooterBuffer = NULL; - Protobuf__TableFooter protobufTableFooter = PROTOBUF__TABLE_FOOTER__INIT; - Protobuf__StripeMetadata **stripeMetadataArray = NULL; - ListCell *stripeMetadataCell = NULL; - uint8 *tableFooterData = NULL; - uint32 tableFooterSize = 0; - uint32 stripeIndex = 0; - - List *stripeMetadataList = tableFooter->stripeMetadataList; - uint32 stripeCount = list_length(stripeMetadataList); - stripeMetadataArray = palloc0(stripeCount * sizeof(Protobuf__StripeMetadata *)); - - foreach(stripeMetadataCell, stripeMetadataList) - { - StripeMetadata *stripeMetadata = lfirst(stripeMetadataCell); - - Protobuf__StripeMetadata *protobufStripeMetadata = NULL; - protobufStripeMetadata = palloc0(sizeof(Protobuf__StripeMetadata)); - protobuf__stripe_metadata__init(protobufStripeMetadata); - protobufStripeMetadata->has_fileoffset = true; - protobufStripeMetadata->fileoffset = stripeMetadata->fileOffset; - protobufStripeMetadata->has_skiplistlength = true; - protobufStripeMetadata->skiplistlength = stripeMetadata->skipListLength; - protobufStripeMetadata->has_datalength = true; - protobufStripeMetadata->datalength = stripeMetadata->dataLength; - protobufStripeMetadata->has_footerlength = true; - protobufStripeMetadata->footerlength = stripeMetadata->footerLength; - protobufStripeMetadata->has_id = true; - protobufStripeMetadata->id = stripeMetadata->id; - - stripeMetadataArray[stripeIndex] = protobufStripeMetadata; - stripeIndex++; - } - - protobufTableFooter.n_stripemetadataarray = stripeCount; - protobufTableFooter.stripemetadataarray = stripeMetadataArray; - protobufTableFooter.has_blockrowcount = true; - protobufTableFooter.blockrowcount = tableFooter->blockRowCount; - - tableFooterSize = protobuf__table_footer__get_packed_size(&protobufTableFooter); - tableFooterData = palloc0(tableFooterSize); - protobuf__table_footer__pack(&protobufTableFooter, tableFooterData); - - tableFooterBuffer = palloc0(sizeof(StringInfoData)); - tableFooterBuffer->len = tableFooterSize; - tableFooterBuffer->maxlen = tableFooterSize; - tableFooterBuffer->data = (char *) tableFooterData; - - return tableFooterBuffer; -} - - /* * SerializeColumnSkipList serializes a column skip list, where the colum skip * list includes all block skip nodes for that column. The function then returns @@ -194,112 +102,6 @@ SerializeColumnSkipList(ColumnBlockSkipNode *blockSkipNodeArray, uint32 blockCou } -/* - * DeserializePostScript deserializes the given postscript buffer and returns - * the size of table footer in tableFooterLength pointer. - */ -void -DeserializePostScript(StringInfo buffer, uint64 *tableFooterLength) -{ - Protobuf__PostScript *protobufPostScript = NULL; - protobufPostScript = protobuf__post_script__unpack(NULL, buffer->len, - (uint8 *) buffer->data); - if (protobufPostScript == NULL) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("invalid postscript buffer"))); - } - - if (protobufPostScript->versionmajor != CSTORE_VERSION_MAJOR || - protobufPostScript->versionminor > CSTORE_VERSION_MINOR) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("invalid column store version number"))); - } - else if (strncmp(protobufPostScript->magicnumber, CSTORE_MAGIC_NUMBER, - NAMEDATALEN) != 0) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("invalid magic number"))); - } - - (*tableFooterLength) = protobufPostScript->tablefooterlength; - - protobuf__post_script__free_unpacked(protobufPostScript, NULL); -} - - -/* - * DeserializeTableFooter deserializes the given buffer and returns the result as - * a TableFooter struct. - */ -TableFooter * -DeserializeTableFooter(StringInfo buffer) -{ - TableFooter *tableFooter = NULL; - Protobuf__TableFooter *protobufTableFooter = NULL; - List *stripeMetadataList = NIL; - uint64 blockRowCount = 0; - uint32 stripeCount = 0; - uint32 stripeIndex = 0; - - protobufTableFooter = protobuf__table_footer__unpack(NULL, buffer->len, - (uint8 *) buffer->data); - if (protobufTableFooter == NULL) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("invalid table footer buffer"))); - } - - if (!protobufTableFooter->has_blockrowcount) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("missing required table footer metadata fields"))); - } - else if (protobufTableFooter->blockrowcount < BLOCK_ROW_COUNT_MINIMUM || - protobufTableFooter->blockrowcount > BLOCK_ROW_COUNT_MAXIMUM) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("invalid block row count"))); - } - blockRowCount = protobufTableFooter->blockrowcount; - - stripeCount = protobufTableFooter->n_stripemetadataarray; - for (stripeIndex = 0; stripeIndex < stripeCount; stripeIndex++) - { - StripeMetadata *stripeMetadata = NULL; - Protobuf__StripeMetadata *protobufStripeMetadata = NULL; - - protobufStripeMetadata = protobufTableFooter->stripemetadataarray[stripeIndex]; - if (!protobufStripeMetadata->has_fileoffset || - !protobufStripeMetadata->has_skiplistlength || - !protobufStripeMetadata->has_datalength || - !protobufStripeMetadata->has_footerlength) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("missing required stripe metadata fields"))); - } - - stripeMetadata = palloc0(sizeof(StripeMetadata)); - stripeMetadata->fileOffset = protobufStripeMetadata->fileoffset; - stripeMetadata->skipListLength = protobufStripeMetadata->skiplistlength; - stripeMetadata->dataLength = protobufStripeMetadata->datalength; - stripeMetadata->footerLength = protobufStripeMetadata->footerlength; - stripeMetadata->id = protobufStripeMetadata->id; - - stripeMetadataList = lappend(stripeMetadataList, stripeMetadata); - } - - protobuf__table_footer__free_unpacked(protobufTableFooter, NULL); - - tableFooter = palloc0(sizeof(TableFooter)); - tableFooter->stripeMetadataList = stripeMetadataList; - tableFooter->blockRowCount = blockRowCount; - - return tableFooter; -} - - /* * DeserializeBlockCount deserializes the given column skip list buffer and * returns the number of blocks in column skip list. diff --git a/cstore_metadata_serialization.h b/cstore_metadata_serialization.h index 12a3d135b..efd27000a 100644 --- a/cstore_metadata_serialization.h +++ b/cstore_metadata_serialization.h @@ -15,15 +15,12 @@ #define CSTORE_SERIALIZATION_H /* Function declarations for metadata serialization */ -extern StringInfo SerializePostScript(uint64 tableFooterLength); -extern StringInfo SerializeTableFooter(TableFooter *tableFooter); extern StringInfo SerializeColumnSkipList(ColumnBlockSkipNode *blockSkipNodeArray, uint32 blockCount, bool typeByValue, int typeLength); /* Function declarations for metadata deserialization */ extern void DeserializePostScript(StringInfo buffer, uint64 *tableFooterLength); -extern TableFooter * DeserializeTableFooter(StringInfo buffer); extern uint32 DeserializeBlockCount(StringInfo buffer); extern uint32 DeserializeRowCount(StringInfo buffer); extern ColumnBlockSkipNode * DeserializeColumnSkipList(StringInfo buffer, diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index e2d003989..f5168de1e 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -13,13 +13,21 @@ #include "cstore_version_compat.h" #include +#include "access/heapam.h" #include "access/nbtree.h" #include "access/table.h" +#include "access/tableam.h" #include "access/xact.h" #include "catalog/indexing.h" #include "catalog/pg_namespace.h" #include "catalog/pg_collation.h" +#include "catalog/pg_type.h" #include "commands/defrem.h" +#include "commands/trigger.h" +#include "executor/executor.h" +#include "executor/spi.h" +#include "miscadmin.h" +#include "nodes/execnodes.h" #include "lib/stringinfo.h" #include "optimizer/optimizer.h" #include "port.h" @@ -33,9 +41,16 @@ static Oid CStoreStripeAttrRelationId(void); static Oid CStoreStripeAttrIndexRelationId(void); +static Oid CStoreStripesRelationId(void); +static Oid CStoreStripesIndexRelationId(void); +static Oid CStoreTablesRelationId(void); +static Oid CStoreTablesIndexRelationId(void); static void InsertStripeAttrRow(Oid relid, uint64 stripe, AttrNumber attr, uint64 existsSize, uint64 valuesSize, uint64 skiplistSize); +static int TableBlockRowCount(Oid relid); +static void DeleteTableMetadataRowIfExists(Oid relid); +static EState * create_estate_for_relation(Relation rel); /* constants for cstore_stripe_attr */ #define Natts_cstore_stripe_attr 6 @@ -46,6 +61,275 @@ static void InsertStripeAttrRow(Oid relid, uint64 stripe, AttrNumber attr, #define Anum_cstore_stripe_attr_value_size 5 #define Anum_cstore_stripe_attr_skiplist_size 6 +/* constants for cstore_table */ +#define Natts_cstore_tables 4 +#define Anum_cstore_tables_relid 1 +#define Anum_cstore_tables_block_row_count 2 +#define Anum_cstore_tables_version_major 3 +#define Anum_cstore_tables_version_minor 4 + +/* constants for cstore_stripe */ +#define Natts_cstore_stripes 5 +#define Anum_cstore_stripes_relid 1 +#define Anum_cstore_stripes_stripe 2 +#define Anum_cstore_stripes_file_offset 3 +#define Anum_cstore_stripes_skiplist_length 4 +#define Anum_cstore_stripes_data_length 5 + +/* + * InitCStoreTableMetadata adds a record for the given relation in cstore_table. + */ +void +InitCStoreTableMetadata(Oid relid, int blockRowCount) +{ + Oid cstoreTableOid = InvalidOid; + Relation cstoreTable = NULL; + TupleDesc tupleDescriptor = NULL; + HeapTuple tuple = NULL; + + bool nulls[Natts_cstore_tables] = { 0 }; + Datum values[Natts_cstore_tables] = { + ObjectIdGetDatum(relid), + Int32GetDatum(blockRowCount), + Int32GetDatum(CSTORE_VERSION_MAJOR), + Int32GetDatum(CSTORE_VERSION_MINOR) + }; + + DeleteTableMetadataRowIfExists(relid); + + cstoreTableOid = CStoreTablesRelationId(); + cstoreTable = heap_open(cstoreTableOid, RowExclusiveLock); + tupleDescriptor = RelationGetDescr(cstoreTable); + + tuple = heap_form_tuple(tupleDescriptor, values, nulls); + + CatalogTupleInsert(cstoreTable, tuple); + + CommandCounterIncrement(); + + heap_close(cstoreTable, NoLock); +} + + +/* + * InsertStripeMetadataRow adds a row to cstore_stripes. + */ +void +InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe) +{ + bool nulls[Natts_cstore_stripes] = { 0 }; + Datum values[Natts_cstore_stripes] = { + ObjectIdGetDatum(relid), + Int64GetDatum(stripe->id), + Int64GetDatum(stripe->fileOffset), + Int64GetDatum(stripe->skipListLength), + Int64GetDatum(stripe->dataLength) + }; + + Oid cstoreStripesOid = CStoreStripesRelationId(); + Relation cstoreStripes = heap_open(cstoreStripesOid, RowExclusiveLock); + TupleDesc tupleDescriptor = RelationGetDescr(cstoreStripes); + + HeapTuple tuple = heap_form_tuple(tupleDescriptor, values, nulls); + + CatalogTupleInsert(cstoreStripes, tuple); + + CommandCounterIncrement(); + + heap_close(cstoreStripes, NoLock); +} + + +/* + * ReadTableMetadata constructs TableMetadata for a given relid by reading + * from cstore_tables and cstore_stripes. + */ +TableMetadata * +ReadTableMetadata(Oid relid) +{ + Oid cstoreStripesOid = InvalidOid; + Relation cstoreStripes = NULL; + Relation index = NULL; + TupleDesc tupleDescriptor = NULL; + ScanKeyData scanKey[1]; + SysScanDesc scanDescriptor = NULL; + HeapTuple heapTuple; + + TableMetadata *tableMetadata = palloc0(sizeof(TableMetadata)); + tableMetadata->blockRowCount = TableBlockRowCount(relid); + + ScanKeyInit(&scanKey[0], Anum_cstore_stripes_relid, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relid)); + + cstoreStripesOid = CStoreStripesRelationId(); + cstoreStripes = heap_open(cstoreStripesOid, AccessShareLock); + index = index_open(CStoreStripesIndexRelationId(), AccessShareLock); + tupleDescriptor = RelationGetDescr(cstoreStripes); + + scanDescriptor = systable_beginscan_ordered(cstoreStripes, index, NULL, 1, scanKey); + + while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) + { + StripeMetadata *stripeMetadata = NULL; + Datum datumArray[Natts_cstore_stripes]; + bool isNullArray[Natts_cstore_stripes]; + + heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray); + + stripeMetadata = palloc0(sizeof(StripeMetadata)); + stripeMetadata->id = DatumGetInt64(datumArray[Anum_cstore_stripes_stripe - 1]); + stripeMetadata->fileOffset = DatumGetInt64( + datumArray[Anum_cstore_stripes_file_offset - 1]); + stripeMetadata->dataLength = DatumGetInt64( + datumArray[Anum_cstore_stripes_data_length - 1]); + stripeMetadata->skipListLength = DatumGetInt64( + datumArray[Anum_cstore_stripes_skiplist_length - 1]); + + tableMetadata->stripeMetadataList = lappend(tableMetadata->stripeMetadataList, + stripeMetadata); + } + + systable_endscan_ordered(scanDescriptor); + index_close(index, NoLock); + heap_close(cstoreStripes, NoLock); + + return tableMetadata; +} + + +/* + * TableBlockRowCount returns block_row_count column from cstore_tables for a given relid. + */ +static int +TableBlockRowCount(Oid relid) +{ + int blockRowCount = 0; + Oid cstoreTablesOid = InvalidOid; + Relation cstoreTables = NULL; + Relation index = NULL; + TupleDesc tupleDescriptor = NULL; + ScanKeyData scanKey[1]; + SysScanDesc scanDescriptor = NULL; + HeapTuple heapTuple = NULL; + + ScanKeyInit(&scanKey[0], Anum_cstore_tables_relid, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relid)); + + cstoreTablesOid = CStoreTablesRelationId(); + cstoreTables = heap_open(cstoreTablesOid, AccessShareLock); + index = index_open(CStoreTablesIndexRelationId(), AccessShareLock); + tupleDescriptor = RelationGetDescr(cstoreTables); + + scanDescriptor = systable_beginscan_ordered(cstoreTables, index, NULL, 1, scanKey); + + heapTuple = systable_getnext(scanDescriptor); + if (HeapTupleIsValid(heapTuple)) + { + Datum datumArray[Natts_cstore_tables]; + bool isNullArray[Natts_cstore_tables]; + heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray); + blockRowCount = DatumGetInt32(datumArray[Anum_cstore_tables_block_row_count - 1]); + } + + systable_endscan_ordered(scanDescriptor); + index_close(index, NoLock); + heap_close(cstoreTables, NoLock); + + return blockRowCount; +} + + +/* + * DeleteTableMetadataRowIfExists removes the row with given relid from cstore_stripes. + */ +static void +DeleteTableMetadataRowIfExists(Oid relid) +{ + Oid cstoreTablesOid = InvalidOid; + Relation cstoreTables = NULL; + Relation index = NULL; + ScanKeyData scanKey[1]; + SysScanDesc scanDescriptor = NULL; + HeapTuple heapTuple = NULL; + + ScanKeyInit(&scanKey[0], Anum_cstore_tables_relid, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relid)); + + cstoreTablesOid = CStoreTablesRelationId(); + cstoreTables = table_open(cstoreTablesOid, AccessShareLock); + index = index_open(CStoreTablesIndexRelationId(), AccessShareLock); + + scanDescriptor = systable_beginscan_ordered(cstoreTables, index, NULL, 1, scanKey); + + heapTuple = systable_getnext(scanDescriptor); + if (HeapTupleIsValid(heapTuple)) + { + EState *estate = create_estate_for_relation(cstoreTables); + ResultRelInfo *resultRelInfo = estate->es_result_relation_info; + + ItemPointer tid = &(heapTuple->t_self); + simple_table_tuple_delete(cstoreTables, tid, estate->es_snapshot); + + /* + * Execute AFTER ROW DELETE Triggers to enforce foreign key + * constraints. + */ + ExecARDeleteTriggers(estate, resultRelInfo, + tid, NULL, NULL); + + AfterTriggerEndQuery(estate); + ExecCleanUpTriggerState(estate); + ExecResetTupleTable(estate->es_tupleTable, false); + FreeExecutorState(estate); + } + + systable_endscan_ordered(scanDescriptor); + index_close(index, NoLock); + table_close(cstoreTables, NoLock); +} + + +/* + * Based on a similar function from + * postgres/src/backend/replication/logical/worker.c. + * + * Executor state preparation for evaluation of constraint expressions, + * indexes and triggers. + * + * This is based on similar code in copy.c + */ +static EState * +create_estate_for_relation(Relation rel) +{ + EState *estate; + ResultRelInfo *resultRelInfo; + RangeTblEntry *rte; + + estate = CreateExecutorState(); + + rte = makeNode(RangeTblEntry); + rte->rtekind = RTE_RELATION; + rte->relid = RelationGetRelid(rel); + rte->relkind = rel->rd_rel->relkind; + rte->rellockmode = AccessShareLock; + ExecInitRangeTable(estate, list_make1(rte)); + + resultRelInfo = makeNode(ResultRelInfo); + InitResultRelInfo(resultRelInfo, rel, 1, NULL, 0); + + estate->es_result_relations = resultRelInfo; + estate->es_num_result_relations = 1; + estate->es_result_relation_info = resultRelInfo; + + estate->es_output_cid = GetCurrentCommandId(true); + + /* Prepare to catch AFTER triggers. */ + AfterTriggerBeginQuery(); + + return estate; +} + + /* * SaveStripeFooter stores give StripeFooter as cstore_stripe_attr records. */ @@ -176,11 +460,55 @@ CStoreStripeAttrRelationId(void) /* - * CStoreStripeAttrRelationId returns relation id of cstore_stripe_attr_idx. + * CStoreStripeAttrRelationId returns relation id of cstore_stripe_attr_pkey. * TODO: should we cache this similar to citus? */ static Oid CStoreStripeAttrIndexRelationId(void) { - return get_relname_relid("cstore_stripe_attr_idx", PG_CATALOG_NAMESPACE); + return get_relname_relid("cstore_stripe_attr_pkey", PG_CATALOG_NAMESPACE); +} + + +/* + * CStoreStripesRelationId returns relation id of cstore_stripes. + * TODO: should we cache this similar to citus? + */ +static Oid +CStoreStripesRelationId(void) +{ + return get_relname_relid("cstore_stripes", PG_CATALOG_NAMESPACE); +} + + +/* + * CStoreStripesIndexRelationId returns relation id of cstore_stripes_idx. + * TODO: should we cache this similar to citus? + */ +static Oid +CStoreStripesIndexRelationId(void) +{ + return get_relname_relid("cstore_stripes_pkey", PG_CATALOG_NAMESPACE); +} + + +/* + * CStoreTablesRelationId returns relation id of cstore_tables. + * TODO: should we cache this similar to citus? + */ +static Oid +CStoreTablesRelationId(void) +{ + return get_relname_relid("cstore_tables", PG_CATALOG_NAMESPACE); +} + + +/* + * CStoreTablesIndexRelationId returns relation id of cstore_tables_idx. + * TODO: should we cache this similar to citus? + */ +static Oid +CStoreTablesIndexRelationId(void) +{ + return get_relname_relid("cstore_tables_pkey", PG_CATALOG_NAMESPACE); } diff --git a/cstore_reader.c b/cstore_reader.c index 78c7fe00e..ddef3395a 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -82,7 +82,6 @@ static void DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex TupleDesc tupleDescriptor); static Datum ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeForm); -static int64 FILESize(FILE *file); static StringInfo ReadFromFile(FILE *file, uint64 offset, uint32 size); static void ResetUncompressedBlockData(ColumnBlockData **blockDataArray, uint32 columnCount); @@ -99,20 +98,14 @@ CStoreBeginRead(Oid relationId, const char *filename, TupleDesc tupleDescriptor, List *projectedColumnList, List *whereClauseList) { TableReadState *readState = NULL; - TableFooter *tableFooter = NULL; + TableMetadata *tableMetadata = NULL; FILE *tableFile = NULL; MemoryContext stripeReadContext = NULL; uint32 columnCount = 0; bool *projectedColumnMask = NULL; ColumnBlockData **blockDataArray = NULL; - StringInfo tableFooterFilename = makeStringInfo(); - appendStringInfo(tableFooterFilename, "%s%s", filename, CSTORE_FOOTER_FILE_SUFFIX); - - tableFooter = CStoreReadFooter(tableFooterFilename); - - pfree(tableFooterFilename->data); - pfree(tableFooterFilename); + tableMetadata = ReadTableMetadata(relationId); tableFile = AllocateFile(filename, PG_BINARY_R); if (tableFile == NULL) @@ -134,12 +127,12 @@ CStoreBeginRead(Oid relationId, const char *filename, TupleDesc tupleDescriptor, columnCount = tupleDescriptor->natts; projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList); blockDataArray = CreateEmptyBlockDataArray(columnCount, projectedColumnMask, - tableFooter->blockRowCount); + tableMetadata->blockRowCount); readState = palloc0(sizeof(TableReadState)); readState->relationId = relationId; readState->tableFile = tableFile; - readState->tableFooter = tableFooter; + readState->tableMetadata = tableMetadata; readState->projectedColumnList = projectedColumnList; readState->whereClauseList = whereClauseList; readState->stripeBuffers = NULL; @@ -154,76 +147,6 @@ CStoreBeginRead(Oid relationId, const char *filename, TupleDesc tupleDescriptor, } -/* - * CStoreReadFooter reads the cstore file footer from the given file. First, the - * function reads the last byte of the file as the postscript size. Then, the - * function reads the postscript. Last, the function reads and deserializes the - * footer. - */ -TableFooter * -CStoreReadFooter(StringInfo tableFooterFilename) -{ - TableFooter *tableFooter = NULL; - FILE *tableFooterFile = NULL; - uint64 footerOffset = 0; - uint64 footerLength = 0; - StringInfo postscriptBuffer = NULL; - StringInfo postscriptSizeBuffer = NULL; - uint64 postscriptSizeOffset = 0; - uint8 postscriptSize = 0; - uint64 footerFileSize = 0; - uint64 postscriptOffset = 0; - StringInfo footerBuffer = NULL; - int freeResult = 0; - - tableFooterFile = AllocateFile(tableFooterFilename->data, PG_BINARY_R); - if (tableFooterFile == NULL) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not open file \"%s\" for reading: %m", - tableFooterFilename->data), - errhint("Try copying in data to the table."))); - } - - footerFileSize = FILESize(tableFooterFile); - if (footerFileSize < CSTORE_POSTSCRIPT_SIZE_LENGTH) - { - ereport(ERROR, (errmsg("invalid cstore file"))); - } - - postscriptSizeOffset = footerFileSize - CSTORE_POSTSCRIPT_SIZE_LENGTH; - postscriptSizeBuffer = ReadFromFile(tableFooterFile, postscriptSizeOffset, - CSTORE_POSTSCRIPT_SIZE_LENGTH); - memcpy(&postscriptSize, postscriptSizeBuffer->data, CSTORE_POSTSCRIPT_SIZE_LENGTH); - if (postscriptSize + CSTORE_POSTSCRIPT_SIZE_LENGTH > footerFileSize) - { - ereport(ERROR, (errmsg("invalid postscript size"))); - } - - postscriptOffset = footerFileSize - (CSTORE_POSTSCRIPT_SIZE_LENGTH + postscriptSize); - postscriptBuffer = ReadFromFile(tableFooterFile, postscriptOffset, postscriptSize); - - DeserializePostScript(postscriptBuffer, &footerLength); - if (footerLength + postscriptSize + CSTORE_POSTSCRIPT_SIZE_LENGTH > footerFileSize) - { - ereport(ERROR, (errmsg("invalid footer size"))); - } - - footerOffset = postscriptOffset - footerLength; - footerBuffer = ReadFromFile(tableFooterFile, footerOffset, footerLength); - tableFooter = DeserializeTableFooter(footerBuffer); - - freeResult = FreeFile(tableFooterFile); - if (freeResult != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not close file: %m"))); - } - - return tableFooter; -} - - /* * CStoreReadNextRow tries to read a row from the cstore file. On success, it sets * column values and nulls, and returns true. If there are no more rows to read, @@ -234,7 +157,7 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu { uint32 blockIndex = 0; uint32 blockRowIndex = 0; - TableFooter *tableFooter = readState->tableFooter; + TableMetadata *tableMetadata = readState->tableMetadata; MemoryContext oldContext = NULL; /* @@ -247,7 +170,7 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu { StripeBuffers *stripeBuffers = NULL; StripeMetadata *stripeMetadata = NULL; - List *stripeMetadataList = tableFooter->stripeMetadataList; + List *stripeMetadataList = tableMetadata->stripeMetadataList; uint32 stripeCount = list_length(stripeMetadataList); StripeFooter *stripeFooter = NULL; @@ -284,8 +207,8 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu } } - blockIndex = readState->stripeReadRowCount / tableFooter->blockRowCount; - blockRowIndex = readState->stripeReadRowCount % tableFooter->blockRowCount; + blockIndex = readState->stripeReadRowCount / tableMetadata->blockRowCount; + blockRowIndex = readState->stripeReadRowCount % tableMetadata->blockRowCount; if (blockIndex != readState->deserializedBlockIndex) { @@ -294,14 +217,14 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu uint32 stripeRowCount = 0; stripeRowCount = readState->stripeBuffers->rowCount; - lastBlockIndex = stripeRowCount / tableFooter->blockRowCount; + lastBlockIndex = stripeRowCount / tableMetadata->blockRowCount; if (blockIndex == lastBlockIndex) { - blockRowCount = stripeRowCount % tableFooter->blockRowCount; + blockRowCount = stripeRowCount % tableMetadata->blockRowCount; } else { - blockRowCount = tableFooter->blockRowCount; + blockRowCount = tableMetadata->blockRowCount; } oldContext = MemoryContextSwitchTo(readState->stripeReadContext); @@ -341,9 +264,9 @@ CStoreEndRead(TableReadState *readState) MemoryContextDelete(readState->stripeReadContext); FreeFile(readState->tableFile); - list_free_deep(readState->tableFooter->stripeMetadataList); + list_free_deep(readState->tableMetadata->stripeMetadataList); FreeColumnBlockDataArray(readState->blockDataArray, columnCount); - pfree(readState->tableFooter); + pfree(readState->tableMetadata); pfree(readState); } @@ -405,19 +328,12 @@ FreeColumnBlockDataArray(ColumnBlockData **blockDataArray, uint32 columnCount) uint64 CStoreTableRowCount(Oid relid, const char *filename) { - TableFooter *tableFooter = NULL; + TableMetadata *tableMetadata = NULL; FILE *tableFile; ListCell *stripeMetadataCell = NULL; uint64 totalRowCount = 0; - StringInfo tableFooterFilename = makeStringInfo(); - - appendStringInfo(tableFooterFilename, "%s%s", filename, CSTORE_FOOTER_FILE_SUFFIX); - - tableFooter = CStoreReadFooter(tableFooterFilename); - - pfree(tableFooterFilename->data); - pfree(tableFooterFilename); + tableMetadata = ReadTableMetadata(relid); tableFile = AllocateFile(filename, PG_BINARY_R); if (tableFile == NULL) @@ -426,7 +342,7 @@ CStoreTableRowCount(Oid relid, const char *filename) errmsg("could not open file \"%s\" for reading: %m", filename))); } - foreach(stripeMetadataCell, tableFooter->stripeMetadataList) + foreach(stripeMetadataCell, tableMetadata->stripeMetadataList) { StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell); totalRowCount += StripeRowCount(relid, tableFile, stripeMetadata); @@ -1263,32 +1179,6 @@ ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeFor } -/* Returns the size of the given file handle. */ -static int64 -FILESize(FILE *file) -{ - int64 fileSize = 0; - int fseekResult = 0; - - errno = 0; - fseekResult = fseeko(file, 0, SEEK_END); - if (fseekResult != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not seek in file: %m"))); - } - - fileSize = ftello(file); - if (fileSize == -1) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not get position in file: %m"))); - } - - return fileSize; -} - - /* Reads the given segment from the given file. */ static StringInfo ReadFromFile(FILE *file, uint64 offset, uint32 size) diff --git a/cstore_writer.c b/cstore_writer.c index 318d8d518..240c13fc2 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -27,7 +27,6 @@ #include "cstore_metadata_serialization.h" #include "cstore_version_compat.h" -static void CStoreWriteFooter(StringInfo footerFileName, TableFooter *tableFooter); static StripeBuffers * CreateEmptyStripeBuffers(uint32 stripeMaxRowCount, uint32 blockRowCount, uint32 columnCount); @@ -50,7 +49,7 @@ static void UpdateBlockSkipNodeMinMax(ColumnBlockSkipNode *blockSkipNode, int columnTypeLength, Oid columnCollation, FmgrInfo *comparisonFunction); static Datum DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength); -static void AppendStripeMetadata(TableFooter *tableFooter, +static void AppendStripeMetadata(TableMetadata *tableMetadata, StripeMetadata stripeMetadata); static void WriteToFile(FILE *file, void *data, uint32 dataLength); static void SyncAndCloseFile(FILE *file); @@ -72,61 +71,37 @@ CStoreBeginWrite(Oid relationId, { TableWriteState *writeState = NULL; FILE *tableFile = NULL; - StringInfo tableFooterFilename = NULL; - TableFooter *tableFooter = NULL; + TableMetadata *tableMetadata = NULL; FmgrInfo **comparisonFunctionArray = NULL; MemoryContext stripeWriteContext = NULL; uint64 currentFileOffset = 0; uint32 columnCount = 0; uint32 columnIndex = 0; - struct stat statBuffer; - int statResult = 0; bool *columnMaskArray = NULL; ColumnBlockData **blockData = NULL; uint64 currentStripeId = 0; - tableFooterFilename = makeStringInfo(); - appendStringInfo(tableFooterFilename, "%s%s", filename, CSTORE_FOOTER_FILE_SUFFIX); - - statResult = stat(tableFooterFilename->data, &statBuffer); - if (statResult < 0) + tableFile = AllocateFile(filename, "a+"); + if (tableFile == NULL) { - tableFile = AllocateFile(filename, "w"); - if (tableFile == NULL) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not open file \"%s\" for writing: %m", - filename))); - } - - tableFooter = palloc0(sizeof(TableFooter)); - tableFooter->blockRowCount = blockRowCount; - tableFooter->stripeMetadataList = NIL; + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not open file \"%s\" for writing: %m", + filename))); } - else - { - tableFile = AllocateFile(filename, "r+"); - if (tableFile == NULL) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not open file \"%s\" for writing: %m", - filename))); - } - tableFooter = CStoreReadFooter(tableFooterFilename); - } + tableMetadata = ReadTableMetadata(relationId); /* * If stripeMetadataList is not empty, jump to the position right after * the last position. */ - if (tableFooter->stripeMetadataList != NIL) + if (tableMetadata->stripeMetadataList != NIL) { StripeMetadata *lastStripe = NULL; uint64 lastStripeSize = 0; int fseekResult = 0; - lastStripe = llast(tableFooter->stripeMetadataList); + lastStripe = llast(tableMetadata->stripeMetadataList); lastStripeSize += lastStripe->skipListLength; lastStripeSize += lastStripe->dataLength; lastStripeSize += lastStripe->footerLength; @@ -180,8 +155,7 @@ CStoreBeginWrite(Oid relationId, writeState = palloc0(sizeof(TableWriteState)); writeState->relationId = relationId; writeState->tableFile = tableFile; - writeState->tableFooterFilename = tableFooterFilename; - writeState->tableFooter = tableFooter; + writeState->tableMetadata = tableMetadata; writeState->compressionType = compressionType; writeState->stripeMaxRowCount = stripeMaxRowCount; writeState->tupleDescriptor = tupleDescriptor; @@ -215,8 +189,8 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul StripeBuffers *stripeBuffers = writeState->stripeBuffers; StripeSkipList *stripeSkipList = writeState->stripeSkipList; uint32 columnCount = writeState->tupleDescriptor->natts; - TableFooter *tableFooter = writeState->tableFooter; - const uint32 blockRowCount = tableFooter->blockRowCount; + TableMetadata *tableMetadata = writeState->tableMetadata; + const uint32 blockRowCount = tableMetadata->blockRowCount; ColumnBlockData **blockDataArray = writeState->blockDataArray; MemoryContext oldContext = MemoryContextSwitchTo(writeState->stripeWriteContext); @@ -304,7 +278,8 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul * doesn't free it. */ MemoryContextSwitchTo(oldContext); - AppendStripeMetadata(tableFooter, stripeMetadata); + InsertStripeMetadataRow(writeState->relationId, &stripeMetadata); + AppendStripeMetadata(tableMetadata, stripeMetadata); } else { @@ -322,9 +297,6 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul void CStoreEndWrite(TableWriteState *writeState) { - StringInfo tableFooterFilename = NULL; - StringInfo tempTableFooterFileName = NULL; - int renameResult = 0; int columnCount = writeState->tupleDescriptor->natts; StripeBuffers *stripeBuffers = writeState->stripeBuffers; @@ -336,85 +308,20 @@ CStoreEndWrite(TableWriteState *writeState) MemoryContextReset(writeState->stripeWriteContext); MemoryContextSwitchTo(oldContext); - AppendStripeMetadata(writeState->tableFooter, stripeMetadata); + InsertStripeMetadataRow(writeState->relationId, &stripeMetadata); + AppendStripeMetadata(writeState->tableMetadata, stripeMetadata); } SyncAndCloseFile(writeState->tableFile); - tableFooterFilename = writeState->tableFooterFilename; - tempTableFooterFileName = makeStringInfo(); - appendStringInfo(tempTableFooterFileName, "%s%s", tableFooterFilename->data, - CSTORE_TEMP_FILE_SUFFIX); - - CStoreWriteFooter(tempTableFooterFileName, writeState->tableFooter); - - renameResult = rename(tempTableFooterFileName->data, tableFooterFilename->data); - if (renameResult != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not rename file \"%s\" to \"%s\": %m", - tempTableFooterFileName->data, - tableFooterFilename->data))); - } - - pfree(tempTableFooterFileName->data); - pfree(tempTableFooterFileName); - MemoryContextDelete(writeState->stripeWriteContext); - list_free_deep(writeState->tableFooter->stripeMetadataList); - pfree(writeState->tableFooter); - pfree(writeState->tableFooterFilename->data); - pfree(writeState->tableFooterFilename); + list_free_deep(writeState->tableMetadata->stripeMetadataList); pfree(writeState->comparisonFunctionArray); FreeColumnBlockDataArray(writeState->blockDataArray, columnCount); pfree(writeState); } -/* - * CStoreWriteFooter writes the given footer to given file. First, the function - * serializes and writes the footer to the file. Then, the function serializes - * and writes the postscript. Then, the function writes the postscript size as - * the last byte of the file. Last, the function syncs and closes the footer file. - */ -static void -CStoreWriteFooter(StringInfo tableFooterFilename, TableFooter *tableFooter) -{ - FILE *tableFooterFile = NULL; - StringInfo tableFooterBuffer = NULL; - StringInfo postscriptBuffer = NULL; - uint8 postscriptSize = 0; - - tableFooterFile = AllocateFile(tableFooterFilename->data, PG_BINARY_W); - if (tableFooterFile == NULL) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not open file \"%s\" for writing: %m", - tableFooterFilename->data))); - } - - /* write the footer */ - tableFooterBuffer = SerializeTableFooter(tableFooter); - WriteToFile(tableFooterFile, tableFooterBuffer->data, tableFooterBuffer->len); - - /* write the postscript */ - postscriptBuffer = SerializePostScript(tableFooterBuffer->len); - WriteToFile(tableFooterFile, postscriptBuffer->data, postscriptBuffer->len); - - /* write the 1-byte postscript size */ - Assert(postscriptBuffer->len < CSTORE_POSTSCRIPT_SIZE_MAX); - postscriptSize = postscriptBuffer->len; - WriteToFile(tableFooterFile, &postscriptSize, CSTORE_POSTSCRIPT_SIZE_LENGTH); - - SyncAndCloseFile(tableFooterFile); - - pfree(tableFooterBuffer->data); - pfree(tableFooterBuffer); - pfree(postscriptBuffer->data); - pfree(postscriptBuffer); -} - - /* * CreateEmptyStripeBuffers allocates an empty StripeBuffers structure with the given * column count. @@ -501,7 +408,7 @@ FlushStripe(TableWriteState *writeState) StripeFooter *stripeFooter = NULL; uint32 columnIndex = 0; uint32 blockIndex = 0; - TableFooter *tableFooter = writeState->tableFooter; + TableMetadata *tableMetadata = writeState->tableMetadata; FILE *tableFile = writeState->tableFile; StripeBuffers *stripeBuffers = writeState->stripeBuffers; StripeSkipList *stripeSkipList = writeState->stripeSkipList; @@ -509,7 +416,7 @@ FlushStripe(TableWriteState *writeState) TupleDesc tupleDescriptor = writeState->tupleDescriptor; uint32 columnCount = tupleDescriptor->natts; uint32 blockCount = stripeSkipList->blockCount; - uint32 blockRowCount = tableFooter->blockRowCount; + uint32 blockRowCount = tableMetadata->blockRowCount; uint32 lastBlockIndex = stripeBuffers->rowCount / blockRowCount; uint32 lastBlockRowCount = stripeBuffers->rowCount % blockRowCount; @@ -918,13 +825,13 @@ DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength) * table footer's stripeMetadataList. */ static void -AppendStripeMetadata(TableFooter *tableFooter, StripeMetadata stripeMetadata) +AppendStripeMetadata(TableMetadata *tableMetadata, StripeMetadata stripeMetadata) { StripeMetadata *stripeMetadataCopy = palloc0(sizeof(StripeMetadata)); memcpy(stripeMetadataCopy, &stripeMetadata, sizeof(StripeMetadata)); - tableFooter->stripeMetadataList = lappend(tableFooter->stripeMetadataList, - stripeMetadataCopy); + tableMetadata->stripeMetadataList = lappend(tableMetadata->stripeMetadataList, + stripeMetadataCopy); } diff --git a/expected/truncate.out b/expected/truncate.out index e16a6ea9f..14119c804 100644 --- a/expected/truncate.out +++ b/expected/truncate.out @@ -72,7 +72,7 @@ SELECT count(*) FROM cstore_truncate_test_compressed; SELECT cstore_table_size('cstore_truncate_test_compressed'); cstore_table_size ------------------- - 26 + 0 (1 row) -- make sure data files still present @@ -82,7 +82,7 @@ SELECT count(*) FROM ( ) AS q1) AS q2; count ------- - 6 + 3 (1 row) INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; From 10fd94a9e3090fef1628fcc10e7fa32cd909edef Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Tue, 8 Sep 2020 19:03:01 -0700 Subject: [PATCH 09/91] Address feedback --- cstore_fdw--1.7.sql | 34 ++++++++--------- cstore_metadata_tables.c | 79 ++++++++++++++++++++++++++++++---------- 2 files changed, 76 insertions(+), 37 deletions(-) diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index 86589ca90..726085b17 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -59,36 +59,36 @@ CREATE EVENT TRIGGER cstore_drop_event EXECUTE PROCEDURE cstore_drop_trigger(); CREATE TABLE cstore_tables ( - relid oid, - block_row_count int, - version_major bigint, - version_minor bigint, + relid oid NOT NULL, + block_row_count int NOT NULL, + version_major bigint NOT NULL, + version_minor bigint NOT NULL, PRIMARY KEY (relid) ) WITH (user_catalog_table = true); ALTER TABLE cstore_tables SET SCHEMA pg_catalog; CREATE TABLE cstore_stripes ( - relid oid, - stripe bigint, - file_offset bigint, - skiplist_length bigint, - data_length bigint, + relid oid NOT NULL, + stripe bigint NOT NULL, + file_offset bigint NOT NULL, + skiplist_length bigint NOT NULL, + data_length bigint NOT NULL, PRIMARY KEY (relid, stripe), - FOREIGN KEY (relid) REFERENCES cstore_tables(relid) ON DELETE CASCADE + FOREIGN KEY (relid) REFERENCES cstore_tables(relid) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); ALTER TABLE cstore_stripes SET SCHEMA pg_catalog; CREATE TABLE cstore_stripe_attr ( - relid oid, - stripe bigint, - attr int, - exists_size bigint, - value_size bigint, - skiplist_size bigint, + relid oid NOT NULL, + stripe bigint NOT NULL, + attr int NOT NULL, + exists_size bigint NOT NULL, + value_size bigint NOT NULL, + skiplist_size bigint NOT NULL, PRIMARY KEY (relid, stripe, attr), - FOREIGN KEY (relid, stripe) REFERENCES cstore_stripes(relid, stripe) ON DELETE CASCADE + FOREIGN KEY (relid, stripe) REFERENCES cstore_stripes(relid, stripe) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); ALTER TABLE cstore_stripe_attr SET SCHEMA pg_catalog; diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index f5168de1e..5c381a029 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -50,6 +50,8 @@ static void InsertStripeAttrRow(Oid relid, uint64 stripe, AttrNumber attr, uint64 skiplistSize); static int TableBlockRowCount(Oid relid); static void DeleteTableMetadataRowIfExists(Oid relid); +static void InsertTupleAndEnforceConstraints(Relation rel, HeapTuple heapTuple); +static void DeleteTupleAndEnforceConstraints(Relation rel, HeapTuple heapTuple); static EState * create_estate_for_relation(Relation rel); /* constants for cstore_stripe_attr */ @@ -103,7 +105,7 @@ InitCStoreTableMetadata(Oid relid, int blockRowCount) tuple = heap_form_tuple(tupleDescriptor, values, nulls); - CatalogTupleInsert(cstoreTable, tuple); + InsertTupleAndEnforceConstraints(cstoreTable, tuple); CommandCounterIncrement(); @@ -132,7 +134,7 @@ InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe) HeapTuple tuple = heap_form_tuple(tupleDescriptor, values, nulls); - CatalogTupleInsert(cstoreStripes, tuple); + InsertTupleAndEnforceConstraints(cstoreStripes, tuple); CommandCounterIncrement(); @@ -264,23 +266,7 @@ DeleteTableMetadataRowIfExists(Oid relid) heapTuple = systable_getnext(scanDescriptor); if (HeapTupleIsValid(heapTuple)) { - EState *estate = create_estate_for_relation(cstoreTables); - ResultRelInfo *resultRelInfo = estate->es_result_relation_info; - - ItemPointer tid = &(heapTuple->t_self); - simple_table_tuple_delete(cstoreTables, tid, estate->es_snapshot); - - /* - * Execute AFTER ROW DELETE Triggers to enforce foreign key - * constraints. - */ - ExecARDeleteTriggers(estate, resultRelInfo, - tid, NULL, NULL); - - AfterTriggerEndQuery(estate); - ExecCleanUpTriggerState(estate); - ExecResetTupleTable(estate->es_tupleTable, false); - FreeExecutorState(estate); + DeleteTupleAndEnforceConstraints(cstoreTables, heapTuple); } systable_endscan_ordered(scanDescriptor); @@ -289,6 +275,59 @@ DeleteTableMetadataRowIfExists(Oid relid) } +/* + * InsertTupleAndEnforceConstraints inserts a tuple into a relation and + * makes sure constraints (e.g. FK constraints, NOT NULL, ...) are enforced. + */ +static void +InsertTupleAndEnforceConstraints(Relation rel, HeapTuple heapTuple) +{ + EState *estate = NULL; + TupleTableSlot *slot = NULL; + + estate = create_estate_for_relation(rel); + slot = ExecInitExtraTupleSlot(estate, RelationGetDescr(rel), &TTSOpsHeapTuple); + ExecStoreHeapTuple(heapTuple, slot, false); + + ExecOpenIndices(estate->es_result_relation_info, false); + + /* ExecSimpleRelationInsert executes any constraints */ + ExecSimpleRelationInsert(estate, slot); + + ExecCloseIndices(estate->es_result_relation_info); + + AfterTriggerEndQuery(estate); + ExecCleanUpTriggerState(estate); + ExecResetTupleTable(estate->es_tupleTable, false); + FreeExecutorState(estate); +} + + + +/* + * DeleteTupleAndEnforceConstraints deletes a tuple from a relation and + * makes sure constraints (e.g. FK constraints) are enforced. + */ +static void +DeleteTupleAndEnforceConstraints(Relation rel, HeapTuple heapTuple) +{ + EState *estate = create_estate_for_relation(rel); + ResultRelInfo *resultRelInfo = estate->es_result_relation_info; + + ItemPointer tid = &(heapTuple->t_self); + simple_table_tuple_delete(rel, tid, estate->es_snapshot); + + /* execute AFTER ROW DELETE Triggers to enforce constraints */ + ExecARDeleteTriggers(estate, resultRelInfo, + tid, NULL, NULL); + + AfterTriggerEndQuery(estate); + ExecCleanUpTriggerState(estate); + ExecResetTupleTable(estate->es_tupleTable, false); + FreeExecutorState(estate); +} + + /* * Based on a similar function from * postgres/src/backend/replication/logical/worker.c. @@ -370,7 +409,7 @@ InsertStripeAttrRow(Oid relid, uint64 stripe, AttrNumber attr, HeapTuple tuple = heap_form_tuple(tupleDescriptor, values, nulls); - CatalogTupleInsert(cstoreStripeAttrs, tuple); + InsertTupleAndEnforceConstraints(cstoreStripeAttrs, tuple); CommandCounterIncrement(); From 35a52a6fe16e2fa761b1df43c096b8af333731ac Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Wed, 9 Sep 2020 11:04:27 -0700 Subject: [PATCH 10/91] Use cstore namespace instead of pg_catalog. --- cstore_fdw--1.7.sql | 18 +++++++----------- cstore_metadata_tables.c | 21 +++++++++++++++------ 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index 726085b17..7a0c9c7b8 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -3,6 +3,8 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION cstore_fdw" to load this file. \quit +CREATE SCHEMA cstore; + CREATE FUNCTION cstore_fdw_handler() RETURNS fdw_handler AS 'MODULE_PATHNAME' @@ -58,7 +60,7 @@ CREATE EVENT TRIGGER cstore_drop_event ON SQL_DROP EXECUTE PROCEDURE cstore_drop_trigger(); -CREATE TABLE cstore_tables ( +CREATE TABLE cstore.cstore_tables ( relid oid NOT NULL, block_row_count int NOT NULL, version_major bigint NOT NULL, @@ -66,21 +68,17 @@ CREATE TABLE cstore_tables ( PRIMARY KEY (relid) ) WITH (user_catalog_table = true); -ALTER TABLE cstore_tables SET SCHEMA pg_catalog; - -CREATE TABLE cstore_stripes ( +CREATE TABLE cstore.cstore_stripes ( relid oid NOT NULL, stripe bigint NOT NULL, file_offset bigint NOT NULL, skiplist_length bigint NOT NULL, data_length bigint NOT NULL, PRIMARY KEY (relid, stripe), - FOREIGN KEY (relid) REFERENCES cstore_tables(relid) ON DELETE CASCADE INITIALLY DEFERRED + FOREIGN KEY (relid) REFERENCES cstore.cstore_tables(relid) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); -ALTER TABLE cstore_stripes SET SCHEMA pg_catalog; - -CREATE TABLE cstore_stripe_attr ( +CREATE TABLE cstore.cstore_stripe_attr ( relid oid NOT NULL, stripe bigint NOT NULL, attr int NOT NULL, @@ -88,7 +86,5 @@ CREATE TABLE cstore_stripe_attr ( value_size bigint NOT NULL, skiplist_size bigint NOT NULL, PRIMARY KEY (relid, stripe, attr), - FOREIGN KEY (relid, stripe) REFERENCES cstore_stripes(relid, stripe) ON DELETE CASCADE INITIALLY DEFERRED + FOREIGN KEY (relid, stripe) REFERENCES cstore.cstore_stripes(relid, stripe) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); - -ALTER TABLE cstore_stripe_attr SET SCHEMA pg_catalog; diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 5c381a029..39e852c55 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -22,6 +22,7 @@ #include "catalog/pg_namespace.h" #include "catalog/pg_collation.h" #include "catalog/pg_type.h" +#include "catalog/namespace.h" #include "commands/defrem.h" #include "commands/trigger.h" #include "executor/executor.h" @@ -45,6 +46,7 @@ static Oid CStoreStripesRelationId(void); static Oid CStoreStripesIndexRelationId(void); static Oid CStoreTablesRelationId(void); static Oid CStoreTablesIndexRelationId(void); +static Oid CStoreNamespaceId(void); static void InsertStripeAttrRow(Oid relid, uint64 stripe, AttrNumber attr, uint64 existsSize, uint64 valuesSize, uint64 skiplistSize); @@ -494,7 +496,7 @@ ReadStripeFooter(Oid relid, uint64 stripe, int relationColumnCount) static Oid CStoreStripeAttrRelationId(void) { - return get_relname_relid("cstore_stripe_attr", PG_CATALOG_NAMESPACE); + return get_relname_relid("cstore_stripe_attr", CStoreNamespaceId()); } @@ -505,7 +507,7 @@ CStoreStripeAttrRelationId(void) static Oid CStoreStripeAttrIndexRelationId(void) { - return get_relname_relid("cstore_stripe_attr_pkey", PG_CATALOG_NAMESPACE); + return get_relname_relid("cstore_stripe_attr_pkey", CStoreNamespaceId()); } @@ -516,7 +518,7 @@ CStoreStripeAttrIndexRelationId(void) static Oid CStoreStripesRelationId(void) { - return get_relname_relid("cstore_stripes", PG_CATALOG_NAMESPACE); + return get_relname_relid("cstore_stripes", CStoreNamespaceId()); } @@ -527,7 +529,7 @@ CStoreStripesRelationId(void) static Oid CStoreStripesIndexRelationId(void) { - return get_relname_relid("cstore_stripes_pkey", PG_CATALOG_NAMESPACE); + return get_relname_relid("cstore_stripes_pkey", CStoreNamespaceId()); } @@ -538,7 +540,7 @@ CStoreStripesIndexRelationId(void) static Oid CStoreTablesRelationId(void) { - return get_relname_relid("cstore_tables", PG_CATALOG_NAMESPACE); + return get_relname_relid("cstore_tables", CStoreNamespaceId()); } @@ -549,5 +551,12 @@ CStoreTablesRelationId(void) static Oid CStoreTablesIndexRelationId(void) { - return get_relname_relid("cstore_tables_pkey", PG_CATALOG_NAMESPACE); + return get_relname_relid("cstore_tables_pkey", CStoreNamespaceId()); +} + + +static Oid +CStoreNamespaceId(void) +{ + return get_namespace_oid("cstore", false); } From 0d4e249c97ba7f65a6dcacb4fc8527624b877d4b Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Wed, 9 Sep 2020 14:17:30 -0700 Subject: [PATCH 11/91] Reuse the same state for multiple inserts --- cstore_metadata_tables.c | 304 ++++++++++++++++++++------------------- 1 file changed, 157 insertions(+), 147 deletions(-) diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 39e852c55..3843e4cd6 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -40,6 +40,12 @@ #include "cstore_metadata_serialization.h" +typedef struct +{ + Relation rel; + EState *estate; +} ModifyState; + static Oid CStoreStripeAttrRelationId(void); static Oid CStoreStripeAttrIndexRelationId(void); static Oid CStoreStripesRelationId(void); @@ -47,13 +53,13 @@ static Oid CStoreStripesIndexRelationId(void); static Oid CStoreTablesRelationId(void); static Oid CStoreTablesIndexRelationId(void); static Oid CStoreNamespaceId(void); -static void InsertStripeAttrRow(Oid relid, uint64 stripe, AttrNumber attr, - uint64 existsSize, uint64 valuesSize, - uint64 skiplistSize); static int TableBlockRowCount(Oid relid); static void DeleteTableMetadataRowIfExists(Oid relid); -static void InsertTupleAndEnforceConstraints(Relation rel, HeapTuple heapTuple); -static void DeleteTupleAndEnforceConstraints(Relation rel, HeapTuple heapTuple); +static ModifyState * StartModifyRelation(Relation rel); +static void InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values, + bool *nulls); +static void DeleteTupleAndEnforceConstraints(ModifyState *state, HeapTuple heapTuple); +static void FinishModifyRelation(ModifyState *state); static EState * create_estate_for_relation(Relation rel); /* constants for cstore_stripe_attr */ @@ -86,10 +92,9 @@ static EState * create_estate_for_relation(Relation rel); void InitCStoreTableMetadata(Oid relid, int blockRowCount) { - Oid cstoreTableOid = InvalidOid; - Relation cstoreTable = NULL; - TupleDesc tupleDescriptor = NULL; - HeapTuple tuple = NULL; + Oid cstoreTablesOid = InvalidOid; + Relation cstoreTables = NULL; + ModifyState *modifyState = NULL; bool nulls[Natts_cstore_tables] = { 0 }; Datum values[Natts_cstore_tables] = { @@ -101,17 +106,16 @@ InitCStoreTableMetadata(Oid relid, int blockRowCount) DeleteTableMetadataRowIfExists(relid); - cstoreTableOid = CStoreTablesRelationId(); - cstoreTable = heap_open(cstoreTableOid, RowExclusiveLock); - tupleDescriptor = RelationGetDescr(cstoreTable); + cstoreTablesOid = CStoreTablesRelationId(); + cstoreTables = heap_open(cstoreTablesOid, RowExclusiveLock); - tuple = heap_form_tuple(tupleDescriptor, values, nulls); - - InsertTupleAndEnforceConstraints(cstoreTable, tuple); + modifyState = StartModifyRelation(cstoreTables); + InsertTupleAndEnforceConstraints(modifyState, values, nulls); + FinishModifyRelation(modifyState); CommandCounterIncrement(); - heap_close(cstoreTable, NoLock); + heap_close(cstoreTables, NoLock); } @@ -132,11 +136,10 @@ InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe) Oid cstoreStripesOid = CStoreStripesRelationId(); Relation cstoreStripes = heap_open(cstoreStripesOid, RowExclusiveLock); - TupleDesc tupleDescriptor = RelationGetDescr(cstoreStripes); - HeapTuple tuple = heap_form_tuple(tupleDescriptor, values, nulls); - - InsertTupleAndEnforceConstraints(cstoreStripes, tuple); + ModifyState *modifyState = StartModifyRelation(cstoreStripes); + InsertTupleAndEnforceConstraints(modifyState, values, nulls); + FinishModifyRelation(modifyState); CommandCounterIncrement(); @@ -268,7 +271,9 @@ DeleteTableMetadataRowIfExists(Oid relid) heapTuple = systable_getnext(scanDescriptor); if (HeapTupleIsValid(heapTuple)) { - DeleteTupleAndEnforceConstraints(cstoreTables, heapTuple); + ModifyState *modifyState = StartModifyRelation(cstoreTables); + DeleteTupleAndEnforceConstraints(modifyState, heapTuple); + FinishModifyRelation(modifyState); } systable_endscan_ordered(scanDescriptor); @@ -277,144 +282,33 @@ DeleteTableMetadataRowIfExists(Oid relid) } -/* - * InsertTupleAndEnforceConstraints inserts a tuple into a relation and - * makes sure constraints (e.g. FK constraints, NOT NULL, ...) are enforced. - */ -static void -InsertTupleAndEnforceConstraints(Relation rel, HeapTuple heapTuple) -{ - EState *estate = NULL; - TupleTableSlot *slot = NULL; - - estate = create_estate_for_relation(rel); - slot = ExecInitExtraTupleSlot(estate, RelationGetDescr(rel), &TTSOpsHeapTuple); - ExecStoreHeapTuple(heapTuple, slot, false); - - ExecOpenIndices(estate->es_result_relation_info, false); - - /* ExecSimpleRelationInsert executes any constraints */ - ExecSimpleRelationInsert(estate, slot); - - ExecCloseIndices(estate->es_result_relation_info); - - AfterTriggerEndQuery(estate); - ExecCleanUpTriggerState(estate); - ExecResetTupleTable(estate->es_tupleTable, false); - FreeExecutorState(estate); -} - - - -/* - * DeleteTupleAndEnforceConstraints deletes a tuple from a relation and - * makes sure constraints (e.g. FK constraints) are enforced. - */ -static void -DeleteTupleAndEnforceConstraints(Relation rel, HeapTuple heapTuple) -{ - EState *estate = create_estate_for_relation(rel); - ResultRelInfo *resultRelInfo = estate->es_result_relation_info; - - ItemPointer tid = &(heapTuple->t_self); - simple_table_tuple_delete(rel, tid, estate->es_snapshot); - - /* execute AFTER ROW DELETE Triggers to enforce constraints */ - ExecARDeleteTriggers(estate, resultRelInfo, - tid, NULL, NULL); - - AfterTriggerEndQuery(estate); - ExecCleanUpTriggerState(estate); - ExecResetTupleTable(estate->es_tupleTable, false); - FreeExecutorState(estate); -} - - -/* - * Based on a similar function from - * postgres/src/backend/replication/logical/worker.c. - * - * Executor state preparation for evaluation of constraint expressions, - * indexes and triggers. - * - * This is based on similar code in copy.c - */ -static EState * -create_estate_for_relation(Relation rel) -{ - EState *estate; - ResultRelInfo *resultRelInfo; - RangeTblEntry *rte; - - estate = CreateExecutorState(); - - rte = makeNode(RangeTblEntry); - rte->rtekind = RTE_RELATION; - rte->relid = RelationGetRelid(rel); - rte->relkind = rel->rd_rel->relkind; - rte->rellockmode = AccessShareLock; - ExecInitRangeTable(estate, list_make1(rte)); - - resultRelInfo = makeNode(ResultRelInfo); - InitResultRelInfo(resultRelInfo, rel, 1, NULL, 0); - - estate->es_result_relations = resultRelInfo; - estate->es_num_result_relations = 1; - estate->es_result_relation_info = resultRelInfo; - - estate->es_output_cid = GetCurrentCommandId(true); - - /* Prepare to catch AFTER triggers. */ - AfterTriggerBeginQuery(); - - return estate; -} - - /* * SaveStripeFooter stores give StripeFooter as cstore_stripe_attr records. */ void SaveStripeFooter(Oid relid, uint64 stripe, StripeFooter *footer) { - for (AttrNumber attr = 1; attr <= footer->columnCount; attr++) - { - InsertStripeAttrRow(relid, stripe, attr, - footer->existsSizeArray[attr - 1], - footer->valueSizeArray[attr - 1], - footer->skipListSizeArray[attr - 1]); - } -} - - -/* - * InsertStripeAttrRow adds a row to cstore_stripe_attr. - */ -static void -InsertStripeAttrRow(Oid relid, uint64 stripe, AttrNumber attr, - uint64 existsSize, uint64 valuesSize, - uint64 skiplistSize) -{ - bool nulls[Natts_cstore_stripe_attr] = { 0 }; - Datum values[Natts_cstore_stripe_attr] = { - ObjectIdGetDatum(relid), - Int64GetDatum(stripe), - Int16GetDatum(attr), - Int64GetDatum(existsSize), - Int64GetDatum(valuesSize), - Int64GetDatum(skiplistSize) - }; - Oid cstoreStripeAttrOid = CStoreStripeAttrRelationId(); Relation cstoreStripeAttrs = heap_open(cstoreStripeAttrOid, RowExclusiveLock); - TupleDesc tupleDescriptor = RelationGetDescr(cstoreStripeAttrs); - HeapTuple tuple = heap_form_tuple(tupleDescriptor, values, nulls); + ModifyState *modifyState = StartModifyRelation(cstoreStripeAttrs); - InsertTupleAndEnforceConstraints(cstoreStripeAttrs, tuple); + for (AttrNumber attr = 1; attr <= footer->columnCount; attr++) + { + bool nulls[Natts_cstore_stripe_attr] = { 0 }; + Datum values[Natts_cstore_stripe_attr] = { + ObjectIdGetDatum(relid), + Int64GetDatum(stripe), + Int16GetDatum(attr), + Int64GetDatum(footer->existsSizeArray[attr - 1]), + Int64GetDatum(footer->valueSizeArray[attr - 1]), + Int64GetDatum(footer->skipListSizeArray[attr - 1]) + }; - CommandCounterIncrement(); + InsertTupleAndEnforceConstraints(modifyState, values, nulls); + } + FinishModifyRelation(modifyState); heap_close(cstoreStripeAttrs, NoLock); } @@ -489,6 +383,118 @@ ReadStripeFooter(Oid relid, uint64 stripe, int relationColumnCount) } +/* + * StartModifyRelation allocates resources for modifications. + */ +static ModifyState * +StartModifyRelation(Relation rel) +{ + ModifyState *modifyState = NULL; + EState *estate = create_estate_for_relation(rel); + + /* ExecSimpleRelationInsert, ... require caller to open indexes */ + ExecOpenIndices(estate->es_result_relation_info, false); + + modifyState = palloc(sizeof(ModifyState)); + modifyState->rel = rel; + modifyState->estate = estate; + + return modifyState; +} + + +/* + * InsertTupleAndEnforceConstraints inserts a tuple into a relation and makes + * sure constraints are enforced and indexes are updated. + */ +static void +InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values, bool *nulls) +{ + TupleDesc tupleDescriptor = RelationGetDescr(state->rel); + HeapTuple tuple = heap_form_tuple(tupleDescriptor, values, nulls); + TupleTableSlot *slot = ExecInitExtraTupleSlot(state->estate, tupleDescriptor, + &TTSOpsHeapTuple); + ExecStoreHeapTuple(tuple, slot, false); + + /* use ExecSimpleRelationInsert to enforce constraints */ + ExecSimpleRelationInsert(state->estate, slot); +} + + +/* + * DeleteTupleAndEnforceConstraints deletes a tuple from a relation and + * makes sure constraints (e.g. FK constraints) are enforced. + */ +static void +DeleteTupleAndEnforceConstraints(ModifyState *state, HeapTuple heapTuple) +{ + EState *estate = state->estate; + ResultRelInfo *resultRelInfo = estate->es_result_relation_info; + + ItemPointer tid = &(heapTuple->t_self); + simple_table_tuple_delete(state->rel, tid, estate->es_snapshot); + + /* execute AFTER ROW DELETE Triggers to enforce constraints */ + ExecARDeleteTriggers(estate, resultRelInfo, tid, NULL, NULL); +} + + +/* + * FinishModifyRelation cleans up resources after modifications are done. + */ +static void +FinishModifyRelation(ModifyState *state) +{ + ExecCloseIndices(state->estate->es_result_relation_info); + + AfterTriggerEndQuery(state->estate); + ExecCleanUpTriggerState(state->estate); + ExecResetTupleTable(state->estate->es_tupleTable, false); + FreeExecutorState(state->estate); +} + + +/* + * Based on a similar function from + * postgres/src/backend/replication/logical/worker.c. + * + * Executor state preparation for evaluation of constraint expressions, + * indexes and triggers. + * + * This is based on similar code in copy.c + */ +static EState * +create_estate_for_relation(Relation rel) +{ + EState *estate; + ResultRelInfo *resultRelInfo; + RangeTblEntry *rte; + + estate = CreateExecutorState(); + + rte = makeNode(RangeTblEntry); + rte->rtekind = RTE_RELATION; + rte->relid = RelationGetRelid(rel); + rte->relkind = rel->rd_rel->relkind; + rte->rellockmode = AccessShareLock; + ExecInitRangeTable(estate, list_make1(rte)); + + resultRelInfo = makeNode(ResultRelInfo); + InitResultRelInfo(resultRelInfo, rel, 1, NULL, 0); + + estate->es_result_relations = resultRelInfo; + estate->es_num_result_relations = 1; + estate->es_result_relation_info = resultRelInfo; + + estate->es_output_cid = GetCurrentCommandId(true); + + /* Prepare to catch AFTER triggers. */ + AfterTriggerBeginQuery(); + + return estate; +} + + /* * CStoreStripeAttrRelationId returns relation id of cstore_stripe_attr. * TODO: should we cache this similar to citus? @@ -555,6 +561,10 @@ CStoreTablesIndexRelationId(void) } +/* + * CStoreNamespaceId returns namespace id of the schema we store cstore + * related tables. + */ static Oid CStoreNamespaceId(void) { From e9045227cd2c31acf568af623de4bb27fd39eb73 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 9 Sep 2020 12:44:41 -0700 Subject: [PATCH 12/91] create relfilenode for FDW --- cstore_fdw.c | 161 ++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 146 insertions(+), 15 deletions(-) diff --git a/cstore_fdw.c b/cstore_fdw.c index 8ce3a7296..073a68130 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -21,9 +21,13 @@ #include "access/heapam.h" #include "access/reloptions.h" #include "access/tuptoaster.h" +#include "access/xact.h" +#include "catalog/catalog.h" +#include "catalog/indexing.h" #include "catalog/namespace.h" #include "catalog/pg_foreign_table.h" #include "catalog/pg_namespace.h" +#include "catalog/storage.h" #include "commands/copy.h" #include "commands/dbcommands.h" #include "commands/defrem.h" @@ -50,18 +54,20 @@ #include "parser/parser.h" #include "parser/parse_coerce.h" #include "parser/parse_type.h" +#include "storage/smgr.h" #include "tcop/utility.h" #include "utils/builtins.h" #include "utils/fmgroids.h" #include "utils/lsyscache.h" +#if PG_VERSION_NUM < 120000 +#include "utils/rel.h" +#endif #if PG_VERSION_NUM >= 120000 #include "utils/snapmgr.h" #else #include "utils/tqual.h" #endif -#if PG_VERSION_NUM < 120000 -#include "utils/rel.h" -#endif +#include "utils/syscache.h" #include "cstore.h" #include "cstore_fdw.h" @@ -124,6 +130,7 @@ static void CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement); static List * DroppedCStoreFilenameList(DropStmt *dropStatement); static List * FindCStoreTables(List *tableList); static List * OpenRelationsForTruncate(List *cstoreTableList); +static void InitializeRelFileNode(Relation relation); static void TruncateCStoreTables(List *cstoreRelationList); static bool CStoreTable(Oid relationId); static bool CStoreServer(ForeignServer *server); @@ -183,6 +190,9 @@ static void CStoreEndForeignInsert(EState *executorState, ResultRelInfo *relatio static bool CStoreIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); #endif +static void cstore_fdw_initrel(Relation rel); +static Relation cstore_fdw_open(Oid relationId, LOCKMODE lockmode); +static Relation cstore_fdw_openrv(RangeVar *relation, LOCKMODE lockmode); PG_FUNCTION_INFO_V1(cstore_ddl_event_end_trigger); PG_FUNCTION_INFO_V1(cstore_table_size); @@ -261,7 +271,7 @@ cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS) { Oid relationId = RangeVarGetRelid(createStatement->base.relation, AccessShareLock, false); - Relation relation = heap_open(relationId, AccessExclusiveLock); + Relation relation = cstore_fdw_open(relationId, AccessExclusiveLock); /* * Make sure database directory exists before creating a table. @@ -368,7 +378,7 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, foreach(fileListCell, droppedTables) { char *fileName = lfirst(fileListCell); - + //TODO: relation storage is not dropped DeleteCStoreTableFiles(fileName); } } @@ -562,7 +572,7 @@ CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) * Open and lock the relation. We acquire ShareUpdateExclusiveLock to allow * concurrent reads, but block concurrent writes. */ - relation = heap_openrv(copyStatement->relation, ShareUpdateExclusiveLock); + relation = cstore_fdw_openrv(copyStatement->relation, ShareUpdateExclusiveLock); relationId = RelationGetRelid(relation); /* allocate column values and nulls arrays */ @@ -850,7 +860,7 @@ OpenRelationsForTruncate(List *cstoreTableList) foreach(relationCell, cstoreTableList) { RangeVar *rangeVar = (RangeVar *) lfirst(relationCell); - Relation relation = heap_openrv(rangeVar, AccessExclusiveLock); + Relation relation = cstore_fdw_openrv(rangeVar, AccessExclusiveLock); Oid relationId = relation->rd_id; AclResult aclresult = pg_class_aclcheck(relationId, GetUserId(), ACL_TRUNCATE); @@ -889,11 +899,76 @@ TruncateCStoreTables(List *cstoreRelationList) Assert(CStoreTable(relationId)); cstoreOptions = CStoreGetOptions(relationId); + if (OidIsValid(relation->rd_rel->relfilenode)) + { + RelationOpenSmgr(relation); + RelationDropStorage(relation); + } DeleteCStoreTableFiles(cstoreOptions->filename); InitializeCStoreTableFile(relationId, relation, CStoreGetOptions(relationId)); } } +/* + * Version 11 and earlier already create a relfilenode for foreign + * tables. Version 12 and later do not, so we need to create one manually. + */ +static void +InitializeRelFileNode(Relation relation) +{ +#if PG_VERSION_NUM >= 120000 + Relation pg_class; + HeapTuple tuple; + Form_pg_class classform; + + /* + * Get a writable copy of the pg_class tuple for the given relation. + */ + pg_class = heap_open(RelationRelationId, RowExclusiveLock); + + tuple = SearchSysCacheCopy1(RELOID, + ObjectIdGetDatum(RelationGetRelid(relation))); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "could not find tuple for relation %u", + RelationGetRelid(relation)); + classform = (Form_pg_class) GETSTRUCT(tuple); + + if (!OidIsValid(classform->relfilenode)) + { + Oid tablespace; + Oid filenode = relation->rd_id; + char persistence = relation->rd_rel->relpersistence; + RelFileNode newrnode; + SMgrRelation srel; + + if (OidIsValid(relation->rd_rel->reltablespace)) + tablespace = relation->rd_rel->reltablespace; + else + tablespace = MyDatabaseTableSpace; + + newrnode.spcNode = tablespace; + newrnode.dbNode = MyDatabaseId; + newrnode.relNode = filenode; + + srel = RelationCreateStorage(newrnode, persistence); + smgrclose(srel); + + classform->relfilenode = filenode; + classform->relpages = 0; /* it's empty until further notice */ + classform->reltuples = 0; + classform->relallvisible = 0; + classform->relfrozenxid = InvalidTransactionId; + classform->relminmxid = InvalidTransactionId; + + CatalogTupleUpdate(pg_class, &tuple->t_self, tuple); + CommandCounterIncrement(); + } + + heap_freetuple(tuple); + heap_close(pg_class, RowExclusiveLock); +#endif +} + /* * CStoreTable checks if the given table name belongs to a foreign columnar store @@ -1179,6 +1254,7 @@ cstore_clean_table_resources(PG_FUNCTION_ARGS) struct stat fileStat; int statResult = -1; + //TODO: relation storage is not dropped appendStringInfo(filePath, "%s/%s/%d/%d", DataDir, CSTORE_FDW_NAME, (int) MyDatabaseId, (int) relationId); @@ -1402,7 +1478,7 @@ static char * CStoreDefaultFilePath(Oid foreignTableId) { StringInfo cstoreFilePath = NULL; - Relation relation = relation_open(foreignTableId, AccessShareLock); + Relation relation = cstore_fdw_open(foreignTableId, AccessShareLock); RelFileNode relationFileNode = relation->rd_node; Oid databaseOid = relationFileNode.dbNode; Oid relationFileOid = relationFileNode.relNode; @@ -1453,7 +1529,7 @@ CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId { Path *foreignScanPath = NULL; CStoreOptions *cstoreOptions = CStoreGetOptions(foreignTableId); - Relation relation = heap_open(foreignTableId, AccessShareLock); + Relation relation = cstore_fdw_open(foreignTableId, AccessShareLock); /* * We skip reading columns that are not in query. Here we assume that all @@ -1659,7 +1735,7 @@ ColumnList(RelOptInfo *baserel, Oid foreignTableId) List *restrictInfoList = baserel->baserestrictinfo; ListCell *restrictInfoCell = NULL; const AttrNumber wholeRow = 0; - Relation relation = heap_open(foreignTableId, AccessShareLock); + Relation relation = cstore_fdw_open(foreignTableId, AccessShareLock); TupleDesc tupleDescriptor = RelationGetDescr(relation); /* first add the columns used in joins and projections */ @@ -1750,10 +1826,13 @@ ColumnList(RelOptInfo *baserel, Oid foreignTableId) static void CStoreExplainForeignScan(ForeignScanState *scanState, ExplainState *explainState) { - Oid foreignTableId = RelationGetRelid(scanState->ss.ss_currentRelation); - CStoreOptions *cstoreOptions = CStoreGetOptions(foreignTableId); + Relation relation = scanState->ss.ss_currentRelation; + CStoreOptions *cstoreOptions; + Oid foreignTableId; - ExplainPropertyText("CStore File", cstoreOptions->filename, explainState); + cstore_fdw_initrel(relation); + foreignTableId = RelationGetRelid(relation); + cstoreOptions = CStoreGetOptions(foreignTableId); /* supress file size if we're not showing cost details */ if (explainState->costs) @@ -1784,6 +1863,8 @@ CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags) List *foreignPrivateList = NIL; List *whereClauseList = NIL; + cstore_fdw_initrel(currentRelation); + /* if Explain with no Analyze, do nothing */ if (executorFlags & EXEC_FLAG_EXPLAIN_ONLY) { @@ -1869,9 +1950,12 @@ CStoreAnalyzeForeignTable(Relation relation, BlockNumber *totalPageCount) { Oid foreignTableId = RelationGetRelid(relation); - CStoreOptions *cstoreOptions = CStoreGetOptions(foreignTableId); + CStoreOptions *cstoreOptions; struct stat statBuffer; + cstore_fdw_initrel(relation); + cstoreOptions = CStoreGetOptions(foreignTableId); + int statResult = stat(cstoreOptions->filename, &statBuffer); if (statResult < 0) { @@ -1924,6 +2008,7 @@ CStoreAcquireSampleRows(Relation relation, int logLevel, TupleDesc tupleDescriptor = RelationGetDescr(relation); uint32 columnCount = tupleDescriptor->natts; + cstore_fdw_initrel(relation); /* create list of columns of the relation */ uint32 columnIndex = 0; @@ -2147,7 +2232,7 @@ CStoreBeginForeignInsert(ModifyTableState *modifyTableState, ResultRelInfo *rela Relation relation = NULL; foreignTableOid = RelationGetRelid(relationInfo->ri_RelationDesc); - relation = heap_open(foreignTableOid, ShareUpdateExclusiveLock); + relation = cstore_fdw_open(foreignTableOid, ShareUpdateExclusiveLock); cstoreOptions = CStoreGetOptions(foreignTableOid); tupleDescriptor = RelationGetDescr(relationInfo->ri_RelationDesc); @@ -2246,3 +2331,49 @@ CStoreIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel, #endif + +/* + * Versions 12 and later do not initialize rd_node even if the relation has a + * valid relfilenode, so we need to initialize it each time a cstore FDW + * relation is opened. + */ +static void +cstore_fdw_initrel(Relation rel) +{ +#if PG_VERSION_NUM >= 120000 + if (rel->rd_rel->relfilenode == InvalidOid) + InitializeRelFileNode(rel); + + /* + * Copied code from RelationInitPhysicalAddr(), which doesn't + * work on foreign tables. + */ + if (OidIsValid(rel->rd_rel->reltablespace)) + rel->rd_node.spcNode = rel->rd_rel->reltablespace; + else + rel->rd_node.spcNode = MyDatabaseTableSpace; + + rel->rd_node.dbNode = MyDatabaseId; + rel->rd_node.relNode = rel->rd_rel->relfilenode; +#endif +} + +static Relation +cstore_fdw_open(Oid relationId, LOCKMODE lockmode) +{ + Relation rel = heap_open(relationId, lockmode); + + cstore_fdw_initrel(rel); + + return rel; +} + +static Relation +cstore_fdw_openrv(RangeVar *relation, LOCKMODE lockmode) +{ + Relation rel = heap_openrv(relation, lockmode); + + cstore_fdw_initrel(rel); + + return rel; +} From b18c9c8060365ffb2487934d1a2c693a95152fd8 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 11 Sep 2020 14:21:56 -0700 Subject: [PATCH 13/91] drop storage for DROP command --- cstore_fdw.c | 75 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 27 deletions(-) diff --git a/cstore_fdw.c b/cstore_fdw.c index 073a68130..5ad465807 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -127,7 +127,7 @@ static uint64 CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString); static uint64 CopyOutCStoreTable(CopyStmt *copyStatement, const char *queryString); static void CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement); -static List * DroppedCStoreFilenameList(DropStmt *dropStatement); +static List * DroppedCStoreRelidList(DropStmt *dropStatement); static List * FindCStoreTables(List *tableList); static List * OpenRelationsForTruncate(List *cstoreTableList); static void InitializeRelFileNode(Relation relation); @@ -369,17 +369,43 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, } else { - ListCell *fileListCell = NULL; - List *droppedTables = DroppedCStoreFilenameList((DropStmt *) parseTree); + List *dropRelids = DroppedCStoreRelidList((DropStmt *) parseTree); + List *dropFiles = NIL; + ListCell *lc = NULL; + + /* drop smgr storage */ + foreach(lc, dropRelids) + { + Oid relid = lfirst_oid(lc); + Relation relation = cstore_fdw_open(relid, AccessExclusiveLock); + CStoreOptions *cstoreOptions = CStoreGetOptions(relid); + char *defaultfilename = CStoreDefaultFilePath(relid); + + RelationOpenSmgr(relation); + RelationDropStorage(relation); + heap_close(relation, AccessExclusiveLock); + + /* + * Skip files that are placed in default location, they are handled + * by sql drop trigger. Both paths are generated by code, use + * of strcmp is safe here. + */ + if (strcmp(defaultfilename, cstoreOptions->filename) == 0) + { + continue; + } + + dropFiles = lappend(dropFiles, cstoreOptions->filename); + } CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, destReceiver, completionTag); - foreach(fileListCell, droppedTables) + /* drop files */ + foreach(lc, dropFiles) { - char *fileName = lfirst(fileListCell); - //TODO: relation storage is not dropped - DeleteCStoreTableFiles(fileName); + char *filename = lfirst(lc); + DeleteCStoreTableFiles(filename); } } } @@ -783,13 +809,13 @@ CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement) /* - * DropppedCStoreFilenameList extracts and returns the list of cstore file names + * DropppedCStoreRelidList extracts and returns the list of cstore relids * from DROP table statement */ static List * -DroppedCStoreFilenameList(DropStmt *dropStatement) +DroppedCStoreRelidList(DropStmt *dropStatement) { - List *droppedCStoreFileList = NIL; + List *droppedCStoreRelidList = NIL; if (dropStatement->removeType == OBJECT_FOREIGN_TABLE) { @@ -802,26 +828,13 @@ DroppedCStoreFilenameList(DropStmt *dropStatement) Oid relationId = RangeVarGetRelid(rangeVar, AccessShareLock, true); if (CStoreTable(relationId)) { - CStoreOptions *cstoreOptions = CStoreGetOptions(relationId); - char *defaultfilename = CStoreDefaultFilePath(relationId); - - /* - * Skip files that are placed in default location, they are handled - * by sql drop trigger. Both paths are generated by code, use - * of strcmp is safe here. - */ - if (strcmp(defaultfilename, cstoreOptions->filename) == 0) - { - continue; - } - - droppedCStoreFileList = lappend(droppedCStoreFileList, - cstoreOptions->filename); + droppedCStoreRelidList = lappend_oid(droppedCStoreRelidList, + relationId); } } } - return droppedCStoreFileList; + return droppedCStoreRelidList; } @@ -1254,7 +1267,15 @@ cstore_clean_table_resources(PG_FUNCTION_ARGS) struct stat fileStat; int statResult = -1; - //TODO: relation storage is not dropped + /* + * TODO: Event triggers do not offer the relfilenode of the + * dropped table, and by the time the sql_drop event trigger + * is called, the object is already gone so we can't look it + * up. Therefore, we can't drop the Smgr storage here, which + * means that cascaded drops of cstore foreign tables will + * leak storage. + */ + appendStringInfo(filePath, "%s/%s/%d/%d", DataDir, CSTORE_FDW_NAME, (int) MyDatabaseId, (int) relationId); From a2f7eadeb9fba32b46158eec6f6837b1bf1b2ac3 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 11 Sep 2020 16:02:00 -0700 Subject: [PATCH 14/91] lock while initializing relfilenode --- cstore_fdw.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cstore_fdw.c b/cstore_fdw.c index 5ad465807..07b47d590 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -948,12 +948,21 @@ InitializeRelFileNode(Relation relation) if (!OidIsValid(classform->relfilenode)) { + Relation tmprel; Oid tablespace; Oid filenode = relation->rd_id; char persistence = relation->rd_rel->relpersistence; RelFileNode newrnode; SMgrRelation srel; + /* + * Upgrade to AccessExclusiveLock, and hold until the end of the + * transaction. This shouldn't happen during a read, but it's hard to + * prove that because it happens lazily. + */ + tmprel = heap_open(relation->rd_id, AccessExclusiveLock); + heap_close(tmprel, NoLock); + if (OidIsValid(relation->rd_rel->reltablespace)) tablespace = relation->rd_rel->reltablespace; else From dee408248cd0ec5830df0ccbd456127a96dc65be Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 9 Sep 2020 12:44:41 -0700 Subject: [PATCH 15/91] Replace file access with Smgr --- cstore.c | 132 +---------------- cstore.h | 32 ++-- cstore_fdw.c | 268 +++++++--------------------------- cstore_reader.c | 115 ++++++--------- cstore_writer.c | 166 ++++++++------------- expected/drop.out | 57 -------- expected/truncate.out | 31 ---- input/block_filtering.source | 6 +- input/copyto.source | 3 +- input/create.source | 16 +- input/data_types.source | 18 +-- output/block_filtering.source | 6 +- output/copyto.source | 3 +- output/create.source | 18 +-- output/data_types.source | 18 +-- sql/drop.sql | 28 ---- sql/truncate.sql | 19 --- 17 files changed, 209 insertions(+), 727 deletions(-) diff --git a/cstore.c b/cstore.c index 658c15745..f04fc4fc6 100644 --- a/cstore.c +++ b/cstore.c @@ -21,9 +21,6 @@ #include "cstore.h" -static void CreateDirectory(StringInfo directoryName); -static bool DirectoryExists(StringInfo directoryName); - /* ParseCompressionType converts a string to a compression type. */ CompressionType ParseCompressionType(const char *compressionTypeString) @@ -44,80 +41,6 @@ ParseCompressionType(const char *compressionTypeString) } -/* CreateDirectory creates a new directory with the given directory name. */ -static void -CreateDirectory(StringInfo directoryName) -{ - int makeOK = mkdir(directoryName->data, S_IRWXU); - if (makeOK != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not create directory \"%s\": %m", - directoryName->data))); - } -} - - -/* DirectoryExists checks if a directory exists for the given directory name. */ -static bool -DirectoryExists(StringInfo directoryName) -{ - bool directoryExists = true; - struct stat directoryStat; - - int statOK = stat(directoryName->data, &directoryStat); - if (statOK == 0) - { - /* file already exists; check that it is a directory */ - if (!S_ISDIR(directoryStat.st_mode)) - { - ereport(ERROR, (errmsg("\"%s\" is not a directory", directoryName->data), - errhint("You need to remove or rename the file \"%s\".", - directoryName->data))); - } - } - else - { - if (errno == ENOENT) - { - directoryExists = false; - } - else - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not stat directory \"%s\": %m", - directoryName->data))); - } - } - - return directoryExists; -} - - -/* - * RemoveCStoreDatabaseDirectory removes CStore directory previously - * created for this database. - * However it does not remove 'cstore_fdw' directory even if there - * are no other databases left. - */ -void -RemoveCStoreDatabaseDirectory(Oid databaseOid) -{ - StringInfo cstoreDirectoryPath = makeStringInfo(); - StringInfo cstoreDatabaseDirectoryPath = makeStringInfo(); - - appendStringInfo(cstoreDirectoryPath, "%s/%s", DataDir, CSTORE_FDW_NAME); - - appendStringInfo(cstoreDatabaseDirectoryPath, "%s/%s/%u", DataDir, - CSTORE_FDW_NAME, databaseOid); - - if (DirectoryExists(cstoreDatabaseDirectoryPath)) - { - rmtree(cstoreDatabaseDirectoryPath->data, true); - } -} - - /* * InitializeCStoreTableFile creates data and footer file for a cstore table. * The function assumes data and footer files do not exist, therefore @@ -136,62 +59,9 @@ InitializeCStoreTableFile(Oid relationId, Relation relation, CStoreOptions *csto * Initialize state to write to the cstore file. This creates an * empty data file and a valid footer file for the table. */ - writeState = CStoreBeginWrite(relationId, cstoreOptions->filename, + writeState = CStoreBeginWrite(relationId, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, tupleDescriptor); CStoreEndWrite(writeState); } - - -/* - * CreateCStoreDatabaseDirectory creates the directory (and parent directories, - * if needed) used to store automatically managed cstore_fdw files. The path to - * the directory is $PGDATA/cstore_fdw/{databaseOid}. - */ -void -CreateCStoreDatabaseDirectory(Oid databaseOid) -{ - bool cstoreDirectoryExists = false; - bool databaseDirectoryExists = false; - StringInfo cstoreDatabaseDirectoryPath = NULL; - - StringInfo cstoreDirectoryPath = makeStringInfo(); - appendStringInfo(cstoreDirectoryPath, "%s/%s", DataDir, CSTORE_FDW_NAME); - - cstoreDirectoryExists = DirectoryExists(cstoreDirectoryPath); - if (!cstoreDirectoryExists) - { - CreateDirectory(cstoreDirectoryPath); - } - - cstoreDatabaseDirectoryPath = makeStringInfo(); - appendStringInfo(cstoreDatabaseDirectoryPath, "%s/%s/%u", DataDir, - CSTORE_FDW_NAME, databaseOid); - - databaseDirectoryExists = DirectoryExists(cstoreDatabaseDirectoryPath); - if (!databaseDirectoryExists) - { - CreateDirectory(cstoreDatabaseDirectoryPath); - } -} - - -/* - * DeleteCStoreTableFiles deletes the data and footer files for a cstore table - * whose data filename is given. - */ -void -DeleteCStoreTableFiles(char *filename) -{ - int dataFileRemoved = 0; - - /* delete the data file */ - dataFileRemoved = unlink(filename); - if (dataFileRemoved != 0) - { - ereport(WARNING, (errcode_for_file_access(), - errmsg("could not delete file \"%s\": %m", - filename))); - } -} diff --git a/cstore.h b/cstore.h index 20cac7e05..9679fea9c 100644 --- a/cstore.h +++ b/cstore.h @@ -16,10 +16,10 @@ #include "fmgr.h" #include "lib/stringinfo.h" +#include "storage/bufpage.h" #include "utils/relcache.h" /* Defines for valid option names */ -#define OPTION_NAME_FILENAME "filename" #define OPTION_NAME_COMPRESSION_TYPE "compression" #define OPTION_NAME_STRIPE_ROW_COUNT "stripe_row_count" #define OPTION_NAME_BLOCK_ROW_COUNT "block_row_count" @@ -68,7 +68,6 @@ typedef enum */ typedef struct CStoreOptions { - char *filename; CompressionType compressionType; uint64 stripeRowCount; uint32 blockRowCount; @@ -203,10 +202,9 @@ typedef struct TableReadState { Oid relationId; - FILE *tableFile; TableMetadata *tableMetadata; TupleDesc tupleDescriptor; - + Relation relation; /* * List of Var pointers for columns in the query. We use this both for * getting vector of projected columns, and also when we want to build @@ -228,7 +226,6 @@ typedef struct TableReadState typedef struct TableWriteState { Oid relationId; - FILE *tableFile; TableMetadata *tableMetadata; CompressionType compressionType; TupleDesc tupleDescriptor; @@ -257,11 +254,9 @@ extern void InitializeCStoreTableFile(Oid relationId, Relation relation, CStoreOptions *cstoreOptions); extern void CreateCStoreDatabaseDirectory(Oid databaseOid); extern void RemoveCStoreDatabaseDirectory(Oid databaseOid); -extern void DeleteCStoreTableFiles(char *filename); /* Function declarations for writing to a cstore file */ extern TableWriteState * CStoreBeginWrite(Oid relationId, - const char *filename, CompressionType compressionType, uint64 stripeMaxRowCount, uint32 blockRowCount, @@ -271,7 +266,7 @@ extern void CStoreWriteRow(TableWriteState *state, Datum *columnValues, extern void CStoreEndWrite(TableWriteState *state); /* Function declarations for reading from a cstore file */ -extern TableReadState * CStoreBeginRead(Oid relationId, const char *filename, +extern TableReadState * CStoreBeginRead(Oid relationId, TupleDesc tupleDescriptor, List *projectedColumnList, List *qualConditions); extern bool CStoreReadFinished(TableReadState *state); @@ -286,7 +281,7 @@ extern ColumnBlockData ** CreateEmptyBlockDataArray(uint32 columnCount, bool *co uint32 blockRowCount); extern void FreeColumnBlockDataArray(ColumnBlockData **blockDataArray, uint32 columnCount); -extern uint64 CStoreTableRowCount(Oid relid, const char *filename); +extern uint64 CStoreTableRowCount(Relation relation); extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, CompressionType compressionType); extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressionType); @@ -294,8 +289,27 @@ extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressio /* cstore_metadata_tables.c */ extern void SaveStripeFooter(Oid relid, uint64 stripe, StripeFooter *footer); extern StripeFooter * ReadStripeFooter(Oid relid, uint64 stripe, int relationColumnCount); + extern void InitCStoreTableMetadata(Oid relid, int blockRowCount); extern void InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe); extern TableMetadata * ReadTableMetadata(Oid relid); +typedef struct SmgrAddr +{ + BlockNumber blockno; + uint32 offset; +} SmgrAddr; + +static inline SmgrAddr +logical_to_smgr(uint64 logicalOffset) +{ + uint64 bytes_per_page = BLCKSZ - SizeOfPageHeaderData; + SmgrAddr addr; + + addr.blockno = logicalOffset / bytes_per_page; + addr.offset = logicalOffset % bytes_per_page; + + return addr; +} + #endif /* CSTORE_H */ diff --git a/cstore_fdw.c b/cstore_fdw.c index 07b47d590..cd8dcf4ef 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -100,7 +100,6 @@ static const uint32 ValidOptionCount = 4; static const CStoreValidOption ValidOptionArray[] = { /* foreign table options */ - { OPTION_NAME_FILENAME, ForeignTableRelationId }, { OPTION_NAME_COMPRESSION_TYPE, ForeignTableRelationId }, { OPTION_NAME_STRIPE_ROW_COUNT, ForeignTableRelationId }, { OPTION_NAME_BLOCK_ROW_COUNT, ForeignTableRelationId } @@ -130,7 +129,7 @@ static void CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement); static List * DroppedCStoreRelidList(DropStmt *dropStatement); static List * FindCStoreTables(List *tableList); static List * OpenRelationsForTruncate(List *cstoreTableList); -static void InitializeRelFileNode(Relation relation); +static void InitializeRelFileNode(Relation relation, bool force); static void TruncateCStoreTables(List *cstoreRelationList); static bool CStoreTable(Oid relationId); static bool CStoreServer(ForeignServer *server); @@ -140,10 +139,9 @@ static StringInfo OptionNamesString(Oid currentContextId); static HeapTuple GetSlotHeapTuple(TupleTableSlot *tts); static CStoreOptions * CStoreGetOptions(Oid foreignTableId); static char * CStoreGetOptionValue(Oid foreignTableId, const char *optionName); -static void ValidateForeignTableOptions(char *filename, char *compressionTypeString, +static void ValidateForeignTableOptions(char *compressionTypeString, char *stripeRowCountString, char *blockRowCountString); -static char * CStoreDefaultFilePath(Oid foreignTableId); static void CStoreGetForeignRelSize(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId); static void CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, @@ -158,8 +156,8 @@ static ForeignScan * CStoreGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel Oid foreignTableId, ForeignPath *bestPath, List *targetList, List *scanClauses); #endif -static double TupleCountEstimate(Oid relid, RelOptInfo *baserel, const char *filename); -static BlockNumber PageCount(const char *filename); +static double TupleCountEstimate(Relation relation, RelOptInfo *baserel); +static BlockNumber PageCount(Relation relation); static List * ColumnList(RelOptInfo *baserel, Oid foreignTableId); static void CStoreExplainForeignScan(ForeignScanState *scanState, ExplainState *explainState); @@ -250,17 +248,7 @@ cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS) triggerData = (EventTriggerData *) fcinfo->context; parseTree = triggerData->parsetree; - if (nodeTag(parseTree) == T_CreateForeignServerStmt) - { - CreateForeignServerStmt *serverStatement = (CreateForeignServerStmt *) parseTree; - - char *foreignWrapperName = serverStatement->fdwname; - if (strncmp(foreignWrapperName, CSTORE_FDW_NAME, NAMEDATALEN) == 0) - { - CreateCStoreDatabaseDirectory(MyDatabaseId); - } - } - else if (nodeTag(parseTree) == T_CreateForeignTableStmt) + if (nodeTag(parseTree) == T_CreateForeignTableStmt) { CreateForeignTableStmt *createStatement = (CreateForeignTableStmt *) parseTree; char *serverName = createStatement->servername; @@ -280,8 +268,6 @@ cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS) * We have no chance to hook into server creation to create data * directory for it during database creation time. */ - CreateCStoreDatabaseDirectory(MyDatabaseId); - InitializeCStoreTableFile(relationId, relation, CStoreGetOptions(relationId)); heap_close(relation, AccessExclusiveLock); } @@ -361,16 +347,10 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, destReceiver, completionTag); - - if (removeCStoreDirectory) - { - RemoveCStoreDatabaseDirectory(MyDatabaseId); - } } else { List *dropRelids = DroppedCStoreRelidList((DropStmt *) parseTree); - List *dropFiles = NIL; ListCell *lc = NULL; /* drop smgr storage */ @@ -378,35 +358,14 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, { Oid relid = lfirst_oid(lc); Relation relation = cstore_fdw_open(relid, AccessExclusiveLock); - CStoreOptions *cstoreOptions = CStoreGetOptions(relid); - char *defaultfilename = CStoreDefaultFilePath(relid); RelationOpenSmgr(relation); RelationDropStorage(relation); heap_close(relation, AccessExclusiveLock); - - /* - * Skip files that are placed in default location, they are handled - * by sql drop trigger. Both paths are generated by code, use - * of strcmp is safe here. - */ - if (strcmp(defaultfilename, cstoreOptions->filename) == 0) - { - continue; - } - - dropFiles = lappend(dropFiles, cstoreOptions->filename); } CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, destReceiver, completionTag); - - /* drop files */ - foreach(lc, dropFiles) - { - char *filename = lfirst(lc); - DeleteCStoreTableFiles(filename); - } } } else if (nodeTag(parseTree) == T_TruncateStmt) @@ -449,18 +408,9 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, } else if (nodeTag(parseTree) == T_DropdbStmt) { - DropdbStmt *dropDdStmt = (DropdbStmt *) parseTree; - bool missingOk = true; - Oid databaseOid = get_database_oid(dropDdStmt->dbname, missingOk); - /* let postgres handle error checking and dropping of the database */ CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, destReceiver, completionTag); - - if (databaseOid != InvalidOid) - { - RemoveCStoreDatabaseDirectory(databaseOid); - } } /* handle other utility statements */ @@ -642,11 +592,11 @@ CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) /* init state to write to the cstore file */ writeState = CStoreBeginWrite(relationId, - cstoreOptions->filename, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, tupleDescriptor); + writeState->relation = relation; while (nextRowFound) { @@ -912,12 +862,7 @@ TruncateCStoreTables(List *cstoreRelationList) Assert(CStoreTable(relationId)); cstoreOptions = CStoreGetOptions(relationId); - if (OidIsValid(relation->rd_rel->relfilenode)) - { - RelationOpenSmgr(relation); - RelationDropStorage(relation); - } - DeleteCStoreTableFiles(cstoreOptions->filename); + InitializeRelFileNode(relation, true); InitializeCStoreTableFile(relationId, relation, CStoreGetOptions(relationId)); } } @@ -927,12 +872,12 @@ TruncateCStoreTables(List *cstoreRelationList) * tables. Version 12 and later do not, so we need to create one manually. */ static void -InitializeRelFileNode(Relation relation) +InitializeRelFileNode(Relation relation, bool force) { #if PG_VERSION_NUM >= 120000 - Relation pg_class; - HeapTuple tuple; - Form_pg_class classform; + Relation pg_class; + HeapTuple tuple; + Form_pg_class classform; /* * Get a writable copy of the pg_class tuple for the given relation. @@ -946,12 +891,12 @@ InitializeRelFileNode(Relation relation) RelationGetRelid(relation)); classform = (Form_pg_class) GETSTRUCT(tuple); - if (!OidIsValid(classform->relfilenode)) + if (!OidIsValid(classform->relfilenode) || force) { + char persistence = relation->rd_rel->relpersistence; Relation tmprel; Oid tablespace; - Oid filenode = relation->rd_id; - char persistence = relation->rd_rel->relpersistence; + Oid filenode; RelFileNode newrnode; SMgrRelation srel; @@ -968,6 +913,8 @@ InitializeRelFileNode(Relation relation) else tablespace = MyDatabaseTableSpace; + filenode = GetNewRelFileNode(tablespace, NULL, persistence); + newrnode.spcNode = tablespace; newrnode.dbNode = MyDatabaseId; newrnode.relNode = filenode; @@ -1120,32 +1067,20 @@ Datum cstore_table_size(PG_FUNCTION_ARGS) { Oid relationId = PG_GETARG_OID(0); - - int64 tableSize = 0; - CStoreOptions *cstoreOptions = NULL; - char *dataFilename = NULL; - int dataFileStatResult = 0; - struct stat dataFileStatBuffer; - bool cstoreTable = CStoreTable(relationId); + Relation relation; + BlockNumber nblocks; + if (!cstoreTable) { ereport(ERROR, (errmsg("relation is not a cstore table"))); } - cstoreOptions = CStoreGetOptions(relationId); - dataFilename = cstoreOptions->filename; - - dataFileStatResult = stat(dataFilename, &dataFileStatBuffer); - if (dataFileStatResult != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not stat file \"%s\": %m", dataFilename))); - } - - tableSize += dataFileStatBuffer.st_size; - - PG_RETURN_INT64(tableSize); + relation = cstore_fdw_open(relationId, AccessShareLock); + RelationOpenSmgr(relation); + nblocks = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM); + heap_close(relation, AccessShareLock); + PG_RETURN_INT64(nblocks * BLCKSZ); } @@ -1197,7 +1132,6 @@ cstore_fdw_validator(PG_FUNCTION_ARGS) Oid optionContextId = PG_GETARG_OID(1); List *optionList = untransformRelOptions(optionArray); ListCell *optionCell = NULL; - char *filename = NULL; char *compressionTypeString = NULL; char *stripeRowCountString = NULL; char *blockRowCountString = NULL; @@ -1232,11 +1166,7 @@ cstore_fdw_validator(PG_FUNCTION_ARGS) optionNamesString->data))); } - if (strncmp(optionName, OPTION_NAME_FILENAME, NAMEDATALEN) == 0) - { - filename = defGetString(optionDef); - } - else if (strncmp(optionName, OPTION_NAME_COMPRESSION_TYPE, NAMEDATALEN) == 0) + if (strncmp(optionName, OPTION_NAME_COMPRESSION_TYPE, NAMEDATALEN) == 0) { compressionTypeString = defGetString(optionDef); } @@ -1252,7 +1182,7 @@ cstore_fdw_validator(PG_FUNCTION_ARGS) if (optionContextId == ForeignTableRelationId) { - ValidateForeignTableOptions(filename, compressionTypeString, + ValidateForeignTableOptions(compressionTypeString, stripeRowCountString, blockRowCountString); } @@ -1271,11 +1201,6 @@ cstore_fdw_validator(PG_FUNCTION_ARGS) Datum cstore_clean_table_resources(PG_FUNCTION_ARGS) { - Oid relationId = PG_GETARG_OID(0); - StringInfo filePath = makeStringInfo(); - struct stat fileStat; - int statResult = -1; - /* * TODO: Event triggers do not offer the relfilenode of the * dropped table, and by the time the sql_drop event trigger @@ -1285,19 +1210,6 @@ cstore_clean_table_resources(PG_FUNCTION_ARGS) * leak storage. */ - appendStringInfo(filePath, "%s/%s/%d/%d", DataDir, CSTORE_FDW_NAME, - (int) MyDatabaseId, (int) relationId); - - /* - * Check to see if the file exist first. This is the only way to - * find out if the table being dropped is a cstore table. - */ - statResult = stat(filePath->data, &fileStat); - if (statResult == 0) - { - DeleteCStoreTableFiles(filePath->data); - } - PG_RETURN_VOID(); } @@ -1359,7 +1271,6 @@ static CStoreOptions * CStoreGetOptions(Oid foreignTableId) { CStoreOptions *cstoreOptions = NULL; - char *filename = NULL; CompressionType compressionType = DEFAULT_COMPRESSION_TYPE; int32 stripeRowCount = DEFAULT_STRIPE_ROW_COUNT; int32 blockRowCount = DEFAULT_BLOCK_ROW_COUNT; @@ -1367,7 +1278,6 @@ CStoreGetOptions(Oid foreignTableId) char *stripeRowCountString = NULL; char *blockRowCountString = NULL; - filename = CStoreGetOptionValue(foreignTableId, OPTION_NAME_FILENAME); compressionTypeString = CStoreGetOptionValue(foreignTableId, OPTION_NAME_COMPRESSION_TYPE); stripeRowCountString = CStoreGetOptionValue(foreignTableId, @@ -1375,7 +1285,7 @@ CStoreGetOptions(Oid foreignTableId) blockRowCountString = CStoreGetOptionValue(foreignTableId, OPTION_NAME_BLOCK_ROW_COUNT); - ValidateForeignTableOptions(filename, compressionTypeString, + ValidateForeignTableOptions(compressionTypeString, stripeRowCountString, blockRowCountString); /* parse provided options */ @@ -1392,14 +1302,7 @@ CStoreGetOptions(Oid foreignTableId) blockRowCount = pg_atoi(blockRowCountString, sizeof(int32), 0); } - /* set default filename if it is not provided */ - if (filename == NULL) - { - filename = CStoreDefaultFilePath(foreignTableId); - } - cstoreOptions = palloc0(sizeof(CStoreOptions)); - cstoreOptions->filename = filename; cstoreOptions->compressionType = compressionType; cstoreOptions->stripeRowCount = stripeRowCount; cstoreOptions->blockRowCount = blockRowCount; @@ -1450,12 +1353,9 @@ CStoreGetOptionValue(Oid foreignTableId, const char *optionName) * considered invalid. */ static void -ValidateForeignTableOptions(char *filename, char *compressionTypeString, +ValidateForeignTableOptions(char *compressionTypeString, char *stripeRowCountString, char *blockRowCountString) { - /* we currently do not have any checks for filename */ - (void) filename; - /* check if the provided compression type is valid */ if (compressionTypeString != NULL) { @@ -1500,36 +1400,6 @@ ValidateForeignTableOptions(char *filename, char *compressionTypeString, } -/* - * CStoreDefaultFilePath constructs the default file path to use for a cstore_fdw - * table. The path is of the form $PGDATA/cstore_fdw/{databaseOid}/{relfilenode}. - */ -static char * -CStoreDefaultFilePath(Oid foreignTableId) -{ - StringInfo cstoreFilePath = NULL; - Relation relation = cstore_fdw_open(foreignTableId, AccessShareLock); - RelFileNode relationFileNode = relation->rd_node; - Oid databaseOid = relationFileNode.dbNode; - Oid relationFileOid = relationFileNode.relNode; - - relation_close(relation, AccessShareLock); - - /* PG12 onward does not create relfilenode for foreign tables */ - if (databaseOid == InvalidOid) - { - databaseOid = MyDatabaseId; - relationFileOid = foreignTableId; - } - - cstoreFilePath = makeStringInfo(); - appendStringInfo(cstoreFilePath, "%s/%s/%u/%u", DataDir, CSTORE_FDW_NAME, - databaseOid, relationFileOid); - - return cstoreFilePath->data; -} - - /* * CStoreGetForeignRelSize obtains relation size estimates for a foreign table and * puts its estimate for row count into baserel->rows. @@ -1537,14 +1407,14 @@ CStoreDefaultFilePath(Oid foreignTableId) static void CStoreGetForeignRelSize(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId) { - CStoreOptions *cstoreOptions = CStoreGetOptions(foreignTableId); - double tupleCountEstimate = TupleCountEstimate(foreignTableId, baserel, - cstoreOptions->filename); + Relation relation = cstore_fdw_open(foreignTableId, AccessShareLock); + double tupleCountEstimate = TupleCountEstimate(relation, baserel); double rowSelectivity = clauselist_selectivity(root, baserel->baserestrictinfo, 0, JOIN_INNER, NULL); double outputRowCount = clamp_row_est(tupleCountEstimate * rowSelectivity); baserel->rows = outputRowCount; + heap_close(relation, AccessShareLock); } @@ -1558,7 +1428,6 @@ static void CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId) { Path *foreignScanPath = NULL; - CStoreOptions *cstoreOptions = CStoreGetOptions(foreignTableId); Relation relation = cstore_fdw_open(foreignTableId, AccessShareLock); /* @@ -1579,15 +1448,14 @@ CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId */ List *queryColumnList = ColumnList(baserel, foreignTableId); uint32 queryColumnCount = list_length(queryColumnList); - BlockNumber relationPageCount = PageCount(cstoreOptions->filename); + BlockNumber relationPageCount = PageCount(relation); uint32 relationColumnCount = RelationGetNumberOfAttributes(relation); double queryColumnRatio = (double) queryColumnCount / relationColumnCount; double queryPageCount = relationPageCount * queryColumnRatio; double totalDiskAccessCost = seq_page_cost * queryPageCount; - double tupleCountEstimate = TupleCountEstimate(foreignTableId, baserel, - cstoreOptions->filename); + double tupleCountEstimate = TupleCountEstimate(relation, baserel); /* * We estimate costs almost the same way as cost_seqscan(), thus assuming @@ -1692,7 +1560,7 @@ CStoreGetForeignPlan(PlannerInfo * root, RelOptInfo * baserel, Oid foreignTableI * file. */ static double -TupleCountEstimate(Oid relid, RelOptInfo *baserel, const char *filename) +TupleCountEstimate(Relation relation, RelOptInfo *baserel) { double tupleCountEstimate = 0.0; @@ -1705,13 +1573,13 @@ TupleCountEstimate(Oid relid, RelOptInfo *baserel, const char *filename) * that by the current file size. */ double tupleDensity = baserel->tuples / (double) baserel->pages; - BlockNumber pageCount = PageCount(filename); + BlockNumber pageCount = PageCount(relation); tupleCountEstimate = clamp_row_est(tupleDensity * (double) pageCount); } else { - tupleCountEstimate = (double) CStoreTableRowCount(relid, filename); + tupleCountEstimate = (double) CStoreTableRowCount(relation); } return tupleCountEstimate; @@ -1720,25 +1588,14 @@ TupleCountEstimate(Oid relid, RelOptInfo *baserel, const char *filename) /* PageCount calculates and returns the number of pages in a file. */ static BlockNumber -PageCount(const char *filename) +PageCount(Relation relation) { - BlockNumber pageCount = 0; - struct stat statBuffer; + BlockNumber nblocks; - /* if file doesn't exist at plan time, use default estimate for its size */ - int statResult = stat(filename, &statBuffer); - if (statResult < 0) - { - statBuffer.st_size = 10 * BLCKSZ; - } + RelationOpenSmgr(relation); + nblocks = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM); - pageCount = (statBuffer.st_size + (BLCKSZ - 1)) / BLCKSZ; - if (pageCount < 1) - { - pageCount = 1; - } - - return pageCount; + return (nblocks > 0) ? nblocks : 1; } @@ -1856,25 +1713,18 @@ ColumnList(RelOptInfo *baserel, Oid foreignTableId) static void CStoreExplainForeignScan(ForeignScanState *scanState, ExplainState *explainState) { - Relation relation = scanState->ss.ss_currentRelation; - CStoreOptions *cstoreOptions; - Oid foreignTableId; + Relation relation = scanState->ss.ss_currentRelation; cstore_fdw_initrel(relation); - foreignTableId = RelationGetRelid(relation); - cstoreOptions = CStoreGetOptions(foreignTableId); /* supress file size if we're not showing cost details */ if (explainState->costs) { - struct stat statBuffer; - - int statResult = stat(cstoreOptions->filename, &statBuffer); - if (statResult == 0) - { - ExplainPropertyLong("CStore File Size", (long) statBuffer.st_size, - explainState); - } + long nblocks; + RelationOpenSmgr(relation); + nblocks = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM); + ExplainPropertyLong("CStore File Size", (long) (nblocks * BLCKSZ), + explainState); } } @@ -1909,8 +1759,9 @@ CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags) whereClauseList = foreignScan->scan.plan.qual; columnList = (List *) linitial(foreignPrivateList); - readState = CStoreBeginRead(foreignTableId, cstoreOptions->filename, + readState = CStoreBeginRead(foreignTableId, tupleDescriptor, columnList, whereClauseList); + readState->relation = cstore_fdw_open(foreignTableId, AccessShareLock); scanState->fdw_state = (void *) readState; } @@ -1956,6 +1807,7 @@ CStoreEndForeignScan(ForeignScanState *scanState) TableReadState *readState = (TableReadState *) scanState->fdw_state; if (readState != NULL) { + heap_close(readState->relation, AccessShareLock); CStoreEndRead(readState); } } @@ -1979,22 +1831,9 @@ CStoreAnalyzeForeignTable(Relation relation, AcquireSampleRowsFunc *acquireSampleRowsFunc, BlockNumber *totalPageCount) { - Oid foreignTableId = RelationGetRelid(relation); - CStoreOptions *cstoreOptions; - struct stat statBuffer; - cstore_fdw_initrel(relation); - cstoreOptions = CStoreGetOptions(foreignTableId); - - int statResult = stat(cstoreOptions->filename, &statBuffer); - if (statResult < 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not stat file \"%s\": %m", - cstoreOptions->filename))); - } - - (*totalPageCount) = PageCount(cstoreOptions->filename); + RelationOpenSmgr(relation); + (*totalPageCount) = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM); (*acquireSampleRowsFunc) = CStoreAcquireSampleRows; return true; @@ -2267,7 +2106,6 @@ CStoreBeginForeignInsert(ModifyTableState *modifyTableState, ResultRelInfo *rela tupleDescriptor = RelationGetDescr(relationInfo->ri_RelationDesc); writeState = CStoreBeginWrite(foreignTableOid, - cstoreOptions->filename, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, @@ -2372,7 +2210,7 @@ cstore_fdw_initrel(Relation rel) { #if PG_VERSION_NUM >= 120000 if (rel->rd_rel->relfilenode == InvalidOid) - InitializeRelFileNode(rel); + InitializeRelFileNode(rel, false); /* * Copied code from RelationInitPhysicalAddr(), which doesn't diff --git a/cstore_reader.c b/cstore_reader.c index ddef3395a..4cbe2a44f 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -38,7 +38,7 @@ #include "cstore_version_compat.h" /* static function declarations */ -static StripeBuffers * LoadFilteredStripeBuffers(FILE *tableFile, +static StripeBuffers * LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, StripeFooter *stripeFooter, TupleDesc tupleDescriptor, @@ -48,12 +48,12 @@ static void ReadStripeNextRow(StripeBuffers *stripeBuffers, List *projectedColum uint64 blockIndex, uint64 blockRowIndex, ColumnBlockData **blockDataArray, Datum *columnValues, bool *columnNulls); -static ColumnBuffers * LoadColumnBuffers(FILE *tableFile, +static ColumnBuffers * LoadColumnBuffers(Relation relation, ColumnBlockSkipNode *blockSkipNodeArray, uint32 blockCount, uint64 existsFileOffset, uint64 valueFileOffset, Form_pg_attribute attributeForm); -static StripeSkipList * LoadStripeSkipList(FILE *tableFile, +static StripeSkipList * LoadStripeSkipList(Relation relation, StripeMetadata *stripeMetadata, StripeFooter *stripeFooter, uint32 columnCount, @@ -82,10 +82,10 @@ static void DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex TupleDesc tupleDescriptor); static Datum ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeForm); -static StringInfo ReadFromFile(FILE *file, uint64 offset, uint32 size); +static StringInfo ReadFromSmgr(Relation rel, uint64 offset, uint32 size); static void ResetUncompressedBlockData(ColumnBlockData **blockDataArray, uint32 columnCount); -static uint64 StripeRowCount(Oid relid, FILE *tableFile, StripeMetadata *stripeMetadata); +static uint64 StripeRowCount(Relation relation, StripeMetadata *stripeMetadata); static int RelationColumnCount(Oid relid); @@ -94,12 +94,11 @@ static int RelationColumnCount(Oid relid); * read handle that's used during reading rows and finishing the read operation. */ TableReadState * -CStoreBeginRead(Oid relationId, const char *filename, TupleDesc tupleDescriptor, +CStoreBeginRead(Oid relationId, TupleDesc tupleDescriptor, List *projectedColumnList, List *whereClauseList) { TableReadState *readState = NULL; TableMetadata *tableMetadata = NULL; - FILE *tableFile = NULL; MemoryContext stripeReadContext = NULL; uint32 columnCount = 0; bool *projectedColumnMask = NULL; @@ -107,14 +106,6 @@ CStoreBeginRead(Oid relationId, const char *filename, TupleDesc tupleDescriptor, tableMetadata = ReadTableMetadata(relationId); - tableFile = AllocateFile(filename, PG_BINARY_R); - if (tableFile == NULL) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not open file \"%s\" for reading: %m", - filename))); - } - /* * We allocate all stripe specific data in the stripeReadContext, and reset * this memory context before loading a new stripe. This is to avoid memory @@ -131,7 +122,6 @@ CStoreBeginRead(Oid relationId, const char *filename, TupleDesc tupleDescriptor, readState = palloc0(sizeof(TableReadState)); readState->relationId = relationId; - readState->tableFile = tableFile; readState->tableMetadata = tableMetadata; readState->projectedColumnList = projectedColumnList; readState->whereClauseList = whereClauseList; @@ -187,7 +177,8 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu stripeFooter = ReadStripeFooter(readState->relationId, stripeMetadata->id, readState->tupleDescriptor->natts); - stripeBuffers = LoadFilteredStripeBuffers(readState->tableFile, stripeMetadata, + stripeBuffers = LoadFilteredStripeBuffers(readState->relation, + stripeMetadata, stripeFooter, readState->tupleDescriptor, readState->projectedColumnList, @@ -263,7 +254,6 @@ CStoreEndRead(TableReadState *readState) int columnCount = readState->tupleDescriptor->natts; MemoryContextDelete(readState->stripeReadContext); - FreeFile(readState->tableFile); list_free_deep(readState->tableMetadata->stripeMetadataList); FreeColumnBlockDataArray(readState->blockDataArray, columnCount); pfree(readState->tableMetadata); @@ -326,30 +316,20 @@ FreeColumnBlockDataArray(ColumnBlockData **blockDataArray, uint32 columnCount) /* CStoreTableRowCount returns the exact row count of a table using skiplists */ uint64 -CStoreTableRowCount(Oid relid, const char *filename) +CStoreTableRowCount(Relation relation) { TableMetadata *tableMetadata = NULL; - FILE *tableFile; ListCell *stripeMetadataCell = NULL; uint64 totalRowCount = 0; - tableMetadata = ReadTableMetadata(relid); - - tableFile = AllocateFile(filename, PG_BINARY_R); - if (tableFile == NULL) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not open file \"%s\" for reading: %m", filename))); - } + tableMetadata = ReadTableMetadata(relation->rd_id); foreach(stripeMetadataCell, tableMetadata->stripeMetadataList) { StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell); - totalRowCount += StripeRowCount(relid, tableFile, stripeMetadata); + totalRowCount += StripeRowCount(relation, stripeMetadata); } - FreeFile(tableFile); - return totalRowCount; } @@ -359,15 +339,15 @@ CStoreTableRowCount(Oid relid, const char *filename) * skip list, and returns number of rows for given stripe. */ static uint64 -StripeRowCount(Oid relid, FILE *tableFile, StripeMetadata *stripeMetadata) +StripeRowCount(Relation relation, StripeMetadata *stripeMetadata) { uint64 rowCount = 0; StringInfo firstColumnSkipListBuffer = NULL; - StripeFooter *stripeFooter = ReadStripeFooter(relid, stripeMetadata->id, - RelationColumnCount(relid)); + StripeFooter *stripeFooter = ReadStripeFooter(relation->rd_id, stripeMetadata->id, + RelationColumnCount(relation->rd_id)); - firstColumnSkipListBuffer = ReadFromFile(tableFile, stripeMetadata->fileOffset, + firstColumnSkipListBuffer = ReadFromSmgr(relation, stripeMetadata->fileOffset, stripeFooter->skipListSizeArray[0]); rowCount = DeserializeRowCount(firstColumnSkipListBuffer); @@ -381,7 +361,7 @@ StripeRowCount(Oid relid, FILE *tableFile, StripeMetadata *stripeMetadata) * and only loads columns that are projected in the query. */ static StripeBuffers * -LoadFilteredStripeBuffers(FILE *tableFile, StripeMetadata *stripeMetadata, +LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, StripeFooter *stripeFooter, TupleDesc tupleDescriptor, List *projectedColumnList, List *whereClauseList) { @@ -393,7 +373,7 @@ LoadFilteredStripeBuffers(FILE *tableFile, StripeMetadata *stripeMetadata, bool *projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList); - StripeSkipList *stripeSkipList = LoadStripeSkipList(tableFile, stripeMetadata, + StripeSkipList *stripeSkipList = LoadStripeSkipList(relation, stripeMetadata, stripeFooter, columnCount, projectedColumnMask, tupleDescriptor); @@ -423,7 +403,7 @@ LoadFilteredStripeBuffers(FILE *tableFile, StripeMetadata *stripeMetadata, Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); uint32 blockCount = selectedBlockSkipList->blockCount; - ColumnBuffers *columnBuffers = LoadColumnBuffers(tableFile, blockSkipNode, + ColumnBuffers *columnBuffers = LoadColumnBuffers(relation, blockSkipNode, blockCount, existsFileOffset, valueFileOffset, @@ -482,7 +462,7 @@ ReadStripeNextRow(StripeBuffers *stripeBuffers, List *projectedColumnList, * and lengths are retrieved from the column block skip node array. */ static ColumnBuffers * -LoadColumnBuffers(FILE *tableFile, ColumnBlockSkipNode *blockSkipNodeArray, +LoadColumnBuffers(Relation relation, ColumnBlockSkipNode *blockSkipNodeArray, uint32 blockCount, uint64 existsFileOffset, uint64 valueFileOffset, Form_pg_attribute attributeForm) { @@ -505,7 +485,7 @@ LoadColumnBuffers(FILE *tableFile, ColumnBlockSkipNode *blockSkipNodeArray, { ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; uint64 existsOffset = existsFileOffset + blockSkipNode->existsBlockOffset; - StringInfo rawExistsBuffer = ReadFromFile(tableFile, existsOffset, + StringInfo rawExistsBuffer = ReadFromSmgr(relation, existsOffset, blockSkipNode->existsLength); blockBuffersArray[blockIndex]->existsBuffer = rawExistsBuffer; @@ -517,7 +497,7 @@ LoadColumnBuffers(FILE *tableFile, ColumnBlockSkipNode *blockSkipNodeArray, ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; CompressionType compressionType = blockSkipNode->valueCompressionType; uint64 valueOffset = valueFileOffset + blockSkipNode->valueBlockOffset; - StringInfo rawValueBuffer = ReadFromFile(tableFile, valueOffset, + StringInfo rawValueBuffer = ReadFromSmgr(relation, valueOffset, blockSkipNode->valueLength); blockBuffersArray[blockIndex]->valueBuffer = rawValueBuffer; @@ -533,7 +513,8 @@ LoadColumnBuffers(FILE *tableFile, ColumnBlockSkipNode *blockSkipNodeArray, /* Reads the skip list for the given stripe. */ static StripeSkipList * -LoadStripeSkipList(FILE *tableFile, StripeMetadata *stripeMetadata, +LoadStripeSkipList(Relation relation, + StripeMetadata *stripeMetadata, StripeFooter *stripeFooter, uint32 columnCount, bool *projectedColumnMask, TupleDesc tupleDescriptor) @@ -547,7 +528,7 @@ LoadStripeSkipList(FILE *tableFile, StripeMetadata *stripeMetadata, uint32 stripeColumnCount = stripeFooter->columnCount; /* deserialize block count */ - firstColumnSkipListBuffer = ReadFromFile(tableFile, stripeMetadata->fileOffset, + firstColumnSkipListBuffer = ReadFromSmgr(relation, stripeMetadata->fileOffset, stripeFooter->skipListSizeArray[0]); stripeBlockCount = DeserializeBlockCount(firstColumnSkipListBuffer); @@ -570,7 +551,7 @@ LoadStripeSkipList(FILE *tableFile, StripeMetadata *stripeMetadata, Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); StringInfo columnSkipListBuffer = - ReadFromFile(tableFile, currentColumnSkipListFileOffset, + ReadFromSmgr(relation, currentColumnSkipListFileOffset, columnSkipListSize); ColumnBlockSkipNode *columnSkipList = DeserializeColumnSkipList(columnSkipListBuffer, attributeForm->attbyval, @@ -1178,49 +1159,37 @@ ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeFor return defaultValue; } - -/* Reads the given segment from the given file. */ static StringInfo -ReadFromFile(FILE *file, uint64 offset, uint32 size) +ReadFromSmgr(Relation rel, uint64 offset, uint32 size) { - int fseekResult = 0; - int freadResult = 0; - int fileError = 0; + StringInfo resultBuffer = makeStringInfo(); + uint64 read = 0; - StringInfo resultBuffer = makeStringInfo(); enlargeStringInfo(resultBuffer, size); resultBuffer->len = size; - if (size == 0) + while (read < size) { - return resultBuffer; - } + Buffer buffer; + Page page; + PageHeader phdr; + uint32 to_read; + SmgrAddr addr = logical_to_smgr(offset + read); + uint32 pageoffset = addr.offset + SizeOfPageHeaderData; - errno = 0; - fseekResult = fseeko(file, offset, SEEK_SET); - if (fseekResult != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not seek in file: %m"))); - } + buffer = ReadBuffer(rel, addr.blockno); + page = BufferGetPage(buffer); + phdr = (PageHeader)page; - freadResult = fread(resultBuffer->data, size, 1, file); - if (freadResult != 1) - { - ereport(ERROR, (errmsg("could not read enough data from file"))); - } - - fileError = ferror(file); - if (fileError != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not read file: %m"))); + to_read = Min(size - read, phdr->pd_upper - pageoffset); + memcpy(resultBuffer->data + read, page + pageoffset, to_read); + ReleaseBuffer(buffer); + read += to_read; } return resultBuffer; } - /* * ResetUncompressedBlockData iterates over deserialized column block data * and sets valueBuffer field to empty buffer. This field is allocated in stripe diff --git a/cstore_writer.c b/cstore_writer.c index 240c13fc2..76e3aa070 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -16,12 +16,12 @@ #include "postgres.h" -#include - #include "access/nbtree.h" #include "catalog/pg_am.h" #include "storage/fd.h" +#include "storage/smgr.h" #include "utils/memutils.h" +#include "utils/rel.h" #include "cstore.h" #include "cstore_metadata_serialization.h" @@ -51,8 +51,6 @@ static void UpdateBlockSkipNodeMinMax(ColumnBlockSkipNode *blockSkipNode, static Datum DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength); static void AppendStripeMetadata(TableMetadata *tableMetadata, StripeMetadata stripeMetadata); -static void WriteToFile(FILE *file, void *data, uint32 dataLength); -static void SyncAndCloseFile(FILE *file); static StringInfo CopyStringInfo(StringInfo sourceString); @@ -65,12 +63,11 @@ static StringInfo CopyStringInfo(StringInfo sourceString); */ TableWriteState * CStoreBeginWrite(Oid relationId, - const char *filename, CompressionType compressionType, + CompressionType compressionType, uint64 stripeMaxRowCount, uint32 blockRowCount, TupleDesc tupleDescriptor) { TableWriteState *writeState = NULL; - FILE *tableFile = NULL; TableMetadata *tableMetadata = NULL; FmgrInfo **comparisonFunctionArray = NULL; MemoryContext stripeWriteContext = NULL; @@ -81,14 +78,6 @@ CStoreBeginWrite(Oid relationId, ColumnBlockData **blockData = NULL; uint64 currentStripeId = 0; - tableFile = AllocateFile(filename, "a+"); - if (tableFile == NULL) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not open file \"%s\" for writing: %m", - filename))); - } - tableMetadata = ReadTableMetadata(relationId); /* @@ -99,7 +88,6 @@ CStoreBeginWrite(Oid relationId, { StripeMetadata *lastStripe = NULL; uint64 lastStripeSize = 0; - int fseekResult = 0; lastStripe = llast(tableMetadata->stripeMetadataList); lastStripeSize += lastStripe->skipListLength; @@ -108,14 +96,6 @@ CStoreBeginWrite(Oid relationId, currentFileOffset = lastStripe->fileOffset + lastStripeSize; currentStripeId = lastStripe->id + 1; - - errno = 0; - fseekResult = fseeko(tableFile, currentFileOffset, SEEK_SET); - if (fseekResult != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not seek in file \"%s\": %m", filename))); - } } /* get comparison function pointers for each of the columns */ @@ -154,7 +134,6 @@ CStoreBeginWrite(Oid relationId, writeState = palloc0(sizeof(TableWriteState)); writeState->relationId = relationId; - writeState->tableFile = tableFile; writeState->tableMetadata = tableMetadata; writeState->compressionType = compressionType; writeState->stripeMaxRowCount = stripeMaxRowCount; @@ -312,8 +291,6 @@ CStoreEndWrite(TableWriteState *writeState) AppendStripeMetadata(writeState->tableMetadata, stripeMetadata); } - SyncAndCloseFile(writeState->tableFile); - MemoryContextDelete(writeState->stripeWriteContext); list_free_deep(writeState->tableMetadata->stripeMetadataList); pfree(writeState->comparisonFunctionArray); @@ -391,6 +368,56 @@ CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 blockRowCount, return stripeSkipList; } +static void +WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) +{ + uint64 logicalOffset = writeState->currentFileOffset; + uint64 remaining = dataLength; + Relation rel = writeState->relation; + Buffer buffer; + + while (remaining > 0) + { + SmgrAddr addr = logical_to_smgr(logicalOffset); + BlockNumber nblocks; + Page page; + PageHeader phdr; + uint64 to_write; + + RelationOpenSmgr(rel); + nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + + while (addr.blockno >= nblocks) + { + Buffer buffer = ReadBuffer(rel, P_NEW); + ReleaseBuffer(buffer); + nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + } + + RelationCloseSmgr(rel); + + buffer = ReadBuffer(rel, addr.blockno); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + + page = BufferGetPage(buffer); + phdr = (PageHeader) page; + if (PageIsNew(page)) + PageInit(page, BLCKSZ, 0); + + /* always appending */ + Assert(phdr->pd_lower == addr.offset + SizeOfPageHeaderData); + + to_write = Min(phdr->pd_upper - phdr->pd_lower, remaining); + memcpy(page + phdr->pd_lower, data, to_write); + phdr->pd_lower += to_write; + + UnlockReleaseBuffer(buffer); + + data += to_write; + remaining -= to_write; + logicalOffset += to_write; + } +} /* * FlushStripe flushes current stripe data into the file. The function first ensures @@ -409,7 +436,6 @@ FlushStripe(TableWriteState *writeState) uint32 columnIndex = 0; uint32 blockIndex = 0; TableMetadata *tableMetadata = writeState->tableMetadata; - FILE *tableFile = writeState->tableFile; StripeBuffers *stripeBuffers = writeState->stripeBuffers; StripeSkipList *stripeSkipList = writeState->stripeSkipList; ColumnBlockSkipNode **columnSkipNodeArray = stripeSkipList->blockSkipNodeArray; @@ -419,6 +445,7 @@ FlushStripe(TableWriteState *writeState) uint32 blockRowCount = tableMetadata->blockRowCount; uint32 lastBlockIndex = stripeBuffers->rowCount / blockRowCount; uint32 lastBlockRowCount = stripeBuffers->rowCount % blockRowCount; + uint64 initialFileOffset = writeState->currentFileOffset; /* * check if the last block needs serialization , the last block was not serialized @@ -479,7 +506,8 @@ FlushStripe(TableWriteState *writeState) for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { StringInfo skipListBuffer = skipListBufferArray[columnIndex]; - WriteToFile(tableFile, skipListBuffer->data, skipListBuffer->len); + WriteToSmgr(writeState, skipListBuffer->data, skipListBuffer->len); + writeState->currentFileOffset += skipListBuffer->len; } /* then, we flush the data buffers */ @@ -494,7 +522,8 @@ FlushStripe(TableWriteState *writeState) columnBuffers->blockBuffersArray[blockIndex]; StringInfo existsBuffer = blockBuffers->existsBuffer; - WriteToFile(tableFile, existsBuffer->data, existsBuffer->len); + WriteToSmgr(writeState, existsBuffer->data, existsBuffer->len); + writeState->currentFileOffset += existsBuffer->len; } for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) @@ -503,7 +532,8 @@ FlushStripe(TableWriteState *writeState) columnBuffers->blockBuffersArray[blockIndex]; StringInfo valueBuffer = blockBuffers->valueBuffer; - WriteToFile(tableFile, valueBuffer->data, valueBuffer->len); + WriteToSmgr(writeState, valueBuffer->data, valueBuffer->len); + writeState->currentFileOffset += valueBuffer->len; } } @@ -520,16 +550,12 @@ FlushStripe(TableWriteState *writeState) dataLength += stripeFooter->valueSizeArray[columnIndex]; } - stripeMetadata.fileOffset = writeState->currentFileOffset; + stripeMetadata.fileOffset = initialFileOffset; stripeMetadata.skipListLength = skipListLength; stripeMetadata.dataLength = dataLength; stripeMetadata.footerLength = 0; stripeMetadata.id = writeState->currentStripeId; - /* advance current file offset */ - writeState->currentFileOffset += skipListLength; - writeState->currentFileOffset += dataLength; - return stripeMetadata; } @@ -834,76 +860,6 @@ AppendStripeMetadata(TableMetadata *tableMetadata, StripeMetadata stripeMetadata stripeMetadataCopy); } - -/* Writes the given data to the given file pointer and checks for errors. */ -static void -WriteToFile(FILE *file, void *data, uint32 dataLength) -{ - int writeResult = 0; - int errorResult = 0; - - if (dataLength == 0) - { - return; - } - - errno = 0; - writeResult = fwrite(data, dataLength, 1, file); - if (writeResult != 1) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not write file: %m"))); - } - - errorResult = ferror(file); - if (errorResult != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("error in file: %m"))); - } -} - - -/* Flushes, syncs, and closes the given file pointer and checks for errors. */ -static void -SyncAndCloseFile(FILE *file) -{ - int flushResult = 0; - int syncResult = 0; - int errorResult = 0; - int freeResult = 0; - - errno = 0; - flushResult = fflush(file); - if (flushResult != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not flush file: %m"))); - } - - syncResult = pg_fsync(fileno(file)); - if (syncResult != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not sync file: %m"))); - } - - errorResult = ferror(file); - if (errorResult != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("error in file: %m"))); - } - - freeResult = FreeFile(file); - if (freeResult != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not close file: %m"))); - } -} - - /* * CopyStringInfo creates a deep copy of given source string allocating only needed * amount of memory. diff --git a/expected/drop.out b/expected/drop.out index dc5678da7..926f69337 100644 --- a/expected/drop.out +++ b/expected/drop.out @@ -12,17 +12,6 @@ -- 'postgres' directory is excluded from comparison to have the same result. -- store postgres database oid SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset --- Check that files for the automatically managed table exist in the --- cstore_fdw/{databaseoid} directory. -SELECT count(*) FROM ( - SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( - SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() - ) AS q1) AS q2; - count -------- - 2 -(1 row) - -- DROP cstore_fdw tables DROP FOREIGN TABLE contestant; DROP FOREIGN TABLE contestant_compressed; @@ -31,17 +20,6 @@ CREATE SCHEMA test_schema; CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; DROP SCHEMA test_schema CASCADE; NOTICE: drop cascades to foreign table test_schema.test_table --- Check that the files have been deleted and the directory is empty after the --- DROP table command. -SELECT count(*) FROM ( - SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( - SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() - ) AS q1) AS q2; - count -------- - 0 -(1 row) - SELECT current_database() datname \gset CREATE DATABASE db_to_drop; \c db_to_drop @@ -49,49 +27,14 @@ CREATE EXTENSION cstore_fdw; CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; --- should see 2 files, data and footer file for single table -SELECT count(*) FROM pg_ls_dir('cstore_fdw/' || :databaseoid); - count -------- - 2 -(1 row) - --- should see 2 directories 1 for each database, excluding postgres database -SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; - count -------- - 2 -(1 row) - DROP EXTENSION cstore_fdw CASCADE; NOTICE: drop cascades to 2 other objects DETAIL: drop cascades to server cstore_server drop cascades to foreign table test_table --- should only see 1 directory here -SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; - count -------- - 1 -(1 row) - -- test database drop CREATE EXTENSION cstore_fdw; CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; --- should see 2 directories 1 for each database -SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; - count -------- - 2 -(1 row) - \c :datname DROP DATABASE db_to_drop; --- should only see 1 directory for the default database -SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; - count -------- - 1 -(1 row) - diff --git a/expected/truncate.out b/expected/truncate.out index 14119c804..c92c15559 100644 --- a/expected/truncate.out +++ b/expected/truncate.out @@ -9,17 +9,6 @@ SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; t (1 row) --- Check that files for the automatically managed table exist in the --- cstore_fdw/{databaseoid} directory. -SELECT count(*) FROM ( - SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( - SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() - ) AS q1) AS q2; - count -------- - 0 -(1 row) - -- CREATE a cstore_fdw table, fill with some data -- CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; @@ -75,16 +64,6 @@ SELECT cstore_table_size('cstore_truncate_test_compressed'); 0 (1 row) --- make sure data files still present -SELECT count(*) FROM ( - SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( - SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() - ) AS q1) AS q2; - count -------- - 3 -(1 row) - INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; @@ -250,13 +229,3 @@ SELECT count(*) FROM truncate_schema.truncate_tbl; DROP SCHEMA truncate_schema CASCADE; NOTICE: drop cascades to foreign table truncate_schema.truncate_tbl DROP USER truncate_user; --- verify files are removed -SELECT count(*) FROM ( - SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( - SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() - ) AS q1) AS q2; - count -------- - 0 -(1 row) - diff --git a/input/block_filtering.source b/input/block_filtering.source index 4451262d4..dc3170f0d 100644 --- a/input/block_filtering.source +++ b/input/block_filtering.source @@ -30,8 +30,7 @@ $$ LANGUAGE PLPGSQL; -- Create and load data CREATE FOREIGN TABLE test_block_filtering (a int) SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/block_filtering.cstore', - block_row_count '1000', stripe_row_count '2000'); + OPTIONS(block_row_count '1000', stripe_row_count '2000'); COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; @@ -60,8 +59,7 @@ SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BET -- Verify that we are fine with collations which use a different alphabet order CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") - SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/collation_block_filtering.cstore'); + SERVER cstore_server; COPY collation_block_filtering_test FROM STDIN; A Å diff --git a/input/copyto.source b/input/copyto.source index 96403a3f4..a4b753a8d 100644 --- a/input/copyto.source +++ b/input/copyto.source @@ -3,8 +3,7 @@ -- CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/test_contestant.cstore'); + SERVER cstore_server; -- load table data from file COPY test_contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; diff --git a/input/create.source b/input/create.source index fbd27dc50..ba52137c1 100644 --- a/input/create.source +++ b/input/create.source @@ -12,30 +12,24 @@ CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; -- Validator tests CREATE FOREIGN TABLE test_validator_invalid_option () SERVER cstore_server - OPTIONS(filename 'data.cstore', bad_option_name '1'); -- ERROR + OPTIONS(bad_option_name '1'); -- ERROR CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () SERVER cstore_server - OPTIONS(filename 'data.cstore', stripe_row_count '0'); -- ERROR + OPTIONS(stripe_row_count '0'); -- ERROR CREATE FOREIGN TABLE test_validator_invalid_block_row_count () SERVER cstore_server - OPTIONS(filename 'data.cstore', block_row_count '0'); -- ERROR + OPTIONS(block_row_count '0'); -- ERROR CREATE FOREIGN TABLE test_validator_invalid_compression_type () SERVER cstore_server - OPTIONS(filename 'data.cstore', compression 'invalid_compression'); -- ERROR - --- Invalid file path test -CREATE FOREIGN TABLE test_invalid_file_path () - SERVER cstore_server - OPTIONS(filename 'bad_directory_path/bad_file_path'); --ERROR + OPTIONS(compression 'invalid_compression'); -- ERROR -- Create uncompressed table CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/contestant.cstore'); + SERVER cstore_server; -- Create compressed table with automatically determined file path diff --git a/input/data_types.source b/input/data_types.source index c3398c67b..ec83c4d8c 100644 --- a/input/data_types.source +++ b/input/data_types.source @@ -11,8 +11,7 @@ SET intervalstyle TO 'POSTGRES_VERBOSE'; -- Test array types CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], - text_array text[]) SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/array_types.cstore'); + text_array text[]) SERVER cstore_server; COPY test_array_types FROM '@abs_srcdir@/data/array_types.csv' WITH CSV; @@ -22,8 +21,7 @@ SELECT * FROM test_array_types; -- Test date/time types CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, timestamp_with_timezone timestamp with time zone, date date, time time, - interval interval) SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/datetime_types.cstore'); + interval interval) SERVER cstore_server; COPY test_datetime_types FROM '@abs_srcdir@/data/datetime_types.csv' WITH CSV; @@ -35,8 +33,7 @@ CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); CREATE TYPE composite_type AS (a int, b text); CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, - composite composite_type) SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/enum_and_composite_types.cstore'); + composite composite_type) SERVER cstore_server; COPY test_enum_and_composite_types FROM '@abs_srcdir@/data/enum_and_composite_types.csv' WITH CSV; @@ -46,8 +43,7 @@ SELECT * FROM test_enum_and_composite_types; -- Test range types CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, - numrange numrange, tsrange tsrange) SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/range_types.cstore'); + numrange numrange, tsrange tsrange) SERVER cstore_server; COPY test_range_types FROM '@abs_srcdir@/data/range_types.csv' WITH CSV; @@ -56,8 +52,7 @@ SELECT * FROM test_range_types; -- Test other types CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, - inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/other_types.cstore'); + inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server; COPY test_other_types FROM '@abs_srcdir@/data/other_types.csv' WITH CSV; @@ -66,8 +61,7 @@ SELECT * FROM test_other_types; -- Test null values CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) - SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/null_values.cstore'); + SERVER cstore_server; COPY test_null_values FROM '@abs_srcdir@/data/null_values.csv' WITH CSV; diff --git a/output/block_filtering.source b/output/block_filtering.source index 21e1eb772..2f664a78a 100644 --- a/output/block_filtering.source +++ b/output/block_filtering.source @@ -26,8 +26,7 @@ $$ LANGUAGE PLPGSQL; -- Create and load data CREATE FOREIGN TABLE test_block_filtering (a int) SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/block_filtering.cstore', - block_row_count '1000', stripe_row_count '2000'); + OPTIONS(block_row_count '1000', stripe_row_count '2000'); COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; -- Verify that filtered_row_count is less than 1000 for the following queries SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); @@ -107,8 +106,7 @@ SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BET -- Verify that we are fine with collations which use a different alphabet order CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") - SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/collation_block_filtering.cstore'); + SERVER cstore_server; COPY collation_block_filtering_test FROM STDIN; SELECT * FROM collation_block_filtering_test WHERE A > 'B'; a diff --git a/output/copyto.source b/output/copyto.source index 6024dd205..a8d841f18 100644 --- a/output/copyto.source +++ b/output/copyto.source @@ -3,8 +3,7 @@ -- CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/test_contestant.cstore'); + SERVER cstore_server; -- load table data from file COPY test_contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; -- export using COPY table TO ... diff --git a/output/create.source b/output/create.source index 937afa2a0..961c0494d 100644 --- a/output/create.source +++ b/output/create.source @@ -7,34 +7,28 @@ CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; -- Validator tests CREATE FOREIGN TABLE test_validator_invalid_option () SERVER cstore_server - OPTIONS(filename 'data.cstore', bad_option_name '1'); -- ERROR + OPTIONS(bad_option_name '1'); -- ERROR ERROR: invalid option "bad_option_name" -HINT: Valid options in this context are: filename, compression, stripe_row_count, block_row_count +HINT: Valid options in this context are: compression, stripe_row_count, block_row_count CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () SERVER cstore_server - OPTIONS(filename 'data.cstore', stripe_row_count '0'); -- ERROR + OPTIONS(stripe_row_count '0'); -- ERROR ERROR: invalid stripe row count HINT: Stripe row count must be an integer between 1000 and 10000000 CREATE FOREIGN TABLE test_validator_invalid_block_row_count () SERVER cstore_server - OPTIONS(filename 'data.cstore', block_row_count '0'); -- ERROR + OPTIONS(block_row_count '0'); -- ERROR ERROR: invalid block row count HINT: Block row count must be an integer between 1000 and 100000 CREATE FOREIGN TABLE test_validator_invalid_compression_type () SERVER cstore_server - OPTIONS(filename 'data.cstore', compression 'invalid_compression'); -- ERROR + OPTIONS(compression 'invalid_compression'); -- ERROR ERROR: invalid compression type HINT: Valid options are: none, pglz --- Invalid file path test -CREATE FOREIGN TABLE test_invalid_file_path () - SERVER cstore_server - OPTIONS(filename 'bad_directory_path/bad_file_path'); --ERROR -ERROR: could not open file "bad_directory_path/bad_file_path" for writing: No such file or directory -- Create uncompressed table CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/contestant.cstore'); + SERVER cstore_server; -- Create compressed table with automatically determined file path CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) diff --git a/output/data_types.source b/output/data_types.source index efa03a663..23fdcfa29 100644 --- a/output/data_types.source +++ b/output/data_types.source @@ -7,8 +7,7 @@ SET timezone to 'GMT'; SET intervalstyle TO 'POSTGRES_VERBOSE'; -- Test array types CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], - text_array text[]) SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/array_types.cstore'); + text_array text[]) SERVER cstore_server; COPY test_array_types FROM '@abs_srcdir@/data/array_types.csv' WITH CSV; SELECT * FROM test_array_types; int_array | bigint_array | text_array @@ -21,8 +20,7 @@ SELECT * FROM test_array_types; -- Test date/time types CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, timestamp_with_timezone timestamp with time zone, date date, time time, - interval interval) SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/datetime_types.cstore'); + interval interval) SERVER cstore_server; COPY test_datetime_types FROM '@abs_srcdir@/data/datetime_types.csv' WITH CSV; SELECT * FROM test_datetime_types; timestamp | timestamp_with_timezone | date | time | interval @@ -35,8 +33,7 @@ SELECT * FROM test_datetime_types; CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); CREATE TYPE composite_type AS (a int, b text); CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, - composite composite_type) SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/enum_and_composite_types.cstore'); + composite composite_type) SERVER cstore_server; COPY test_enum_and_composite_types FROM '@abs_srcdir@/data/enum_and_composite_types.csv' WITH CSV; SELECT * FROM test_enum_and_composite_types; @@ -48,8 +45,7 @@ SELECT * FROM test_enum_and_composite_types; -- Test range types CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, - numrange numrange, tsrange tsrange) SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/range_types.cstore'); + numrange numrange, tsrange tsrange) SERVER cstore_server; COPY test_range_types FROM '@abs_srcdir@/data/range_types.csv' WITH CSV; SELECT * FROM test_range_types; int4range | int8range | numrange | tsrange @@ -60,8 +56,7 @@ SELECT * FROM test_range_types; -- Test other types CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, - inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/other_types.cstore'); + inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server; COPY test_other_types FROM '@abs_srcdir@/data/other_types.csv' WITH CSV; SELECT * FROM test_other_types; bool | bytea | money | inet | bitstring | uuid | json @@ -72,8 +67,7 @@ SELECT * FROM test_other_types; -- Test null values CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) - SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/null_values.cstore'); + SERVER cstore_server; COPY test_null_values FROM '@abs_srcdir@/data/null_values.csv' WITH CSV; SELECT * FROM test_null_values; a | b | c diff --git a/sql/drop.sql b/sql/drop.sql index a0852a279..c64b5c99b 100644 --- a/sql/drop.sql +++ b/sql/drop.sql @@ -15,13 +15,6 @@ -- store postgres database oid SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset --- Check that files for the automatically managed table exist in the --- cstore_fdw/{databaseoid} directory. -SELECT count(*) FROM ( - SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( - SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() - ) AS q1) AS q2; - -- DROP cstore_fdw tables DROP FOREIGN TABLE contestant; DROP FOREIGN TABLE contestant_compressed; @@ -31,13 +24,6 @@ CREATE SCHEMA test_schema; CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; DROP SCHEMA test_schema CASCADE; --- Check that the files have been deleted and the directory is empty after the --- DROP table command. -SELECT count(*) FROM ( - SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( - SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() - ) AS q1) AS q2; - SELECT current_database() datname \gset CREATE DATABASE db_to_drop; @@ -47,17 +33,9 @@ CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; --- should see 2 files, data and footer file for single table -SELECT count(*) FROM pg_ls_dir('cstore_fdw/' || :databaseoid); - --- should see 2 directories 1 for each database, excluding postgres database -SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; DROP EXTENSION cstore_fdw CASCADE; --- should only see 1 directory here -SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; - -- test database drop CREATE EXTENSION cstore_fdw; CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; @@ -65,12 +43,6 @@ SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; --- should see 2 directories 1 for each database -SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; - \c :datname DROP DATABASE db_to_drop; - --- should only see 1 directory for the default database -SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; diff --git a/sql/truncate.sql b/sql/truncate.sql index 0aac2bd34..a1849045e 100644 --- a/sql/truncate.sql +++ b/sql/truncate.sql @@ -6,13 +6,6 @@ SHOW server_version \gset SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; --- Check that files for the automatically managed table exist in the --- cstore_fdw/{databaseoid} directory. -SELECT count(*) FROM ( - SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( - SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() - ) AS q1) AS q2; - -- CREATE a cstore_fdw table, fill with some data -- CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; @@ -39,12 +32,6 @@ SELECT count(*) FROM cstore_truncate_test_compressed; SELECT cstore_table_size('cstore_truncate_test_compressed'); --- make sure data files still present -SELECT count(*) FROM ( - SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( - SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() - ) AS q1) AS q2; - INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; @@ -127,9 +114,3 @@ SELECT count(*) FROM truncate_schema.truncate_tbl; -- cleanup DROP SCHEMA truncate_schema CASCADE; DROP USER truncate_user; - --- verify files are removed -SELECT count(*) FROM ( - SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( - SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() - ) AS q1) AS q2; From 573555747f7c0637167d81dcea0f1cd551794040 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 11 Sep 2020 16:28:57 -0700 Subject: [PATCH 16/91] address review comments --- cstore.h | 6 +++++- cstore_reader.c | 5 ++--- cstore_writer.c | 3 ++- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/cstore.h b/cstore.h index 9679fea9c..d0f959032 100644 --- a/cstore.h +++ b/cstore.h @@ -300,6 +300,10 @@ typedef struct SmgrAddr uint32 offset; } SmgrAddr; +/* + * Map logical offsets (as tracked in the metadata) to a physical page and + * offset where the data is kept. + */ static inline SmgrAddr logical_to_smgr(uint64 logicalOffset) { @@ -307,7 +311,7 @@ logical_to_smgr(uint64 logicalOffset) SmgrAddr addr; addr.blockno = logicalOffset / bytes_per_page; - addr.offset = logicalOffset % bytes_per_page; + addr.offset = SizeOfPageHeaderData + (logicalOffset % bytes_per_page); return addr; } diff --git a/cstore_reader.c b/cstore_reader.c index 4cbe2a44f..654d74697 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -1175,14 +1175,13 @@ ReadFromSmgr(Relation rel, uint64 offset, uint32 size) PageHeader phdr; uint32 to_read; SmgrAddr addr = logical_to_smgr(offset + read); - uint32 pageoffset = addr.offset + SizeOfPageHeaderData; buffer = ReadBuffer(rel, addr.blockno); page = BufferGetPage(buffer); phdr = (PageHeader)page; - to_read = Min(size - read, phdr->pd_upper - pageoffset); - memcpy(resultBuffer->data + read, page + pageoffset, to_read); + to_read = Min(size - read, phdr->pd_upper - addr.offset); + memcpy(resultBuffer->data + read, page + addr.offset, to_read); ReleaseBuffer(buffer); read += to_read; } diff --git a/cstore_writer.c b/cstore_writer.c index 76e3aa070..319136dc9 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -405,12 +405,13 @@ WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) PageInit(page, BLCKSZ, 0); /* always appending */ - Assert(phdr->pd_lower == addr.offset + SizeOfPageHeaderData); + Assert(phdr->pd_lower == addr.offset); to_write = Min(phdr->pd_upper - phdr->pd_lower, remaining); memcpy(page + phdr->pd_lower, data, to_write); phdr->pd_lower += to_write; + MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); data += to_write; From fb110446bebdc73af8f8dd8b8b73f28551844f4c Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 14 Sep 2020 13:13:36 -0700 Subject: [PATCH 17/91] Fix compilation in pg 11 --- cstore_metadata_tables.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 3843e4cd6..5b6151ef6 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -14,9 +14,8 @@ #include #include "access/heapam.h" +#include "access/htup_details.h" #include "access/nbtree.h" -#include "access/table.h" -#include "access/tableam.h" #include "access/xact.h" #include "catalog/indexing.h" #include "catalog/pg_namespace.h" @@ -30,7 +29,6 @@ #include "miscadmin.h" #include "nodes/execnodes.h" #include "lib/stringinfo.h" -#include "optimizer/optimizer.h" #include "port.h" #include "storage/fd.h" #include "utils/fmgroids.h" @@ -263,7 +261,7 @@ DeleteTableMetadataRowIfExists(Oid relid) BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relid)); cstoreTablesOid = CStoreTablesRelationId(); - cstoreTables = table_open(cstoreTablesOid, AccessShareLock); + cstoreTables = heap_open(cstoreTablesOid, AccessShareLock); index = index_open(CStoreTablesIndexRelationId(), AccessShareLock); scanDescriptor = systable_beginscan_ordered(cstoreTables, index, NULL, 1, scanKey); @@ -278,7 +276,7 @@ DeleteTableMetadataRowIfExists(Oid relid) systable_endscan_ordered(scanDescriptor); index_close(index, NoLock); - table_close(cstoreTables, NoLock); + heap_close(cstoreTables, NoLock); } @@ -412,9 +410,15 @@ InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values, bool *nulls) { TupleDesc tupleDescriptor = RelationGetDescr(state->rel); HeapTuple tuple = heap_form_tuple(tupleDescriptor, values, nulls); + +#if PG_VERSION_NUM >= 120000 TupleTableSlot *slot = ExecInitExtraTupleSlot(state->estate, tupleDescriptor, &TTSOpsHeapTuple); ExecStoreHeapTuple(tuple, slot, false); +#else + TupleTableSlot *slot = ExecInitExtraTupleSlot(state->estate, tupleDescriptor); + ExecStoreTuple(tuple, slot, InvalidBuffer, false); +#endif /* use ExecSimpleRelationInsert to enforce constraints */ ExecSimpleRelationInsert(state->estate, slot); @@ -432,7 +436,7 @@ DeleteTupleAndEnforceConstraints(ModifyState *state, HeapTuple heapTuple) ResultRelInfo *resultRelInfo = estate->es_result_relation_info; ItemPointer tid = &(heapTuple->t_self); - simple_table_tuple_delete(state->rel, tid, estate->es_snapshot); + simple_heap_delete(state->rel, tid); /* execute AFTER ROW DELETE Triggers to enforce constraints */ ExecARDeleteTriggers(estate, resultRelInfo, tid, NULL, NULL); @@ -476,8 +480,10 @@ create_estate_for_relation(Relation rel) rte->rtekind = RTE_RELATION; rte->relid = RelationGetRelid(rel); rte->relkind = rel->rd_rel->relkind; +#if PG_VERSION_NUM >= 120000 rte->rellockmode = AccessShareLock; ExecInitRangeTable(estate, list_make1(rte)); +#endif resultRelInfo = makeNode(ResultRelInfo); InitResultRelInfo(resultRelInfo, rel, 1, NULL, 0); @@ -488,6 +494,12 @@ create_estate_for_relation(Relation rel) estate->es_output_cid = GetCurrentCommandId(true); +#if PG_VERSION_NUM < 120000 + /* Triggers might need a slot */ + if (resultRelInfo->ri_TrigDesc) + estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate, NULL); +#endif + /* Prepare to catch AFTER triggers. */ AfterTriggerBeginQuery(); From 2737686fd0d3c3a490cbf0c87ae56b69b403dde6 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 14 Sep 2020 14:54:22 -0700 Subject: [PATCH 18/91] Move skipnodes to metadata tables --- Makefile | 9 +- cstore.h | 10 +- cstore.proto | 24 --- cstore_fdw--1.7.sql | 29 ++- cstore_metadata_serialization.c | 302 -------------------------------- cstore_metadata_serialization.h | 31 ---- cstore_metadata_tables.c | 300 +++++++++++++++++++++++++++++-- cstore_reader.c | 146 +-------------- cstore_writer.c | 81 ++------- 9 files changed, 342 insertions(+), 590 deletions(-) delete mode 100644 cstore.proto delete mode 100644 cstore_metadata_serialization.c delete mode 100644 cstore_metadata_serialization.h diff --git a/Makefile b/Makefile index 10d7fcc14..b8277f3a2 100644 --- a/Makefile +++ b/Makefile @@ -6,10 +6,8 @@ MODULE_big = cstore_fdw PG_CPPFLAGS = -std=c11 -SHLIB_LINK = -lprotobuf-c -OBJS = cstore.pb-c.o cstore.o cstore_fdw.o cstore_writer.o cstore_reader.o \ - cstore_metadata_serialization.o cstore_compression.o mod.o \ - cstore_metadata_tables.o +OBJS = cstore.o cstore_fdw.o cstore_writer.o cstore_reader.o \ + cstore_compression.o mod.o cstore_metadata_tables.o EXTENSION = cstore_fdw DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cstore_fdw--1.4--1.5.sql \ @@ -51,9 +49,6 @@ ifeq (,$(findstring $(MAJORVERSION), 9.3 9.4 9.5 9.6 10 11 12)) $(error PostgreSQL 9.3 to 12 is required to compile this extension) endif -cstore.pb-c.c: cstore.proto - protoc-c --c_out=. cstore.proto - installcheck: remove_cstore_files remove_cstore_files: diff --git a/cstore.h b/cstore.h index d0f959032..c7f849cc9 100644 --- a/cstore.h +++ b/cstore.h @@ -81,9 +81,9 @@ typedef struct CStoreOptions typedef struct StripeMetadata { uint64 fileOffset; - uint64 skipListLength; uint64 dataLength; - uint64 footerLength; + uint32 blockCount; + uint64 rowCount; uint64 id; } StripeMetadata; @@ -191,7 +191,6 @@ typedef struct StripeBuffers typedef struct StripeFooter { uint32 columnCount; - uint64 *skipListSizeArray; uint64 *existsSizeArray; uint64 *valueSizeArray; } StripeFooter; @@ -293,6 +292,11 @@ extern StripeFooter * ReadStripeFooter(Oid relid, uint64 stripe, int relationCol extern void InitCStoreTableMetadata(Oid relid, int blockRowCount); extern void InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe); extern TableMetadata * ReadTableMetadata(Oid relid); +extern void SaveStripeSkipList(Oid relid, uint64 stripe, StripeSkipList *stripeSkipList, + TupleDesc tupleDescriptor); +extern StripeSkipList * ReadStripeSkipList(Oid relid, uint64 stripe, + TupleDesc tupleDescriptor, + uint32 blockCount); typedef struct SmgrAddr { diff --git a/cstore.proto b/cstore.proto deleted file mode 100644 index a7525b633..000000000 --- a/cstore.proto +++ /dev/null @@ -1,24 +0,0 @@ -syntax = "proto2"; - -package protobuf; - -enum CompressionType { - // Values should match with the corresponding struct in cstore_fdw.h - NONE = 0; - PG_LZ = 1; -}; - -message ColumnBlockSkipNode { - optional uint64 rowCount = 1; - optional bytes minimumValue = 2; - optional bytes maximumValue = 3; - optional uint64 valueBlockOffset = 4; - optional uint64 valueLength = 5; - optional CompressionType valueCompressionType = 6; - optional uint64 existsBlockOffset = 7; - optional uint64 existsLength = 8; -} - -message ColumnBlockSkipList { - repeated ColumnBlockSkipNode blockSkipNodeArray = 1; -} diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index 7a0c9c7b8..1c19fda50 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -68,23 +68,48 @@ CREATE TABLE cstore.cstore_tables ( PRIMARY KEY (relid) ) WITH (user_catalog_table = true); +COMMENT ON TABLE cstore.cstore_tables IS 'CStore table wide metadata'; + CREATE TABLE cstore.cstore_stripes ( relid oid NOT NULL, stripe bigint NOT NULL, file_offset bigint NOT NULL, - skiplist_length bigint NOT NULL, data_length bigint NOT NULL, + block_count int NOT NULL, + row_count bigint NOT NULL, PRIMARY KEY (relid, stripe), FOREIGN KEY (relid) REFERENCES cstore.cstore_tables(relid) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); +COMMENT ON TABLE cstore.cstore_tables IS 'CStore per stripe metadata'; + CREATE TABLE cstore.cstore_stripe_attr ( relid oid NOT NULL, stripe bigint NOT NULL, attr int NOT NULL, exists_size bigint NOT NULL, value_size bigint NOT NULL, - skiplist_size bigint NOT NULL, PRIMARY KEY (relid, stripe, attr), FOREIGN KEY (relid, stripe) REFERENCES cstore.cstore_stripes(relid, stripe) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); + +COMMENT ON TABLE cstore.cstore_tables IS 'CStore per stripe/column combination metadata'; + +CREATE TABLE cstore.cstore_skipnodes ( + relid oid NOT NULL, + stripe bigint NOT NULL, + attr int NOT NULL, + block int NOT NULL, + row_count bigint NOT NULL, + minimum_value bytea, + maximum_value bytea, + value_stream_offset bigint NOT NULL, + value_stream_length bigint NOT NULL, + exists_stream_offset bigint NOT NULL, + exists_stream_length bigint NOT NULL, + value_compression_type int NOT NULL, + PRIMARY KEY (relid, stripe, attr, block), + FOREIGN KEY (relid, stripe, attr) REFERENCES cstore.cstore_stripe_attr(relid, stripe, attr) ON DELETE CASCADE INITIALLY DEFERRED +) WITH (user_catalog_table = true); + +COMMENT ON TABLE cstore.cstore_tables IS 'CStore per block metadata'; diff --git a/cstore_metadata_serialization.c b/cstore_metadata_serialization.c deleted file mode 100644 index 2b06d4a15..000000000 --- a/cstore_metadata_serialization.c +++ /dev/null @@ -1,302 +0,0 @@ -/*------------------------------------------------------------------------- - * - * cstore_metadata_serialization.c - * - * This file contains function definitions for serializing/deserializing cstore - * metadata. - * - * Copyright (c) 2016, Citus Data, Inc. - * - * $Id$ - * - *------------------------------------------------------------------------- - */ - - -#include "postgres.h" - -#include "access/tupmacs.h" - -#include "cstore.h" -#include "cstore_metadata_serialization.h" -#include "cstore.pb-c.h" - -/* local functions forward declarations */ -static ProtobufCBinaryData DatumToProtobufBinary(Datum datum, bool typeByValue, - int typeLength); -static Datum ProtobufBinaryToDatum(ProtobufCBinaryData protobufBinary, - bool typeByValue, int typeLength); - - -/* - * SerializeColumnSkipList serializes a column skip list, where the colum skip - * list includes all block skip nodes for that column. The function then returns - * the result as a string info. - */ -StringInfo -SerializeColumnSkipList(ColumnBlockSkipNode *blockSkipNodeArray, uint32 blockCount, - bool typeByValue, int typeLength) -{ - StringInfo blockSkipListBuffer = NULL; - Protobuf__ColumnBlockSkipList protobufBlockSkipList = - PROTOBUF__COLUMN_BLOCK_SKIP_LIST__INIT; - Protobuf__ColumnBlockSkipNode **protobufBlockSkipNodeArray = NULL; - uint32 blockIndex = 0; - uint8 *blockSkipListData = NULL; - uint32 blockSkipListSize = 0; - - protobufBlockSkipNodeArray = palloc0(blockCount * - sizeof(Protobuf__ColumnBlockSkipNode *)); - for (blockIndex = 0; blockIndex < blockCount; blockIndex++) - { - ColumnBlockSkipNode blockSkipNode = blockSkipNodeArray[blockIndex]; - Protobuf__ColumnBlockSkipNode *protobufBlockSkipNode = NULL; - ProtobufCBinaryData binaryMinimumValue = { 0, 0 }; - ProtobufCBinaryData binaryMaximumValue = { 0, 0 }; - - if (blockSkipNode.hasMinMax) - { - binaryMinimumValue = DatumToProtobufBinary(blockSkipNode.minimumValue, - typeByValue, typeLength); - binaryMaximumValue = DatumToProtobufBinary(blockSkipNode.maximumValue, - typeByValue, typeLength); - } - - protobufBlockSkipNode = palloc0(sizeof(Protobuf__ColumnBlockSkipNode)); - protobuf__column_block_skip_node__init(protobufBlockSkipNode); - protobufBlockSkipNode->has_rowcount = true; - protobufBlockSkipNode->rowcount = blockSkipNode.rowCount; - protobufBlockSkipNode->has_minimumvalue = blockSkipNode.hasMinMax; - protobufBlockSkipNode->minimumvalue = binaryMinimumValue; - protobufBlockSkipNode->has_maximumvalue = blockSkipNode.hasMinMax; - protobufBlockSkipNode->maximumvalue = binaryMaximumValue; - protobufBlockSkipNode->has_valueblockoffset = true; - protobufBlockSkipNode->valueblockoffset = blockSkipNode.valueBlockOffset; - protobufBlockSkipNode->has_valuelength = true; - protobufBlockSkipNode->valuelength = blockSkipNode.valueLength; - protobufBlockSkipNode->has_existsblockoffset = true; - protobufBlockSkipNode->existsblockoffset = blockSkipNode.existsBlockOffset; - protobufBlockSkipNode->has_existslength = true; - protobufBlockSkipNode->existslength = blockSkipNode.existsLength; - protobufBlockSkipNode->has_valuecompressiontype = true; - protobufBlockSkipNode->valuecompressiontype = - (Protobuf__CompressionType) blockSkipNode.valueCompressionType; - - protobufBlockSkipNodeArray[blockIndex] = protobufBlockSkipNode; - } - - protobufBlockSkipList.n_blockskipnodearray = blockCount; - protobufBlockSkipList.blockskipnodearray = protobufBlockSkipNodeArray; - - blockSkipListSize = - protobuf__column_block_skip_list__get_packed_size(&protobufBlockSkipList); - blockSkipListData = palloc0(blockSkipListSize); - protobuf__column_block_skip_list__pack(&protobufBlockSkipList, blockSkipListData); - - blockSkipListBuffer = palloc0(sizeof(StringInfoData)); - blockSkipListBuffer->len = blockSkipListSize; - blockSkipListBuffer->maxlen = blockSkipListSize; - blockSkipListBuffer->data = (char *) blockSkipListData; - - return blockSkipListBuffer; -} - - -/* - * DeserializeBlockCount deserializes the given column skip list buffer and - * returns the number of blocks in column skip list. - */ -uint32 -DeserializeBlockCount(StringInfo buffer) -{ - uint32 blockCount = 0; - Protobuf__ColumnBlockSkipList *protobufBlockSkipList = NULL; - - protobufBlockSkipList = - protobuf__column_block_skip_list__unpack(NULL, buffer->len, - (uint8 *) buffer->data); - if (protobufBlockSkipList == NULL) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("invalid skip list buffer"))); - } - - blockCount = protobufBlockSkipList->n_blockskipnodearray; - - protobuf__column_block_skip_list__free_unpacked(protobufBlockSkipList, NULL); - - return blockCount; -} - - -/* - * DeserializeRowCount deserializes the given column skip list buffer and - * returns the total number of rows in block skip list. - */ -uint32 -DeserializeRowCount(StringInfo buffer) -{ - uint32 rowCount = 0; - Protobuf__ColumnBlockSkipList *protobufBlockSkipList = NULL; - uint32 blockIndex = 0; - uint32 blockCount = 0; - - protobufBlockSkipList = - protobuf__column_block_skip_list__unpack(NULL, buffer->len, - (uint8 *) buffer->data); - if (protobufBlockSkipList == NULL) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("invalid skip list buffer"))); - } - - blockCount = (uint32) protobufBlockSkipList->n_blockskipnodearray; - for (blockIndex = 0; blockIndex < blockCount; blockIndex++) - { - Protobuf__ColumnBlockSkipNode *protobufBlockSkipNode = - protobufBlockSkipList->blockskipnodearray[blockIndex]; - rowCount += protobufBlockSkipNode->rowcount; - } - - protobuf__column_block_skip_list__free_unpacked(protobufBlockSkipList, NULL); - - return rowCount; -} - - -/* - * DeserializeColumnSkipList deserializes the given buffer and returns the result as - * a ColumnBlockSkipNode array. If the number of unpacked block skip nodes are not - * equal to the given block count function errors out. - */ -ColumnBlockSkipNode * -DeserializeColumnSkipList(StringInfo buffer, bool typeByValue, int typeLength, - uint32 blockCount) -{ - ColumnBlockSkipNode *blockSkipNodeArray = NULL; - uint32 blockIndex = 0; - Protobuf__ColumnBlockSkipList *protobufBlockSkipList = NULL; - - protobufBlockSkipList = - protobuf__column_block_skip_list__unpack(NULL, buffer->len, - (uint8 *) buffer->data); - if (protobufBlockSkipList == NULL) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("invalid skip list buffer"))); - } - - if (protobufBlockSkipList->n_blockskipnodearray != blockCount) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("block skip node count and block count don't match"))); - } - - blockSkipNodeArray = palloc0(blockCount * sizeof(ColumnBlockSkipNode)); - - for (blockIndex = 0; blockIndex < blockCount; blockIndex++) - { - Protobuf__ColumnBlockSkipNode *protobufBlockSkipNode = NULL; - ColumnBlockSkipNode *blockSkipNode = NULL; - bool hasMinMax = false; - Datum minimumValue = 0; - Datum maximumValue = 0; - - protobufBlockSkipNode = protobufBlockSkipList->blockskipnodearray[blockIndex]; - if (!protobufBlockSkipNode->has_rowcount || - !protobufBlockSkipNode->has_existsblockoffset || - !protobufBlockSkipNode->has_valueblockoffset || - !protobufBlockSkipNode->has_existslength || - !protobufBlockSkipNode->has_valuelength || - !protobufBlockSkipNode->has_valuecompressiontype) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("missing required block skip node metadata"))); - } - - if (protobufBlockSkipNode->has_minimumvalue != - protobufBlockSkipNode->has_maximumvalue) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("has minimum and has maximum fields " - "don't match"))); - } - - hasMinMax = protobufBlockSkipNode->has_minimumvalue; - if (hasMinMax) - { - minimumValue = ProtobufBinaryToDatum(protobufBlockSkipNode->minimumvalue, - typeByValue, typeLength); - maximumValue = ProtobufBinaryToDatum(protobufBlockSkipNode->maximumvalue, - typeByValue, typeLength); - } - - blockSkipNode = &blockSkipNodeArray[blockIndex]; - blockSkipNode->rowCount = protobufBlockSkipNode->rowcount; - blockSkipNode->hasMinMax = hasMinMax; - blockSkipNode->minimumValue = minimumValue; - blockSkipNode->maximumValue = maximumValue; - blockSkipNode->existsBlockOffset = protobufBlockSkipNode->existsblockoffset; - blockSkipNode->valueBlockOffset = protobufBlockSkipNode->valueblockoffset; - blockSkipNode->existsLength = protobufBlockSkipNode->existslength; - blockSkipNode->valueLength = protobufBlockSkipNode->valuelength; - blockSkipNode->valueCompressionType = - (CompressionType) protobufBlockSkipNode->valuecompressiontype; - } - - protobuf__column_block_skip_list__free_unpacked(protobufBlockSkipList, NULL); - - return blockSkipNodeArray; -} - - -/* Converts a datum to a ProtobufCBinaryData. */ -static ProtobufCBinaryData -DatumToProtobufBinary(Datum datum, bool datumTypeByValue, int datumTypeLength) -{ - ProtobufCBinaryData protobufBinary = { 0, 0 }; - - int datumLength = att_addlength_datum(0, datumTypeLength, datum); - char *datumBuffer = palloc0(datumLength); - - if (datumTypeLength > 0) - { - if (datumTypeByValue) - { - store_att_byval(datumBuffer, datum, datumTypeLength); - } - else - { - memcpy(datumBuffer, DatumGetPointer(datum), datumTypeLength); - } - } - else - { - memcpy(datumBuffer, DatumGetPointer(datum), datumLength); - } - - protobufBinary.data = (uint8 *) datumBuffer; - protobufBinary.len = datumLength; - - return protobufBinary; -} - - -/* Converts the given ProtobufCBinaryData to a Datum. */ -static Datum -ProtobufBinaryToDatum(ProtobufCBinaryData protobufBinary, bool datumTypeByValue, - int datumTypeLength) -{ - Datum datum = 0; - - /* - * We copy the protobuf data so the result of this function lives even - * after the unpacked protobuf struct is freed. - */ - char *binaryDataCopy = palloc0(protobufBinary.len); - memcpy(binaryDataCopy, protobufBinary.data, protobufBinary.len); - - datum = fetch_att(binaryDataCopy, datumTypeByValue, datumTypeLength); - - return datum; -} diff --git a/cstore_metadata_serialization.h b/cstore_metadata_serialization.h deleted file mode 100644 index efd27000a..000000000 --- a/cstore_metadata_serialization.h +++ /dev/null @@ -1,31 +0,0 @@ -/*------------------------------------------------------------------------- - * - * cstore_metadata_serialization.h - * - * Type and function declarations to serialize/deserialize cstore metadata. - * - * Copyright (c) 2016, Citus Data, Inc. - * - * $Id$ - * - *------------------------------------------------------------------------- - */ - -#ifndef CSTORE_SERIALIZATION_H -#define CSTORE_SERIALIZATION_H - -/* Function declarations for metadata serialization */ -extern StringInfo SerializeColumnSkipList(ColumnBlockSkipNode *blockSkipNodeArray, - uint32 blockCount, bool typeByValue, - int typeLength); - -/* Function declarations for metadata deserialization */ -extern void DeserializePostScript(StringInfo buffer, uint64 *tableFooterLength); -extern uint32 DeserializeBlockCount(StringInfo buffer); -extern uint32 DeserializeRowCount(StringInfo buffer); -extern ColumnBlockSkipNode * DeserializeColumnSkipList(StringInfo buffer, - bool typeByValue, int typeLength, - uint32 blockCount); - - -#endif /* CSTORE_SERIALIZATION_H */ diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 5b6151ef6..af0eb96c4 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -31,13 +31,12 @@ #include "lib/stringinfo.h" #include "port.h" #include "storage/fd.h" +#include "utils/builtins.h" #include "utils/fmgroids.h" #include "utils/memutils.h" #include "utils/lsyscache.h" #include "utils/rel.h" -#include "cstore_metadata_serialization.h" - typedef struct { Relation rel; @@ -50,6 +49,8 @@ static Oid CStoreStripesRelationId(void); static Oid CStoreStripesIndexRelationId(void); static Oid CStoreTablesRelationId(void); static Oid CStoreTablesIndexRelationId(void); +static Oid CStoreSkipNodesRelationId(void); +static Oid CStoreSkipNodesIndexRelationId(void); static Oid CStoreNamespaceId(void); static int TableBlockRowCount(Oid relid); static void DeleteTableMetadataRowIfExists(Oid relid); @@ -59,15 +60,16 @@ static void InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values, static void DeleteTupleAndEnforceConstraints(ModifyState *state, HeapTuple heapTuple); static void FinishModifyRelation(ModifyState *state); static EState * create_estate_for_relation(Relation rel); +static bytea * DatumToBytea(Datum value, Form_pg_attribute attrForm); +static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); /* constants for cstore_stripe_attr */ -#define Natts_cstore_stripe_attr 6 +#define Natts_cstore_stripe_attr 5 #define Anum_cstore_stripe_attr_relid 1 #define Anum_cstore_stripe_attr_stripe 2 #define Anum_cstore_stripe_attr_attr 3 #define Anum_cstore_stripe_attr_exists_size 4 #define Anum_cstore_stripe_attr_value_size 5 -#define Anum_cstore_stripe_attr_skiplist_size 6 /* constants for cstore_table */ #define Natts_cstore_tables 4 @@ -77,12 +79,29 @@ static EState * create_estate_for_relation(Relation rel); #define Anum_cstore_tables_version_minor 4 /* constants for cstore_stripe */ -#define Natts_cstore_stripes 5 +#define Natts_cstore_stripes 6 #define Anum_cstore_stripes_relid 1 #define Anum_cstore_stripes_stripe 2 #define Anum_cstore_stripes_file_offset 3 -#define Anum_cstore_stripes_skiplist_length 4 -#define Anum_cstore_stripes_data_length 5 +#define Anum_cstore_stripes_data_length 4 +#define Anum_cstore_stripes_block_count 5 +#define Anum_cstore_stripes_row_count 6 + +/* constants for cstore_skipnodes */ +#define Natts_cstore_skipnodes 12 +#define Anum_cstore_skipnodes_relid 1 +#define Anum_cstore_skipnodes_stripe 2 +#define Anum_cstore_skipnodes_attr 3 +#define Anum_cstore_skipnodes_block 4 +#define Anum_cstore_skipnodes_row_count 5 +#define Anum_cstore_skipnodes_minimum_value 6 +#define Anum_cstore_skipnodes_maximum_value 7 +#define Anum_cstore_skipnodes_value_stream_offset 8 +#define Anum_cstore_skipnodes_value_stream_length 9 +#define Anum_cstore_skipnodes_exists_stream_offset 10 +#define Anum_cstore_skipnodes_exists_stream_length 11 +#define Anum_cstore_skipnodes_value_compression_type 12 + /* * InitCStoreTableMetadata adds a record for the given relation in cstore_table. @@ -117,6 +136,185 @@ InitCStoreTableMetadata(Oid relid, int blockRowCount) } +/* + * SaveStripeSkipList saves StripeSkipList for a given stripe as rows + * of cstore_skipnodes. + */ +void +SaveStripeSkipList(Oid relid, uint64 stripe, StripeSkipList *stripeSkipList, + TupleDesc tupleDescriptor) +{ + uint32 columnIndex = 0; + uint32 blockIndex = 0; + Oid cstoreSkipNodesOid = InvalidOid; + Relation cstoreSkipNodes = NULL; + ModifyState *modifyState = NULL; + uint32 columnCount = stripeSkipList->columnCount; + + cstoreSkipNodesOid = CStoreSkipNodesRelationId(); + cstoreSkipNodes = heap_open(cstoreSkipNodesOid, RowExclusiveLock); + modifyState = StartModifyRelation(cstoreSkipNodes); + + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) + { + ColumnBlockSkipNode *skipNode = + &stripeSkipList->blockSkipNodeArray[columnIndex][blockIndex]; + + Datum values[Natts_cstore_skipnodes] = { + ObjectIdGetDatum(relid), + Int64GetDatum(stripe), + Int32GetDatum(columnIndex + 1), + Int32GetDatum(blockIndex), + Int64GetDatum(skipNode->rowCount), + 0, /* to be filled below */ + 0, /* to be filled below */ + Int64GetDatum(skipNode->valueBlockOffset), + Int64GetDatum(skipNode->valueLength), + Int64GetDatum(skipNode->existsBlockOffset), + Int64GetDatum(skipNode->existsLength), + Int32GetDatum(skipNode->valueCompressionType) + }; + + bool nulls[Natts_cstore_skipnodes] = { false }; + + if (skipNode->hasMinMax) + { + values[Anum_cstore_skipnodes_minimum_value - 1] = + PointerGetDatum(DatumToBytea(skipNode->minimumValue, + &tupleDescriptor->attrs[columnIndex])); + values[Anum_cstore_skipnodes_maximum_value - 1] = + PointerGetDatum(DatumToBytea(skipNode->maximumValue, + &tupleDescriptor->attrs[columnIndex])); + } + else + { + nulls[Anum_cstore_skipnodes_minimum_value - 1] = true; + nulls[Anum_cstore_skipnodes_maximum_value - 1] = true; + } + + InsertTupleAndEnforceConstraints(modifyState, values, nulls); + } + } + + FinishModifyRelation(modifyState); + heap_close(cstoreSkipNodes, NoLock); + + CommandCounterIncrement(); +} + + +/* + * ReadStripeSkipList fetches StripeSkipList for a given stripe. + */ +StripeSkipList * +ReadStripeSkipList(Oid relid, uint64 stripe, TupleDesc tupleDescriptor, + uint32 blockCount) +{ + StripeSkipList *skipList = NULL; + uint32 columnIndex = 0; + Oid cstoreSkipNodesOid = InvalidOid; + Relation cstoreSkipNodes = NULL; + Relation index = NULL; + HeapTuple heapTuple = NULL; + uint32 columnCount = tupleDescriptor->natts; + ScanKeyData scanKey[2]; + SysScanDesc scanDescriptor = NULL; + + cstoreSkipNodesOid = CStoreSkipNodesRelationId(); + cstoreSkipNodes = heap_open(cstoreSkipNodesOid, AccessShareLock); + index = index_open(CStoreSkipNodesIndexRelationId(), AccessShareLock); + + ScanKeyInit(&scanKey[0], Anum_cstore_skipnodes_relid, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relid)); + ScanKeyInit(&scanKey[1], Anum_cstore_skipnodes_stripe, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(stripe)); + + scanDescriptor = systable_beginscan_ordered(cstoreSkipNodes, index, NULL, 2, scanKey); + + skipList = palloc0(sizeof(StripeSkipList)); + skipList->blockCount = blockCount; + skipList->columnCount = columnCount; + skipList->blockSkipNodeArray = palloc0(columnCount * sizeof(ColumnBlockSkipNode *)); + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + skipList->blockSkipNodeArray[columnIndex] = + palloc0(blockCount * sizeof(ColumnBlockSkipNode)); + } + + while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) + { + uint32 attr = 0; + uint32 blockIndex = 0; + ColumnBlockSkipNode *skipNode = NULL; + + Datum datumArray[Natts_cstore_skipnodes]; + bool isNullArray[Natts_cstore_skipnodes]; + + heap_deform_tuple(heapTuple, RelationGetDescr(cstoreSkipNodes), datumArray, + isNullArray); + + attr = DatumGetInt32(datumArray[Anum_cstore_skipnodes_attr - 1]); + blockIndex = DatumGetInt32(datumArray[Anum_cstore_skipnodes_block - 1]); + + if (attr <= 0 || attr > columnCount) + { + ereport(ERROR, (errmsg("invalid stripe skipnode entry"), + errdetail("Attribute number out of range: %u", attr))); + } + + if (blockIndex < 0 || blockIndex >= blockCount) + { + ereport(ERROR, (errmsg("invalid stripe skipnode entry"), + errdetail("Block number out of range: %u", blockIndex))); + } + + columnIndex = attr - 1; + + skipNode = &skipList->blockSkipNodeArray[columnIndex][blockIndex]; + skipNode->rowCount = DatumGetInt64(datumArray[Anum_cstore_skipnodes_row_count - + 1]); + skipNode->valueBlockOffset = + DatumGetInt64(datumArray[Anum_cstore_skipnodes_value_stream_offset - 1]); + skipNode->valueLength = + DatumGetInt64(datumArray[Anum_cstore_skipnodes_value_stream_length - 1]); + skipNode->existsBlockOffset = + DatumGetInt64(datumArray[Anum_cstore_skipnodes_exists_stream_offset - 1]); + skipNode->existsLength = + DatumGetInt64(datumArray[Anum_cstore_skipnodes_exists_stream_length - 1]); + skipNode->valueCompressionType = + DatumGetInt32(datumArray[Anum_cstore_skipnodes_value_compression_type - 1]); + + if (isNullArray[Anum_cstore_skipnodes_minimum_value - 1] || + isNullArray[Anum_cstore_skipnodes_maximum_value - 1]) + { + skipNode->hasMinMax = false; + } + else + { + bytea *minValue = DatumGetByteaP( + datumArray[Anum_cstore_skipnodes_minimum_value - 1]); + bytea *maxValue = DatumGetByteaP( + datumArray[Anum_cstore_skipnodes_maximum_value - 1]); + + skipNode->minimumValue = + ByteaToDatum(minValue, &tupleDescriptor->attrs[columnIndex]); + skipNode->maximumValue = + ByteaToDatum(maxValue, &tupleDescriptor->attrs[columnIndex]); + + skipNode->hasMinMax = true; + } + } + + systable_endscan_ordered(scanDescriptor); + index_close(index, NoLock); + heap_close(cstoreSkipNodes, NoLock); + + return skipList; +} + + /* * InsertStripeMetadataRow adds a row to cstore_stripes. */ @@ -128,8 +326,9 @@ InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe) ObjectIdGetDatum(relid), Int64GetDatum(stripe->id), Int64GetDatum(stripe->fileOffset), - Int64GetDatum(stripe->skipListLength), - Int64GetDatum(stripe->dataLength) + Int64GetDatum(stripe->dataLength), + Int32GetDatum(stripe->blockCount), + Int64GetDatum(stripe->rowCount) }; Oid cstoreStripesOid = CStoreStripesRelationId(); @@ -187,8 +386,10 @@ ReadTableMetadata(Oid relid) datumArray[Anum_cstore_stripes_file_offset - 1]); stripeMetadata->dataLength = DatumGetInt64( datumArray[Anum_cstore_stripes_data_length - 1]); - stripeMetadata->skipListLength = DatumGetInt64( - datumArray[Anum_cstore_stripes_skiplist_length - 1]); + stripeMetadata->blockCount = DatumGetInt32( + datumArray[Anum_cstore_stripes_block_count - 1]); + stripeMetadata->rowCount = DatumGetInt64( + datumArray[Anum_cstore_stripes_row_count - 1]); tableMetadata->stripeMetadataList = lappend(tableMetadata->stripeMetadataList, stripeMetadata); @@ -299,8 +500,7 @@ SaveStripeFooter(Oid relid, uint64 stripe, StripeFooter *footer) Int64GetDatum(stripe), Int16GetDatum(attr), Int64GetDatum(footer->existsSizeArray[attr - 1]), - Int64GetDatum(footer->valueSizeArray[attr - 1]), - Int64GetDatum(footer->skipListSizeArray[attr - 1]) + Int64GetDatum(footer->valueSizeArray[attr - 1]) }; InsertTupleAndEnforceConstraints(modifyState, values, nulls); @@ -339,7 +539,6 @@ ReadStripeFooter(Oid relid, uint64 stripe, int relationColumnCount) footer = palloc0(sizeof(StripeFooter)); footer->existsSizeArray = palloc0(relationColumnCount * sizeof(int64)); footer->valueSizeArray = palloc0(relationColumnCount * sizeof(int64)); - footer->skipListSizeArray = palloc0(relationColumnCount * sizeof(int64)); /* * Stripe can have less columns than the relation if ALTER TABLE happens @@ -369,8 +568,6 @@ ReadStripeFooter(Oid relid, uint64 stripe, int relationColumnCount) DatumGetInt64(datumArray[Anum_cstore_stripe_attr_exists_size - 1]); footer->valueSizeArray[attr - 1] = DatumGetInt64(datumArray[Anum_cstore_stripe_attr_value_size - 1]); - footer->skipListSizeArray[attr - 1] = - DatumGetInt64(datumArray[Anum_cstore_stripe_attr_skiplist_size - 1]); } systable_endscan_ordered(scanDescriptor); @@ -507,6 +704,55 @@ create_estate_for_relation(Relation rel) } +/* + * DatumToBytea serializes a datum into a bytea value. + */ +static bytea * +DatumToBytea(Datum value, Form_pg_attribute attrForm) +{ + int datumLength = att_addlength_datum(0, attrForm->attlen, value); + bytea *result = palloc0(datumLength + VARHDRSZ); + + SET_VARSIZE(result, datumLength + VARHDRSZ); + + if (attrForm->attlen > 0) + { + if (attrForm->attbyval) + { + store_att_byval(VARDATA(result), value, attrForm->attlen); + } + else + { + memcpy(VARDATA(result), DatumGetPointer(value), attrForm->attlen); + } + } + else + { + memcpy(VARDATA(result), DatumGetPointer(value), datumLength); + } + + return result; +} + + +/* + * ByteaToDatum deserializes a value which was previously serialized using + * DatumToBytea. + */ +static Datum +ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm) +{ + /* + * We copy the data so the result of this function lives even + * after the byteaDatum is freed. + */ + char *binaryDataCopy = palloc0(VARSIZE_ANY_EXHDR(bytes)); + memcpy(binaryDataCopy, VARDATA_ANY(bytes), VARSIZE_ANY_EXHDR(bytes)); + + return fetch_att(binaryDataCopy, attrForm->attbyval, attrForm->attlen); +} + + /* * CStoreStripeAttrRelationId returns relation id of cstore_stripe_attr. * TODO: should we cache this similar to citus? @@ -573,6 +819,28 @@ CStoreTablesIndexRelationId(void) } +/* + * CStoreSkipNodesRelationId returns relation id of cstore_skipnodes. + * TODO: should we cache this similar to citus? + */ +static Oid +CStoreSkipNodesRelationId(void) +{ + return get_relname_relid("cstore_skipnodes", CStoreNamespaceId()); +} + + +/* + * CStoreSkipNodesIndexRelationId returns relation id of cstore_skipnodes_pkey. + * TODO: should we cache this similar to citus? + */ +static Oid +CStoreSkipNodesIndexRelationId(void) +{ + return get_relname_relid("cstore_skipnodes_pkey", CStoreNamespaceId()); +} + + /* * CStoreNamespaceId returns namespace id of the schema we store cstore * related tables. diff --git a/cstore_reader.c b/cstore_reader.c index 654d74697..929c65c04 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -34,7 +34,6 @@ #include "utils/rel.h" #include "cstore.h" -#include "cstore_metadata_serialization.h" #include "cstore_version_compat.h" /* static function declarations */ @@ -53,12 +52,6 @@ static ColumnBuffers * LoadColumnBuffers(Relation relation, uint32 blockCount, uint64 existsFileOffset, uint64 valueFileOffset, Form_pg_attribute attributeForm); -static StripeSkipList * LoadStripeSkipList(Relation relation, - StripeMetadata *stripeMetadata, - StripeFooter *stripeFooter, - uint32 columnCount, - bool *projectedColumnMask, - TupleDesc tupleDescriptor); static bool * SelectedBlockMask(StripeSkipList *stripeSkipList, List *projectedColumnList, List *whereClauseList); static List * BuildRestrictInfoList(List *whereClauseList); @@ -85,8 +78,6 @@ static Datum ColumnDefaultValue(TupleConstr *tupleConstraints, static StringInfo ReadFromSmgr(Relation rel, uint64 offset, uint32 size); static void ResetUncompressedBlockData(ColumnBlockData **blockDataArray, uint32 columnCount); -static uint64 StripeRowCount(Relation relation, StripeMetadata *stripeMetadata); -static int RelationColumnCount(Oid relid); /* @@ -327,34 +318,13 @@ CStoreTableRowCount(Relation relation) foreach(stripeMetadataCell, tableMetadata->stripeMetadataList) { StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell); - totalRowCount += StripeRowCount(relation, stripeMetadata); + totalRowCount += stripeMetadata->rowCount; } return totalRowCount; } -/* - * StripeRowCount reads serialized stripe footer, the first column's - * skip list, and returns number of rows for given stripe. - */ -static uint64 -StripeRowCount(Relation relation, StripeMetadata *stripeMetadata) -{ - uint64 rowCount = 0; - StringInfo firstColumnSkipListBuffer = NULL; - - StripeFooter *stripeFooter = ReadStripeFooter(relation->rd_id, stripeMetadata->id, - RelationColumnCount(relation->rd_id)); - - firstColumnSkipListBuffer = ReadFromSmgr(relation, stripeMetadata->fileOffset, - stripeFooter->skipListSizeArray[0]); - rowCount = DeserializeRowCount(firstColumnSkipListBuffer); - - return rowCount; -} - - /* * LoadFilteredStripeBuffers reads serialized stripe data from the given file. * The function skips over blocks whose rows are refuted by restriction qualifiers, @@ -373,10 +343,10 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, bool *projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList); - StripeSkipList *stripeSkipList = LoadStripeSkipList(relation, stripeMetadata, - stripeFooter, columnCount, - projectedColumnMask, - tupleDescriptor); + StripeSkipList *stripeSkipList = ReadStripeSkipList(RelationGetRelid(relation), + stripeMetadata->id, + tupleDescriptor, + stripeMetadata->blockCount); bool *selectedBlockMask = SelectedBlockMask(stripeSkipList, projectedColumnList, whereClauseList); @@ -387,7 +357,7 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, /* load column data for projected columns */ columnBuffersArray = palloc0(columnCount * sizeof(ColumnBuffers *)); - currentColumnFileOffset = stripeMetadata->fileOffset + stripeMetadata->skipListLength; + currentColumnFileOffset = stripeMetadata->fileOffset; for (columnIndex = 0; columnIndex < stripeFooter->columnCount; columnIndex++) { @@ -511,98 +481,6 @@ LoadColumnBuffers(Relation relation, ColumnBlockSkipNode *blockSkipNodeArray, } -/* Reads the skip list for the given stripe. */ -static StripeSkipList * -LoadStripeSkipList(Relation relation, - StripeMetadata *stripeMetadata, - StripeFooter *stripeFooter, uint32 columnCount, - bool *projectedColumnMask, - TupleDesc tupleDescriptor) -{ - StripeSkipList *stripeSkipList = NULL; - ColumnBlockSkipNode **blockSkipNodeArray = NULL; - StringInfo firstColumnSkipListBuffer = NULL; - uint64 currentColumnSkipListFileOffset = 0; - uint32 columnIndex = 0; - uint32 stripeBlockCount = 0; - uint32 stripeColumnCount = stripeFooter->columnCount; - - /* deserialize block count */ - firstColumnSkipListBuffer = ReadFromSmgr(relation, stripeMetadata->fileOffset, - stripeFooter->skipListSizeArray[0]); - stripeBlockCount = DeserializeBlockCount(firstColumnSkipListBuffer); - - /* deserialize column skip lists */ - blockSkipNodeArray = palloc0(columnCount * sizeof(ColumnBlockSkipNode *)); - currentColumnSkipListFileOffset = stripeMetadata->fileOffset; - - for (columnIndex = 0; columnIndex < stripeColumnCount; columnIndex++) - { - uint64 columnSkipListSize = stripeFooter->skipListSizeArray[columnIndex]; - bool firstColumn = columnIndex == 0; - - /* - * Only selected columns' column skip lists are read. However, the first - * column's skip list is read regardless of being selected. It is used by - * StripeSkipListRowCount later. - */ - if (projectedColumnMask[columnIndex] || firstColumn) - { - Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); - - StringInfo columnSkipListBuffer = - ReadFromSmgr(relation, currentColumnSkipListFileOffset, - columnSkipListSize); - ColumnBlockSkipNode *columnSkipList = - DeserializeColumnSkipList(columnSkipListBuffer, attributeForm->attbyval, - attributeForm->attlen, stripeBlockCount); - blockSkipNodeArray[columnIndex] = columnSkipList; - } - - currentColumnSkipListFileOffset += columnSkipListSize; - } - - /* table contains additional columns added after this stripe is created */ - for (columnIndex = stripeColumnCount; columnIndex < columnCount; columnIndex++) - { - ColumnBlockSkipNode *columnSkipList = NULL; - uint32 blockIndex = 0; - bool firstColumn = columnIndex == 0; - - /* no need to create ColumnBlockSkipList if the column is not selected */ - if (!projectedColumnMask[columnIndex] && !firstColumn) - { - blockSkipNodeArray[columnIndex] = NULL; - continue; - } - - /* create empty ColumnBlockSkipNode for missing columns*/ - columnSkipList = palloc0(stripeBlockCount * sizeof(ColumnBlockSkipNode)); - - for (blockIndex = 0; blockIndex < stripeBlockCount; blockIndex++) - { - columnSkipList[blockIndex].rowCount = 0; - columnSkipList[blockIndex].hasMinMax = false; - columnSkipList[blockIndex].minimumValue = 0; - columnSkipList[blockIndex].maximumValue = 0; - columnSkipList[blockIndex].existsBlockOffset = 0; - columnSkipList[blockIndex].valueBlockOffset = 0; - columnSkipList[blockIndex].existsLength = 0; - columnSkipList[blockIndex].valueLength = 0; - columnSkipList[blockIndex].valueCompressionType = COMPRESSION_NONE; - } - blockSkipNodeArray[columnIndex] = columnSkipList; - } - - stripeSkipList = palloc0(sizeof(StripeSkipList)); - stripeSkipList->blockSkipNodeArray = blockSkipNodeArray; - stripeSkipList->columnCount = columnCount; - stripeSkipList->blockCount = stripeBlockCount; - - return stripeSkipList; -} - - /* * SelectedBlockMask walks over each column's blocks and checks if a block can * be filtered without reading its data. The filtering happens when all rows in @@ -1207,15 +1085,3 @@ ResetUncompressedBlockData(ColumnBlockData **blockDataArray, uint32 columnCount) } } } - - -static int -RelationColumnCount(Oid relid) -{ - Relation rel = RelationIdGetRelation(relid); - TupleDesc tupleDesc = RelationGetDescr(rel); - int columnCount = tupleDesc->natts; - RelationClose(rel); - - return columnCount; -} diff --git a/cstore_writer.c b/cstore_writer.c index 319136dc9..8a5f498e2 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -24,7 +24,6 @@ #include "utils/rel.h" #include "cstore.h" -#include "cstore_metadata_serialization.h" #include "cstore_version_compat.h" static StripeBuffers * CreateEmptyStripeBuffers(uint32 stripeMaxRowCount, @@ -34,10 +33,7 @@ static StripeSkipList * CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 blockRowCount, uint32 columnCount); static StripeMetadata FlushStripe(TableWriteState *writeState); -static StringInfo * CreateSkipListBufferArray(StripeSkipList *stripeSkipList, - TupleDesc tupleDescriptor); -static StripeFooter * CreateStripeFooter(StripeSkipList *stripeSkipList, - StringInfo *skipListBufferArray); +static StripeFooter * CreateStripeFooter(StripeSkipList *stripeSkipList); static StringInfo SerializeBoolArray(bool *boolArray, uint32 boolArrayLength); static void SerializeSingleDatum(StringInfo datumBuffer, Datum datum, bool datumTypeByValue, int datumTypeLength, @@ -90,9 +86,7 @@ CStoreBeginWrite(Oid relationId, uint64 lastStripeSize = 0; lastStripe = llast(tableMetadata->stripeMetadataList); - lastStripeSize += lastStripe->skipListLength; lastStripeSize += lastStripe->dataLength; - lastStripeSize += lastStripe->footerLength; currentFileOffset = lastStripe->fileOffset + lastStripeSize; currentStripeId = lastStripe->id + 1; @@ -429,10 +423,8 @@ WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) static StripeMetadata FlushStripe(TableWriteState *writeState) { - StripeMetadata stripeMetadata = { 0, 0, 0, 0 }; - uint64 skipListLength = 0; + StripeMetadata stripeMetadata = { 0 }; uint64 dataLength = 0; - StringInfo *skipListBufferArray = NULL; StripeFooter *stripeFooter = NULL; uint32 columnIndex = 0; uint32 blockIndex = 0; @@ -486,32 +478,21 @@ FlushStripe(TableWriteState *writeState) } /* create skip list and footer buffers */ - skipListBufferArray = CreateSkipListBufferArray(stripeSkipList, tupleDescriptor); - stripeFooter = CreateStripeFooter(stripeSkipList, skipListBufferArray); + SaveStripeSkipList(writeState->relationId, writeState->currentStripeId, + stripeSkipList, tupleDescriptor); + stripeFooter = CreateStripeFooter(stripeSkipList); /* - * Each stripe has three sections: - * (1) Skip list, which contains statistics for each column block, and can - * be used to skip reading row blocks that are refuted by WHERE clause list, - * (2) Data section, in which we store data for each column continuously. + * Each stripe has only one section: + * Data section, in which we store data for each column continuously. * We store data for each for each column in blocks. For each block, we * store two buffers: "exists" buffer, and "value" buffer. "exists" buffer * tells which values are not NULL. "value" buffer contains values for * present values. For each column, we first store all "exists" buffers, * and then all "value" buffers. - * (3) Stripe footer, which contains the skip list buffer size, exists buffer - * size, and value buffer size for each of the columns. - * - * We start by flushing the skip list buffers. */ - for (columnIndex = 0; columnIndex < columnCount; columnIndex++) - { - StringInfo skipListBuffer = skipListBufferArray[columnIndex]; - WriteToSmgr(writeState, skipListBuffer->data, skipListBuffer->len); - writeState->currentFileOffset += skipListBuffer->len; - } - /* then, we flush the data buffers */ + /* flush the data buffers */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; @@ -546,60 +527,32 @@ FlushStripe(TableWriteState *writeState) /* set stripe metadata */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { - skipListLength += stripeFooter->skipListSizeArray[columnIndex]; dataLength += stripeFooter->existsSizeArray[columnIndex]; dataLength += stripeFooter->valueSizeArray[columnIndex]; } + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + stripeMetadata.rowCount += + stripeSkipList->blockSkipNodeArray[0][blockIndex].rowCount; + } + stripeMetadata.fileOffset = initialFileOffset; - stripeMetadata.skipListLength = skipListLength; stripeMetadata.dataLength = dataLength; - stripeMetadata.footerLength = 0; stripeMetadata.id = writeState->currentStripeId; + stripeMetadata.blockCount = blockCount; return stripeMetadata; } -/* - * CreateSkipListBufferArray serializes the skip list for each column of the - * given stripe and returns the result as an array. - */ -static StringInfo * -CreateSkipListBufferArray(StripeSkipList *stripeSkipList, TupleDesc tupleDescriptor) -{ - StringInfo *skipListBufferArray = NULL; - uint32 columnIndex = 0; - uint32 columnCount = stripeSkipList->columnCount; - - skipListBufferArray = palloc0(columnCount * sizeof(StringInfo)); - for (columnIndex = 0; columnIndex < columnCount; columnIndex++) - { - StringInfo skipListBuffer = NULL; - ColumnBlockSkipNode *blockSkipNodeArray = - stripeSkipList->blockSkipNodeArray[columnIndex]; - Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); - - skipListBuffer = SerializeColumnSkipList(blockSkipNodeArray, - stripeSkipList->blockCount, - attributeForm->attbyval, - attributeForm->attlen); - - skipListBufferArray[columnIndex] = skipListBuffer; - } - - return skipListBufferArray; -} - - /* Creates and returns the footer for given stripe. */ static StripeFooter * -CreateStripeFooter(StripeSkipList *stripeSkipList, StringInfo *skipListBufferArray) +CreateStripeFooter(StripeSkipList *stripeSkipList) { StripeFooter *stripeFooter = NULL; uint32 columnIndex = 0; uint32 columnCount = stripeSkipList->columnCount; - uint64 *skipListSizeArray = palloc0(columnCount * sizeof(uint64)); uint64 *existsSizeArray = palloc0(columnCount * sizeof(uint64)); uint64 *valueSizeArray = palloc0(columnCount * sizeof(uint64)); @@ -614,12 +567,10 @@ CreateStripeFooter(StripeSkipList *stripeSkipList, StringInfo *skipListBufferArr existsSizeArray[columnIndex] += blockSkipNodeArray[blockIndex].existsLength; valueSizeArray[columnIndex] += blockSkipNodeArray[blockIndex].valueLength; } - skipListSizeArray[columnIndex] = skipListBufferArray[columnIndex]->len; } stripeFooter = palloc0(sizeof(StripeFooter)); stripeFooter->columnCount = columnCount; - stripeFooter->skipListSizeArray = skipListSizeArray; stripeFooter->existsSizeArray = existsSizeArray; stripeFooter->valueSizeArray = valueSizeArray; From 139da88ad97d8fb7aa4772a5bf774dbd1bd3994b Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 14 Sep 2020 15:08:50 -0700 Subject: [PATCH 19/91] Remove some unnecessary code & fix compiler warnings --- cstore_fdw.c | 62 +++++++++++----------------------------------------- 1 file changed, 13 insertions(+), 49 deletions(-) diff --git a/cstore_fdw.c b/cstore_fdw.c index cd8dcf4ef..63951c5f3 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -96,7 +96,7 @@ typedef struct CStoreValidOption #define COMPRESSION_STRING_DELIMITED_LIST "none, pglz" /* Array of options that are valid for cstore_fdw */ -static const uint32 ValidOptionCount = 4; +static const uint32 ValidOptionCount = 3; static const CStoreValidOption ValidOptionArray[] = { /* foreign table options */ @@ -319,54 +319,22 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, } else if (nodeTag(parseTree) == T_DropStmt) { - DropStmt *dropStmt = (DropStmt *) parseTree; + List *dropRelids = DroppedCStoreRelidList((DropStmt *) parseTree); + ListCell *lc = NULL; - if (dropStmt->removeType == OBJECT_EXTENSION) + /* drop smgr storage */ + foreach(lc, dropRelids) { - bool removeCStoreDirectory = false; - ListCell *objectCell = NULL; + Oid relid = lfirst_oid(lc); + Relation relation = cstore_fdw_open(relid, AccessExclusiveLock); - foreach(objectCell, dropStmt->objects) - { - Node *object = (Node *) lfirst(objectCell); - char *objectName = NULL; - -#if PG_VERSION_NUM >= 100000 - Assert(IsA(object, String)); - objectName = strVal(object); -#else - Assert(IsA(object, List)); - objectName = strVal(linitial((List *) object)); -#endif - - if (strncmp(CSTORE_FDW_NAME, objectName, NAMEDATALEN) == 0) - { - removeCStoreDirectory = true; - } - } - - CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, - destReceiver, completionTag); + RelationOpenSmgr(relation); + RelationDropStorage(relation); + heap_close(relation, AccessExclusiveLock); } - else - { - List *dropRelids = DroppedCStoreRelidList((DropStmt *) parseTree); - ListCell *lc = NULL; - /* drop smgr storage */ - foreach(lc, dropRelids) - { - Oid relid = lfirst_oid(lc); - Relation relation = cstore_fdw_open(relid, AccessExclusiveLock); - - RelationOpenSmgr(relation); - RelationDropStorage(relation); - heap_close(relation, AccessExclusiveLock); - } - - CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, - destReceiver, completionTag); - } + CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, + destReceiver, completionTag); } else if (nodeTag(parseTree) == T_TruncateStmt) { @@ -857,11 +825,9 @@ TruncateCStoreTables(List *cstoreRelationList) { Relation relation = (Relation) lfirst(relationCell); Oid relationId = relation->rd_id; - CStoreOptions *cstoreOptions = NULL; Assert(CStoreTable(relationId)); - cstoreOptions = CStoreGetOptions(relationId); InitializeRelFileNode(relation, true); InitializeCStoreTableFile(relationId, relation, CStoreGetOptions(relationId)); } @@ -1735,7 +1701,6 @@ CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags) { TableReadState *readState = NULL; Oid foreignTableId = InvalidOid; - CStoreOptions *cstoreOptions = NULL; Relation currentRelation = scanState->ss.ss_currentRelation; TupleDesc tupleDescriptor = RelationGetDescr(currentRelation); List *columnList = NIL; @@ -1752,7 +1717,6 @@ CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags) } foreignTableId = RelationGetRelid(scanState->ss.ss_currentRelation); - cstoreOptions = CStoreGetOptions(foreignTableId); foreignScan = (ForeignScan *) scanState->ss.ps.plan; foreignPrivateList = (List *) foreignScan->fdw_private; @@ -1873,6 +1837,7 @@ CStoreAcquireSampleRows(Relation relation, int logLevel, ForeignScan *foreignScan = NULL; char *relationName = NULL; int executorFlags = 0; + uint32 columnIndex = 0; TupleDesc tupleDescriptor = RelationGetDescr(relation); uint32 columnCount = tupleDescriptor->natts; @@ -1880,7 +1845,6 @@ CStoreAcquireSampleRows(Relation relation, int logLevel, cstore_fdw_initrel(relation); /* create list of columns of the relation */ - uint32 columnIndex = 0; for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); From a94bbcc7ef1e3d533b0f237b4e37c1c1abaf5927 Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Fri, 11 Sep 2020 18:12:47 +0200 Subject: [PATCH 20/91] write wal entries when writing to the buffers --- cstore_writer.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/cstore_writer.c b/cstore_writer.c index 8a5f498e2..5e44812bd 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -18,6 +18,7 @@ #include "access/nbtree.h" #include "catalog/pg_am.h" +#include "miscadmin.h" #include "storage/fd.h" #include "storage/smgr.h" #include "utils/memutils.h" @@ -401,11 +402,30 @@ WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) /* always appending */ Assert(phdr->pd_lower == addr.offset); + START_CRIT_SECTION(); + to_write = Min(phdr->pd_upper - phdr->pd_lower, remaining); memcpy(page + phdr->pd_lower, data, to_write); phdr->pd_lower += to_write; MarkBufferDirty(buffer); + + if (RelationNeedsWAL(rel)) + { + XLogBeginInsert(); + + /* + * Since cstore will mostly write whole pages we force the transmission of the + * whole image in the buffer + */ + XLogRegisterBuffer(0, buffer, REGBUF_FORCE_IMAGE); + + XLogRecPtr recptr = XLogInsert(RM_GENERIC_ID, 0); + PageSetLSN(page, recptr); + } + + END_CRIT_SECTION(); + UnlockReleaseBuffer(buffer); data += to_write; From d69bff7621e83edccf336e734031048341c5a804 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Tue, 15 Sep 2020 10:05:27 -0700 Subject: [PATCH 21/91] Use schema config in control file --- cstore_fdw--1.7.sql | 28 +++++++++++++--------------- cstore_fdw.control | 3 ++- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index 1c19fda50..19801f1f8 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -3,8 +3,6 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION cstore_fdw" to load this file. \quit -CREATE SCHEMA cstore; - CREATE FUNCTION cstore_fdw_handler() RETURNS fdw_handler AS 'MODULE_PATHNAME' @@ -28,7 +26,7 @@ CREATE EVENT TRIGGER cstore_ddl_event_end ON ddl_command_end EXECUTE PROCEDURE cstore_ddl_event_end_trigger(); -CREATE FUNCTION cstore_table_size(relation regclass) +CREATE FUNCTION public.cstore_table_size(relation regclass) RETURNS bigint AS 'MODULE_PATHNAME' LANGUAGE C STRICT; @@ -50,7 +48,7 @@ BEGIN CONTINUE; END IF; - PERFORM public.cstore_clean_table_resources(v_obj.objid); + PERFORM cstore.cstore_clean_table_resources(v_obj.objid); END LOOP; END; @@ -60,7 +58,7 @@ CREATE EVENT TRIGGER cstore_drop_event ON SQL_DROP EXECUTE PROCEDURE cstore_drop_trigger(); -CREATE TABLE cstore.cstore_tables ( +CREATE TABLE cstore_tables ( relid oid NOT NULL, block_row_count int NOT NULL, version_major bigint NOT NULL, @@ -68,9 +66,9 @@ CREATE TABLE cstore.cstore_tables ( PRIMARY KEY (relid) ) WITH (user_catalog_table = true); -COMMENT ON TABLE cstore.cstore_tables IS 'CStore table wide metadata'; +COMMENT ON TABLE cstore_tables IS 'CStore table wide metadata'; -CREATE TABLE cstore.cstore_stripes ( +CREATE TABLE cstore_stripes ( relid oid NOT NULL, stripe bigint NOT NULL, file_offset bigint NOT NULL, @@ -78,24 +76,24 @@ CREATE TABLE cstore.cstore_stripes ( block_count int NOT NULL, row_count bigint NOT NULL, PRIMARY KEY (relid, stripe), - FOREIGN KEY (relid) REFERENCES cstore.cstore_tables(relid) ON DELETE CASCADE INITIALLY DEFERRED + FOREIGN KEY (relid) REFERENCES cstore_tables(relid) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); -COMMENT ON TABLE cstore.cstore_tables IS 'CStore per stripe metadata'; +COMMENT ON TABLE cstore_tables IS 'CStore per stripe metadata'; -CREATE TABLE cstore.cstore_stripe_attr ( +CREATE TABLE cstore_stripe_attr ( relid oid NOT NULL, stripe bigint NOT NULL, attr int NOT NULL, exists_size bigint NOT NULL, value_size bigint NOT NULL, PRIMARY KEY (relid, stripe, attr), - FOREIGN KEY (relid, stripe) REFERENCES cstore.cstore_stripes(relid, stripe) ON DELETE CASCADE INITIALLY DEFERRED + FOREIGN KEY (relid, stripe) REFERENCES cstore_stripes(relid, stripe) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); -COMMENT ON TABLE cstore.cstore_tables IS 'CStore per stripe/column combination metadata'; +COMMENT ON TABLE cstore_tables IS 'CStore per stripe/column combination metadata'; -CREATE TABLE cstore.cstore_skipnodes ( +CREATE TABLE cstore_skipnodes ( relid oid NOT NULL, stripe bigint NOT NULL, attr int NOT NULL, @@ -109,7 +107,7 @@ CREATE TABLE cstore.cstore_skipnodes ( exists_stream_length bigint NOT NULL, value_compression_type int NOT NULL, PRIMARY KEY (relid, stripe, attr, block), - FOREIGN KEY (relid, stripe, attr) REFERENCES cstore.cstore_stripe_attr(relid, stripe, attr) ON DELETE CASCADE INITIALLY DEFERRED + FOREIGN KEY (relid, stripe, attr) REFERENCES cstore_stripe_attr(relid, stripe, attr) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); -COMMENT ON TABLE cstore.cstore_tables IS 'CStore per block metadata'; +COMMENT ON TABLE cstore_tables IS 'CStore per block metadata'; diff --git a/cstore_fdw.control b/cstore_fdw.control index a95b8509f..6f781dcbb 100644 --- a/cstore_fdw.control +++ b/cstore_fdw.control @@ -2,4 +2,5 @@ comment = 'foreign-data wrapper for flat cstore access' default_version = '1.7' module_pathname = '$libdir/cstore_fdw' -relocatable = true +relocatable = false +schema = cstore From f7f59933f868ce1803d35cb3501b0fd0a109371f Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 15 Sep 2020 12:39:51 -0700 Subject: [PATCH 22/91] fix v11 tests --- cstore_fdw.c | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/cstore_fdw.c b/cstore_fdw.c index 63951c5f3..36f576cee 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -129,7 +129,7 @@ static void CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement); static List * DroppedCStoreRelidList(DropStmt *dropStatement); static List * FindCStoreTables(List *tableList); static List * OpenRelationsForTruncate(List *cstoreTableList); -static void InitializeRelFileNode(Relation relation, bool force); +static void FdwNewRelFileNode(Relation relation); static void TruncateCStoreTables(List *cstoreRelationList); static bool CStoreTable(Oid relationId); static bool CStoreServer(ForeignServer *server); @@ -828,26 +828,22 @@ TruncateCStoreTables(List *cstoreRelationList) Assert(CStoreTable(relationId)); - InitializeRelFileNode(relation, true); + FdwNewRelFileNode(relation); InitializeCStoreTableFile(relationId, relation, CStoreGetOptions(relationId)); } } /* - * Version 11 and earlier already create a relfilenode for foreign + * Version 11 and earlier already assign a relfilenode for foreign * tables. Version 12 and later do not, so we need to create one manually. */ static void -InitializeRelFileNode(Relation relation, bool force) +FdwNewRelFileNode(Relation relation) { -#if PG_VERSION_NUM >= 120000 Relation pg_class; HeapTuple tuple; Form_pg_class classform; - /* - * Get a writable copy of the pg_class tuple for the given relation. - */ pg_class = heap_open(RelationRelationId, RowExclusiveLock); tuple = SearchSysCacheCopy1(RELOID, @@ -857,14 +853,13 @@ InitializeRelFileNode(Relation relation, bool force) RelationGetRelid(relation)); classform = (Form_pg_class) GETSTRUCT(tuple); - if (!OidIsValid(classform->relfilenode) || force) + if (true) { char persistence = relation->rd_rel->relpersistence; Relation tmprel; Oid tablespace; Oid filenode; RelFileNode newrnode; - SMgrRelation srel; /* * Upgrade to AccessExclusiveLock, and hold until the end of the @@ -874,6 +869,9 @@ InitializeRelFileNode(Relation relation, bool force) tmprel = heap_open(relation->rd_id, AccessExclusiveLock); heap_close(tmprel, NoLock); + if (OidIsValid(relation->rd_rel->relfilenode)) + RelationDropStorage(relation); + if (OidIsValid(relation->rd_rel->reltablespace)) tablespace = relation->rd_rel->reltablespace; else @@ -885,9 +883,6 @@ InitializeRelFileNode(Relation relation, bool force) newrnode.dbNode = MyDatabaseId; newrnode.relNode = filenode; - srel = RelationCreateStorage(newrnode, persistence); - smgrclose(srel); - classform->relfilenode = filenode; classform->relpages = 0; /* it's empty until further notice */ classform->reltuples = 0; @@ -901,7 +896,25 @@ InitializeRelFileNode(Relation relation, bool force) heap_freetuple(tuple); heap_close(pg_class, RowExclusiveLock); +} + +static void +FdwCreateStorage(Relation relation) +{ + Assert(OidIsValid(relation->rd_rel->relfilenode)); + RelationOpenSmgr(relation); + if (!smgrexists(relation->rd_smgr, MAIN_FORKNUM)) + { +#if PG_VERSION_NUM >= 120000 + SMgrRelation srel; + srel = RelationCreateStorage(relation->rd_node, + relation->rd_rel->relpersistence); + smgrclose(srel); +#else + RelationCreateStorage(relation->rd_node, + relation->rd_rel->relpersistence); #endif + } } @@ -2174,7 +2187,7 @@ cstore_fdw_initrel(Relation rel) { #if PG_VERSION_NUM >= 120000 if (rel->rd_rel->relfilenode == InvalidOid) - InitializeRelFileNode(rel, false); + FdwNewRelFileNode(rel); /* * Copied code from RelationInitPhysicalAddr(), which doesn't @@ -2188,6 +2201,7 @@ cstore_fdw_initrel(Relation rel) rel->rd_node.dbNode = MyDatabaseId; rel->rd_node.relNode = rel->rd_rel->relfilenode; #endif + FdwCreateStorage(rel); } static Relation From fe7ab6df84ec7f982477fb1526db4fc67624a96e Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 15 Sep 2020 09:10:34 -0700 Subject: [PATCH 23/91] Rename tests to be FDW-specific. --- Makefile | 4 ++-- expected/{alter.out => fdw_alter.out} | 0 expected/{analyze.out => fdw_analyze.out} | 0 expected/{drop.out => fdw_drop.out} | 0 expected/{functions.out => fdw_functions.out} | 0 expected/{insert.out => fdw_insert.out} | 0 expected/{query.out => fdw_query.out} | 0 expected/{truncate.out => fdw_truncate.out} | 0 expected/{truncate_0.out => fdw_truncate_0.out} | 0 input/{block_filtering.source => fdw_block_filtering.source} | 0 input/{copyto.source => fdw_copyto.source} | 0 input/{create.source => fdw_create.source} | 0 input/{data_types.source => fdw_data_types.source} | 0 input/{load.source => fdw_load.source} | 0 output/{block_filtering.source => fdw_block_filtering.source} | 0 output/{copyto.source => fdw_copyto.source} | 0 output/{create.source => fdw_create.source} | 0 output/{data_types.source => fdw_data_types.source} | 0 output/{load.source => fdw_load.source} | 0 sql/{alter.sql => fdw_alter.sql} | 0 sql/{analyze.sql => fdw_analyze.sql} | 0 sql/{drop.sql => fdw_drop.sql} | 0 sql/{functions.sql => fdw_functions.sql} | 0 sql/{insert.sql => fdw_insert.sql} | 0 sql/{query.sql => fdw_query.sql} | 0 sql/{truncate.sql => fdw_truncate.sql} | 0 26 files changed, 2 insertions(+), 2 deletions(-) rename expected/{alter.out => fdw_alter.out} (100%) rename expected/{analyze.out => fdw_analyze.out} (100%) rename expected/{drop.out => fdw_drop.out} (100%) rename expected/{functions.out => fdw_functions.out} (100%) rename expected/{insert.out => fdw_insert.out} (100%) rename expected/{query.out => fdw_query.out} (100%) rename expected/{truncate.out => fdw_truncate.out} (100%) rename expected/{truncate_0.out => fdw_truncate_0.out} (100%) rename input/{block_filtering.source => fdw_block_filtering.source} (100%) rename input/{copyto.source => fdw_copyto.source} (100%) rename input/{create.source => fdw_create.source} (100%) rename input/{data_types.source => fdw_data_types.source} (100%) rename input/{load.source => fdw_load.source} (100%) rename output/{block_filtering.source => fdw_block_filtering.source} (100%) rename output/{copyto.source => fdw_copyto.source} (100%) rename output/{create.source => fdw_create.source} (100%) rename output/{data_types.source => fdw_data_types.source} (100%) rename output/{load.source => fdw_load.source} (100%) rename sql/{alter.sql => fdw_alter.sql} (100%) rename sql/{analyze.sql => fdw_analyze.sql} (100%) rename sql/{drop.sql => fdw_drop.sql} (100%) rename sql/{functions.sql => fdw_functions.sql} (100%) rename sql/{insert.sql => fdw_insert.sql} (100%) rename sql/{query.sql => fdw_query.sql} (100%) rename sql/{truncate.sql => fdw_truncate.sql} (100%) diff --git a/Makefile b/Makefile index b8277f3a2..a266edd9a 100644 --- a/Makefile +++ b/Makefile @@ -14,8 +14,8 @@ DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cs cstore_fdw--1.3--1.4.sql cstore_fdw--1.2--1.3.sql cstore_fdw--1.1--1.2.sql \ cstore_fdw--1.0--1.1.sql -REGRESS = create load query analyze data_types functions block_filtering drop \ - insert copyto alter truncate +REGRESS = fdw_create fdw_load fdw_query fdw_analyze fdw_data_types fdw_functions \ + fdw_block_filtering fdw_drop fdw_insert fdw_copyto fdw_alter fdw_truncate EXTRA_CLEAN = cstore.pb-c.h cstore.pb-c.c data/*.cstore data/*.cstore.footer \ sql/block_filtering.sql sql/create.sql sql/data_types.sql sql/load.sql \ sql/copyto.sql expected/block_filtering.out expected/create.out \ diff --git a/expected/alter.out b/expected/fdw_alter.out similarity index 100% rename from expected/alter.out rename to expected/fdw_alter.out diff --git a/expected/analyze.out b/expected/fdw_analyze.out similarity index 100% rename from expected/analyze.out rename to expected/fdw_analyze.out diff --git a/expected/drop.out b/expected/fdw_drop.out similarity index 100% rename from expected/drop.out rename to expected/fdw_drop.out diff --git a/expected/functions.out b/expected/fdw_functions.out similarity index 100% rename from expected/functions.out rename to expected/fdw_functions.out diff --git a/expected/insert.out b/expected/fdw_insert.out similarity index 100% rename from expected/insert.out rename to expected/fdw_insert.out diff --git a/expected/query.out b/expected/fdw_query.out similarity index 100% rename from expected/query.out rename to expected/fdw_query.out diff --git a/expected/truncate.out b/expected/fdw_truncate.out similarity index 100% rename from expected/truncate.out rename to expected/fdw_truncate.out diff --git a/expected/truncate_0.out b/expected/fdw_truncate_0.out similarity index 100% rename from expected/truncate_0.out rename to expected/fdw_truncate_0.out diff --git a/input/block_filtering.source b/input/fdw_block_filtering.source similarity index 100% rename from input/block_filtering.source rename to input/fdw_block_filtering.source diff --git a/input/copyto.source b/input/fdw_copyto.source similarity index 100% rename from input/copyto.source rename to input/fdw_copyto.source diff --git a/input/create.source b/input/fdw_create.source similarity index 100% rename from input/create.source rename to input/fdw_create.source diff --git a/input/data_types.source b/input/fdw_data_types.source similarity index 100% rename from input/data_types.source rename to input/fdw_data_types.source diff --git a/input/load.source b/input/fdw_load.source similarity index 100% rename from input/load.source rename to input/fdw_load.source diff --git a/output/block_filtering.source b/output/fdw_block_filtering.source similarity index 100% rename from output/block_filtering.source rename to output/fdw_block_filtering.source diff --git a/output/copyto.source b/output/fdw_copyto.source similarity index 100% rename from output/copyto.source rename to output/fdw_copyto.source diff --git a/output/create.source b/output/fdw_create.source similarity index 100% rename from output/create.source rename to output/fdw_create.source diff --git a/output/data_types.source b/output/fdw_data_types.source similarity index 100% rename from output/data_types.source rename to output/fdw_data_types.source diff --git a/output/load.source b/output/fdw_load.source similarity index 100% rename from output/load.source rename to output/fdw_load.source diff --git a/sql/alter.sql b/sql/fdw_alter.sql similarity index 100% rename from sql/alter.sql rename to sql/fdw_alter.sql diff --git a/sql/analyze.sql b/sql/fdw_analyze.sql similarity index 100% rename from sql/analyze.sql rename to sql/fdw_analyze.sql diff --git a/sql/drop.sql b/sql/fdw_drop.sql similarity index 100% rename from sql/drop.sql rename to sql/fdw_drop.sql diff --git a/sql/functions.sql b/sql/fdw_functions.sql similarity index 100% rename from sql/functions.sql rename to sql/fdw_functions.sql diff --git a/sql/insert.sql b/sql/fdw_insert.sql similarity index 100% rename from sql/insert.sql rename to sql/fdw_insert.sql diff --git a/sql/query.sql b/sql/fdw_query.sql similarity index 100% rename from sql/query.sql rename to sql/fdw_query.sql diff --git a/sql/truncate.sql b/sql/fdw_truncate.sql similarity index 100% rename from sql/truncate.sql rename to sql/fdw_truncate.sql From 09208986ba4465df1c7af62f1fdbba8250712e71 Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Wed, 16 Sep 2020 15:20:17 +0200 Subject: [PATCH 24/91] remove travis --- .travis.yml | 42 ------------------------------------------ 1 file changed, 42 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index f83f7206e..000000000 --- a/.travis.yml +++ /dev/null @@ -1,42 +0,0 @@ -sudo: required -dist: bionic -language: c -cache: - apt: true - directories: - - /home/travis/postgresql -env: - global: - - enable_coverage=yes - - PG_PRELOAD=cstore_fdw - matrix: - - PGVERSION=9.3 - - PGVERSION=9.4 - - PGVERSION=9.5 - - PGVERSION=9.6 - - PGVERSION=10 - - PGVERSION=11 - - PGVERSION=12 - -before_install: - - git clone -b v0.7.13 --depth 1 https://github.com/citusdata/tools.git - - sudo make -C tools install - - setup_apt - - nuke_pg -install: - - sudo apt-get install protobuf-c-compiler - - sudo apt-get install libprotobuf-c0-dev - - sudo locale-gen da_DK - - sudo locale-gen da_DK.utf8 - - sudo pip install cpp-coveralls - - install_pg - - install_custom_pg -before_script: - - chmod 777 . - - chmod 777 data - - chmod 666 data/* - - config_and_start_cluster -script: pg_travis_test -after_success: - - sudo chmod 666 *.gcda - - coveralls --exclude cstore.pb-c.c --exclude cstore.pb-c.h From 20a8bca426b80be6e9db7c1d57938d81602425aa Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Wed, 16 Sep 2020 15:21:24 +0200 Subject: [PATCH 25/91] add integration files for circle ci This is based on the circle ci integration we have for citus, albeit highly simplified. --- .circleci/build.sh | 16 +++++++ .circleci/config.yml | 101 ++++++++++++++++++++++++++++++++++++++++++ .circleci/run_test.sh | 27 +++++++++++ .gitignore | 5 +++ 4 files changed, 149 insertions(+) create mode 100755 .circleci/build.sh create mode 100644 .circleci/config.yml create mode 100755 .circleci/run_test.sh diff --git a/.circleci/build.sh b/.circleci/build.sh new file mode 100755 index 000000000..6a9f14c74 --- /dev/null +++ b/.circleci/build.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +set -euxo pipefail +IFS=$'\n\t' + +status=0 + +basedir="$(pwd)" +installdir="${basedir}/install-${PG_MAJOR}" + +make install DESTDIR="${installdir}" +pushd "${installdir}" +find . -type f -print > "${basedir}/files.lst" +cat "${basedir}/files.lst" +tar cvf "${basedir}/install-${PG_MAJOR}.tar" $(cat "${basedir}/files.lst") +popd diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 000000000..9f2532c1d --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,101 @@ +version: 2.1 +orbs: + codecov: codecov/codecov@1.1.1 + +jobs: + check-style: + docker: + - image: 'citus/stylechecker:latest' + steps: + - checkout + - run: + name: 'Check Style' + command: | + citus_indent --check + - run: + name: 'Check if whitespace fixing changed anything, install editorconfig if it did' + command: | + git diff --exit-code + + build-11: + docker: + - image: 'citus/extbuilder:11.9' + steps: + - checkout + - run: + name: 'Configure, Build, and Install' + command: | + PG_MAJOR=11 .circleci/build.sh + - persist_to_workspace: + root: . + paths: + - install-11.tar + + build-12: + docker: + - image: 'citus/extbuilder:12.4' + steps: + - checkout + - run: + name: 'Configure, Build, and Install' + command: | + PG_MAJOR=12 .circleci/build.sh + - persist_to_workspace: + root: . + paths: + - install-12.tar + + test-11_checkinstall: + docker: + - image: 'citus/exttester:11.9' + working_directory: /home/circleci/project + steps: + - checkout + - attach_workspace: + at: . + - run: + name: 'Prepare Container & Install Extension' + command: | + chown -R circleci:circleci /home/circleci + tar xfv "${CIRCLE_WORKING_DIRECTORY}/install-${PG_MAJOR}.tar" --directory / + - run: + name: 'Run Test' + command: | + gosu circleci .circleci/run_test.sh installcheck + - codecov/upload: + flags: 'test_11,installcheck' + + test-12_checkinstall: + docker: + - image: 'citus/exttester:12.4' + working_directory: /home/circleci/project + steps: + - checkout + - attach_workspace: + at: . + - run: + name: 'Prepare Container & Install Extension' + command: | + chown -R circleci:circleci /home/circleci + tar xfv "${CIRCLE_WORKING_DIRECTORY}/install-${PG_MAJOR}.tar" --directory / + - run: + name: 'Run Test' + command: | + gosu circleci .circleci/run_test.sh installcheck + - codecov/upload: + flags: 'test_12,installcheck' + +workflows: + version: 2 + build_and_test: + jobs: + + - check-style + + - build-11 + - build-12 + + - test-11_checkinstall: + requires: [build-11] + - test-12_checkinstall: + requires: [build-12] diff --git a/.circleci/run_test.sh b/.circleci/run_test.sh new file mode 100755 index 000000000..f9e183b56 --- /dev/null +++ b/.circleci/run_test.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +set -euxo pipefail +IFS=$'\n\t' + +status=0 + +export PGPORT=${PGPORT:-55432} + +function cleanup { + pg_ctl -D /tmp/postgres stop + rm -rf /tmp/postgres +} +trap cleanup EXIT + +rm -rf /tmp/postgres +initdb -E unicode /tmp/postgres +echo "shared_preload_libraries = 'cstore_fdw'" >> /tmp/postgres/postgresql.conf +pg_ctl -D /tmp/postgres -o "-p ${PGPORT}" -l /tmp/postgres_logfile start || status=$? +if [ -z $status ]; then cat /tmp/postgres_logfile; fi + +make "${@}" || status=$? +diffs="regression.diffs" + +if test -f "${diffs}"; then cat "${diffs}"; fi + +exit $status diff --git a/.gitignore b/.gitignore index 21c5e32ea..0c643e590 100644 --- a/.gitignore +++ b/.gitignore @@ -60,3 +60,8 @@ .vscode *.pb-c.* + +# ignore files that could be created by circleci automation +files.lst +install-*.tar +install-*/ From 1e93e15a8d8f0f9fddc73c0f0c7880ba9cce6191 Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Wed, 16 Sep 2020 15:21:57 +0200 Subject: [PATCH 26/91] fix indentation via citus_indent --- cstore.h | 4 +++- cstore_fdw.c | 46 ++++++++++++++++++++++++++++------------ cstore_metadata_tables.c | 3 +++ cstore_reader.c | 18 +++++++++------- cstore_version_compat.h | 2 +- cstore_writer.c | 21 +++++++++++------- 6 files changed, 62 insertions(+), 32 deletions(-) diff --git a/cstore.h b/cstore.h index c7f849cc9..87b552bbf 100644 --- a/cstore.h +++ b/cstore.h @@ -204,6 +204,7 @@ typedef struct TableReadState TableMetadata *tableMetadata; TupleDesc tupleDescriptor; Relation relation; + /* * List of Var pointers for columns in the query. We use this both for * getting vector of projected columns, and also when we want to build @@ -301,7 +302,7 @@ extern StripeSkipList * ReadStripeSkipList(Oid relid, uint64 stripe, typedef struct SmgrAddr { BlockNumber blockno; - uint32 offset; + uint32 offset; } SmgrAddr; /* @@ -320,4 +321,5 @@ logical_to_smgr(uint64 logicalOffset) return addr; } + #endif /* CSTORE_H */ diff --git a/cstore_fdw.c b/cstore_fdw.c index 36f576cee..512dee5a3 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -319,14 +319,14 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, } else if (nodeTag(parseTree) == T_DropStmt) { - List *dropRelids = DroppedCStoreRelidList((DropStmt *) parseTree); - ListCell *lc = NULL; + List *dropRelids = DroppedCStoreRelidList((DropStmt *) parseTree); + ListCell *lc = NULL; /* drop smgr storage */ foreach(lc, dropRelids) { - Oid relid = lfirst_oid(lc); - Relation relation = cstore_fdw_open(relid, AccessExclusiveLock); + Oid relid = lfirst_oid(lc); + Relation relation = cstore_fdw_open(relid, AccessExclusiveLock); RelationOpenSmgr(relation); RelationDropStorage(relation); @@ -334,7 +334,7 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, } CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, - destReceiver, completionTag); + destReceiver, completionTag); } else if (nodeTag(parseTree) == T_TruncateStmt) { @@ -833,6 +833,7 @@ TruncateCStoreTables(List *cstoreRelationList) } } + /* * Version 11 and earlier already assign a relfilenode for foreign * tables. Version 12 and later do not, so we need to create one manually. @@ -840,26 +841,28 @@ TruncateCStoreTables(List *cstoreRelationList) static void FdwNewRelFileNode(Relation relation) { - Relation pg_class; - HeapTuple tuple; - Form_pg_class classform; + Relation pg_class; + HeapTuple tuple; + Form_pg_class classform; pg_class = heap_open(RelationRelationId, RowExclusiveLock); tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(RelationGetRelid(relation))); if (!HeapTupleIsValid(tuple)) + { elog(ERROR, "could not find tuple for relation %u", RelationGetRelid(relation)); + } classform = (Form_pg_class) GETSTRUCT(tuple); if (true) { - char persistence = relation->rd_rel->relpersistence; - Relation tmprel; - Oid tablespace; - Oid filenode; - RelFileNode newrnode; + char persistence = relation->rd_rel->relpersistence; + Relation tmprel; + Oid tablespace; + Oid filenode; + RelFileNode newrnode; /* * Upgrade to AccessExclusiveLock, and hold until the end of the @@ -870,12 +873,18 @@ FdwNewRelFileNode(Relation relation) heap_close(tmprel, NoLock); if (OidIsValid(relation->rd_rel->relfilenode)) + { RelationDropStorage(relation); + } if (OidIsValid(relation->rd_rel->reltablespace)) + { tablespace = relation->rd_rel->reltablespace; + } else + { tablespace = MyDatabaseTableSpace; + } filenode = GetNewRelFileNode(tablespace, NULL, persistence); @@ -898,6 +907,7 @@ FdwNewRelFileNode(Relation relation) heap_close(pg_class, RowExclusiveLock); } + static void FdwCreateStorage(Relation relation) { @@ -1692,7 +1702,7 @@ ColumnList(RelOptInfo *baserel, Oid foreignTableId) static void CStoreExplainForeignScan(ForeignScanState *scanState, ExplainState *explainState) { - Relation relation = scanState->ss.ss_currentRelation; + Relation relation = scanState->ss.ss_currentRelation; cstore_fdw_initrel(relation); @@ -2187,16 +2197,22 @@ cstore_fdw_initrel(Relation rel) { #if PG_VERSION_NUM >= 120000 if (rel->rd_rel->relfilenode == InvalidOid) + { FdwNewRelFileNode(rel); + } /* * Copied code from RelationInitPhysicalAddr(), which doesn't * work on foreign tables. */ if (OidIsValid(rel->rd_rel->reltablespace)) + { rel->rd_node.spcNode = rel->rd_rel->reltablespace; + } else + { rel->rd_node.spcNode = MyDatabaseTableSpace; + } rel->rd_node.dbNode = MyDatabaseId; rel->rd_node.relNode = rel->rd_rel->relfilenode; @@ -2204,6 +2220,7 @@ cstore_fdw_initrel(Relation rel) FdwCreateStorage(rel); } + static Relation cstore_fdw_open(Oid relationId, LOCKMODE lockmode) { @@ -2214,6 +2231,7 @@ cstore_fdw_open(Oid relationId, LOCKMODE lockmode) return rel; } + static Relation cstore_fdw_openrv(RangeVar *relation, LOCKMODE lockmode) { diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index af0eb96c4..8a67a3a9e 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -692,9 +692,12 @@ create_estate_for_relation(Relation rel) estate->es_output_cid = GetCurrentCommandId(true); #if PG_VERSION_NUM < 120000 + /* Triggers might need a slot */ if (resultRelInfo->ri_TrigDesc) + { estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate, NULL); + } #endif /* Prepare to catch AFTER triggers. */ diff --git a/cstore_reader.c b/cstore_reader.c index 929c65c04..fecb45605 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -1037,26 +1037,27 @@ ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeFor return defaultValue; } + static StringInfo ReadFromSmgr(Relation rel, uint64 offset, uint32 size) { - StringInfo resultBuffer = makeStringInfo(); - uint64 read = 0; + StringInfo resultBuffer = makeStringInfo(); + uint64 read = 0; enlargeStringInfo(resultBuffer, size); resultBuffer->len = size; while (read < size) { - Buffer buffer; - Page page; - PageHeader phdr; - uint32 to_read; - SmgrAddr addr = logical_to_smgr(offset + read); + Buffer buffer; + Page page; + PageHeader phdr; + uint32 to_read; + SmgrAddr addr = logical_to_smgr(offset + read); buffer = ReadBuffer(rel, addr.blockno); page = BufferGetPage(buffer); - phdr = (PageHeader)page; + phdr = (PageHeader) page; to_read = Min(size - read, phdr->pd_upper - addr.offset); memcpy(resultBuffer->data + read, page + addr.offset, to_read); @@ -1067,6 +1068,7 @@ ReadFromSmgr(Relation rel, uint64 offset, uint32 size) return resultBuffer; } + /* * ResetUncompressedBlockData iterates over deserialized column block data * and sets valueBuffer field to empty buffer. This field is allocated in stripe diff --git a/cstore_version_compat.h b/cstore_version_compat.h index 95521c1aa..3d1a60f93 100644 --- a/cstore_version_compat.h +++ b/cstore_version_compat.h @@ -2,7 +2,7 @@ * * cstore_version_compat.h * - * Compatibility macros for writing code agnostic to PostgreSQL versions + * Compatibility macros for writing code agnostic to PostgreSQL versions * * Copyright (c) 2018, Citus Data, Inc. * diff --git a/cstore_writer.c b/cstore_writer.c index 5e44812bd..55a314ec4 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -363,21 +363,22 @@ CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 blockRowCount, return stripeSkipList; } + static void WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) { - uint64 logicalOffset = writeState->currentFileOffset; - uint64 remaining = dataLength; - Relation rel = writeState->relation; - Buffer buffer; + uint64 logicalOffset = writeState->currentFileOffset; + uint64 remaining = dataLength; + Relation rel = writeState->relation; + Buffer buffer; while (remaining > 0) { - SmgrAddr addr = logical_to_smgr(logicalOffset); + SmgrAddr addr = logical_to_smgr(logicalOffset); BlockNumber nblocks; - Page page; - PageHeader phdr; - uint64 to_write; + Page page; + PageHeader phdr; + uint64 to_write; RelationOpenSmgr(rel); nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); @@ -397,7 +398,9 @@ WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) page = BufferGetPage(buffer); phdr = (PageHeader) page; if (PageIsNew(page)) + { PageInit(page, BLCKSZ, 0); + } /* always appending */ Assert(phdr->pd_lower == addr.offset); @@ -434,6 +437,7 @@ WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) } } + /* * FlushStripe flushes current stripe data into the file. The function first ensures * the last data block for each column is properly serialized and compressed. Then, @@ -832,6 +836,7 @@ AppendStripeMetadata(TableMetadata *tableMetadata, StripeMetadata stripeMetadata stripeMetadataCopy); } + /* * CopyStringInfo creates a deep copy of given source string allocating only needed * amount of memory. From 48e9c17b5015705acd2baac35bc8164f908cf3e4 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 14 Aug 2020 16:43:13 -0700 Subject: [PATCH 27/91] stubs for table access method --- Makefile | 2 +- cstore_tableam.c | 404 +++++++++++++++++++++++++++++++++++++++++++++++ cstore_tableam.h | 6 + 3 files changed, 411 insertions(+), 1 deletion(-) create mode 100644 cstore_tableam.c create mode 100644 cstore_tableam.h diff --git a/Makefile b/Makefile index a266edd9a..f7943e61b 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ MODULE_big = cstore_fdw PG_CPPFLAGS = -std=c11 OBJS = cstore.o cstore_fdw.o cstore_writer.o cstore_reader.o \ - cstore_compression.o mod.o cstore_metadata_tables.o + cstore_compression.o mod.o cstore_metadata_tables.o cstore_tableam.o EXTENSION = cstore_fdw DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cstore_fdw--1.4--1.5.sql \ diff --git a/cstore_tableam.c b/cstore_tableam.c new file mode 100644 index 000000000..e64243ce2 --- /dev/null +++ b/cstore_tableam.c @@ -0,0 +1,404 @@ +#include "postgres.h" + +#include "cstore_tableam.h" +#include + +#include "miscadmin.h" + +#include "access/genam.h" +#include "access/heapam.h" +#include "access/multixact.h" +#include "access/rewriteheap.h" +#include "access/tableam.h" +#include "access/tsmapi.h" +#include "access/tuptoaster.h" +#include "access/xact.h" +#include "catalog/catalog.h" +#include "catalog/index.h" +#include "catalog/storage.h" +#include "catalog/storage_xlog.h" +#include "commands/progress.h" +#include "executor/executor.h" +#include "optimizer/plancat.h" +#include "pgstat.h" +#include "storage/bufmgr.h" +#include "storage/bufpage.h" +#include "storage/bufmgr.h" +#include "storage/lmgr.h" +#include "storage/predicate.h" +#include "storage/procarray.h" +#include "storage/smgr.h" +#include "utils/builtins.h" +#include "utils/rel.h" + + +static const TupleTableSlotOps * +cstore_slot_callbacks(Relation relation) +{ + return &TTSOpsVirtual; +} + +static TableScanDesc +cstore_beginscan(Relation relation, Snapshot snapshot, + int nkeys, ScanKey key, + ParallelTableScanDesc parallel_scan, + uint32 flags) +{ + elog(ERROR, "cstore_beginscan not implemented"); +} + +static void +cstore_endscan(TableScanDesc sscan) +{ + elog(ERROR, "cstore_endscan not implemented"); +} + +static void +cstore_rescan(TableScanDesc sscan, ScanKey key, bool set_params, + bool allow_strat, bool allow_sync, bool allow_pagemode) +{ + elog(ERROR, "cstore_rescan not implemented"); +} + +static bool +cstore_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot) +{ + elog(ERROR, "cstore_getnextslot not implemented"); +} + +static Size +cstore_parallelscan_estimate(Relation rel) +{ + elog(ERROR, "cstore_parallelscan_estimate not implemented"); +} + +static Size +cstore_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan) +{ + elog(ERROR, "cstore_parallelscan_initialize not implemented"); +} + +static void +cstore_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan) +{ + elog(ERROR, "cstore_parallelscan_reinitialize not implemented"); +} + +static IndexFetchTableData * +cstore_index_fetch_begin(Relation rel) +{ + elog(ERROR, "cstore_index_fetch_begin not implemented"); +} + +static void +cstore_index_fetch_reset(IndexFetchTableData *scan) +{ + elog(ERROR, "cstore_index_fetch_reset not implemented"); +} + +static void +cstore_index_fetch_end(IndexFetchTableData *scan) +{ + elog(ERROR, "cstore_index_fetch_end not implemented"); +} + +static bool +cstore_index_fetch_tuple(struct IndexFetchTableData *scan, + ItemPointer tid, + Snapshot snapshot, + TupleTableSlot *slot, + bool *call_again, bool *all_dead) +{ + elog(ERROR, "cstore_index_fetch_tuple not implemented"); +} + +static bool +cstore_fetch_row_version(Relation relation, + ItemPointer tid, + Snapshot snapshot, + TupleTableSlot *slot) +{ + elog(ERROR, "cstore_fetch_row_version not implemented"); +} + +static void +cstore_get_latest_tid(TableScanDesc sscan, + ItemPointer tid) +{ + elog(ERROR, "cstore_get_latest_tid not implemented"); +} + +static bool +cstore_tuple_tid_valid(TableScanDesc scan, ItemPointer tid) +{ + elog(ERROR, "cstore_tuple_tid_valid not implemented"); +} + +static bool +cstore_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, + Snapshot snapshot) +{ + return true; +} + +static TransactionId +cstore_compute_xid_horizon_for_tuples(Relation rel, + ItemPointerData *tids, + int nitems) +{ + elog(ERROR, "cstore_compute_xid_horizon_for_tuples not implemented"); +} + +static void +cstore_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, + int options, BulkInsertState bistate) +{ + elog(ERROR, "cstore_tuple_insert not implemented"); +} + +static void +cstore_tuple_insert_speculative(Relation relation, TupleTableSlot *slot, + CommandId cid, int options, + BulkInsertState bistate, uint32 specToken) +{ + elog(ERROR, "cstore_tuple_insert_speculative not implemented"); +} + +static void +cstore_tuple_complete_speculative(Relation relation, TupleTableSlot *slot, + uint32 specToken, bool succeeded) +{ + elog(ERROR, "cstore_tuple_complete_speculative not implemented"); +} + +static void +cstore_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, + CommandId cid, int options, BulkInsertState bistate) +{ + elog(ERROR, "cstore_multi_insert not implemented"); +} + +static TM_Result +cstore_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, + Snapshot snapshot, Snapshot crosscheck, bool wait, + TM_FailureData *tmfd, bool changingPart) +{ + elog(ERROR, "cstore_tuple_delete not implemented"); +} + +static TM_Result +cstore_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, + CommandId cid, Snapshot snapshot, Snapshot crosscheck, + bool wait, TM_FailureData *tmfd, + LockTupleMode *lockmode, bool *update_indexes) +{ + elog(ERROR, "cstore_tuple_update not implemented"); +} + +static TM_Result +cstore_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, + TupleTableSlot *slot, CommandId cid, LockTupleMode mode, + LockWaitPolicy wait_policy, uint8 flags, + TM_FailureData *tmfd) +{ + elog(ERROR, "cstore_tuple_lock not implemented"); +} + +static void +cstore_finish_bulk_insert(Relation relation, int options) +{ + elog(ERROR, "cstore_finish_bulk_insert not implemented"); +} + +static void +cstore_relation_set_new_filenode(Relation rel, + const RelFileNode *newrnode, + char persistence, + TransactionId *freezeXid, + MultiXactId *minmulti) +{ + elog(ERROR, "cstore_relation_set_new_filenode not implemented"); +} + +static void +cstore_relation_nontransactional_truncate(Relation rel) +{ + elog(ERROR, "cstore_relation_nontransactional_truncate not implemented"); +} + +static void +cstore_relation_copy_data(Relation rel, const RelFileNode *newrnode) +{ + elog(ERROR, "cstore_relation_copy_data not implemented"); +} + +static void +cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, + Relation OldIndex, bool use_sort, + TransactionId OldestXmin, + TransactionId *xid_cutoff, + MultiXactId *multi_cutoff, + double *num_tuples, + double *tups_vacuumed, + double *tups_recently_dead) +{ + elog(ERROR, "cstore_relation_copy_for_cluster not implemented"); +} + +static bool +cstore_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, + BufferAccessStrategy bstrategy) +{ + elog(ERROR, "cstore_scan_analyze_next_block not implemented"); +} + +static bool +cstore_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, + double *liverows, double *deadrows, + TupleTableSlot *slot) +{ + elog(ERROR, "cstore_scan_analyze_next_tuple not implemented"); +} + +static double +cstore_index_build_range_scan(Relation heapRelation, + Relation indexRelation, + IndexInfo *indexInfo, + bool allow_sync, + bool anyvisible, + bool progress, + BlockNumber start_blockno, + BlockNumber numblocks, + IndexBuildCallback callback, + void *callback_state, + TableScanDesc scan) +{ + elog(ERROR, "cstore_index_build_range_scan not implemented"); +} + +static void +cstore_index_validate_scan(Relation heapRelation, + Relation indexRelation, + IndexInfo *indexInfo, + Snapshot snapshot, + ValidateIndexState *state) +{ + elog(ERROR, "cstore_index_validate_scan not implemented"); +} + +static uint64 +cstore_relation_size(Relation rel, ForkNumber forkNumber) +{ + elog(ERROR, "cstore_relation_size not implemented"); +} + +static bool +cstore_relation_needs_toast_table(Relation rel) +{ + elog(ERROR, "cstore_relation_needs_toast_table not implemented"); +} + +static void +cstore_estimate_rel_size(Relation rel, int32 *attr_widths, + BlockNumber *pages, double *tuples, + double *allvisfrac) +{ + elog(ERROR, "cstore_estimate_rel_size not implemented"); +} + +static bool +cstore_scan_bitmap_next_block(TableScanDesc scan, + TBMIterateResult *tbmres) +{ + elog(ERROR, "cstore_scan_bitmap_next_block not implemented"); +} + +static bool +cstore_scan_bitmap_next_tuple(TableScanDesc scan, + TBMIterateResult *tbmres, + TupleTableSlot *slot) +{ + elog(ERROR, "cstore_scan_bitmap_next_tuple not implemented"); +} + +static bool +cstore_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate) +{ + elog(ERROR, "cstore_scan_sample_next_block not implemented"); +} + +static bool +cstore_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, + TupleTableSlot *slot) +{ + elog(ERROR, "cstore_scan_sample_next_tuple not implemented"); +} + +static const TableAmRoutine cstore_am_methods = { + .type = T_TableAmRoutine, + + .slot_callbacks = cstore_slot_callbacks, + + .scan_begin = cstore_beginscan, + .scan_end = cstore_endscan, + .scan_rescan = cstore_rescan, + .scan_getnextslot = cstore_getnextslot, + + .parallelscan_estimate = cstore_parallelscan_estimate, + .parallelscan_initialize = cstore_parallelscan_initialize, + .parallelscan_reinitialize = cstore_parallelscan_reinitialize, + + .index_fetch_begin = cstore_index_fetch_begin, + .index_fetch_reset = cstore_index_fetch_reset, + .index_fetch_end = cstore_index_fetch_end, + .index_fetch_tuple = cstore_index_fetch_tuple, + + .tuple_fetch_row_version = cstore_fetch_row_version, + .tuple_get_latest_tid = cstore_get_latest_tid, + .tuple_tid_valid = cstore_tuple_tid_valid, + .tuple_satisfies_snapshot = cstore_tuple_satisfies_snapshot, + .compute_xid_horizon_for_tuples = cstore_compute_xid_horizon_for_tuples, + + .tuple_insert = cstore_tuple_insert, + .tuple_insert_speculative = cstore_tuple_insert_speculative, + .tuple_complete_speculative = cstore_tuple_complete_speculative, + .multi_insert = cstore_multi_insert, + .tuple_delete = cstore_tuple_delete, + .tuple_update = cstore_tuple_update, + .tuple_lock = cstore_tuple_lock, + .finish_bulk_insert = cstore_finish_bulk_insert, + + .relation_set_new_filenode = cstore_relation_set_new_filenode, + .relation_nontransactional_truncate = cstore_relation_nontransactional_truncate, + .relation_copy_data = cstore_relation_copy_data, + .relation_copy_for_cluster = cstore_relation_copy_for_cluster, + .relation_vacuum = heap_vacuum_rel, + .scan_analyze_next_block = cstore_scan_analyze_next_block, + .scan_analyze_next_tuple = cstore_scan_analyze_next_tuple, + .index_build_range_scan = cstore_index_build_range_scan, + .index_validate_scan = cstore_index_validate_scan, + + .relation_size = cstore_relation_size, + .relation_needs_toast_table = cstore_relation_needs_toast_table, + + .relation_estimate_size = cstore_estimate_rel_size, + + .scan_bitmap_next_block = cstore_scan_bitmap_next_block, + .scan_bitmap_next_tuple = cstore_scan_bitmap_next_tuple, + .scan_sample_next_block = cstore_scan_sample_next_block, + .scan_sample_next_tuple = cstore_scan_sample_next_tuple +}; + + +const TableAmRoutine * +GetCstoreTableAmRoutine(void) +{ + return &cstore_am_methods; +} + +PG_FUNCTION_INFO_V1(cstore_tableam_handler); +Datum +cstore_tableam_handler(PG_FUNCTION_ARGS) +{ + PG_RETURN_POINTER(&cstore_am_methods); +} diff --git a/cstore_tableam.h b/cstore_tableam.h new file mode 100644 index 000000000..67dbe0d87 --- /dev/null +++ b/cstore_tableam.h @@ -0,0 +1,6 @@ +#include "postgres.h" +#include "fmgr.h" +#include "access/tableam.h" + +const TableAmRoutine *GetCstoreTableAmRoutine(void); +Datum cstore_tableam_handler(PG_FUNCTION_ARGS); From b6ca8fcd70e3c33addb2e897ad93975b7040cb6c Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 11 Sep 2020 16:50:06 -0700 Subject: [PATCH 28/91] extension control --- Makefile | 2 +- cstore_fdw--1.7--1.8.sql | 9 +++++++++ cstore_fdw.control | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 cstore_fdw--1.7--1.8.sql diff --git a/Makefile b/Makefile index f7943e61b..97f4d9e64 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ OBJS = cstore.o cstore_fdw.o cstore_writer.o cstore_reader.o \ EXTENSION = cstore_fdw DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cstore_fdw--1.4--1.5.sql \ cstore_fdw--1.3--1.4.sql cstore_fdw--1.2--1.3.sql cstore_fdw--1.1--1.2.sql \ - cstore_fdw--1.0--1.1.sql + cstore_fdw--1.0--1.1.sql cstore_fdw--1.7--1.8.sql REGRESS = fdw_create fdw_load fdw_query fdw_analyze fdw_data_types fdw_functions \ fdw_block_filtering fdw_drop fdw_insert fdw_copyto fdw_alter fdw_truncate diff --git a/cstore_fdw--1.7--1.8.sql b/cstore_fdw--1.7--1.8.sql new file mode 100644 index 000000000..b1519d73e --- /dev/null +++ b/cstore_fdw--1.7--1.8.sql @@ -0,0 +1,9 @@ +/* cstore_fdw/cstore_fdw--1.7--1.8.sql */ + +CREATE FUNCTION cstore_tableam_handler(internal) +RETURNS table_am_handler +LANGUAGE C +AS 'MODULE_PATHNAME', 'cstore_tableam_handler'; + +CREATE ACCESS METHOD cstore_tableam +TYPE TABLE HANDLER cstore_tableam_handler; diff --git a/cstore_fdw.control b/cstore_fdw.control index 6f781dcbb..57fd0808a 100644 --- a/cstore_fdw.control +++ b/cstore_fdw.control @@ -1,6 +1,6 @@ # cstore_fdw extension comment = 'foreign-data wrapper for flat cstore access' -default_version = '1.7' +default_version = '1.8' module_pathname = '$libdir/cstore_fdw' relocatable = false schema = cstore From b06f48a2a79c38b18132374f8f797743e5a15c45 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 11 Sep 2020 16:51:09 -0700 Subject: [PATCH 29/91] tableAM updates --- cstore_tableam.c | 171 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 161 insertions(+), 10 deletions(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index e64243ce2..d67ac10b6 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -1,6 +1,5 @@ #include "postgres.h" -#include "cstore_tableam.h" #include #include "miscadmin.h" @@ -19,6 +18,7 @@ #include "catalog/storage_xlog.h" #include "commands/progress.h" #include "executor/executor.h" +#include "nodes/makefuncs.h" #include "optimizer/plancat.h" #include "pgstat.h" #include "storage/bufmgr.h" @@ -31,6 +31,57 @@ #include "utils/builtins.h" #include "utils/rel.h" +#include "cstore_tableam.h" +#include "cstore_fdw.h" + +typedef struct CStoreScanDescData +{ + TableScanDescData cs_base; + TableReadState *cs_readState; +} CStoreScanDescData; + +typedef struct CStoreScanDescData *CStoreScanDesc; + +static TableWriteState *CStoreWriteState = NULL; + +static void +cstore_init_write_state(Relation relation) +{ + //TODO: upgrade lock to serialize writes + + if (CStoreWriteState != NULL) + { + // TODO: consider whether it's possible for a new write to start + // before an old one is flushed + Assert(CStoreWriteState->relation->rd_id == relation->rd_id); + } + + if (CStoreWriteState == NULL) + { + CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(relation->rd_id); + TupleDesc tupdesc = RelationGetDescr(relation); + + elog(NOTICE, "initializing write state for relation %d", relation->rd_id); + CStoreWriteState = CStoreBeginWrite(cstoreFdwOptions->filename, + cstoreFdwOptions->compressionType, + cstoreFdwOptions->stripeRowCount, + cstoreFdwOptions->blockRowCount, + tupdesc); + + CStoreWriteState->relation = relation; + } +} + +void +cstore_free_write_state() +{ + if (CStoreWriteState != NULL) + { + elog(NOTICE, "flushing write state for relation %d", CStoreWriteState->relation->rd_id); + CStoreEndWrite(CStoreWriteState); + CStoreWriteState = NULL; + } +} static const TupleTableSlotOps * cstore_slot_callbacks(Relation relation) @@ -44,13 +95,48 @@ cstore_beginscan(Relation relation, Snapshot snapshot, ParallelTableScanDesc parallel_scan, uint32 flags) { - elog(ERROR, "cstore_beginscan not implemented"); + TupleDesc tupdesc = relation->rd_att; + CStoreFdwOptions *cstoreFdwOptions = NULL; + TableReadState *readState = NULL; + CStoreScanDesc scan = palloc(sizeof(CStoreScanDescData)); + List *columnList = NIL; + + cstoreFdwOptions = CStoreGetOptions(relation->rd_id); + + scan->cs_base.rs_rd = relation; + scan->cs_base.rs_snapshot = snapshot; + scan->cs_base.rs_nkeys = nkeys; + scan->cs_base.rs_key = key; + scan->cs_base.rs_flags = flags; + scan->cs_base.rs_parallel = parallel_scan; + + for (int i = 0; i < tupdesc->natts; i++) + { + Index varno = 0; + AttrNumber varattno = i+1; + Oid vartype = tupdesc->attrs[i].atttypid; + int32 vartypmod = 0; + Oid varcollid = 0; + Index varlevelsup = 0; + Var *var = makeVar(varno, varattno, vartype, vartypmod, + varcollid, varlevelsup); + + columnList = lappend(columnList, var); + } + + readState = CStoreBeginRead(cstoreFdwOptions->filename, tupdesc, + columnList, NULL); + + scan->cs_readState = readState; + + return ((TableScanDesc) scan); } static void cstore_endscan(TableScanDesc sscan) { - elog(ERROR, "cstore_endscan not implemented"); + CStoreScanDesc scan = (CStoreScanDesc) sscan; + CStoreEndRead(scan->cs_readState); } static void @@ -63,7 +149,22 @@ cstore_rescan(TableScanDesc sscan, ScanKey key, bool set_params, static bool cstore_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot) { - elog(ERROR, "cstore_getnextslot not implemented"); + CStoreScanDesc scan = (CStoreScanDesc) sscan; + TupleDesc tupdesc = slot->tts_tupleDescriptor; + int natts = tupdesc->natts; + bool nextRowFound; + + ExecClearTuple(slot); + memset(slot->tts_values, 0, sizeof(Datum) * natts); + memset(slot->tts_isnull, true, sizeof(bool) * natts); + + nextRowFound = CStoreReadNextRow(scan->cs_readState, slot->tts_values, slot->tts_isnull); + + if (!nextRowFound) + return false; + + ExecStoreVirtualTuple(slot); + return true; } static Size @@ -153,7 +254,23 @@ static void cstore_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate) { - elog(ERROR, "cstore_tuple_insert not implemented"); + HeapTuple heapTuple; + + cstore_init_write_state(relation); + + heapTuple = GetSlotHeapTuple(slot); + if (HeapTupleHasExternal(heapTuple)) + { + /* detoast any toasted attributes */ + HeapTuple newTuple = toast_flatten_tuple(heapTuple, + slot->tts_tupleDescriptor); + + ExecForceStoreHeapTuple(newTuple, slot, true); + } + + slot_getallattrs(slot); + + CStoreWriteRow(CStoreWriteState, slot->tts_values, slot->tts_isnull); } static void @@ -175,7 +292,26 @@ static void cstore_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate) { - elog(ERROR, "cstore_multi_insert not implemented"); + cstore_init_write_state(relation); + + for (int i = 0; i < ntuples; i++) + { + TupleTableSlot *tupleSlot = slots[i]; + HeapTuple heapTuple = GetSlotHeapTuple(tupleSlot); + + if (HeapTupleHasExternal(heapTuple)) + { + /* detoast any toasted attributes */ + HeapTuple newTuple = toast_flatten_tuple(heapTuple, + tupleSlot->tts_tupleDescriptor); + + ExecForceStoreHeapTuple(newTuple, tupleSlot, true); + } + + slot_getallattrs(tupleSlot); + + CStoreWriteRow(CStoreWriteState, tupleSlot->tts_values, tupleSlot->tts_isnull); + } } static TM_Result @@ -207,7 +343,11 @@ cstore_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, static void cstore_finish_bulk_insert(Relation relation, int options) { - elog(ERROR, "cstore_finish_bulk_insert not implemented"); + //TODO: flush relation like for heap? + // free write state or only in ExecutorEnd_hook? + + // for COPY + cstore_free_write_state(); } static void @@ -217,7 +357,15 @@ cstore_relation_set_new_filenode(Relation rel, TransactionId *freezeXid, MultiXactId *minmulti) { - elog(ERROR, "cstore_relation_set_new_filenode not implemented"); + SMgrRelation srel; + + Assert(persistence == RELPERSISTENCE_PERMANENT); + *freezeXid = RecentXmin; + *minmulti = GetOldestMultiXactId(); + srel = RelationCreateStorage(*newrnode, persistence); + CreateCStoreDatabaseDirectory(MyDatabaseId); + InitializeCStoreTableFile(rel->rd_id, rel); + smgrclose(srel); } static void @@ -295,7 +443,7 @@ cstore_relation_size(Relation rel, ForkNumber forkNumber) static bool cstore_relation_needs_toast_table(Relation rel) { - elog(ERROR, "cstore_relation_needs_toast_table not implemented"); + return false; } static void @@ -303,7 +451,10 @@ cstore_estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac) { - elog(ERROR, "cstore_estimate_rel_size not implemented"); + *attr_widths = 12; + *tuples = 100; + *pages = 10; + *allvisfrac = 1.0; } static bool From aa422f2da020604c80361571eb31bfd08dac9c90 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 15 Sep 2020 09:01:38 -0700 Subject: [PATCH 30/91] fixup rebase --- cstore.h | 2 -- cstore_tableam.c | 46 ++++++++++++++++++++++++++++------------------ cstore_tableam.h | 1 + 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/cstore.h b/cstore.h index 87b552bbf..9a1764972 100644 --- a/cstore.h +++ b/cstore.h @@ -252,8 +252,6 @@ typedef struct TableWriteState extern CompressionType ParseCompressionType(const char *compressionTypeString); extern void InitializeCStoreTableFile(Oid relationId, Relation relation, CStoreOptions *cstoreOptions); -extern void CreateCStoreDatabaseDirectory(Oid databaseOid); -extern void RemoveCStoreDatabaseDirectory(Oid databaseOid); /* Function declarations for writing to a cstore file */ extern TableWriteState * CStoreBeginWrite(Oid relationId, diff --git a/cstore_tableam.c b/cstore_tableam.c index d67ac10b6..f93971c59 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -31,8 +31,8 @@ #include "utils/builtins.h" #include "utils/rel.h" +#include "cstore.h" #include "cstore_tableam.h" -#include "cstore_fdw.h" typedef struct CStoreScanDescData { @@ -44,6 +44,16 @@ typedef struct CStoreScanDescData *CStoreScanDesc; static TableWriteState *CStoreWriteState = NULL; +static CStoreOptions * +CStoreGetDefaultOptions(void) +{ + CStoreOptions *cstoreOptions = palloc0(sizeof(CStoreOptions)); + cstoreOptions->compressionType = DEFAULT_COMPRESSION_TYPE; + cstoreOptions->stripeRowCount = DEFAULT_STRIPE_ROW_COUNT; + cstoreOptions->blockRowCount = DEFAULT_BLOCK_ROW_COUNT; + return cstoreOptions; +} + static void cstore_init_write_state(Relation relation) { @@ -58,14 +68,14 @@ cstore_init_write_state(Relation relation) if (CStoreWriteState == NULL) { - CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(relation->rd_id); + CStoreOptions *cstoreOptions = CStoreGetDefaultOptions(); TupleDesc tupdesc = RelationGetDescr(relation); elog(NOTICE, "initializing write state for relation %d", relation->rd_id); - CStoreWriteState = CStoreBeginWrite(cstoreFdwOptions->filename, - cstoreFdwOptions->compressionType, - cstoreFdwOptions->stripeRowCount, - cstoreFdwOptions->blockRowCount, + CStoreWriteState = CStoreBeginWrite(relation->rd_id, + cstoreOptions->compressionType, + cstoreOptions->stripeRowCount, + cstoreOptions->blockRowCount, tupdesc); CStoreWriteState->relation = relation; @@ -95,13 +105,14 @@ cstore_beginscan(Relation relation, Snapshot snapshot, ParallelTableScanDesc parallel_scan, uint32 flags) { - TupleDesc tupdesc = relation->rd_att; - CStoreFdwOptions *cstoreFdwOptions = NULL; - TableReadState *readState = NULL; - CStoreScanDesc scan = palloc(sizeof(CStoreScanDescData)); - List *columnList = NIL; + Oid relid = relation->rd_id; + TupleDesc tupdesc = relation->rd_att; + CStoreOptions *cstoreOptions = NULL; + TableReadState *readState = NULL; + CStoreScanDesc scan = palloc(sizeof(CStoreScanDescData)); + List *columnList = NIL; - cstoreFdwOptions = CStoreGetOptions(relation->rd_id); + cstoreOptions = CStoreGetDefaultOptions(); scan->cs_base.rs_rd = relation; scan->cs_base.rs_snapshot = snapshot; @@ -124,8 +135,8 @@ cstore_beginscan(Relation relation, Snapshot snapshot, columnList = lappend(columnList, var); } - readState = CStoreBeginRead(cstoreFdwOptions->filename, tupdesc, - columnList, NULL); + readState = CStoreBeginRead(relid, tupdesc, columnList, NULL); + readState->relation = relation; scan->cs_readState = readState; @@ -258,7 +269,7 @@ cstore_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, cstore_init_write_state(relation); - heapTuple = GetSlotHeapTuple(slot); + heapTuple = ExecCopySlotHeapTuple(slot); if (HeapTupleHasExternal(heapTuple)) { /* detoast any toasted attributes */ @@ -297,7 +308,7 @@ cstore_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, for (int i = 0; i < ntuples; i++) { TupleTableSlot *tupleSlot = slots[i]; - HeapTuple heapTuple = GetSlotHeapTuple(tupleSlot); + HeapTuple heapTuple = ExecCopySlotHeapTuple(tupleSlot); if (HeapTupleHasExternal(heapTuple)) { @@ -363,8 +374,7 @@ cstore_relation_set_new_filenode(Relation rel, *freezeXid = RecentXmin; *minmulti = GetOldestMultiXactId(); srel = RelationCreateStorage(*newrnode, persistence); - CreateCStoreDatabaseDirectory(MyDatabaseId); - InitializeCStoreTableFile(rel->rd_id, rel); + InitializeCStoreTableFile(rel->rd_id, rel, CStoreGetDefaultOptions()); smgrclose(srel); } diff --git a/cstore_tableam.h b/cstore_tableam.h index 67dbe0d87..f81c13155 100644 --- a/cstore_tableam.h +++ b/cstore_tableam.h @@ -4,3 +4,4 @@ const TableAmRoutine *GetCstoreTableAmRoutine(void); Datum cstore_tableam_handler(PG_FUNCTION_ARGS); +extern void cstore_free_write_state(void); From f886fb33e5f639803c2f8afa8850f5e7dc332241 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 15 Sep 2020 13:01:09 -0700 Subject: [PATCH 31/91] add AM tests --- Makefile | 4 +- expected/am_alter.out | 178 +++++++++++++++++++++ expected/am_analyze.out | 19 +++ expected/am_block_filtering.out | 116 ++++++++++++++ expected/am_copyto.out | 23 +++ expected/am_create.out | 44 ++++++ expected/am_data_types.out | 78 +++++++++ expected/am_drop.out | 40 +++++ expected/am_functions.out | 18 +++ expected/am_insert.out | 88 +++++++++++ expected/am_load.out | 39 +++++ expected/am_query.out | 105 +++++++++++++ expected/am_truncate.out | 231 +++++++++++++++++++++++++++ expected/am_truncate_0.out | 262 +++++++++++++++++++++++++++++++ input/am_block_filtering.source | 69 ++++++++ input/am_copyto.source | 17 ++ input/am_create.source | 43 +++++ input/am_data_types.source | 68 ++++++++ input/am_load.source | 44 ++++++ output/am_block_filtering.source | 116 ++++++++++++++ output/am_copyto.source | 23 +++ output/am_create.source | 44 ++++++ output/am_data_types.source | 78 +++++++++ output/am_load.source | 39 +++++ sql/am_alter.sql | 85 ++++++++++ sql/am_analyze.sql | 11 ++ sql/am_block_filtering.sql | 69 ++++++++ sql/am_copyto.sql | 17 ++ sql/am_create.sql | 43 +++++ sql/am_data_types.sql | 68 ++++++++ sql/am_drop.sql | 48 ++++++ sql/am_functions.sql | 20 +++ sql/am_insert.sql | 56 +++++++ sql/am_load.sql | 44 ++++++ sql/am_query.sql | 34 ++++ sql/am_truncate.sql | 116 ++++++++++++++ 36 files changed, 2396 insertions(+), 1 deletion(-) create mode 100644 expected/am_alter.out create mode 100644 expected/am_analyze.out create mode 100644 expected/am_block_filtering.out create mode 100644 expected/am_copyto.out create mode 100644 expected/am_create.out create mode 100644 expected/am_data_types.out create mode 100644 expected/am_drop.out create mode 100644 expected/am_functions.out create mode 100644 expected/am_insert.out create mode 100644 expected/am_load.out create mode 100644 expected/am_query.out create mode 100644 expected/am_truncate.out create mode 100644 expected/am_truncate_0.out create mode 100644 input/am_block_filtering.source create mode 100644 input/am_copyto.source create mode 100644 input/am_create.source create mode 100644 input/am_data_types.source create mode 100644 input/am_load.source create mode 100644 output/am_block_filtering.source create mode 100644 output/am_copyto.source create mode 100644 output/am_create.source create mode 100644 output/am_data_types.source create mode 100644 output/am_load.source create mode 100644 sql/am_alter.sql create mode 100644 sql/am_analyze.sql create mode 100644 sql/am_block_filtering.sql create mode 100644 sql/am_copyto.sql create mode 100644 sql/am_create.sql create mode 100644 sql/am_data_types.sql create mode 100644 sql/am_drop.sql create mode 100644 sql/am_functions.sql create mode 100644 sql/am_insert.sql create mode 100644 sql/am_load.sql create mode 100644 sql/am_query.sql create mode 100644 sql/am_truncate.sql diff --git a/Makefile b/Makefile index 97f4d9e64..2fc550ca9 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,9 @@ DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cs cstore_fdw--1.3--1.4.sql cstore_fdw--1.2--1.3.sql cstore_fdw--1.1--1.2.sql \ cstore_fdw--1.0--1.1.sql cstore_fdw--1.7--1.8.sql -REGRESS = fdw_create fdw_load fdw_query fdw_analyze fdw_data_types fdw_functions \ +REGRESS = am_create am_load am_query am_analyze am_data_types am_functions \ + am_block_filtering am_drop am_insert am_copyto am_alter am_truncate \ + fdw_create fdw_load fdw_query fdw_analyze fdw_data_types fdw_functions \ fdw_block_filtering fdw_drop fdw_insert fdw_copyto fdw_alter fdw_truncate EXTRA_CLEAN = cstore.pb-c.h cstore.pb-c.c data/*.cstore data/*.cstore.footer \ sql/block_filtering.sql sql/create.sql sql/data_types.sql sql/load.sql \ diff --git a/expected/am_alter.out b/expected/am_alter.out new file mode 100644 index 000000000..659e2723e --- /dev/null +++ b/expected/am_alter.out @@ -0,0 +1,178 @@ +-- +-- Testing ALTER TABLE on cstore_fdw tables. +-- +CREATE FOREIGN TABLE test_alter_table (a int, b int, c int) SERVER cstore_server; +WITH sample_data AS (VALUES + (1, 2, 3), + (4, 5, 6), + (7, 8, 9) +) +INSERT INTO test_alter_table SELECT * FROM sample_data; +-- drop a column +ALTER FOREIGN TABLE test_alter_table DROP COLUMN a; +-- test analyze +ANALYZE test_alter_table; +-- verify select queries run as expected +SELECT * FROM test_alter_table; + b | c +---+--- + 2 | 3 + 5 | 6 + 8 | 9 +(3 rows) + +SELECT a FROM test_alter_table; +ERROR: column "a" does not exist +LINE 1: SELECT a FROM test_alter_table; + ^ +SELECT b FROM test_alter_table; + b +--- + 2 + 5 + 8 +(3 rows) + +-- verify insert runs as expected +INSERT INTO test_alter_table (SELECT 3, 5, 8); +ERROR: INSERT has more expressions than target columns +LINE 1: INSERT INTO test_alter_table (SELECT 3, 5, 8); + ^ +INSERT INTO test_alter_table (SELECT 5, 8); +-- add a column with no defaults +ALTER FOREIGN TABLE test_alter_table ADD COLUMN d int; +SELECT * FROM test_alter_table; + b | c | d +---+---+--- + 2 | 3 | + 5 | 6 | + 8 | 9 | + 5 | 8 | +(4 rows) + +INSERT INTO test_alter_table (SELECT 3, 5, 8); +SELECT * FROM test_alter_table; + b | c | d +---+---+--- + 2 | 3 | + 5 | 6 | + 8 | 9 | + 5 | 8 | + 3 | 5 | 8 +(5 rows) + +-- add a fixed-length column with default value +ALTER FOREIGN TABLE test_alter_table ADD COLUMN e int default 3; +SELECT * from test_alter_table; + b | c | d | e +---+---+---+--- + 2 | 3 | | 3 + 5 | 6 | | 3 + 8 | 9 | | 3 + 5 | 8 | | 3 + 3 | 5 | 8 | 3 +(5 rows) + +INSERT INTO test_alter_table (SELECT 1, 2, 4, 8); +SELECT * from test_alter_table; + b | c | d | e +---+---+---+--- + 2 | 3 | | 3 + 5 | 6 | | 3 + 8 | 9 | | 3 + 5 | 8 | | 3 + 3 | 5 | 8 | 3 + 1 | 2 | 4 | 8 +(6 rows) + +-- add a variable-length column with default value +ALTER FOREIGN TABLE test_alter_table ADD COLUMN f text DEFAULT 'TEXT ME'; +SELECT * from test_alter_table; + b | c | d | e | f +---+---+---+---+--------- + 2 | 3 | | 3 | TEXT ME + 5 | 6 | | 3 | TEXT ME + 8 | 9 | | 3 | TEXT ME + 5 | 8 | | 3 | TEXT ME + 3 | 5 | 8 | 3 | TEXT ME + 1 | 2 | 4 | 8 | TEXT ME +(6 rows) + +INSERT INTO test_alter_table (SELECT 1, 2, 4, 8, 'ABCDEF'); +SELECT * from test_alter_table; + b | c | d | e | f +---+---+---+---+--------- + 2 | 3 | | 3 | TEXT ME + 5 | 6 | | 3 | TEXT ME + 8 | 9 | | 3 | TEXT ME + 5 | 8 | | 3 | TEXT ME + 3 | 5 | 8 | 3 | TEXT ME + 1 | 2 | 4 | 8 | TEXT ME + 1 | 2 | 4 | 8 | ABCDEF +(7 rows) + +-- drop couple of columns +ALTER FOREIGN TABLE test_alter_table DROP COLUMN c; +ALTER FOREIGN TABLE test_alter_table DROP COLUMN e; +ANALYZE test_alter_table; +SELECT * from test_alter_table; + b | d | f +---+---+--------- + 2 | | TEXT ME + 5 | | TEXT ME + 8 | | TEXT ME + 5 | | TEXT ME + 3 | 8 | TEXT ME + 1 | 4 | TEXT ME + 1 | 4 | ABCDEF +(7 rows) + +SELECT count(*) from test_alter_table; + count +------- + 7 +(1 row) + +SELECT count(t.*) from test_alter_table t; + count +------- + 7 +(1 row) + +-- unsupported default values +ALTER FOREIGN TABLE test_alter_table ADD COLUMN g boolean DEFAULT isfinite(current_date); +ALTER FOREIGN TABLE test_alter_table ADD COLUMN h DATE DEFAULT current_date; +SELECT * FROM test_alter_table; +ERROR: unsupported default value for column "g" +HINT: Expression is either mutable or does not evaluate to constant value +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN g DROP DEFAULT; +SELECT * FROM test_alter_table; +ERROR: unsupported default value for column "h" +HINT: Expression is either mutable or does not evaluate to constant value +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN h DROP DEFAULT; +ANALYZE test_alter_table; +SELECT * FROM test_alter_table; + b | d | f | g | h +---+---+---------+---+--- + 2 | | TEXT ME | | + 5 | | TEXT ME | | + 8 | | TEXT ME | | + 5 | | TEXT ME | | + 3 | 8 | TEXT ME | | + 1 | 4 | TEXT ME | | + 1 | 4 | ABCDEF | | +(7 rows) + +-- unsupported type change +ALTER FOREIGN TABLE test_alter_table ADD COLUMN i int; +ALTER FOREIGN TABLE test_alter_table ADD COLUMN j float; +ALTER FOREIGN TABLE test_alter_table ADD COLUMN k text; +-- this is valid type change +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN i TYPE float; +-- this is not valid +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN j TYPE int; +ERROR: Column j cannot be cast automatically to type pg_catalog.int4 +-- text / varchar conversion is valid both ways +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE varchar(20); +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE text; +DROP FOREIGN TABLE test_alter_table; diff --git a/expected/am_analyze.out b/expected/am_analyze.out new file mode 100644 index 000000000..f8c4d974a --- /dev/null +++ b/expected/am_analyze.out @@ -0,0 +1,19 @@ +-- +-- Test the ANALYZE command for cstore_fdw tables. +-- +-- ANALYZE uncompressed table +ANALYZE contestant; +SELECT count(*) FROM pg_stats WHERE tablename='contestant'; + count +------- + 6 +(1 row) + +-- ANALYZE compressed table +ANALYZE contestant_compressed; +SELECT count(*) FROM pg_stats WHERE tablename='contestant_compressed'; + count +------- + 6 +(1 row) + diff --git a/expected/am_block_filtering.out b/expected/am_block_filtering.out new file mode 100644 index 000000000..bccfafd15 --- /dev/null +++ b/expected/am_block_filtering.out @@ -0,0 +1,116 @@ +-- +-- Test block filtering in cstore_fdw using min/max values in stripe skip lists. +-- +-- +-- filtered_row_count returns number of rows filtered by the WHERE clause. +-- If blocks get filtered by cstore_fdw, less rows are passed to WHERE +-- clause, so this function should return a lower number. +-- +CREATE OR REPLACE FUNCTION filtered_row_count (query text) RETURNS bigint AS +$$ + DECLARE + result bigint; + rec text; + BEGIN + result := 0; + + FOR rec IN EXECUTE 'EXPLAIN ANALYZE ' || query LOOP + IF rec ~ '^\s+Rows Removed by Filter' then + result := regexp_replace(rec, '[^0-9]*', '', 'g'); + END IF; + END LOOP; + + RETURN result; + END; +$$ LANGUAGE PLPGSQL; +-- Create and load data +CREATE FOREIGN TABLE test_block_filtering (a int) + SERVER cstore_server + OPTIONS(block_row_count '1000', stripe_row_count '2000'); +COPY test_block_filtering FROM '/Users/jefdavi/wd/cstore2/data/block_filtering.csv' WITH CSV; +-- Verify that filtered_row_count is less than 1000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); + filtered_row_count +-------------------- + 0 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); + filtered_row_count +-------------------- + 801 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 200'); + filtered_row_count +-------------------- + 200 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 9900'); + filtered_row_count +-------------------- + 101 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 9900'); + filtered_row_count +-------------------- + 900 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); + filtered_row_count +-------------------- + 0 +(1 row) + +-- Verify that filtered_row_count is less than 2000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 1 AND 10'); + filtered_row_count +-------------------- + 990 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); + filtered_row_count +-------------------- + 1979 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN -10 AND 0'); + filtered_row_count +-------------------- + 0 +(1 row) + +-- Load data for second time and verify that filtered_row_count is exactly twice as before +COPY test_block_filtering FROM '/Users/jefdavi/wd/cstore2/data/block_filtering.csv' WITH CSV; +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); + filtered_row_count +-------------------- + 1602 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); + filtered_row_count +-------------------- + 0 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); + filtered_row_count +-------------------- + 3958 +(1 row) + +-- Verify that we are fine with collations which use a different alphabet order +CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") + SERVER cstore_server; +COPY collation_block_filtering_test FROM STDIN; +SELECT * FROM collation_block_filtering_test WHERE A > 'B'; + a +--- + Å +(1 row) + diff --git a/expected/am_copyto.out b/expected/am_copyto.out new file mode 100644 index 000000000..2b68d0ad5 --- /dev/null +++ b/expected/am_copyto.out @@ -0,0 +1,23 @@ +-- +-- Test copying data from cstore_fdw tables. +-- +CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server; +-- load table data from file +COPY test_contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; +-- export using COPY table TO ... +COPY test_contestant TO STDOUT; +a 01-10-1990 2090 97.1 XA {a} +b 11-01-1990 2203 98.1 XA {a,b} +c 11-01-1988 2907 99.4 XB {w,y} +d 05-05-1985 2314 98.3 XB {} +e 05-05-1995 2236 98.2 XC {a} +-- export using COPY (SELECT * FROM table) TO ... +COPY (select * from test_contestant) TO STDOUT; +a 01-10-1990 2090 97.1 XA {a} +b 11-01-1990 2203 98.1 XA {a,b} +c 11-01-1988 2907 99.4 XB {w,y} +d 05-05-1985 2314 98.3 XB {} +e 05-05-1995 2236 98.2 XC {a} +DROP FOREIGN TABLE test_contestant CASCADE; diff --git a/expected/am_create.out b/expected/am_create.out new file mode 100644 index 000000000..961c0494d --- /dev/null +++ b/expected/am_create.out @@ -0,0 +1,44 @@ +-- +-- Test the CREATE statements related to cstore_fdw. +-- +-- Install cstore_fdw +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +-- Validator tests +CREATE FOREIGN TABLE test_validator_invalid_option () + SERVER cstore_server + OPTIONS(bad_option_name '1'); -- ERROR +ERROR: invalid option "bad_option_name" +HINT: Valid options in this context are: compression, stripe_row_count, block_row_count +CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () + SERVER cstore_server + OPTIONS(stripe_row_count '0'); -- ERROR +ERROR: invalid stripe row count +HINT: Stripe row count must be an integer between 1000 and 10000000 +CREATE FOREIGN TABLE test_validator_invalid_block_row_count () + SERVER cstore_server + OPTIONS(block_row_count '0'); -- ERROR +ERROR: invalid block row count +HINT: Block row count must be an integer between 1000 and 100000 +CREATE FOREIGN TABLE test_validator_invalid_compression_type () + SERVER cstore_server + OPTIONS(compression 'invalid_compression'); -- ERROR +ERROR: invalid compression type +HINT: Valid options are: none, pglz +-- Create uncompressed table +CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server; +-- Create compressed table with automatically determined file path +CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server + OPTIONS(compression 'pglz'); +-- Test that querying an empty table works +ANALYZE contestant; +SELECT count(*) FROM contestant; + count +------- + 0 +(1 row) + diff --git a/expected/am_data_types.out b/expected/am_data_types.out new file mode 100644 index 000000000..a27a25eb9 --- /dev/null +++ b/expected/am_data_types.out @@ -0,0 +1,78 @@ +-- +-- Test loading and reading different data types to/from cstore_fdw foreign tables. +-- +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; +SET timezone to 'GMT'; +SET intervalstyle TO 'POSTGRES_VERBOSE'; +-- Test array types +CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], + text_array text[]) SERVER cstore_server; +COPY test_array_types FROM '/Users/jefdavi/wd/cstore2/data/array_types.csv' WITH CSV; +SELECT * FROM test_array_types; + int_array | bigint_array | text_array +--------------------------+--------------------------------------------+------------ + {1,2,3} | {1,2,3} | {a,b,c} + {} | {} | {} + {-2147483648,2147483647} | {-9223372036854775808,9223372036854775807} | {""} +(3 rows) + +-- Test date/time types +CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, + timestamp_with_timezone timestamp with time zone, date date, time time, + interval interval) SERVER cstore_server; +COPY test_datetime_types FROM '/Users/jefdavi/wd/cstore2/data/datetime_types.csv' WITH CSV; +SELECT * FROM test_datetime_types; + timestamp | timestamp_with_timezone | date | time | interval +---------------------+-------------------------+------------+----------+----------- + 2000-01-02 04:05:06 | 1999-01-08 12:05:06+00 | 2000-01-02 | 04:05:06 | @ 4 hours + 1970-01-01 00:00:00 | infinity | -infinity | 00:00:00 | @ 0 +(2 rows) + +-- Test enum and composite types +CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); +CREATE TYPE composite_type AS (a int, b text); +CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, + composite composite_type) SERVER cstore_server; +COPY test_enum_and_composite_types FROM + '/Users/jefdavi/wd/cstore2/data/enum_and_composite_types.csv' WITH CSV; +SELECT * FROM test_enum_and_composite_types; + enum | composite +------+----------- + a | (2,b) + b | (3,c) +(2 rows) + +-- Test range types +CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, + numrange numrange, tsrange tsrange) SERVER cstore_server; +COPY test_range_types FROM '/Users/jefdavi/wd/cstore2/data/range_types.csv' WITH CSV; +SELECT * FROM test_range_types; + int4range | int8range | numrange | tsrange +-----------+-----------+----------+----------------------------------------------- + [1,3) | [1,3) | [1,3) | ["2000-01-02 00:30:00","2010-02-03 12:30:00") + empty | [1,) | (,) | empty +(2 rows) + +-- Test other types +CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, + inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server; +COPY test_other_types FROM '/Users/jefdavi/wd/cstore2/data/other_types.csv' WITH CSV; +SELECT * FROM test_other_types; + bool | bytea | money | inet | bitstring | uuid | json +------+------------+-------+-------------+-----------+--------------------------------------+------------------ + f | \xdeadbeef | $1.00 | 192.168.1.2 | 10101 | a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11 | {"key": "value"} + t | \xcdb0 | $1.50 | 127.0.0.1 | | a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11 | [] +(2 rows) + +-- Test null values +CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) + SERVER cstore_server; +COPY test_null_values FROM '/Users/jefdavi/wd/cstore2/data/null_values.csv' WITH CSV; +SELECT * FROM test_null_values; + a | b | c +---+--------+----- + | {NULL} | (,) + | | +(2 rows) + diff --git a/expected/am_drop.out b/expected/am_drop.out new file mode 100644 index 000000000..926f69337 --- /dev/null +++ b/expected/am_drop.out @@ -0,0 +1,40 @@ +-- +-- Tests the different DROP commands for cstore_fdw tables. +-- +-- DROP FOREIGN TABL +-- DROP SCHEMA +-- DROP EXTENSION +-- DROP DATABASE +-- +-- Note that travis does not create +-- cstore_fdw extension in default database (postgres). This has caused +-- different behavior between travis tests and local tests. Thus +-- 'postgres' directory is excluded from comparison to have the same result. +-- store postgres database oid +SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset +-- DROP cstore_fdw tables +DROP FOREIGN TABLE contestant; +DROP FOREIGN TABLE contestant_compressed; +-- Create a cstore_fdw table under a schema and drop it. +CREATE SCHEMA test_schema; +CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; +DROP SCHEMA test_schema CASCADE; +NOTICE: drop cascades to foreign table test_schema.test_table +SELECT current_database() datname \gset +CREATE DATABASE db_to_drop; +\c db_to_drop +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset +CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; +DROP EXTENSION cstore_fdw CASCADE; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to server cstore_server +drop cascades to foreign table test_table +-- test database drop +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset +CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; +\c :datname +DROP DATABASE db_to_drop; diff --git a/expected/am_functions.out b/expected/am_functions.out new file mode 100644 index 000000000..117fc15f9 --- /dev/null +++ b/expected/am_functions.out @@ -0,0 +1,18 @@ +-- +-- Test utility functions for cstore_fdw tables. +-- +CREATE FOREIGN TABLE empty_table (a int) SERVER cstore_server; +CREATE FOREIGN TABLE table_with_data (a int) SERVER cstore_server; +CREATE TABLE non_cstore_table (a int); +COPY table_with_data FROM STDIN; +SELECT cstore_table_size('empty_table') < cstore_table_size('table_with_data'); + ?column? +---------- + t +(1 row) + +SELECT cstore_table_size('non_cstore_table'); +ERROR: relation is not a cstore table +DROP FOREIGN TABLE empty_table; +DROP FOREIGN TABLE table_with_data; +DROP TABLE non_cstore_table; diff --git a/expected/am_insert.out b/expected/am_insert.out new file mode 100644 index 000000000..49d9ed132 --- /dev/null +++ b/expected/am_insert.out @@ -0,0 +1,88 @@ +-- +-- Testing insert on cstore_fdw tables. +-- +CREATE FOREIGN TABLE test_insert_command (a int) SERVER cstore_server; +-- test single row inserts fail +select count(*) from test_insert_command; + count +------- + 0 +(1 row) + +insert into test_insert_command values(1); +ERROR: operation is not supported +select count(*) from test_insert_command; + count +------- + 0 +(1 row) + +insert into test_insert_command default values; +ERROR: operation is not supported +select count(*) from test_insert_command; + count +------- + 0 +(1 row) + +-- test inserting from another table succeed +CREATE TABLE test_insert_command_data (a int); +select count(*) from test_insert_command_data; + count +------- + 0 +(1 row) + +insert into test_insert_command_data values(1); +select count(*) from test_insert_command_data; + count +------- + 1 +(1 row) + +insert into test_insert_command select * from test_insert_command_data; +select count(*) from test_insert_command; + count +------- + 1 +(1 row) + +drop table test_insert_command_data; +drop foreign table test_insert_command; +-- test long attribute value insertion +-- create sufficiently long text so that data is stored in toast +CREATE TABLE test_long_text AS +SELECT a as int_val, string_agg(random()::text, '') as text_val +FROM generate_series(1, 10) a, generate_series(1, 1000) b +GROUP BY a ORDER BY a; +-- store hash values of text for later comparison +CREATE TABLE test_long_text_hash AS +SELECT int_val, md5(text_val) AS hash +FROM test_long_text; +CREATE FOREIGN TABLE test_cstore_long_text(int_val int, text_val text) +SERVER cstore_server; +-- store long text in cstore table +INSERT INTO test_cstore_long_text SELECT * FROM test_long_text; +-- drop source table to remove original text from toast +DROP TABLE test_long_text; +-- check if text data is still available in cstore table +-- by comparing previously stored hash. +SELECT a.int_val +FROM test_long_text_hash a, test_cstore_long_text c +WHERE a.int_val = c.int_val AND a.hash = md5(c.text_val); + int_val +--------- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 +(10 rows) + +DROP TABLE test_long_text_hash; +DROP FOREIGN TABLE test_cstore_long_text; diff --git a/expected/am_load.out b/expected/am_load.out new file mode 100644 index 000000000..162ece55b --- /dev/null +++ b/expected/am_load.out @@ -0,0 +1,39 @@ +-- +-- Test loading data into cstore_fdw tables. +-- +-- COPY with incorrect delimiter +COPY contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' + WITH DELIMITER '|'; -- ERROR +ERROR: missing data for column "birthdate" +-- COPY with invalid program +COPY contestant FROM PROGRAM 'invalid_program' WITH CSV; -- ERROR +ERROR: program "invalid_program" failed +DETAIL: command not found +-- COPY into uncompressed table from file +COPY contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; +-- COPY into uncompressed table from program +COPY contestant FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' WITH CSV; +-- COPY into compressed table +COPY contestant_compressed FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; +-- COPY into uncompressed table from program +COPY contestant_compressed FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' + WITH CSV; +-- Test column list +CREATE FOREIGN TABLE famous_constants (id int, name text, value real) + SERVER cstore_server; +COPY famous_constants (value, name, id) FROM STDIN WITH CSV; +COPY famous_constants (name, value) FROM STDIN WITH CSV; +SELECT * FROM famous_constants ORDER BY id, name; + id | name | value +----+----------------+----------- + 1 | pi | 3.141 + 2 | e | 2.718 + 3 | gamma | 0.577 + 4 | bohr radius | 5.291e-11 + | avagadro | 6.022e+23 + | electron mass | 9.109e-31 + | proton mass | 1.672e-27 + | speed of light | 2.997e+08 +(8 rows) + +DROP FOREIGN TABLE famous_constants; diff --git a/expected/am_query.out b/expected/am_query.out new file mode 100644 index 000000000..7ac3508a4 --- /dev/null +++ b/expected/am_query.out @@ -0,0 +1,105 @@ +-- +-- Test querying cstore_fdw tables. +-- +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; +-- Query uncompressed data +SELECT count(*) FROM contestant; + count +------- + 8 +(1 row) + +SELECT avg(rating), stddev_samp(rating) FROM contestant; + avg | stddev_samp +-----------------------+------------------ + 2344.3750000000000000 | 433.746119785032 +(1 row) + +SELECT country, avg(rating) FROM contestant WHERE rating > 2200 + GROUP BY country ORDER BY country; + country | avg +---------+----------------------- + XA | 2203.0000000000000000 + XB | 2610.5000000000000000 + XC | 2236.0000000000000000 + XD | 3090.0000000000000000 +(4 rows) + +SELECT * FROM contestant ORDER BY handle; + handle | birthdate | rating | percentile | country | achievements +--------+------------+--------+------------+---------+-------------- + a | 1990-01-10 | 2090 | 97.1 | XA | {a} + b | 1990-11-01 | 2203 | 98.1 | XA | {a,b} + c | 1988-11-01 | 2907 | 99.4 | XB | {w,y} + d | 1985-05-05 | 2314 | 98.3 | XB | {} + e | 1995-05-05 | 2236 | 98.2 | XC | {a} + f | 1983-04-02 | 3090 | 99.6 | XD | {a,b,c,y} + g | 1991-12-13 | 1803 | 85.1 | XD | {a,c} + h | 1987-10-26 | 2112 | 95.4 | XD | {w,a} +(8 rows) + +-- Query compressed data +SELECT count(*) FROM contestant_compressed; + count +------- + 8 +(1 row) + +SELECT avg(rating), stddev_samp(rating) FROM contestant_compressed; + avg | stddev_samp +-----------------------+------------------ + 2344.3750000000000000 | 433.746119785032 +(1 row) + +SELECT country, avg(rating) FROM contestant_compressed WHERE rating > 2200 + GROUP BY country ORDER BY country; + country | avg +---------+----------------------- + XA | 2203.0000000000000000 + XB | 2610.5000000000000000 + XC | 2236.0000000000000000 + XD | 3090.0000000000000000 +(4 rows) + +SELECT * FROM contestant_compressed ORDER BY handle; + handle | birthdate | rating | percentile | country | achievements +--------+------------+--------+------------+---------+-------------- + a | 1990-01-10 | 2090 | 97.1 | XA | {a} + b | 1990-11-01 | 2203 | 98.1 | XA | {a,b} + c | 1988-11-01 | 2907 | 99.4 | XB | {w,y} + d | 1985-05-05 | 2314 | 98.3 | XB | {} + e | 1995-05-05 | 2236 | 98.2 | XC | {a} + f | 1983-04-02 | 3090 | 99.6 | XD | {a,b,c,y} + g | 1991-12-13 | 1803 | 85.1 | XD | {a,c} + h | 1987-10-26 | 2112 | 95.4 | XD | {w,a} +(8 rows) + +-- Verify that we handle whole-row references correctly +SELECT to_json(v) FROM contestant v ORDER BY rating LIMIT 1; + to_json +------------------------------------------------------------------------------------------------------------------ + {"handle":"g","birthdate":"1991-12-13","rating":1803,"percentile":85.1,"country":"XD ","achievements":["a","c"]} +(1 row) + +-- Test variables used in expressions +CREATE FOREIGN TABLE union_first (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE union_second (a int, b int) SERVER cstore_server; +INSERT INTO union_first SELECT a, a FROM generate_series(1, 5) a; +INSERT INTO union_second SELECT a, a FROM generate_series(11, 15) a; +(SELECT a*1, b FROM union_first) union all (SELECT a*1, b FROM union_second); + ?column? | b +----------+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 11 | 11 + 12 | 12 + 13 | 13 + 14 | 14 + 15 | 15 +(10 rows) + +DROP FOREIGN TABLE union_first, union_second; diff --git a/expected/am_truncate.out b/expected/am_truncate.out new file mode 100644 index 000000000..c92c15559 --- /dev/null +++ b/expected/am_truncate.out @@ -0,0 +1,231 @@ +-- +-- Test the TRUNCATE TABLE command for cstore_fdw tables. +-- +-- print whether we're using version > 10 to make version-specific tests clear +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; + version_above_ten +------------------- + t +(1 row) + +-- CREATE a cstore_fdw table, fill with some data -- +CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_compressed (a int, b int) SERVER cstore_server OPTIONS (compression 'pglz'); +CREATE TABLE cstore_truncate_test_regular (a int, b int); +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +-- query rows +SELECT * FROM cstore_truncate_test; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +TRUNCATE TABLE cstore_truncate_test; +SELECT * FROM cstore_truncate_test; + a | b +---+--- +(0 rows) + +SELECT COUNT(*) from cstore_truncate_test; + count +------- + 0 +(1 row) + +SELECT count(*) FROM cstore_truncate_test_compressed; + count +------- + 20 +(1 row) + +TRUNCATE TABLE cstore_truncate_test_compressed; +SELECT count(*) FROM cstore_truncate_test_compressed; + count +------- + 0 +(1 row) + +SELECT cstore_table_size('cstore_truncate_test_compressed'); + cstore_table_size +------------------- + 0 +(1 row) + +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; +INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; +SELECT * from cstore_truncate_test; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +SELECT * from cstore_truncate_test_second; + a | b +----+---- + 20 | 20 + 21 | 21 + 22 | 22 + 23 | 23 + 24 | 24 + 25 | 25 + 26 | 26 + 27 | 27 + 28 | 28 + 29 | 29 + 30 | 30 +(11 rows) + +SELECT * from cstore_truncate_test_regular; + a | b +----+---- + 10 | 10 + 11 | 11 + 12 | 12 + 13 | 13 + 14 | 14 + 15 | 15 + 16 | 16 + 17 | 17 + 18 | 18 + 19 | 19 + 20 | 20 +(11 rows) + +-- make sure multi truncate works +-- notice that the same table might be repeated +TRUNCATE TABLE cstore_truncate_test, + cstore_truncate_test_regular, + cstore_truncate_test_second, + cstore_truncate_test; +SELECT * from cstore_truncate_test; + a | b +---+--- +(0 rows) + +SELECT * from cstore_truncate_test_second; + a | b +---+--- +(0 rows) + +SELECT * from cstore_truncate_test_regular; + a | b +---+--- +(0 rows) + +-- test if truncate on empty table works +TRUNCATE TABLE cstore_truncate_test; +SELECT * from cstore_truncate_test; + a | b +---+--- +(0 rows) + +-- test if a cached truncate from a pl/pgsql function works +CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ +BEGIN + INSERT INTO cstore_truncate_test_regular select a, a from generate_series(1, 10) a; + TRUNCATE TABLE cstore_truncate_test_regular; +END;$$ +LANGUAGE plpgsql; +SELECT cstore_truncate_test_regular_func(); + cstore_truncate_test_regular_func +----------------------------------- + +(1 row) + +-- the cached plans are used stating from the second call +SELECT cstore_truncate_test_regular_func(); + cstore_truncate_test_regular_func +----------------------------------- + +(1 row) + +DROP FUNCTION cstore_truncate_test_regular_func(); +DROP FOREIGN TABLE cstore_truncate_test, cstore_truncate_test_second; +DROP TABLE cstore_truncate_test_regular; +DROP FOREIGN TABLE cstore_truncate_test_compressed; +-- test truncate with schema +CREATE SCHEMA truncate_schema; +CREATE FOREIGN TABLE truncate_schema.truncate_tbl (id int) SERVER cstore_server OPTIONS(compression 'pglz'); +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + count +------- + 0 +(1 row) + +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +-- create a user that can not truncate +CREATE USER truncate_user; +GRANT USAGE ON SCHEMA truncate_schema TO truncate_user; +GRANT SELECT ON TABLE truncate_schema.truncate_tbl TO truncate_user; +REVOKE TRUNCATE ON TABLE truncate_schema.truncate_tbl FROM truncate_user; +SELECT current_user \gset +\c - truncate_user +-- verify truncate command fails and check number of rows +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +ERROR: permission denied for table truncate_tbl +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +-- switch to super user, grant truncate to truncate_user +\c - :current_user +GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; +-- verify truncate_user can truncate now +\c - truncate_user +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 0 +(1 row) + +\c - :current_user +-- cleanup +DROP SCHEMA truncate_schema CASCADE; +NOTICE: drop cascades to foreign table truncate_schema.truncate_tbl +DROP USER truncate_user; diff --git a/expected/am_truncate_0.out b/expected/am_truncate_0.out new file mode 100644 index 000000000..c8cc4ad98 --- /dev/null +++ b/expected/am_truncate_0.out @@ -0,0 +1,262 @@ +-- +-- Test the TRUNCATE TABLE command for cstore_fdw tables. +-- +-- print whether we're using version > 10 to make version-specific tests clear +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; + version_above_ten +------------------- + f +(1 row) + +-- Check that files for the automatically managed table exist in the +-- cstore_fdw/{databaseoid} directory. +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 0 +(1 row) + +-- CREATE a cstore_fdw table, fill with some data -- +CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_compressed (a int, b int) SERVER cstore_server OPTIONS (compression 'pglz'); +CREATE TABLE cstore_truncate_test_regular (a int, b int); +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +-- query rows +SELECT * FROM cstore_truncate_test; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +TRUNCATE TABLE cstore_truncate_test; +SELECT * FROM cstore_truncate_test; + a | b +---+--- +(0 rows) + +SELECT COUNT(*) from cstore_truncate_test; + count +------- + 0 +(1 row) + +SELECT count(*) FROM cstore_truncate_test_compressed; + count +------- + 20 +(1 row) + +TRUNCATE TABLE cstore_truncate_test_compressed; +SELECT count(*) FROM cstore_truncate_test_compressed; + count +------- + 0 +(1 row) + +SELECT cstore_table_size('cstore_truncate_test_compressed'); + cstore_table_size +------------------- + 26 +(1 row) + +-- make sure data files still present +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 6 +(1 row) + +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; +INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; +SELECT * from cstore_truncate_test; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +SELECT * from cstore_truncate_test_second; + a | b +----+---- + 20 | 20 + 21 | 21 + 22 | 22 + 23 | 23 + 24 | 24 + 25 | 25 + 26 | 26 + 27 | 27 + 28 | 28 + 29 | 29 + 30 | 30 +(11 rows) + +SELECT * from cstore_truncate_test_regular; + a | b +----+---- + 10 | 10 + 11 | 11 + 12 | 12 + 13 | 13 + 14 | 14 + 15 | 15 + 16 | 16 + 17 | 17 + 18 | 18 + 19 | 19 + 20 | 20 +(11 rows) + +-- make sure multi truncate works +-- notice that the same table might be repeated +TRUNCATE TABLE cstore_truncate_test, + cstore_truncate_test_regular, + cstore_truncate_test_second, + cstore_truncate_test; +SELECT * from cstore_truncate_test; + a | b +---+--- +(0 rows) + +SELECT * from cstore_truncate_test_second; + a | b +---+--- +(0 rows) + +SELECT * from cstore_truncate_test_regular; + a | b +---+--- +(0 rows) + +-- test if truncate on empty table works +TRUNCATE TABLE cstore_truncate_test; +SELECT * from cstore_truncate_test; + a | b +---+--- +(0 rows) + +-- test if a cached truncate from a pl/pgsql function works +CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ +BEGIN + INSERT INTO cstore_truncate_test_regular select a, a from generate_series(1, 10) a; + TRUNCATE TABLE cstore_truncate_test_regular; +END;$$ +LANGUAGE plpgsql; +SELECT cstore_truncate_test_regular_func(); + cstore_truncate_test_regular_func +----------------------------------- + +(1 row) + +-- the cached plans are used stating from the second call +SELECT cstore_truncate_test_regular_func(); + cstore_truncate_test_regular_func +----------------------------------- + +(1 row) + +DROP FUNCTION cstore_truncate_test_regular_func(); +DROP FOREIGN TABLE cstore_truncate_test, cstore_truncate_test_second; +DROP TABLE cstore_truncate_test_regular; +DROP FOREIGN TABLE cstore_truncate_test_compressed; +-- test truncate with schema +CREATE SCHEMA truncate_schema; +CREATE FOREIGN TABLE truncate_schema.truncate_tbl (id int) SERVER cstore_server OPTIONS(compression 'pglz'); +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + count +------- + 0 +(1 row) + +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +-- create a user that can not truncate +CREATE USER truncate_user; +GRANT USAGE ON SCHEMA truncate_schema TO truncate_user; +GRANT SELECT ON TABLE truncate_schema.truncate_tbl TO truncate_user; +REVOKE TRUNCATE ON TABLE truncate_schema.truncate_tbl FROM truncate_user; +SELECT current_user \gset +\c - truncate_user +-- verify truncate command fails and check number of rows +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +ERROR: permission denied for relation truncate_tbl +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +-- switch to super user, grant truncate to truncate_user +\c - :current_user +GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; +-- verify truncate_user can truncate now +\c - truncate_user +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 0 +(1 row) + +\c - :current_user +-- cleanup +DROP SCHEMA truncate_schema CASCADE; +NOTICE: drop cascades to foreign table truncate_schema.truncate_tbl +DROP USER truncate_user; +-- verify files are removed +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 0 +(1 row) + diff --git a/input/am_block_filtering.source b/input/am_block_filtering.source new file mode 100644 index 000000000..dc3170f0d --- /dev/null +++ b/input/am_block_filtering.source @@ -0,0 +1,69 @@ +-- +-- Test block filtering in cstore_fdw using min/max values in stripe skip lists. +-- + + +-- +-- filtered_row_count returns number of rows filtered by the WHERE clause. +-- If blocks get filtered by cstore_fdw, less rows are passed to WHERE +-- clause, so this function should return a lower number. +-- +CREATE OR REPLACE FUNCTION filtered_row_count (query text) RETURNS bigint AS +$$ + DECLARE + result bigint; + rec text; + BEGIN + result := 0; + + FOR rec IN EXECUTE 'EXPLAIN ANALYZE ' || query LOOP + IF rec ~ '^\s+Rows Removed by Filter' then + result := regexp_replace(rec, '[^0-9]*', '', 'g'); + END IF; + END LOOP; + + RETURN result; + END; +$$ LANGUAGE PLPGSQL; + + +-- Create and load data +CREATE FOREIGN TABLE test_block_filtering (a int) + SERVER cstore_server + OPTIONS(block_row_count '1000', stripe_row_count '2000'); + +COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; + + +-- Verify that filtered_row_count is less than 1000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 200'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 9900'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 9900'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); + + +-- Verify that filtered_row_count is less than 2000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 1 AND 10'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN -10 AND 0'); + + +-- Load data for second time and verify that filtered_row_count is exactly twice as before +COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); + + +-- Verify that we are fine with collations which use a different alphabet order +CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") + SERVER cstore_server; +COPY collation_block_filtering_test FROM STDIN; +A +Å +B +\. + +SELECT * FROM collation_block_filtering_test WHERE A > 'B'; diff --git a/input/am_copyto.source b/input/am_copyto.source new file mode 100644 index 000000000..a4b753a8d --- /dev/null +++ b/input/am_copyto.source @@ -0,0 +1,17 @@ +-- +-- Test copying data from cstore_fdw tables. +-- +CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server; + +-- load table data from file +COPY test_contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; + +-- export using COPY table TO ... +COPY test_contestant TO STDOUT; + +-- export using COPY (SELECT * FROM table) TO ... +COPY (select * from test_contestant) TO STDOUT; + +DROP FOREIGN TABLE test_contestant CASCADE; diff --git a/input/am_create.source b/input/am_create.source new file mode 100644 index 000000000..ba52137c1 --- /dev/null +++ b/input/am_create.source @@ -0,0 +1,43 @@ +-- +-- Test the CREATE statements related to cstore_fdw. +-- + + +-- Install cstore_fdw +CREATE EXTENSION cstore_fdw; + +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; + + +-- Validator tests +CREATE FOREIGN TABLE test_validator_invalid_option () + SERVER cstore_server + OPTIONS(bad_option_name '1'); -- ERROR + +CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () + SERVER cstore_server + OPTIONS(stripe_row_count '0'); -- ERROR + +CREATE FOREIGN TABLE test_validator_invalid_block_row_count () + SERVER cstore_server + OPTIONS(block_row_count '0'); -- ERROR + +CREATE FOREIGN TABLE test_validator_invalid_compression_type () + SERVER cstore_server + OPTIONS(compression 'invalid_compression'); -- ERROR + +-- Create uncompressed table +CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server; + + +-- Create compressed table with automatically determined file path +CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server + OPTIONS(compression 'pglz'); + +-- Test that querying an empty table works +ANALYZE contestant; +SELECT count(*) FROM contestant; diff --git a/input/am_data_types.source b/input/am_data_types.source new file mode 100644 index 000000000..ec83c4d8c --- /dev/null +++ b/input/am_data_types.source @@ -0,0 +1,68 @@ +-- +-- Test loading and reading different data types to/from cstore_fdw foreign tables. +-- + + +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; +SET timezone to 'GMT'; +SET intervalstyle TO 'POSTGRES_VERBOSE'; + + +-- Test array types +CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], + text_array text[]) SERVER cstore_server; + +COPY test_array_types FROM '@abs_srcdir@/data/array_types.csv' WITH CSV; + +SELECT * FROM test_array_types; + + +-- Test date/time types +CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, + timestamp_with_timezone timestamp with time zone, date date, time time, + interval interval) SERVER cstore_server; + +COPY test_datetime_types FROM '@abs_srcdir@/data/datetime_types.csv' WITH CSV; + +SELECT * FROM test_datetime_types; + + +-- Test enum and composite types +CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); +CREATE TYPE composite_type AS (a int, b text); + +CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, + composite composite_type) SERVER cstore_server; + +COPY test_enum_and_composite_types FROM + '@abs_srcdir@/data/enum_and_composite_types.csv' WITH CSV; + +SELECT * FROM test_enum_and_composite_types; + + +-- Test range types +CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, + numrange numrange, tsrange tsrange) SERVER cstore_server; + +COPY test_range_types FROM '@abs_srcdir@/data/range_types.csv' WITH CSV; + +SELECT * FROM test_range_types; + + +-- Test other types +CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, + inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server; + +COPY test_other_types FROM '@abs_srcdir@/data/other_types.csv' WITH CSV; + +SELECT * FROM test_other_types; + + +-- Test null values +CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) + SERVER cstore_server; + +COPY test_null_values FROM '@abs_srcdir@/data/null_values.csv' WITH CSV; + +SELECT * FROM test_null_values; diff --git a/input/am_load.source b/input/am_load.source new file mode 100644 index 000000000..0913acde7 --- /dev/null +++ b/input/am_load.source @@ -0,0 +1,44 @@ +-- +-- Test loading data into cstore_fdw tables. +-- + +-- COPY with incorrect delimiter +COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' + WITH DELIMITER '|'; -- ERROR + +-- COPY with invalid program +COPY contestant FROM PROGRAM 'invalid_program' WITH CSV; -- ERROR + +-- COPY into uncompressed table from file +COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; + +-- COPY into uncompressed table from program +COPY contestant FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' WITH CSV; + +-- COPY into compressed table +COPY contestant_compressed FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; + +-- COPY into uncompressed table from program +COPY contestant_compressed FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' + WITH CSV; + +-- Test column list +CREATE FOREIGN TABLE famous_constants (id int, name text, value real) + SERVER cstore_server; +COPY famous_constants (value, name, id) FROM STDIN WITH CSV; +3.141,pi,1 +2.718,e,2 +0.577,gamma,3 +5.291e-11,bohr radius,4 +\. + +COPY famous_constants (name, value) FROM STDIN WITH CSV; +avagadro,6.022e23 +electron mass,9.109e-31 +proton mass,1.672e-27 +speed of light,2.997e8 +\. + +SELECT * FROM famous_constants ORDER BY id, name; + +DROP FOREIGN TABLE famous_constants; diff --git a/output/am_block_filtering.source b/output/am_block_filtering.source new file mode 100644 index 000000000..2f664a78a --- /dev/null +++ b/output/am_block_filtering.source @@ -0,0 +1,116 @@ +-- +-- Test block filtering in cstore_fdw using min/max values in stripe skip lists. +-- +-- +-- filtered_row_count returns number of rows filtered by the WHERE clause. +-- If blocks get filtered by cstore_fdw, less rows are passed to WHERE +-- clause, so this function should return a lower number. +-- +CREATE OR REPLACE FUNCTION filtered_row_count (query text) RETURNS bigint AS +$$ + DECLARE + result bigint; + rec text; + BEGIN + result := 0; + + FOR rec IN EXECUTE 'EXPLAIN ANALYZE ' || query LOOP + IF rec ~ '^\s+Rows Removed by Filter' then + result := regexp_replace(rec, '[^0-9]*', '', 'g'); + END IF; + END LOOP; + + RETURN result; + END; +$$ LANGUAGE PLPGSQL; +-- Create and load data +CREATE FOREIGN TABLE test_block_filtering (a int) + SERVER cstore_server + OPTIONS(block_row_count '1000', stripe_row_count '2000'); +COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; +-- Verify that filtered_row_count is less than 1000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); + filtered_row_count +-------------------- + 0 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); + filtered_row_count +-------------------- + 801 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 200'); + filtered_row_count +-------------------- + 200 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 9900'); + filtered_row_count +-------------------- + 101 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 9900'); + filtered_row_count +-------------------- + 900 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); + filtered_row_count +-------------------- + 0 +(1 row) + +-- Verify that filtered_row_count is less than 2000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 1 AND 10'); + filtered_row_count +-------------------- + 990 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); + filtered_row_count +-------------------- + 1979 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN -10 AND 0'); + filtered_row_count +-------------------- + 0 +(1 row) + +-- Load data for second time and verify that filtered_row_count is exactly twice as before +COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); + filtered_row_count +-------------------- + 1602 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); + filtered_row_count +-------------------- + 0 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); + filtered_row_count +-------------------- + 3958 +(1 row) + +-- Verify that we are fine with collations which use a different alphabet order +CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") + SERVER cstore_server; +COPY collation_block_filtering_test FROM STDIN; +SELECT * FROM collation_block_filtering_test WHERE A > 'B'; + a +--- + Å +(1 row) + diff --git a/output/am_copyto.source b/output/am_copyto.source new file mode 100644 index 000000000..a8d841f18 --- /dev/null +++ b/output/am_copyto.source @@ -0,0 +1,23 @@ +-- +-- Test copying data from cstore_fdw tables. +-- +CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server; +-- load table data from file +COPY test_contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; +-- export using COPY table TO ... +COPY test_contestant TO STDOUT; +a 01-10-1990 2090 97.1 XA {a} +b 11-01-1990 2203 98.1 XA {a,b} +c 11-01-1988 2907 99.4 XB {w,y} +d 05-05-1985 2314 98.3 XB {} +e 05-05-1995 2236 98.2 XC {a} +-- export using COPY (SELECT * FROM table) TO ... +COPY (select * from test_contestant) TO STDOUT; +a 01-10-1990 2090 97.1 XA {a} +b 11-01-1990 2203 98.1 XA {a,b} +c 11-01-1988 2907 99.4 XB {w,y} +d 05-05-1985 2314 98.3 XB {} +e 05-05-1995 2236 98.2 XC {a} +DROP FOREIGN TABLE test_contestant CASCADE; diff --git a/output/am_create.source b/output/am_create.source new file mode 100644 index 000000000..961c0494d --- /dev/null +++ b/output/am_create.source @@ -0,0 +1,44 @@ +-- +-- Test the CREATE statements related to cstore_fdw. +-- +-- Install cstore_fdw +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +-- Validator tests +CREATE FOREIGN TABLE test_validator_invalid_option () + SERVER cstore_server + OPTIONS(bad_option_name '1'); -- ERROR +ERROR: invalid option "bad_option_name" +HINT: Valid options in this context are: compression, stripe_row_count, block_row_count +CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () + SERVER cstore_server + OPTIONS(stripe_row_count '0'); -- ERROR +ERROR: invalid stripe row count +HINT: Stripe row count must be an integer between 1000 and 10000000 +CREATE FOREIGN TABLE test_validator_invalid_block_row_count () + SERVER cstore_server + OPTIONS(block_row_count '0'); -- ERROR +ERROR: invalid block row count +HINT: Block row count must be an integer between 1000 and 100000 +CREATE FOREIGN TABLE test_validator_invalid_compression_type () + SERVER cstore_server + OPTIONS(compression 'invalid_compression'); -- ERROR +ERROR: invalid compression type +HINT: Valid options are: none, pglz +-- Create uncompressed table +CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server; +-- Create compressed table with automatically determined file path +CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server + OPTIONS(compression 'pglz'); +-- Test that querying an empty table works +ANALYZE contestant; +SELECT count(*) FROM contestant; + count +------- + 0 +(1 row) + diff --git a/output/am_data_types.source b/output/am_data_types.source new file mode 100644 index 000000000..23fdcfa29 --- /dev/null +++ b/output/am_data_types.source @@ -0,0 +1,78 @@ +-- +-- Test loading and reading different data types to/from cstore_fdw foreign tables. +-- +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; +SET timezone to 'GMT'; +SET intervalstyle TO 'POSTGRES_VERBOSE'; +-- Test array types +CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], + text_array text[]) SERVER cstore_server; +COPY test_array_types FROM '@abs_srcdir@/data/array_types.csv' WITH CSV; +SELECT * FROM test_array_types; + int_array | bigint_array | text_array +--------------------------+--------------------------------------------+------------ + {1,2,3} | {1,2,3} | {a,b,c} + {} | {} | {} + {-2147483648,2147483647} | {-9223372036854775808,9223372036854775807} | {""} +(3 rows) + +-- Test date/time types +CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, + timestamp_with_timezone timestamp with time zone, date date, time time, + interval interval) SERVER cstore_server; +COPY test_datetime_types FROM '@abs_srcdir@/data/datetime_types.csv' WITH CSV; +SELECT * FROM test_datetime_types; + timestamp | timestamp_with_timezone | date | time | interval +---------------------+-------------------------+------------+----------+----------- + 2000-01-02 04:05:06 | 1999-01-08 12:05:06+00 | 2000-01-02 | 04:05:06 | @ 4 hours + 1970-01-01 00:00:00 | infinity | -infinity | 00:00:00 | @ 0 +(2 rows) + +-- Test enum and composite types +CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); +CREATE TYPE composite_type AS (a int, b text); +CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, + composite composite_type) SERVER cstore_server; +COPY test_enum_and_composite_types FROM + '@abs_srcdir@/data/enum_and_composite_types.csv' WITH CSV; +SELECT * FROM test_enum_and_composite_types; + enum | composite +------+----------- + a | (2,b) + b | (3,c) +(2 rows) + +-- Test range types +CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, + numrange numrange, tsrange tsrange) SERVER cstore_server; +COPY test_range_types FROM '@abs_srcdir@/data/range_types.csv' WITH CSV; +SELECT * FROM test_range_types; + int4range | int8range | numrange | tsrange +-----------+-----------+----------+----------------------------------------------- + [1,3) | [1,3) | [1,3) | ["2000-01-02 00:30:00","2010-02-03 12:30:00") + empty | [1,) | (,) | empty +(2 rows) + +-- Test other types +CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, + inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server; +COPY test_other_types FROM '@abs_srcdir@/data/other_types.csv' WITH CSV; +SELECT * FROM test_other_types; + bool | bytea | money | inet | bitstring | uuid | json +------+------------+-------+-------------+-----------+--------------------------------------+------------------ + f | \xdeadbeef | $1.00 | 192.168.1.2 | 10101 | a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11 | {"key": "value"} + t | \xcdb0 | $1.50 | 127.0.0.1 | | a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11 | [] +(2 rows) + +-- Test null values +CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) + SERVER cstore_server; +COPY test_null_values FROM '@abs_srcdir@/data/null_values.csv' WITH CSV; +SELECT * FROM test_null_values; + a | b | c +---+--------+----- + | {NULL} | (,) + | | +(2 rows) + diff --git a/output/am_load.source b/output/am_load.source new file mode 100644 index 000000000..c76f203eb --- /dev/null +++ b/output/am_load.source @@ -0,0 +1,39 @@ +-- +-- Test loading data into cstore_fdw tables. +-- +-- COPY with incorrect delimiter +COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' + WITH DELIMITER '|'; -- ERROR +ERROR: missing data for column "birthdate" +-- COPY with invalid program +COPY contestant FROM PROGRAM 'invalid_program' WITH CSV; -- ERROR +ERROR: program "invalid_program" failed +DETAIL: command not found +-- COPY into uncompressed table from file +COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; +-- COPY into uncompressed table from program +COPY contestant FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' WITH CSV; +-- COPY into compressed table +COPY contestant_compressed FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; +-- COPY into uncompressed table from program +COPY contestant_compressed FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' + WITH CSV; +-- Test column list +CREATE FOREIGN TABLE famous_constants (id int, name text, value real) + SERVER cstore_server; +COPY famous_constants (value, name, id) FROM STDIN WITH CSV; +COPY famous_constants (name, value) FROM STDIN WITH CSV; +SELECT * FROM famous_constants ORDER BY id, name; + id | name | value +----+----------------+----------- + 1 | pi | 3.141 + 2 | e | 2.718 + 3 | gamma | 0.577 + 4 | bohr radius | 5.291e-11 + | avagadro | 6.022e+23 + | electron mass | 9.109e-31 + | proton mass | 1.672e-27 + | speed of light | 2.997e+08 +(8 rows) + +DROP FOREIGN TABLE famous_constants; diff --git a/sql/am_alter.sql b/sql/am_alter.sql new file mode 100644 index 000000000..5ba3beb34 --- /dev/null +++ b/sql/am_alter.sql @@ -0,0 +1,85 @@ +-- +-- Testing ALTER TABLE on cstore_fdw tables. +-- + +CREATE FOREIGN TABLE test_alter_table (a int, b int, c int) SERVER cstore_server; + +WITH sample_data AS (VALUES + (1, 2, 3), + (4, 5, 6), + (7, 8, 9) +) +INSERT INTO test_alter_table SELECT * FROM sample_data; + +-- drop a column +ALTER FOREIGN TABLE test_alter_table DROP COLUMN a; + +-- test analyze +ANALYZE test_alter_table; + +-- verify select queries run as expected +SELECT * FROM test_alter_table; +SELECT a FROM test_alter_table; +SELECT b FROM test_alter_table; + +-- verify insert runs as expected +INSERT INTO test_alter_table (SELECT 3, 5, 8); +INSERT INTO test_alter_table (SELECT 5, 8); + + +-- add a column with no defaults +ALTER FOREIGN TABLE test_alter_table ADD COLUMN d int; +SELECT * FROM test_alter_table; +INSERT INTO test_alter_table (SELECT 3, 5, 8); +SELECT * FROM test_alter_table; + + +-- add a fixed-length column with default value +ALTER FOREIGN TABLE test_alter_table ADD COLUMN e int default 3; +SELECT * from test_alter_table; +INSERT INTO test_alter_table (SELECT 1, 2, 4, 8); +SELECT * from test_alter_table; + + +-- add a variable-length column with default value +ALTER FOREIGN TABLE test_alter_table ADD COLUMN f text DEFAULT 'TEXT ME'; +SELECT * from test_alter_table; +INSERT INTO test_alter_table (SELECT 1, 2, 4, 8, 'ABCDEF'); +SELECT * from test_alter_table; + + +-- drop couple of columns +ALTER FOREIGN TABLE test_alter_table DROP COLUMN c; +ALTER FOREIGN TABLE test_alter_table DROP COLUMN e; +ANALYZE test_alter_table; +SELECT * from test_alter_table; +SELECT count(*) from test_alter_table; +SELECT count(t.*) from test_alter_table t; + + +-- unsupported default values +ALTER FOREIGN TABLE test_alter_table ADD COLUMN g boolean DEFAULT isfinite(current_date); +ALTER FOREIGN TABLE test_alter_table ADD COLUMN h DATE DEFAULT current_date; +SELECT * FROM test_alter_table; +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN g DROP DEFAULT; +SELECT * FROM test_alter_table; +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN h DROP DEFAULT; +ANALYZE test_alter_table; +SELECT * FROM test_alter_table; + +-- unsupported type change +ALTER FOREIGN TABLE test_alter_table ADD COLUMN i int; +ALTER FOREIGN TABLE test_alter_table ADD COLUMN j float; +ALTER FOREIGN TABLE test_alter_table ADD COLUMN k text; + +-- this is valid type change +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN i TYPE float; + +-- this is not valid +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN j TYPE int; + +-- text / varchar conversion is valid both ways +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE varchar(20); +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE text; + +DROP FOREIGN TABLE test_alter_table; diff --git a/sql/am_analyze.sql b/sql/am_analyze.sql new file mode 100644 index 000000000..4476454a6 --- /dev/null +++ b/sql/am_analyze.sql @@ -0,0 +1,11 @@ +-- +-- Test the ANALYZE command for cstore_fdw tables. +-- + +-- ANALYZE uncompressed table +ANALYZE contestant; +SELECT count(*) FROM pg_stats WHERE tablename='contestant'; + +-- ANALYZE compressed table +ANALYZE contestant_compressed; +SELECT count(*) FROM pg_stats WHERE tablename='contestant_compressed'; diff --git a/sql/am_block_filtering.sql b/sql/am_block_filtering.sql new file mode 100644 index 000000000..bb90c72ca --- /dev/null +++ b/sql/am_block_filtering.sql @@ -0,0 +1,69 @@ +-- +-- Test block filtering in cstore_fdw using min/max values in stripe skip lists. +-- + + +-- +-- filtered_row_count returns number of rows filtered by the WHERE clause. +-- If blocks get filtered by cstore_fdw, less rows are passed to WHERE +-- clause, so this function should return a lower number. +-- +CREATE OR REPLACE FUNCTION filtered_row_count (query text) RETURNS bigint AS +$$ + DECLARE + result bigint; + rec text; + BEGIN + result := 0; + + FOR rec IN EXECUTE 'EXPLAIN ANALYZE ' || query LOOP + IF rec ~ '^\s+Rows Removed by Filter' then + result := regexp_replace(rec, '[^0-9]*', '', 'g'); + END IF; + END LOOP; + + RETURN result; + END; +$$ LANGUAGE PLPGSQL; + + +-- Create and load data +CREATE FOREIGN TABLE test_block_filtering (a int) + SERVER cstore_server + OPTIONS(block_row_count '1000', stripe_row_count '2000'); + +COPY test_block_filtering FROM '/Users/jefdavi/wd/cstore2/data/block_filtering.csv' WITH CSV; + + +-- Verify that filtered_row_count is less than 1000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 200'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 9900'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 9900'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); + + +-- Verify that filtered_row_count is less than 2000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 1 AND 10'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN -10 AND 0'); + + +-- Load data for second time and verify that filtered_row_count is exactly twice as before +COPY test_block_filtering FROM '/Users/jefdavi/wd/cstore2/data/block_filtering.csv' WITH CSV; +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); + + +-- Verify that we are fine with collations which use a different alphabet order +CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") + SERVER cstore_server; +COPY collation_block_filtering_test FROM STDIN; +A +Å +B +\. + +SELECT * FROM collation_block_filtering_test WHERE A > 'B'; diff --git a/sql/am_copyto.sql b/sql/am_copyto.sql new file mode 100644 index 000000000..4e9e839b7 --- /dev/null +++ b/sql/am_copyto.sql @@ -0,0 +1,17 @@ +-- +-- Test copying data from cstore_fdw tables. +-- +CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server; + +-- load table data from file +COPY test_contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; + +-- export using COPY table TO ... +COPY test_contestant TO STDOUT; + +-- export using COPY (SELECT * FROM table) TO ... +COPY (select * from test_contestant) TO STDOUT; + +DROP FOREIGN TABLE test_contestant CASCADE; diff --git a/sql/am_create.sql b/sql/am_create.sql new file mode 100644 index 000000000..ba52137c1 --- /dev/null +++ b/sql/am_create.sql @@ -0,0 +1,43 @@ +-- +-- Test the CREATE statements related to cstore_fdw. +-- + + +-- Install cstore_fdw +CREATE EXTENSION cstore_fdw; + +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; + + +-- Validator tests +CREATE FOREIGN TABLE test_validator_invalid_option () + SERVER cstore_server + OPTIONS(bad_option_name '1'); -- ERROR + +CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () + SERVER cstore_server + OPTIONS(stripe_row_count '0'); -- ERROR + +CREATE FOREIGN TABLE test_validator_invalid_block_row_count () + SERVER cstore_server + OPTIONS(block_row_count '0'); -- ERROR + +CREATE FOREIGN TABLE test_validator_invalid_compression_type () + SERVER cstore_server + OPTIONS(compression 'invalid_compression'); -- ERROR + +-- Create uncompressed table +CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server; + + +-- Create compressed table with automatically determined file path +CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server + OPTIONS(compression 'pglz'); + +-- Test that querying an empty table works +ANALYZE contestant; +SELECT count(*) FROM contestant; diff --git a/sql/am_data_types.sql b/sql/am_data_types.sql new file mode 100644 index 000000000..092538a57 --- /dev/null +++ b/sql/am_data_types.sql @@ -0,0 +1,68 @@ +-- +-- Test loading and reading different data types to/from cstore_fdw foreign tables. +-- + + +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; +SET timezone to 'GMT'; +SET intervalstyle TO 'POSTGRES_VERBOSE'; + + +-- Test array types +CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], + text_array text[]) SERVER cstore_server; + +COPY test_array_types FROM '/Users/jefdavi/wd/cstore2/data/array_types.csv' WITH CSV; + +SELECT * FROM test_array_types; + + +-- Test date/time types +CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, + timestamp_with_timezone timestamp with time zone, date date, time time, + interval interval) SERVER cstore_server; + +COPY test_datetime_types FROM '/Users/jefdavi/wd/cstore2/data/datetime_types.csv' WITH CSV; + +SELECT * FROM test_datetime_types; + + +-- Test enum and composite types +CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); +CREATE TYPE composite_type AS (a int, b text); + +CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, + composite composite_type) SERVER cstore_server; + +COPY test_enum_and_composite_types FROM + '/Users/jefdavi/wd/cstore2/data/enum_and_composite_types.csv' WITH CSV; + +SELECT * FROM test_enum_and_composite_types; + + +-- Test range types +CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, + numrange numrange, tsrange tsrange) SERVER cstore_server; + +COPY test_range_types FROM '/Users/jefdavi/wd/cstore2/data/range_types.csv' WITH CSV; + +SELECT * FROM test_range_types; + + +-- Test other types +CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, + inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server; + +COPY test_other_types FROM '/Users/jefdavi/wd/cstore2/data/other_types.csv' WITH CSV; + +SELECT * FROM test_other_types; + + +-- Test null values +CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) + SERVER cstore_server; + +COPY test_null_values FROM '/Users/jefdavi/wd/cstore2/data/null_values.csv' WITH CSV; + +SELECT * FROM test_null_values; diff --git a/sql/am_drop.sql b/sql/am_drop.sql new file mode 100644 index 000000000..c64b5c99b --- /dev/null +++ b/sql/am_drop.sql @@ -0,0 +1,48 @@ +-- +-- Tests the different DROP commands for cstore_fdw tables. +-- +-- DROP FOREIGN TABL +-- DROP SCHEMA +-- DROP EXTENSION +-- DROP DATABASE +-- + +-- Note that travis does not create +-- cstore_fdw extension in default database (postgres). This has caused +-- different behavior between travis tests and local tests. Thus +-- 'postgres' directory is excluded from comparison to have the same result. + +-- store postgres database oid +SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset + +-- DROP cstore_fdw tables +DROP FOREIGN TABLE contestant; +DROP FOREIGN TABLE contestant_compressed; + +-- Create a cstore_fdw table under a schema and drop it. +CREATE SCHEMA test_schema; +CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; +DROP SCHEMA test_schema CASCADE; + +SELECT current_database() datname \gset + +CREATE DATABASE db_to_drop; +\c db_to_drop +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset + +CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; + +DROP EXTENSION cstore_fdw CASCADE; + +-- test database drop +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset + +CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; + +\c :datname + +DROP DATABASE db_to_drop; diff --git a/sql/am_functions.sql b/sql/am_functions.sql new file mode 100644 index 000000000..ed7e260b3 --- /dev/null +++ b/sql/am_functions.sql @@ -0,0 +1,20 @@ +-- +-- Test utility functions for cstore_fdw tables. +-- + +CREATE FOREIGN TABLE empty_table (a int) SERVER cstore_server; +CREATE FOREIGN TABLE table_with_data (a int) SERVER cstore_server; +CREATE TABLE non_cstore_table (a int); + +COPY table_with_data FROM STDIN; +1 +2 +3 +\. + +SELECT cstore_table_size('empty_table') < cstore_table_size('table_with_data'); +SELECT cstore_table_size('non_cstore_table'); + +DROP FOREIGN TABLE empty_table; +DROP FOREIGN TABLE table_with_data; +DROP TABLE non_cstore_table; diff --git a/sql/am_insert.sql b/sql/am_insert.sql new file mode 100644 index 000000000..7a6b075ce --- /dev/null +++ b/sql/am_insert.sql @@ -0,0 +1,56 @@ +-- +-- Testing insert on cstore_fdw tables. +-- + +CREATE FOREIGN TABLE test_insert_command (a int) SERVER cstore_server; + +-- test single row inserts fail +select count(*) from test_insert_command; +insert into test_insert_command values(1); +select count(*) from test_insert_command; + +insert into test_insert_command default values; +select count(*) from test_insert_command; + +-- test inserting from another table succeed +CREATE TABLE test_insert_command_data (a int); + +select count(*) from test_insert_command_data; +insert into test_insert_command_data values(1); +select count(*) from test_insert_command_data; + +insert into test_insert_command select * from test_insert_command_data; +select count(*) from test_insert_command; + +drop table test_insert_command_data; +drop foreign table test_insert_command; + +-- test long attribute value insertion +-- create sufficiently long text so that data is stored in toast +CREATE TABLE test_long_text AS +SELECT a as int_val, string_agg(random()::text, '') as text_val +FROM generate_series(1, 10) a, generate_series(1, 1000) b +GROUP BY a ORDER BY a; + +-- store hash values of text for later comparison +CREATE TABLE test_long_text_hash AS +SELECT int_val, md5(text_val) AS hash +FROM test_long_text; + +CREATE FOREIGN TABLE test_cstore_long_text(int_val int, text_val text) +SERVER cstore_server; + +-- store long text in cstore table +INSERT INTO test_cstore_long_text SELECT * FROM test_long_text; + +-- drop source table to remove original text from toast +DROP TABLE test_long_text; + +-- check if text data is still available in cstore table +-- by comparing previously stored hash. +SELECT a.int_val +FROM test_long_text_hash a, test_cstore_long_text c +WHERE a.int_val = c.int_val AND a.hash = md5(c.text_val); + +DROP TABLE test_long_text_hash; +DROP FOREIGN TABLE test_cstore_long_text; diff --git a/sql/am_load.sql b/sql/am_load.sql new file mode 100644 index 000000000..7f9238b57 --- /dev/null +++ b/sql/am_load.sql @@ -0,0 +1,44 @@ +-- +-- Test loading data into cstore_fdw tables. +-- + +-- COPY with incorrect delimiter +COPY contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' + WITH DELIMITER '|'; -- ERROR + +-- COPY with invalid program +COPY contestant FROM PROGRAM 'invalid_program' WITH CSV; -- ERROR + +-- COPY into uncompressed table from file +COPY contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; + +-- COPY into uncompressed table from program +COPY contestant FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' WITH CSV; + +-- COPY into compressed table +COPY contestant_compressed FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; + +-- COPY into uncompressed table from program +COPY contestant_compressed FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' + WITH CSV; + +-- Test column list +CREATE FOREIGN TABLE famous_constants (id int, name text, value real) + SERVER cstore_server; +COPY famous_constants (value, name, id) FROM STDIN WITH CSV; +3.141,pi,1 +2.718,e,2 +0.577,gamma,3 +5.291e-11,bohr radius,4 +\. + +COPY famous_constants (name, value) FROM STDIN WITH CSV; +avagadro,6.022e23 +electron mass,9.109e-31 +proton mass,1.672e-27 +speed of light,2.997e8 +\. + +SELECT * FROM famous_constants ORDER BY id, name; + +DROP FOREIGN TABLE famous_constants; diff --git a/sql/am_query.sql b/sql/am_query.sql new file mode 100644 index 000000000..87743e7bd --- /dev/null +++ b/sql/am_query.sql @@ -0,0 +1,34 @@ +-- +-- Test querying cstore_fdw tables. +-- + +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; + +-- Query uncompressed data +SELECT count(*) FROM contestant; +SELECT avg(rating), stddev_samp(rating) FROM contestant; +SELECT country, avg(rating) FROM contestant WHERE rating > 2200 + GROUP BY country ORDER BY country; +SELECT * FROM contestant ORDER BY handle; + +-- Query compressed data +SELECT count(*) FROM contestant_compressed; +SELECT avg(rating), stddev_samp(rating) FROM contestant_compressed; +SELECT country, avg(rating) FROM contestant_compressed WHERE rating > 2200 + GROUP BY country ORDER BY country; +SELECT * FROM contestant_compressed ORDER BY handle; + +-- Verify that we handle whole-row references correctly +SELECT to_json(v) FROM contestant v ORDER BY rating LIMIT 1; + +-- Test variables used in expressions +CREATE FOREIGN TABLE union_first (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE union_second (a int, b int) SERVER cstore_server; + +INSERT INTO union_first SELECT a, a FROM generate_series(1, 5) a; +INSERT INTO union_second SELECT a, a FROM generate_series(11, 15) a; + +(SELECT a*1, b FROM union_first) union all (SELECT a*1, b FROM union_second); + +DROP FOREIGN TABLE union_first, union_second; diff --git a/sql/am_truncate.sql b/sql/am_truncate.sql new file mode 100644 index 000000000..a1849045e --- /dev/null +++ b/sql/am_truncate.sql @@ -0,0 +1,116 @@ +-- +-- Test the TRUNCATE TABLE command for cstore_fdw tables. +-- + +-- print whether we're using version > 10 to make version-specific tests clear +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; + +-- CREATE a cstore_fdw table, fill with some data -- +CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_compressed (a int, b int) SERVER cstore_server OPTIONS (compression 'pglz'); +CREATE TABLE cstore_truncate_test_regular (a int, b int); + +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; + +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; + +-- query rows +SELECT * FROM cstore_truncate_test; + +TRUNCATE TABLE cstore_truncate_test; + +SELECT * FROM cstore_truncate_test; + +SELECT COUNT(*) from cstore_truncate_test; + +SELECT count(*) FROM cstore_truncate_test_compressed; +TRUNCATE TABLE cstore_truncate_test_compressed; +SELECT count(*) FROM cstore_truncate_test_compressed; + +SELECT cstore_table_size('cstore_truncate_test_compressed'); + +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; +INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; + +SELECT * from cstore_truncate_test; + +SELECT * from cstore_truncate_test_second; + +SELECT * from cstore_truncate_test_regular; + +-- make sure multi truncate works +-- notice that the same table might be repeated +TRUNCATE TABLE cstore_truncate_test, + cstore_truncate_test_regular, + cstore_truncate_test_second, + cstore_truncate_test; + +SELECT * from cstore_truncate_test; +SELECT * from cstore_truncate_test_second; +SELECT * from cstore_truncate_test_regular; + +-- test if truncate on empty table works +TRUNCATE TABLE cstore_truncate_test; +SELECT * from cstore_truncate_test; + +-- test if a cached truncate from a pl/pgsql function works +CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ +BEGIN + INSERT INTO cstore_truncate_test_regular select a, a from generate_series(1, 10) a; + TRUNCATE TABLE cstore_truncate_test_regular; +END;$$ +LANGUAGE plpgsql; + +SELECT cstore_truncate_test_regular_func(); +-- the cached plans are used stating from the second call +SELECT cstore_truncate_test_regular_func(); +DROP FUNCTION cstore_truncate_test_regular_func(); + +DROP FOREIGN TABLE cstore_truncate_test, cstore_truncate_test_second; +DROP TABLE cstore_truncate_test_regular; +DROP FOREIGN TABLE cstore_truncate_test_compressed; + +-- test truncate with schema +CREATE SCHEMA truncate_schema; +CREATE FOREIGN TABLE truncate_schema.truncate_tbl (id int) SERVER cstore_server OPTIONS(compression 'pglz'); +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); + +-- create a user that can not truncate +CREATE USER truncate_user; +GRANT USAGE ON SCHEMA truncate_schema TO truncate_user; +GRANT SELECT ON TABLE truncate_schema.truncate_tbl TO truncate_user; +REVOKE TRUNCATE ON TABLE truncate_schema.truncate_tbl FROM truncate_user; + +SELECT current_user \gset + +\c - truncate_user +-- verify truncate command fails and check number of rows +SELECT count(*) FROM truncate_schema.truncate_tbl; +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT count(*) FROM truncate_schema.truncate_tbl; + +-- switch to super user, grant truncate to truncate_user +\c - :current_user +GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; + +-- verify truncate_user can truncate now +\c - truncate_user +SELECT count(*) FROM truncate_schema.truncate_tbl; +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT count(*) FROM truncate_schema.truncate_tbl; + +\c - :current_user + +-- cleanup +DROP SCHEMA truncate_schema CASCADE; +DROP USER truncate_user; From a57b9004a4e02b3c3dcdc7b973cb41822bf815f9 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 15 Sep 2020 16:41:28 -0700 Subject: [PATCH 32/91] tests WIP --- Makefile | 4 ++-- cstore_tableam.c | 28 ++++++++++++++++++++++-- cstore_tableam.h | 2 ++ expected/clean.out | 10 +++++++++ expected/extension_create.out | 2 ++ input/am_block_filtering.source | 8 +++---- input/am_copyto.source | 6 +++--- input/am_create.source | 34 +++++------------------------ input/am_data_types.source | 24 ++++++++++----------- input/am_load.source | 6 +++--- input/fdw_create.source | 4 ---- mod.c | 3 +++ output/fdw_create.source | 2 -- sql/am_alter.sql | 38 ++++++++++++++++----------------- sql/am_block_filtering.sql | 8 +++---- sql/am_copyto.sql | 6 +++--- sql/am_create.sql | 34 +++++------------------------ sql/am_data_types.sql | 24 ++++++++++----------- sql/am_drop.sql | 16 +++++++------- sql/am_functions.sql | 8 +++---- sql/am_insert.sql | 8 +++---- sql/am_load.sql | 6 +++--- sql/am_query.sql | 6 +++--- sql/am_truncate.sql | 12 +++++------ sql/clean.sql | 11 ++++++++++ sql/extension_create.sql | 4 ++++ 26 files changed, 158 insertions(+), 156 deletions(-) create mode 100644 expected/clean.out create mode 100644 expected/extension_create.out create mode 100644 sql/clean.sql create mode 100644 sql/extension_create.sql diff --git a/Makefile b/Makefile index 2fc550ca9..ad85b294a 100644 --- a/Makefile +++ b/Makefile @@ -14,8 +14,8 @@ DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cs cstore_fdw--1.3--1.4.sql cstore_fdw--1.2--1.3.sql cstore_fdw--1.1--1.2.sql \ cstore_fdw--1.0--1.1.sql cstore_fdw--1.7--1.8.sql -REGRESS = am_create am_load am_query am_analyze am_data_types am_functions \ - am_block_filtering am_drop am_insert am_copyto am_alter am_truncate \ +REGRESS = extension_create am_create am_load am_query am_analyze am_data_types am_functions \ + am_block_filtering am_drop am_insert am_copyto am_alter am_truncate clean \ fdw_create fdw_load fdw_query fdw_analyze fdw_data_types fdw_functions \ fdw_block_filtering fdw_drop fdw_insert fdw_copyto fdw_alter fdw_truncate EXTRA_CLEAN = cstore.pb-c.h cstore.pb-c.c data/*.cstore data/*.cstore.footer \ diff --git a/cstore_tableam.c b/cstore_tableam.c index f93971c59..21c1aab1f 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -43,6 +43,7 @@ typedef struct CStoreScanDescData typedef struct CStoreScanDescData *CStoreScanDesc; static TableWriteState *CStoreWriteState = NULL; +static ExecutorEnd_hook_type PreviousExecutorEndHook = NULL; static CStoreOptions * CStoreGetDefaultOptions(void) @@ -71,7 +72,7 @@ cstore_init_write_state(Relation relation) CStoreOptions *cstoreOptions = CStoreGetDefaultOptions(); TupleDesc tupdesc = RelationGetDescr(relation); - elog(NOTICE, "initializing write state for relation %d", relation->rd_id); + elog(LOG, "initializing write state for relation %d", relation->rd_id); CStoreWriteState = CStoreBeginWrite(relation->rd_id, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, @@ -87,7 +88,7 @@ cstore_free_write_state() { if (CStoreWriteState != NULL) { - elog(NOTICE, "flushing write state for relation %d", CStoreWriteState->relation->rd_id); + elog(LOG, "flushing write state for relation %d", CStoreWriteState->relation->rd_id); CStoreEndWrite(CStoreWriteState); CStoreWriteState = NULL; } @@ -495,6 +496,29 @@ cstore_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, elog(ERROR, "cstore_scan_sample_next_tuple not implemented"); } +static void +CStoreExecutorEnd(QueryDesc *queryDesc) +{ + cstore_free_write_state(); + if (PreviousExecutorEndHook) + PreviousExecutorEndHook(queryDesc); + else + standard_ExecutorEnd(queryDesc); +} + +void +cstore_tableam_init() +{ + PreviousExecutorEndHook = ExecutorEnd_hook; + ExecutorEnd_hook = CStoreExecutorEnd; +} + +void +cstore_tableam_finish() +{ + ExecutorEnd_hook = PreviousExecutorEndHook; +} + static const TableAmRoutine cstore_am_methods = { .type = T_TableAmRoutine, diff --git a/cstore_tableam.h b/cstore_tableam.h index f81c13155..bd1f3805e 100644 --- a/cstore_tableam.h +++ b/cstore_tableam.h @@ -5,3 +5,5 @@ const TableAmRoutine *GetCstoreTableAmRoutine(void); Datum cstore_tableam_handler(PG_FUNCTION_ARGS); extern void cstore_free_write_state(void); +extern void cstore_tableam_init(void); +extern void cstore_tableam_finish(void); diff --git a/expected/clean.out b/expected/clean.out new file mode 100644 index 000000000..85b25987b --- /dev/null +++ b/expected/clean.out @@ -0,0 +1,10 @@ +DROP TABLE test_insert_command; +DROP TABLE collation_block_filtering_test; +DROP TABLE test_null_values; +DROP TABLE test_other_types; +DROP TABLE test_range_types; +DROP TABLE test_enum_and_composite_types; +DROP TYPE composite_type; +DROP TYPE enum_type; +DROP TABLE test_datetime_types; +DROP TABLE test_array_types; diff --git a/expected/extension_create.out b/expected/extension_create.out new file mode 100644 index 000000000..c4d94e1e5 --- /dev/null +++ b/expected/extension_create.out @@ -0,0 +1,2 @@ +-- Install cstore_fdw +CREATE EXTENSION cstore_fdw; diff --git a/input/am_block_filtering.source b/input/am_block_filtering.source index dc3170f0d..0225bde16 100644 --- a/input/am_block_filtering.source +++ b/input/am_block_filtering.source @@ -28,8 +28,8 @@ $$ LANGUAGE PLPGSQL; -- Create and load data -CREATE FOREIGN TABLE test_block_filtering (a int) - SERVER cstore_server +CREATE TABLE test_block_filtering (a int) + USING cstore_tableam OPTIONS(block_row_count '1000', stripe_row_count '2000'); COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; @@ -58,8 +58,8 @@ SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BET -- Verify that we are fine with collations which use a different alphabet order -CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") - SERVER cstore_server; +CREATE TABLE collation_block_filtering_test(A text collate "da_DK") + USING cstore_tableam; COPY collation_block_filtering_test FROM STDIN; A Å diff --git a/input/am_copyto.source b/input/am_copyto.source index a4b753a8d..bb333bacf 100644 --- a/input/am_copyto.source +++ b/input/am_copyto.source @@ -1,9 +1,9 @@ -- -- Test copying data from cstore_fdw tables. -- -CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, +CREATE TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server; + USING cstore_tableam; -- load table data from file COPY test_contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; @@ -14,4 +14,4 @@ COPY test_contestant TO STDOUT; -- export using COPY (SELECT * FROM table) TO ... COPY (select * from test_contestant) TO STDOUT; -DROP FOREIGN TABLE test_contestant CASCADE; +DROP TABLE test_contestant CASCADE; diff --git a/input/am_create.source b/input/am_create.source index ba52137c1..8a1612f7a 100644 --- a/input/am_create.source +++ b/input/am_create.source @@ -1,42 +1,18 @@ -- --- Test the CREATE statements related to cstore_fdw. +-- Test the CREATE statements related to cstore. -- --- Install cstore_fdw -CREATE EXTENSION cstore_fdw; - -CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; - - --- Validator tests -CREATE FOREIGN TABLE test_validator_invalid_option () - SERVER cstore_server - OPTIONS(bad_option_name '1'); -- ERROR - -CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () - SERVER cstore_server - OPTIONS(stripe_row_count '0'); -- ERROR - -CREATE FOREIGN TABLE test_validator_invalid_block_row_count () - SERVER cstore_server - OPTIONS(block_row_count '0'); -- ERROR - -CREATE FOREIGN TABLE test_validator_invalid_compression_type () - SERVER cstore_server - OPTIONS(compression 'invalid_compression'); -- ERROR - -- Create uncompressed table -CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, +CREATE TABLE contestant (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server; + USING cstore_tableam; -- Create compressed table with automatically determined file path -CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, +CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server - OPTIONS(compression 'pglz'); + USING cstore_tableam; -- Test that querying an empty table works ANALYZE contestant; diff --git a/input/am_data_types.source b/input/am_data_types.source index ec83c4d8c..24c661090 100644 --- a/input/am_data_types.source +++ b/input/am_data_types.source @@ -10,8 +10,8 @@ SET intervalstyle TO 'POSTGRES_VERBOSE'; -- Test array types -CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], - text_array text[]) SERVER cstore_server; +CREATE TABLE test_array_types (int_array int[], bigint_array bigint[], + text_array text[]) USING cstore_tableam; COPY test_array_types FROM '@abs_srcdir@/data/array_types.csv' WITH CSV; @@ -19,9 +19,9 @@ SELECT * FROM test_array_types; -- Test date/time types -CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, +CREATE TABLE test_datetime_types (timestamp timestamp, timestamp_with_timezone timestamp with time zone, date date, time time, - interval interval) SERVER cstore_server; + interval interval) USING cstore_tableam; COPY test_datetime_types FROM '@abs_srcdir@/data/datetime_types.csv' WITH CSV; @@ -32,8 +32,8 @@ SELECT * FROM test_datetime_types; CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); CREATE TYPE composite_type AS (a int, b text); -CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, - composite composite_type) SERVER cstore_server; +CREATE TABLE test_enum_and_composite_types (enum enum_type, + composite composite_type) USING cstore_tableam; COPY test_enum_and_composite_types FROM '@abs_srcdir@/data/enum_and_composite_types.csv' WITH CSV; @@ -42,8 +42,8 @@ SELECT * FROM test_enum_and_composite_types; -- Test range types -CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, - numrange numrange, tsrange tsrange) SERVER cstore_server; +CREATE TABLE test_range_types (int4range int4range, int8range int8range, + numrange numrange, tsrange tsrange) USING cstore_tableam; COPY test_range_types FROM '@abs_srcdir@/data/range_types.csv' WITH CSV; @@ -51,8 +51,8 @@ SELECT * FROM test_range_types; -- Test other types -CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, - inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server; +CREATE TABLE test_other_types (bool boolean, bytea bytea, money money, + inet inet, bitstring bit varying(5), uuid uuid, json json) USING cstore_tableam; COPY test_other_types FROM '@abs_srcdir@/data/other_types.csv' WITH CSV; @@ -60,8 +60,8 @@ SELECT * FROM test_other_types; -- Test null values -CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) - SERVER cstore_server; +CREATE TABLE test_null_values (a int, b int[], c composite_type) + USING cstore_tableam; COPY test_null_values FROM '@abs_srcdir@/data/null_values.csv' WITH CSV; diff --git a/input/am_load.source b/input/am_load.source index 0913acde7..c2ad581e8 100644 --- a/input/am_load.source +++ b/input/am_load.source @@ -23,8 +23,8 @@ COPY contestant_compressed FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv WITH CSV; -- Test column list -CREATE FOREIGN TABLE famous_constants (id int, name text, value real) - SERVER cstore_server; +CREATE TABLE famous_constants (id int, name text, value real) + USING cstore_tableam; COPY famous_constants (value, name, id) FROM STDIN WITH CSV; 3.141,pi,1 2.718,e,2 @@ -41,4 +41,4 @@ speed of light,2.997e8 SELECT * FROM famous_constants ORDER BY id, name; -DROP FOREIGN TABLE famous_constants; +DROP TABLE famous_constants; diff --git a/input/fdw_create.source b/input/fdw_create.source index ba52137c1..bb3a38e28 100644 --- a/input/fdw_create.source +++ b/input/fdw_create.source @@ -2,10 +2,6 @@ -- Test the CREATE statements related to cstore_fdw. -- - --- Install cstore_fdw -CREATE EXTENSION cstore_fdw; - CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; diff --git a/mod.c b/mod.c index d962e9820..4268126e3 100644 --- a/mod.c +++ b/mod.c @@ -16,6 +16,7 @@ #include "fmgr.h" #include "mod.h" +#include "cstore_tableam.h" #include "cstore_fdw.h" PG_MODULE_MAGIC; @@ -23,6 +24,7 @@ PG_MODULE_MAGIC; void _PG_init(void) { + cstore_tableam_init(); cstore_fdw_init(); } @@ -30,5 +32,6 @@ _PG_init(void) void _PG_fini(void) { + cstore_tableam_finish(); cstore_fdw_finish(); } diff --git a/output/fdw_create.source b/output/fdw_create.source index 961c0494d..41f17fdd8 100644 --- a/output/fdw_create.source +++ b/output/fdw_create.source @@ -1,8 +1,6 @@ -- -- Test the CREATE statements related to cstore_fdw. -- --- Install cstore_fdw -CREATE EXTENSION cstore_fdw; CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; -- Validator tests CREATE FOREIGN TABLE test_validator_invalid_option () diff --git a/sql/am_alter.sql b/sql/am_alter.sql index 5ba3beb34..3b608f9cd 100644 --- a/sql/am_alter.sql +++ b/sql/am_alter.sql @@ -2,7 +2,7 @@ -- Testing ALTER TABLE on cstore_fdw tables. -- -CREATE FOREIGN TABLE test_alter_table (a int, b int, c int) SERVER cstore_server; +CREATE TABLE test_alter_table (a int, b int, c int) USING cstore_tableam; WITH sample_data AS (VALUES (1, 2, 3), @@ -12,7 +12,7 @@ WITH sample_data AS (VALUES INSERT INTO test_alter_table SELECT * FROM sample_data; -- drop a column -ALTER FOREIGN TABLE test_alter_table DROP COLUMN a; +ALTER TABLE test_alter_table DROP COLUMN a; -- test analyze ANALYZE test_alter_table; @@ -28,29 +28,29 @@ INSERT INTO test_alter_table (SELECT 5, 8); -- add a column with no defaults -ALTER FOREIGN TABLE test_alter_table ADD COLUMN d int; +ALTER TABLE test_alter_table ADD COLUMN d int; SELECT * FROM test_alter_table; INSERT INTO test_alter_table (SELECT 3, 5, 8); SELECT * FROM test_alter_table; -- add a fixed-length column with default value -ALTER FOREIGN TABLE test_alter_table ADD COLUMN e int default 3; +ALTER TABLE test_alter_table ADD COLUMN e int default 3; SELECT * from test_alter_table; INSERT INTO test_alter_table (SELECT 1, 2, 4, 8); SELECT * from test_alter_table; -- add a variable-length column with default value -ALTER FOREIGN TABLE test_alter_table ADD COLUMN f text DEFAULT 'TEXT ME'; +ALTER TABLE test_alter_table ADD COLUMN f text DEFAULT 'TEXT ME'; SELECT * from test_alter_table; INSERT INTO test_alter_table (SELECT 1, 2, 4, 8, 'ABCDEF'); SELECT * from test_alter_table; -- drop couple of columns -ALTER FOREIGN TABLE test_alter_table DROP COLUMN c; -ALTER FOREIGN TABLE test_alter_table DROP COLUMN e; +ALTER TABLE test_alter_table DROP COLUMN c; +ALTER TABLE test_alter_table DROP COLUMN e; ANALYZE test_alter_table; SELECT * from test_alter_table; SELECT count(*) from test_alter_table; @@ -58,28 +58,28 @@ SELECT count(t.*) from test_alter_table t; -- unsupported default values -ALTER FOREIGN TABLE test_alter_table ADD COLUMN g boolean DEFAULT isfinite(current_date); -ALTER FOREIGN TABLE test_alter_table ADD COLUMN h DATE DEFAULT current_date; +ALTER TABLE test_alter_table ADD COLUMN g boolean DEFAULT isfinite(current_date); +ALTER TABLE test_alter_table ADD COLUMN h DATE DEFAULT current_date; SELECT * FROM test_alter_table; -ALTER FOREIGN TABLE test_alter_table ALTER COLUMN g DROP DEFAULT; +ALTER TABLE test_alter_table ALTER COLUMN g DROP DEFAULT; SELECT * FROM test_alter_table; -ALTER FOREIGN TABLE test_alter_table ALTER COLUMN h DROP DEFAULT; +ALTER TABLE test_alter_table ALTER COLUMN h DROP DEFAULT; ANALYZE test_alter_table; SELECT * FROM test_alter_table; -- unsupported type change -ALTER FOREIGN TABLE test_alter_table ADD COLUMN i int; -ALTER FOREIGN TABLE test_alter_table ADD COLUMN j float; -ALTER FOREIGN TABLE test_alter_table ADD COLUMN k text; +ALTER TABLE test_alter_table ADD COLUMN i int; +ALTER TABLE test_alter_table ADD COLUMN j float; +ALTER TABLE test_alter_table ADD COLUMN k text; -- this is valid type change -ALTER FOREIGN TABLE test_alter_table ALTER COLUMN i TYPE float; +ALTER TABLE test_alter_table ALTER COLUMN i TYPE float; -- this is not valid -ALTER FOREIGN TABLE test_alter_table ALTER COLUMN j TYPE int; +ALTER TABLE test_alter_table ALTER COLUMN j TYPE int; -- text / varchar conversion is valid both ways -ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE varchar(20); -ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE text; +ALTER TABLE test_alter_table ALTER COLUMN k TYPE varchar(20); +ALTER TABLE test_alter_table ALTER COLUMN k TYPE text; -DROP FOREIGN TABLE test_alter_table; +DROP TABLE test_alter_table; diff --git a/sql/am_block_filtering.sql b/sql/am_block_filtering.sql index bb90c72ca..c7d0e997c 100644 --- a/sql/am_block_filtering.sql +++ b/sql/am_block_filtering.sql @@ -28,8 +28,8 @@ $$ LANGUAGE PLPGSQL; -- Create and load data -CREATE FOREIGN TABLE test_block_filtering (a int) - SERVER cstore_server +CREATE TABLE test_block_filtering (a int) + USING cstore_tableam OPTIONS(block_row_count '1000', stripe_row_count '2000'); COPY test_block_filtering FROM '/Users/jefdavi/wd/cstore2/data/block_filtering.csv' WITH CSV; @@ -58,8 +58,8 @@ SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BET -- Verify that we are fine with collations which use a different alphabet order -CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") - SERVER cstore_server; +CREATE TABLE collation_block_filtering_test(A text collate "da_DK") + USING cstore_tableam; COPY collation_block_filtering_test FROM STDIN; A Å diff --git a/sql/am_copyto.sql b/sql/am_copyto.sql index 4e9e839b7..7288ff66f 100644 --- a/sql/am_copyto.sql +++ b/sql/am_copyto.sql @@ -1,9 +1,9 @@ -- -- Test copying data from cstore_fdw tables. -- -CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, +CREATE TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server; + USING cstore_tableam; -- load table data from file COPY test_contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; @@ -14,4 +14,4 @@ COPY test_contestant TO STDOUT; -- export using COPY (SELECT * FROM table) TO ... COPY (select * from test_contestant) TO STDOUT; -DROP FOREIGN TABLE test_contestant CASCADE; +DROP TABLE test_contestant CASCADE; diff --git a/sql/am_create.sql b/sql/am_create.sql index ba52137c1..8a1612f7a 100644 --- a/sql/am_create.sql +++ b/sql/am_create.sql @@ -1,42 +1,18 @@ -- --- Test the CREATE statements related to cstore_fdw. +-- Test the CREATE statements related to cstore. -- --- Install cstore_fdw -CREATE EXTENSION cstore_fdw; - -CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; - - --- Validator tests -CREATE FOREIGN TABLE test_validator_invalid_option () - SERVER cstore_server - OPTIONS(bad_option_name '1'); -- ERROR - -CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () - SERVER cstore_server - OPTIONS(stripe_row_count '0'); -- ERROR - -CREATE FOREIGN TABLE test_validator_invalid_block_row_count () - SERVER cstore_server - OPTIONS(block_row_count '0'); -- ERROR - -CREATE FOREIGN TABLE test_validator_invalid_compression_type () - SERVER cstore_server - OPTIONS(compression 'invalid_compression'); -- ERROR - -- Create uncompressed table -CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, +CREATE TABLE contestant (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server; + USING cstore_tableam; -- Create compressed table with automatically determined file path -CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, +CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server - OPTIONS(compression 'pglz'); + USING cstore_tableam; -- Test that querying an empty table works ANALYZE contestant; diff --git a/sql/am_data_types.sql b/sql/am_data_types.sql index 092538a57..b2668e71f 100644 --- a/sql/am_data_types.sql +++ b/sql/am_data_types.sql @@ -10,8 +10,8 @@ SET intervalstyle TO 'POSTGRES_VERBOSE'; -- Test array types -CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], - text_array text[]) SERVER cstore_server; +CREATE TABLE test_array_types (int_array int[], bigint_array bigint[], + text_array text[]) USING cstore_tableam; COPY test_array_types FROM '/Users/jefdavi/wd/cstore2/data/array_types.csv' WITH CSV; @@ -19,9 +19,9 @@ SELECT * FROM test_array_types; -- Test date/time types -CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, +CREATE TABLE test_datetime_types (timestamp timestamp, timestamp_with_timezone timestamp with time zone, date date, time time, - interval interval) SERVER cstore_server; + interval interval) USING cstore_tableam; COPY test_datetime_types FROM '/Users/jefdavi/wd/cstore2/data/datetime_types.csv' WITH CSV; @@ -32,8 +32,8 @@ SELECT * FROM test_datetime_types; CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); CREATE TYPE composite_type AS (a int, b text); -CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, - composite composite_type) SERVER cstore_server; +CREATE TABLE test_enum_and_composite_types (enum enum_type, + composite composite_type) USING cstore_tableam; COPY test_enum_and_composite_types FROM '/Users/jefdavi/wd/cstore2/data/enum_and_composite_types.csv' WITH CSV; @@ -42,8 +42,8 @@ SELECT * FROM test_enum_and_composite_types; -- Test range types -CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, - numrange numrange, tsrange tsrange) SERVER cstore_server; +CREATE TABLE test_range_types (int4range int4range, int8range int8range, + numrange numrange, tsrange tsrange) USING cstore_tableam; COPY test_range_types FROM '/Users/jefdavi/wd/cstore2/data/range_types.csv' WITH CSV; @@ -51,8 +51,8 @@ SELECT * FROM test_range_types; -- Test other types -CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, - inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server; +CREATE TABLE test_other_types (bool boolean, bytea bytea, money money, + inet inet, bitstring bit varying(5), uuid uuid, json json) USING cstore_tableam; COPY test_other_types FROM '/Users/jefdavi/wd/cstore2/data/other_types.csv' WITH CSV; @@ -60,8 +60,8 @@ SELECT * FROM test_other_types; -- Test null values -CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) - SERVER cstore_server; +CREATE TABLE test_null_values (a int, b int[], c composite_type) + USING cstore_tableam; COPY test_null_values FROM '/Users/jefdavi/wd/cstore2/data/null_values.csv' WITH CSV; diff --git a/sql/am_drop.sql b/sql/am_drop.sql index c64b5c99b..5945a9f2c 100644 --- a/sql/am_drop.sql +++ b/sql/am_drop.sql @@ -1,7 +1,7 @@ -- -- Tests the different DROP commands for cstore_fdw tables. -- --- DROP FOREIGN TABL +-- DROP TABL -- DROP SCHEMA -- DROP EXTENSION -- DROP DATABASE @@ -16,12 +16,12 @@ SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset -- DROP cstore_fdw tables -DROP FOREIGN TABLE contestant; -DROP FOREIGN TABLE contestant_compressed; +DROP TABLE contestant; +DROP TABLE contestant_compressed; -- Create a cstore_fdw table under a schema and drop it. CREATE SCHEMA test_schema; -CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; +CREATE TABLE test_schema.test_table(data int) USING cstore_tableam; DROP SCHEMA test_schema CASCADE; SELECT current_database() datname \gset @@ -29,19 +29,19 @@ SELECT current_database() datname \gset CREATE DATABASE db_to_drop; \c db_to_drop CREATE EXTENSION cstore_fdw; -CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +CREATE USING cstore_tableam DATA WRAPPER cstore_fdw; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset -CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; +CREATE TABLE test_table(data int) USING cstore_tableam; DROP EXTENSION cstore_fdw CASCADE; -- test database drop CREATE EXTENSION cstore_fdw; -CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +CREATE USING cstore_tableam DATA WRAPPER cstore_fdw; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset -CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; +CREATE TABLE test_table(data int) USING cstore_tableam; \c :datname diff --git a/sql/am_functions.sql b/sql/am_functions.sql index ed7e260b3..70624e6d4 100644 --- a/sql/am_functions.sql +++ b/sql/am_functions.sql @@ -2,8 +2,8 @@ -- Test utility functions for cstore_fdw tables. -- -CREATE FOREIGN TABLE empty_table (a int) SERVER cstore_server; -CREATE FOREIGN TABLE table_with_data (a int) SERVER cstore_server; +CREATE TABLE empty_table (a int) USING cstore_tableam; +CREATE TABLE table_with_data (a int) USING cstore_tableam; CREATE TABLE non_cstore_table (a int); COPY table_with_data FROM STDIN; @@ -15,6 +15,6 @@ COPY table_with_data FROM STDIN; SELECT cstore_table_size('empty_table') < cstore_table_size('table_with_data'); SELECT cstore_table_size('non_cstore_table'); -DROP FOREIGN TABLE empty_table; -DROP FOREIGN TABLE table_with_data; +DROP TABLE empty_table; +DROP TABLE table_with_data; DROP TABLE non_cstore_table; diff --git a/sql/am_insert.sql b/sql/am_insert.sql index 7a6b075ce..b249828e7 100644 --- a/sql/am_insert.sql +++ b/sql/am_insert.sql @@ -2,7 +2,7 @@ -- Testing insert on cstore_fdw tables. -- -CREATE FOREIGN TABLE test_insert_command (a int) SERVER cstore_server; +CREATE TABLE test_insert_command (a int) USING cstore_tableam; -- test single row inserts fail select count(*) from test_insert_command; @@ -37,8 +37,8 @@ CREATE TABLE test_long_text_hash AS SELECT int_val, md5(text_val) AS hash FROM test_long_text; -CREATE FOREIGN TABLE test_cstore_long_text(int_val int, text_val text) -SERVER cstore_server; +CREATE TABLE test_cstore_long_text(int_val int, text_val text) +USING cstore_tableam; -- store long text in cstore table INSERT INTO test_cstore_long_text SELECT * FROM test_long_text; @@ -53,4 +53,4 @@ FROM test_long_text_hash a, test_cstore_long_text c WHERE a.int_val = c.int_val AND a.hash = md5(c.text_val); DROP TABLE test_long_text_hash; -DROP FOREIGN TABLE test_cstore_long_text; +DROP TABLE test_cstore_long_text; diff --git a/sql/am_load.sql b/sql/am_load.sql index 7f9238b57..c7e9e5287 100644 --- a/sql/am_load.sql +++ b/sql/am_load.sql @@ -23,8 +23,8 @@ COPY contestant_compressed FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/cont WITH CSV; -- Test column list -CREATE FOREIGN TABLE famous_constants (id int, name text, value real) - SERVER cstore_server; +CREATE TABLE famous_constants (id int, name text, value real) + USING cstore_tableam; COPY famous_constants (value, name, id) FROM STDIN WITH CSV; 3.141,pi,1 2.718,e,2 @@ -41,4 +41,4 @@ speed of light,2.997e8 SELECT * FROM famous_constants ORDER BY id, name; -DROP FOREIGN TABLE famous_constants; +DROP TABLE famous_constants; diff --git a/sql/am_query.sql b/sql/am_query.sql index 87743e7bd..7ac8c2ea4 100644 --- a/sql/am_query.sql +++ b/sql/am_query.sql @@ -23,12 +23,12 @@ SELECT * FROM contestant_compressed ORDER BY handle; SELECT to_json(v) FROM contestant v ORDER BY rating LIMIT 1; -- Test variables used in expressions -CREATE FOREIGN TABLE union_first (a int, b int) SERVER cstore_server; -CREATE FOREIGN TABLE union_second (a int, b int) SERVER cstore_server; +CREATE TABLE union_first (a int, b int) USING cstore_tableam; +CREATE TABLE union_second (a int, b int) USING cstore_tableam; INSERT INTO union_first SELECT a, a FROM generate_series(1, 5) a; INSERT INTO union_second SELECT a, a FROM generate_series(11, 15) a; (SELECT a*1, b FROM union_first) union all (SELECT a*1, b FROM union_second); -DROP FOREIGN TABLE union_first, union_second; +DROP TABLE union_first, union_second; diff --git a/sql/am_truncate.sql b/sql/am_truncate.sql index a1849045e..cc02c1805 100644 --- a/sql/am_truncate.sql +++ b/sql/am_truncate.sql @@ -7,9 +7,9 @@ SHOW server_version \gset SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; -- CREATE a cstore_fdw table, fill with some data -- -CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; -CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; -CREATE FOREIGN TABLE cstore_truncate_test_compressed (a int, b int) SERVER cstore_server OPTIONS (compression 'pglz'); +CREATE TABLE cstore_truncate_test (a int, b int) USING cstore_tableam; +CREATE TABLE cstore_truncate_test_second (a int, b int) USING cstore_tableam; +CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING cstore_tableam OPTIONS (compression 'pglz'); CREATE TABLE cstore_truncate_test_regular (a int, b int); INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; @@ -70,13 +70,13 @@ SELECT cstore_truncate_test_regular_func(); SELECT cstore_truncate_test_regular_func(); DROP FUNCTION cstore_truncate_test_regular_func(); -DROP FOREIGN TABLE cstore_truncate_test, cstore_truncate_test_second; +DROP TABLE cstore_truncate_test, cstore_truncate_test_second; DROP TABLE cstore_truncate_test_regular; -DROP FOREIGN TABLE cstore_truncate_test_compressed; +DROP TABLE cstore_truncate_test_compressed; -- test truncate with schema CREATE SCHEMA truncate_schema; -CREATE FOREIGN TABLE truncate_schema.truncate_tbl (id int) SERVER cstore_server OPTIONS(compression 'pglz'); +CREATE TABLE truncate_schema.truncate_tbl (id int) USING cstore_tableam OPTIONS(compression 'pglz'); INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); SELECT COUNT(*) FROM truncate_schema.truncate_tbl; diff --git a/sql/clean.sql b/sql/clean.sql new file mode 100644 index 000000000..2e038d321 --- /dev/null +++ b/sql/clean.sql @@ -0,0 +1,11 @@ + +DROP TABLE test_insert_command; +DROP TABLE collation_block_filtering_test; +DROP TABLE test_null_values; +DROP TABLE test_other_types; +DROP TABLE test_range_types; +DROP TABLE test_enum_and_composite_types; +DROP TYPE composite_type; +DROP TYPE enum_type; +DROP TABLE test_datetime_types; +DROP TABLE test_array_types; diff --git a/sql/extension_create.sql b/sql/extension_create.sql new file mode 100644 index 000000000..2e73f5be7 --- /dev/null +++ b/sql/extension_create.sql @@ -0,0 +1,4 @@ + +-- Install cstore_fdw +CREATE EXTENSION cstore_fdw; + From 18f6829621463184a250ad579c4a3dfbf2312b0c Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 15 Sep 2020 17:24:00 -0700 Subject: [PATCH 33/91] more fixes --- cstore_tableam.c | 10 ++++++++++ expected/am_create.out | 28 ++-------------------------- input/am_block_filtering.source | 3 +-- output/am_create.source | 28 ++-------------------------- sql/am_block_filtering.sql | 3 +-- sql/am_functions.sql | 4 ++-- sql/am_truncate.sql | 6 +++--- sql/clean.sql | 1 + 8 files changed, 22 insertions(+), 61 deletions(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index 21c1aab1f..09bc8e5e4 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -44,6 +44,7 @@ typedef struct CStoreScanDescData *CStoreScanDesc; static TableWriteState *CStoreWriteState = NULL; static ExecutorEnd_hook_type PreviousExecutorEndHook = NULL; +static MemoryContext CStoreContext = NULL; static CStoreOptions * CStoreGetDefaultOptions(void) @@ -71,13 +72,22 @@ cstore_init_write_state(Relation relation) { CStoreOptions *cstoreOptions = CStoreGetDefaultOptions(); TupleDesc tupdesc = RelationGetDescr(relation); + MemoryContext oldContext; + + if (CStoreContext == NULL) + { + CStoreContext = AllocSetContextCreate(TopMemoryContext, "cstore context", + ALLOCSET_DEFAULT_SIZES); + } elog(LOG, "initializing write state for relation %d", relation->rd_id); + oldContext = MemoryContextSwitchTo(CStoreContext); CStoreWriteState = CStoreBeginWrite(relation->rd_id, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, tupdesc); + MemoryContextSwitchTo(oldContext); CStoreWriteState->relation = relation; } diff --git a/expected/am_create.out b/expected/am_create.out index 961c0494d..56a8b52af 100644 --- a/expected/am_create.out +++ b/expected/am_create.out @@ -1,39 +1,15 @@ -- -- Test the CREATE statements related to cstore_fdw. -- --- Install cstore_fdw -CREATE EXTENSION cstore_fdw; -CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; -- Validator tests -CREATE FOREIGN TABLE test_validator_invalid_option () - SERVER cstore_server - OPTIONS(bad_option_name '1'); -- ERROR -ERROR: invalid option "bad_option_name" -HINT: Valid options in this context are: compression, stripe_row_count, block_row_count -CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () - SERVER cstore_server - OPTIONS(stripe_row_count '0'); -- ERROR -ERROR: invalid stripe row count -HINT: Stripe row count must be an integer between 1000 and 10000000 -CREATE FOREIGN TABLE test_validator_invalid_block_row_count () - SERVER cstore_server - OPTIONS(block_row_count '0'); -- ERROR -ERROR: invalid block row count -HINT: Block row count must be an integer between 1000 and 100000 -CREATE FOREIGN TABLE test_validator_invalid_compression_type () - SERVER cstore_server - OPTIONS(compression 'invalid_compression'); -- ERROR -ERROR: invalid compression type -HINT: Valid options are: none, pglz -- Create uncompressed table CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server; + USING cstore_tableam; -- Create compressed table with automatically determined file path CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server - OPTIONS(compression 'pglz'); + USING cstore_tableam -- Test that querying an empty table works ANALYZE contestant; SELECT count(*) FROM contestant; diff --git a/input/am_block_filtering.source b/input/am_block_filtering.source index 0225bde16..7ca6862c7 100644 --- a/input/am_block_filtering.source +++ b/input/am_block_filtering.source @@ -29,8 +29,7 @@ $$ LANGUAGE PLPGSQL; -- Create and load data CREATE TABLE test_block_filtering (a int) - USING cstore_tableam - OPTIONS(block_row_count '1000', stripe_row_count '2000'); + USING cstore_tableam; COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; diff --git a/output/am_create.source b/output/am_create.source index 961c0494d..56a8b52af 100644 --- a/output/am_create.source +++ b/output/am_create.source @@ -1,39 +1,15 @@ -- -- Test the CREATE statements related to cstore_fdw. -- --- Install cstore_fdw -CREATE EXTENSION cstore_fdw; -CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; -- Validator tests -CREATE FOREIGN TABLE test_validator_invalid_option () - SERVER cstore_server - OPTIONS(bad_option_name '1'); -- ERROR -ERROR: invalid option "bad_option_name" -HINT: Valid options in this context are: compression, stripe_row_count, block_row_count -CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () - SERVER cstore_server - OPTIONS(stripe_row_count '0'); -- ERROR -ERROR: invalid stripe row count -HINT: Stripe row count must be an integer between 1000 and 10000000 -CREATE FOREIGN TABLE test_validator_invalid_block_row_count () - SERVER cstore_server - OPTIONS(block_row_count '0'); -- ERROR -ERROR: invalid block row count -HINT: Block row count must be an integer between 1000 and 100000 -CREATE FOREIGN TABLE test_validator_invalid_compression_type () - SERVER cstore_server - OPTIONS(compression 'invalid_compression'); -- ERROR -ERROR: invalid compression type -HINT: Valid options are: none, pglz -- Create uncompressed table CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server; + USING cstore_tableam; -- Create compressed table with automatically determined file path CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server - OPTIONS(compression 'pglz'); + USING cstore_tableam -- Test that querying an empty table works ANALYZE contestant; SELECT count(*) FROM contestant; diff --git a/sql/am_block_filtering.sql b/sql/am_block_filtering.sql index c7d0e997c..38c63535c 100644 --- a/sql/am_block_filtering.sql +++ b/sql/am_block_filtering.sql @@ -29,8 +29,7 @@ $$ LANGUAGE PLPGSQL; -- Create and load data CREATE TABLE test_block_filtering (a int) - USING cstore_tableam - OPTIONS(block_row_count '1000', stripe_row_count '2000'); + USING cstore_tableam; COPY test_block_filtering FROM '/Users/jefdavi/wd/cstore2/data/block_filtering.csv' WITH CSV; diff --git a/sql/am_functions.sql b/sql/am_functions.sql index 70624e6d4..a466d925d 100644 --- a/sql/am_functions.sql +++ b/sql/am_functions.sql @@ -12,8 +12,8 @@ COPY table_with_data FROM STDIN; 3 \. -SELECT cstore_table_size('empty_table') < cstore_table_size('table_with_data'); -SELECT cstore_table_size('non_cstore_table'); +SELECT pg_relation_size('empty_table') < cstore_table_size('table_with_data'); +SELECT pg_relation_size('non_cstore_table'); DROP TABLE empty_table; DROP TABLE table_with_data; diff --git a/sql/am_truncate.sql b/sql/am_truncate.sql index cc02c1805..e124a7831 100644 --- a/sql/am_truncate.sql +++ b/sql/am_truncate.sql @@ -9,7 +9,7 @@ SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; -- CREATE a cstore_fdw table, fill with some data -- CREATE TABLE cstore_truncate_test (a int, b int) USING cstore_tableam; CREATE TABLE cstore_truncate_test_second (a int, b int) USING cstore_tableam; -CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING cstore_tableam OPTIONS (compression 'pglz'); +CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING cstore_tableam; CREATE TABLE cstore_truncate_test_regular (a int, b int); INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; @@ -30,7 +30,7 @@ SELECT count(*) FROM cstore_truncate_test_compressed; TRUNCATE TABLE cstore_truncate_test_compressed; SELECT count(*) FROM cstore_truncate_test_compressed; -SELECT cstore_table_size('cstore_truncate_test_compressed'); +SELECT pg_relation_size('cstore_truncate_test_compressed'); INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; @@ -76,7 +76,7 @@ DROP TABLE cstore_truncate_test_compressed; -- test truncate with schema CREATE SCHEMA truncate_schema; -CREATE TABLE truncate_schema.truncate_tbl (id int) USING cstore_tableam OPTIONS(compression 'pglz'); +CREATE TABLE truncate_schema.truncate_tbl (id int) USING cstore_tableam; INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); SELECT COUNT(*) FROM truncate_schema.truncate_tbl; diff --git a/sql/clean.sql b/sql/clean.sql index 2e038d321..3375ebeb6 100644 --- a/sql/clean.sql +++ b/sql/clean.sql @@ -1,4 +1,5 @@ +DROP TABLE test_block_filtering; DROP TABLE test_insert_command; DROP TABLE collation_block_filtering_test; DROP TABLE test_null_values; From 83f2d4aef2fcb9f817a543cf0f4e9235139b91ab Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 15 Sep 2020 17:30:19 -0700 Subject: [PATCH 34/91] more fixes --- cstore_tableam.c | 16 +++++++++++++++- expected/am_create.out | 9 ++++----- output/am_create.source | 9 ++++----- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index 09bc8e5e4..9478f86ae 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -458,7 +458,21 @@ cstore_index_validate_scan(Relation heapRelation, static uint64 cstore_relation_size(Relation rel, ForkNumber forkNumber) { - elog(ERROR, "cstore_relation_size not implemented"); + uint64 nblocks = 0; + + /* Open it at the smgr level if not already done */ + RelationOpenSmgr(rel); + + /* InvalidForkNumber indicates returning the size for all forks */ + if (forkNumber == InvalidForkNumber) + { + for (int i = 0; i < MAX_FORKNUM; i++) + nblocks += smgrnblocks(rel->rd_smgr, i); + } + else + nblocks = smgrnblocks(rel->rd_smgr, forkNumber); + + return nblocks * BLCKSZ; } static bool diff --git a/expected/am_create.out b/expected/am_create.out index 56a8b52af..e62447252 100644 --- a/expected/am_create.out +++ b/expected/am_create.out @@ -1,15 +1,14 @@ -- --- Test the CREATE statements related to cstore_fdw. +-- Test the CREATE statements related to cstore. -- --- Validator tests -- Create uncompressed table -CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, +CREATE TABLE contestant (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) USING cstore_tableam; -- Create compressed table with automatically determined file path -CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, +CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - USING cstore_tableam + USING cstore_tableam; -- Test that querying an empty table works ANALYZE contestant; SELECT count(*) FROM contestant; diff --git a/output/am_create.source b/output/am_create.source index 56a8b52af..e62447252 100644 --- a/output/am_create.source +++ b/output/am_create.source @@ -1,15 +1,14 @@ -- --- Test the CREATE statements related to cstore_fdw. +-- Test the CREATE statements related to cstore. -- --- Validator tests -- Create uncompressed table -CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, +CREATE TABLE contestant (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) USING cstore_tableam; -- Create compressed table with automatically determined file path -CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, +CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - USING cstore_tableam + USING cstore_tableam; -- Test that querying an empty table works ANALYZE contestant; SELECT count(*) FROM contestant; From 7ba75fc2a61130036dbd68a59893f06e2651dc69 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 15 Sep 2020 17:43:00 -0700 Subject: [PATCH 35/91] more tests pass --- expected/am_data_types.out | 24 ++++++++++++------------ expected/am_functions.out | 10 +++++----- expected/am_load.out | 7 ++++--- expected/am_query.out | 6 +++--- output/am_data_types.source | 24 ++++++++++++------------ output/am_load.source | 7 ++++--- sql/am_functions.sql | 4 ++-- 7 files changed, 42 insertions(+), 40 deletions(-) diff --git a/expected/am_data_types.out b/expected/am_data_types.out index a27a25eb9..a597ff8de 100644 --- a/expected/am_data_types.out +++ b/expected/am_data_types.out @@ -6,8 +6,8 @@ SET datestyle = "ISO, YMD"; SET timezone to 'GMT'; SET intervalstyle TO 'POSTGRES_VERBOSE'; -- Test array types -CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], - text_array text[]) SERVER cstore_server; +CREATE TABLE test_array_types (int_array int[], bigint_array bigint[], + text_array text[]) USING cstore_tableam; COPY test_array_types FROM '/Users/jefdavi/wd/cstore2/data/array_types.csv' WITH CSV; SELECT * FROM test_array_types; int_array | bigint_array | text_array @@ -18,9 +18,9 @@ SELECT * FROM test_array_types; (3 rows) -- Test date/time types -CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, +CREATE TABLE test_datetime_types (timestamp timestamp, timestamp_with_timezone timestamp with time zone, date date, time time, - interval interval) SERVER cstore_server; + interval interval) USING cstore_tableam; COPY test_datetime_types FROM '/Users/jefdavi/wd/cstore2/data/datetime_types.csv' WITH CSV; SELECT * FROM test_datetime_types; timestamp | timestamp_with_timezone | date | time | interval @@ -32,8 +32,8 @@ SELECT * FROM test_datetime_types; -- Test enum and composite types CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); CREATE TYPE composite_type AS (a int, b text); -CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, - composite composite_type) SERVER cstore_server; +CREATE TABLE test_enum_and_composite_types (enum enum_type, + composite composite_type) USING cstore_tableam; COPY test_enum_and_composite_types FROM '/Users/jefdavi/wd/cstore2/data/enum_and_composite_types.csv' WITH CSV; SELECT * FROM test_enum_and_composite_types; @@ -44,8 +44,8 @@ SELECT * FROM test_enum_and_composite_types; (2 rows) -- Test range types -CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, - numrange numrange, tsrange tsrange) SERVER cstore_server; +CREATE TABLE test_range_types (int4range int4range, int8range int8range, + numrange numrange, tsrange tsrange) USING cstore_tableam; COPY test_range_types FROM '/Users/jefdavi/wd/cstore2/data/range_types.csv' WITH CSV; SELECT * FROM test_range_types; int4range | int8range | numrange | tsrange @@ -55,8 +55,8 @@ SELECT * FROM test_range_types; (2 rows) -- Test other types -CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, - inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server; +CREATE TABLE test_other_types (bool boolean, bytea bytea, money money, + inet inet, bitstring bit varying(5), uuid uuid, json json) USING cstore_tableam; COPY test_other_types FROM '/Users/jefdavi/wd/cstore2/data/other_types.csv' WITH CSV; SELECT * FROM test_other_types; bool | bytea | money | inet | bitstring | uuid | json @@ -66,8 +66,8 @@ SELECT * FROM test_other_types; (2 rows) -- Test null values -CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) - SERVER cstore_server; +CREATE TABLE test_null_values (a int, b int[], c composite_type) + USING cstore_tableam; COPY test_null_values FROM '/Users/jefdavi/wd/cstore2/data/null_values.csv' WITH CSV; SELECT * FROM test_null_values; a | b | c diff --git a/expected/am_functions.out b/expected/am_functions.out index 117fc15f9..6351ba0bf 100644 --- a/expected/am_functions.out +++ b/expected/am_functions.out @@ -1,11 +1,11 @@ -- -- Test utility functions for cstore_fdw tables. -- -CREATE FOREIGN TABLE empty_table (a int) SERVER cstore_server; -CREATE FOREIGN TABLE table_with_data (a int) SERVER cstore_server; +CREATE TABLE empty_table (a int) USING cstore_tableam; +CREATE TABLE table_with_data (a int) USING cstore_tableam; CREATE TABLE non_cstore_table (a int); COPY table_with_data FROM STDIN; -SELECT cstore_table_size('empty_table') < cstore_table_size('table_with_data'); +SELECT pg_relation_size('empty_table') < pg_relation_size('table_with_data'); ?column? ---------- t @@ -13,6 +13,6 @@ SELECT cstore_table_size('empty_table') < cstore_table_size('table_with_data'); SELECT cstore_table_size('non_cstore_table'); ERROR: relation is not a cstore table -DROP FOREIGN TABLE empty_table; -DROP FOREIGN TABLE table_with_data; +DROP TABLE empty_table; +DROP TABLE table_with_data; DROP TABLE non_cstore_table; diff --git a/expected/am_load.out b/expected/am_load.out index 162ece55b..110e444fa 100644 --- a/expected/am_load.out +++ b/expected/am_load.out @@ -5,6 +5,7 @@ COPY contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH DELIMITER '|'; -- ERROR ERROR: missing data for column "birthdate" +CONTEXT: COPY contestant, line 1: "a,1990-01-10,2090,97.1,XA ,{a}" -- COPY with invalid program COPY contestant FROM PROGRAM 'invalid_program' WITH CSV; -- ERROR ERROR: program "invalid_program" failed @@ -19,8 +20,8 @@ COPY contestant_compressed FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.cs COPY contestant_compressed FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' WITH CSV; -- Test column list -CREATE FOREIGN TABLE famous_constants (id int, name text, value real) - SERVER cstore_server; +CREATE TABLE famous_constants (id int, name text, value real) + USING cstore_tableam; COPY famous_constants (value, name, id) FROM STDIN WITH CSV; COPY famous_constants (name, value) FROM STDIN WITH CSV; SELECT * FROM famous_constants ORDER BY id, name; @@ -36,4 +37,4 @@ SELECT * FROM famous_constants ORDER BY id, name; | speed of light | 2.997e+08 (8 rows) -DROP FOREIGN TABLE famous_constants; +DROP TABLE famous_constants; diff --git a/expected/am_query.out b/expected/am_query.out index 7ac3508a4..2f0ff6cc7 100644 --- a/expected/am_query.out +++ b/expected/am_query.out @@ -83,8 +83,8 @@ SELECT to_json(v) FROM contestant v ORDER BY rating LIMIT 1; (1 row) -- Test variables used in expressions -CREATE FOREIGN TABLE union_first (a int, b int) SERVER cstore_server; -CREATE FOREIGN TABLE union_second (a int, b int) SERVER cstore_server; +CREATE TABLE union_first (a int, b int) USING cstore_tableam; +CREATE TABLE union_second (a int, b int) USING cstore_tableam; INSERT INTO union_first SELECT a, a FROM generate_series(1, 5) a; INSERT INTO union_second SELECT a, a FROM generate_series(11, 15) a; (SELECT a*1, b FROM union_first) union all (SELECT a*1, b FROM union_second); @@ -102,4 +102,4 @@ INSERT INTO union_second SELECT a, a FROM generate_series(11, 15) a; 15 | 15 (10 rows) -DROP FOREIGN TABLE union_first, union_second; +DROP TABLE union_first, union_second; diff --git a/output/am_data_types.source b/output/am_data_types.source index 23fdcfa29..8431e6ca2 100644 --- a/output/am_data_types.source +++ b/output/am_data_types.source @@ -6,8 +6,8 @@ SET datestyle = "ISO, YMD"; SET timezone to 'GMT'; SET intervalstyle TO 'POSTGRES_VERBOSE'; -- Test array types -CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], - text_array text[]) SERVER cstore_server; +CREATE TABLE test_array_types (int_array int[], bigint_array bigint[], + text_array text[]) USING cstore_tableam; COPY test_array_types FROM '@abs_srcdir@/data/array_types.csv' WITH CSV; SELECT * FROM test_array_types; int_array | bigint_array | text_array @@ -18,9 +18,9 @@ SELECT * FROM test_array_types; (3 rows) -- Test date/time types -CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, +CREATE TABLE test_datetime_types (timestamp timestamp, timestamp_with_timezone timestamp with time zone, date date, time time, - interval interval) SERVER cstore_server; + interval interval) USING cstore_tableam; COPY test_datetime_types FROM '@abs_srcdir@/data/datetime_types.csv' WITH CSV; SELECT * FROM test_datetime_types; timestamp | timestamp_with_timezone | date | time | interval @@ -32,8 +32,8 @@ SELECT * FROM test_datetime_types; -- Test enum and composite types CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); CREATE TYPE composite_type AS (a int, b text); -CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, - composite composite_type) SERVER cstore_server; +CREATE TABLE test_enum_and_composite_types (enum enum_type, + composite composite_type) USING cstore_tableam; COPY test_enum_and_composite_types FROM '@abs_srcdir@/data/enum_and_composite_types.csv' WITH CSV; SELECT * FROM test_enum_and_composite_types; @@ -44,8 +44,8 @@ SELECT * FROM test_enum_and_composite_types; (2 rows) -- Test range types -CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, - numrange numrange, tsrange tsrange) SERVER cstore_server; +CREATE TABLE test_range_types (int4range int4range, int8range int8range, + numrange numrange, tsrange tsrange) USING cstore_tableam; COPY test_range_types FROM '@abs_srcdir@/data/range_types.csv' WITH CSV; SELECT * FROM test_range_types; int4range | int8range | numrange | tsrange @@ -55,8 +55,8 @@ SELECT * FROM test_range_types; (2 rows) -- Test other types -CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, - inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server; +CREATE TABLE test_other_types (bool boolean, bytea bytea, money money, + inet inet, bitstring bit varying(5), uuid uuid, json json) USING cstore_tableam; COPY test_other_types FROM '@abs_srcdir@/data/other_types.csv' WITH CSV; SELECT * FROM test_other_types; bool | bytea | money | inet | bitstring | uuid | json @@ -66,8 +66,8 @@ SELECT * FROM test_other_types; (2 rows) -- Test null values -CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) - SERVER cstore_server; +CREATE TABLE test_null_values (a int, b int[], c composite_type) + USING cstore_tableam; COPY test_null_values FROM '@abs_srcdir@/data/null_values.csv' WITH CSV; SELECT * FROM test_null_values; a | b | c diff --git a/output/am_load.source b/output/am_load.source index c76f203eb..d1f41f717 100644 --- a/output/am_load.source +++ b/output/am_load.source @@ -5,6 +5,7 @@ COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH DELIMITER '|'; -- ERROR ERROR: missing data for column "birthdate" +CONTEXT: COPY contestant, line 1: "a,1990-01-10,2090,97.1,XA ,{a}" -- COPY with invalid program COPY contestant FROM PROGRAM 'invalid_program' WITH CSV; -- ERROR ERROR: program "invalid_program" failed @@ -19,8 +20,8 @@ COPY contestant_compressed FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; COPY contestant_compressed FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' WITH CSV; -- Test column list -CREATE FOREIGN TABLE famous_constants (id int, name text, value real) - SERVER cstore_server; +CREATE TABLE famous_constants (id int, name text, value real) + USING cstore_tableam; COPY famous_constants (value, name, id) FROM STDIN WITH CSV; COPY famous_constants (name, value) FROM STDIN WITH CSV; SELECT * FROM famous_constants ORDER BY id, name; @@ -36,4 +37,4 @@ SELECT * FROM famous_constants ORDER BY id, name; | speed of light | 2.997e+08 (8 rows) -DROP FOREIGN TABLE famous_constants; +DROP TABLE famous_constants; diff --git a/sql/am_functions.sql b/sql/am_functions.sql index a466d925d..1945eeb46 100644 --- a/sql/am_functions.sql +++ b/sql/am_functions.sql @@ -12,8 +12,8 @@ COPY table_with_data FROM STDIN; 3 \. -SELECT pg_relation_size('empty_table') < cstore_table_size('table_with_data'); -SELECT pg_relation_size('non_cstore_table'); +SELECT pg_relation_size('empty_table') < pg_relation_size('table_with_data'); +SELECT cstore_table_size('non_cstore_table'); DROP TABLE empty_table; DROP TABLE table_with_data; From fd6b4aeba2bf141bb65ec9c067066f234df7273b Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 15 Sep 2020 17:49:45 -0700 Subject: [PATCH 36/91] more tests... --- expected/am_drop.out | 20 ++++++++------------ expected/am_insert.out | 18 ++++++++---------- sql/am_drop.sql | 2 -- sql/am_insert.sql | 2 +- 4 files changed, 17 insertions(+), 25 deletions(-) diff --git a/expected/am_drop.out b/expected/am_drop.out index 926f69337..e1c634d7f 100644 --- a/expected/am_drop.out +++ b/expected/am_drop.out @@ -1,7 +1,7 @@ -- -- Tests the different DROP commands for cstore_fdw tables. -- --- DROP FOREIGN TABL +-- DROP TABL -- DROP SCHEMA -- DROP EXTENSION -- DROP DATABASE @@ -13,28 +13,24 @@ -- store postgres database oid SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset -- DROP cstore_fdw tables -DROP FOREIGN TABLE contestant; -DROP FOREIGN TABLE contestant_compressed; +DROP TABLE contestant; +DROP TABLE contestant_compressed; -- Create a cstore_fdw table under a schema and drop it. CREATE SCHEMA test_schema; -CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; +CREATE TABLE test_schema.test_table(data int) USING cstore_tableam; DROP SCHEMA test_schema CASCADE; -NOTICE: drop cascades to foreign table test_schema.test_table +NOTICE: drop cascades to table test_schema.test_table SELECT current_database() datname \gset CREATE DATABASE db_to_drop; \c db_to_drop CREATE EXTENSION cstore_fdw; -CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset -CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; +CREATE TABLE test_table(data int) USING cstore_tableam; DROP EXTENSION cstore_fdw CASCADE; -NOTICE: drop cascades to 2 other objects -DETAIL: drop cascades to server cstore_server -drop cascades to foreign table test_table +NOTICE: drop cascades to table test_table -- test database drop CREATE EXTENSION cstore_fdw; -CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset -CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; +CREATE TABLE test_table(data int) USING cstore_tableam; \c :datname DROP DATABASE db_to_drop; diff --git a/expected/am_insert.out b/expected/am_insert.out index 49d9ed132..8d06d4323 100644 --- a/expected/am_insert.out +++ b/expected/am_insert.out @@ -1,7 +1,7 @@ -- -- Testing insert on cstore_fdw tables. -- -CREATE FOREIGN TABLE test_insert_command (a int) SERVER cstore_server; +CREATE TABLE test_insert_command (a int) USING cstore_tableam; -- test single row inserts fail select count(*) from test_insert_command; count @@ -10,19 +10,17 @@ select count(*) from test_insert_command; (1 row) insert into test_insert_command values(1); -ERROR: operation is not supported select count(*) from test_insert_command; count ------- - 0 + 1 (1 row) insert into test_insert_command default values; -ERROR: operation is not supported select count(*) from test_insert_command; count ------- - 0 + 2 (1 row) -- test inserting from another table succeed @@ -44,11 +42,11 @@ insert into test_insert_command select * from test_insert_command_data; select count(*) from test_insert_command; count ------- - 1 + 3 (1 row) drop table test_insert_command_data; -drop foreign table test_insert_command; +drop table test_insert_command; -- test long attribute value insertion -- create sufficiently long text so that data is stored in toast CREATE TABLE test_long_text AS @@ -59,8 +57,8 @@ GROUP BY a ORDER BY a; CREATE TABLE test_long_text_hash AS SELECT int_val, md5(text_val) AS hash FROM test_long_text; -CREATE FOREIGN TABLE test_cstore_long_text(int_val int, text_val text) -SERVER cstore_server; +CREATE TABLE test_cstore_long_text(int_val int, text_val text) +USING cstore_tableam; -- store long text in cstore table INSERT INTO test_cstore_long_text SELECT * FROM test_long_text; -- drop source table to remove original text from toast @@ -85,4 +83,4 @@ WHERE a.int_val = c.int_val AND a.hash = md5(c.text_val); (10 rows) DROP TABLE test_long_text_hash; -DROP FOREIGN TABLE test_cstore_long_text; +DROP TABLE test_cstore_long_text; diff --git a/sql/am_drop.sql b/sql/am_drop.sql index 5945a9f2c..f92f90b9d 100644 --- a/sql/am_drop.sql +++ b/sql/am_drop.sql @@ -29,7 +29,6 @@ SELECT current_database() datname \gset CREATE DATABASE db_to_drop; \c db_to_drop CREATE EXTENSION cstore_fdw; -CREATE USING cstore_tableam DATA WRAPPER cstore_fdw; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset CREATE TABLE test_table(data int) USING cstore_tableam; @@ -38,7 +37,6 @@ DROP EXTENSION cstore_fdw CASCADE; -- test database drop CREATE EXTENSION cstore_fdw; -CREATE USING cstore_tableam DATA WRAPPER cstore_fdw; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset CREATE TABLE test_table(data int) USING cstore_tableam; diff --git a/sql/am_insert.sql b/sql/am_insert.sql index b249828e7..5a6d7d385 100644 --- a/sql/am_insert.sql +++ b/sql/am_insert.sql @@ -23,7 +23,7 @@ insert into test_insert_command select * from test_insert_command_data; select count(*) from test_insert_command; drop table test_insert_command_data; -drop foreign table test_insert_command; +drop table test_insert_command; -- test long attribute value insertion -- create sufficiently long text so that data is stored in toast From c49acc948adf68a307d81a398e685f8df71c64c0 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 15 Sep 2020 18:11:55 -0700 Subject: [PATCH 37/91] more test fixes........ --- cstore_tableam.c | 3 ++- expected/am_alter.out | 39 +++++++++++++++++++-------------------- expected/am_copyto.out | 6 +++--- expected/am_truncate.out | 22 +++++++++++----------- output/am_copyto.source | 6 +++--- 5 files changed, 38 insertions(+), 38 deletions(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index 9478f86ae..381f3edd8 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -143,7 +143,8 @@ cstore_beginscan(Relation relation, Snapshot snapshot, Var *var = makeVar(varno, varattno, vartype, vartypmod, varcollid, varlevelsup); - columnList = lappend(columnList, var); + if (!tupdesc->attrs[i].attisdropped) + columnList = lappend(columnList, var); } readState = CStoreBeginRead(relid, tupdesc, columnList, NULL); diff --git a/expected/am_alter.out b/expected/am_alter.out index 659e2723e..bd0737b4b 100644 --- a/expected/am_alter.out +++ b/expected/am_alter.out @@ -1,7 +1,7 @@ -- -- Testing ALTER TABLE on cstore_fdw tables. -- -CREATE FOREIGN TABLE test_alter_table (a int, b int, c int) SERVER cstore_server; +CREATE TABLE test_alter_table (a int, b int, c int) USING cstore_tableam; WITH sample_data AS (VALUES (1, 2, 3), (4, 5, 6), @@ -9,7 +9,7 @@ WITH sample_data AS (VALUES ) INSERT INTO test_alter_table SELECT * FROM sample_data; -- drop a column -ALTER FOREIGN TABLE test_alter_table DROP COLUMN a; +ALTER TABLE test_alter_table DROP COLUMN a; -- test analyze ANALYZE test_alter_table; -- verify select queries run as expected @@ -40,7 +40,7 @@ LINE 1: INSERT INTO test_alter_table (SELECT 3, 5, 8); ^ INSERT INTO test_alter_table (SELECT 5, 8); -- add a column with no defaults -ALTER FOREIGN TABLE test_alter_table ADD COLUMN d int; +ALTER TABLE test_alter_table ADD COLUMN d int; SELECT * FROM test_alter_table; b | c | d ---+---+--- @@ -62,7 +62,7 @@ SELECT * FROM test_alter_table; (5 rows) -- add a fixed-length column with default value -ALTER FOREIGN TABLE test_alter_table ADD COLUMN e int default 3; +ALTER TABLE test_alter_table ADD COLUMN e int default 3; SELECT * from test_alter_table; b | c | d | e ---+---+---+--- @@ -86,7 +86,7 @@ SELECT * from test_alter_table; (6 rows) -- add a variable-length column with default value -ALTER FOREIGN TABLE test_alter_table ADD COLUMN f text DEFAULT 'TEXT ME'; +ALTER TABLE test_alter_table ADD COLUMN f text DEFAULT 'TEXT ME'; SELECT * from test_alter_table; b | c | d | e | f ---+---+---+---+--------- @@ -112,8 +112,8 @@ SELECT * from test_alter_table; (7 rows) -- drop couple of columns -ALTER FOREIGN TABLE test_alter_table DROP COLUMN c; -ALTER FOREIGN TABLE test_alter_table DROP COLUMN e; +ALTER TABLE test_alter_table DROP COLUMN c; +ALTER TABLE test_alter_table DROP COLUMN e; ANALYZE test_alter_table; SELECT * from test_alter_table; b | d | f @@ -140,16 +140,16 @@ SELECT count(t.*) from test_alter_table t; (1 row) -- unsupported default values -ALTER FOREIGN TABLE test_alter_table ADD COLUMN g boolean DEFAULT isfinite(current_date); -ALTER FOREIGN TABLE test_alter_table ADD COLUMN h DATE DEFAULT current_date; +ALTER TABLE test_alter_table ADD COLUMN g boolean DEFAULT isfinite(current_date); +ALTER TABLE test_alter_table ADD COLUMN h DATE DEFAULT current_date; SELECT * FROM test_alter_table; ERROR: unsupported default value for column "g" HINT: Expression is either mutable or does not evaluate to constant value -ALTER FOREIGN TABLE test_alter_table ALTER COLUMN g DROP DEFAULT; +ALTER TABLE test_alter_table ALTER COLUMN g DROP DEFAULT; SELECT * FROM test_alter_table; ERROR: unsupported default value for column "h" HINT: Expression is either mutable or does not evaluate to constant value -ALTER FOREIGN TABLE test_alter_table ALTER COLUMN h DROP DEFAULT; +ALTER TABLE test_alter_table ALTER COLUMN h DROP DEFAULT; ANALYZE test_alter_table; SELECT * FROM test_alter_table; b | d | f | g | h @@ -164,15 +164,14 @@ SELECT * FROM test_alter_table; (7 rows) -- unsupported type change -ALTER FOREIGN TABLE test_alter_table ADD COLUMN i int; -ALTER FOREIGN TABLE test_alter_table ADD COLUMN j float; -ALTER FOREIGN TABLE test_alter_table ADD COLUMN k text; +ALTER TABLE test_alter_table ADD COLUMN i int; +ALTER TABLE test_alter_table ADD COLUMN j float; +ALTER TABLE test_alter_table ADD COLUMN k text; -- this is valid type change -ALTER FOREIGN TABLE test_alter_table ALTER COLUMN i TYPE float; +ALTER TABLE test_alter_table ALTER COLUMN i TYPE float; -- this is not valid -ALTER FOREIGN TABLE test_alter_table ALTER COLUMN j TYPE int; -ERROR: Column j cannot be cast automatically to type pg_catalog.int4 +ALTER TABLE test_alter_table ALTER COLUMN j TYPE int; -- text / varchar conversion is valid both ways -ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE varchar(20); -ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE text; -DROP FOREIGN TABLE test_alter_table; +ALTER TABLE test_alter_table ALTER COLUMN k TYPE varchar(20); +ALTER TABLE test_alter_table ALTER COLUMN k TYPE text; +DROP TABLE test_alter_table; diff --git a/expected/am_copyto.out b/expected/am_copyto.out index 2b68d0ad5..c8a5f676b 100644 --- a/expected/am_copyto.out +++ b/expected/am_copyto.out @@ -1,9 +1,9 @@ -- -- Test copying data from cstore_fdw tables. -- -CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, +CREATE TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server; + USING cstore_tableam; -- load table data from file COPY test_contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; -- export using COPY table TO ... @@ -20,4 +20,4 @@ b 11-01-1990 2203 98.1 XA {a,b} c 11-01-1988 2907 99.4 XB {w,y} d 05-05-1985 2314 98.3 XB {} e 05-05-1995 2236 98.2 XC {a} -DROP FOREIGN TABLE test_contestant CASCADE; +DROP TABLE test_contestant CASCADE; diff --git a/expected/am_truncate.out b/expected/am_truncate.out index c92c15559..538b9ddac 100644 --- a/expected/am_truncate.out +++ b/expected/am_truncate.out @@ -10,9 +10,9 @@ SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; (1 row) -- CREATE a cstore_fdw table, fill with some data -- -CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; -CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; -CREATE FOREIGN TABLE cstore_truncate_test_compressed (a int, b int) SERVER cstore_server OPTIONS (compression 'pglz'); +CREATE TABLE cstore_truncate_test (a int, b int) USING cstore_tableam; +CREATE TABLE cstore_truncate_test_second (a int, b int) USING cstore_tableam; +CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING cstore_tableam; CREATE TABLE cstore_truncate_test_regular (a int, b int); INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; @@ -58,10 +58,10 @@ SELECT count(*) FROM cstore_truncate_test_compressed; 0 (1 row) -SELECT cstore_table_size('cstore_truncate_test_compressed'); - cstore_table_size -------------------- - 0 +SELECT pg_relation_size('cstore_truncate_test_compressed'); + pg_relation_size +------------------ + 0 (1 row) INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; @@ -163,12 +163,12 @@ SELECT cstore_truncate_test_regular_func(); (1 row) DROP FUNCTION cstore_truncate_test_regular_func(); -DROP FOREIGN TABLE cstore_truncate_test, cstore_truncate_test_second; +DROP TABLE cstore_truncate_test, cstore_truncate_test_second; DROP TABLE cstore_truncate_test_regular; -DROP FOREIGN TABLE cstore_truncate_test_compressed; +DROP TABLE cstore_truncate_test_compressed; -- test truncate with schema CREATE SCHEMA truncate_schema; -CREATE FOREIGN TABLE truncate_schema.truncate_tbl (id int) SERVER cstore_server OPTIONS(compression 'pglz'); +CREATE TABLE truncate_schema.truncate_tbl (id int) USING cstore_tableam; INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); SELECT COUNT(*) FROM truncate_schema.truncate_tbl; count @@ -227,5 +227,5 @@ SELECT count(*) FROM truncate_schema.truncate_tbl; \c - :current_user -- cleanup DROP SCHEMA truncate_schema CASCADE; -NOTICE: drop cascades to foreign table truncate_schema.truncate_tbl +NOTICE: drop cascades to table truncate_schema.truncate_tbl DROP USER truncate_user; diff --git a/output/am_copyto.source b/output/am_copyto.source index a8d841f18..127bdc65d 100644 --- a/output/am_copyto.source +++ b/output/am_copyto.source @@ -1,9 +1,9 @@ -- -- Test copying data from cstore_fdw tables. -- -CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, +CREATE TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server; + USING cstore_tableam; -- load table data from file COPY test_contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; -- export using COPY table TO ... @@ -20,4 +20,4 @@ b 11-01-1990 2203 98.1 XA {a,b} c 11-01-1988 2907 99.4 XB {w,y} d 05-05-1985 2314 98.3 XB {} e 05-05-1995 2236 98.2 XC {a} -DROP FOREIGN TABLE test_contestant CASCADE; +DROP TABLE test_contestant CASCADE; From a3b513167c7db1c13f9c81d2e40a4bb81378af64 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 15 Sep 2020 18:20:33 -0700 Subject: [PATCH 38/91] disable a few tests --- Makefile | 15 +++++++++------ expected/clean.out | 2 -- sql/clean.sql | 3 --- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index ad85b294a..00c991f7a 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,10 @@ MODULE_big = cstore_fdw +ifndef MAJORVERSION + MAJORVERSION := $(basename $(VERSION)) +endif + PG_CPPFLAGS = -std=c11 OBJS = cstore.o cstore_fdw.o cstore_writer.o cstore_reader.o \ cstore_compression.o mod.o cstore_metadata_tables.o cstore_tableam.o @@ -14,8 +18,11 @@ DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cs cstore_fdw--1.3--1.4.sql cstore_fdw--1.2--1.3.sql cstore_fdw--1.1--1.2.sql \ cstore_fdw--1.0--1.1.sql cstore_fdw--1.7--1.8.sql -REGRESS = extension_create am_create am_load am_query am_analyze am_data_types am_functions \ - am_block_filtering am_drop am_insert am_copyto am_alter am_truncate clean \ +# +# disabled tests: am_block_filtering am_analyze am_alter +# +REGRESS = extension_create am_create am_load am_query am_data_types am_functions \ + am_drop am_insert am_copyto am_truncate clean \ fdw_create fdw_load fdw_query fdw_analyze fdw_data_types fdw_functions \ fdw_block_filtering fdw_drop fdw_insert fdw_copyto fdw_alter fdw_truncate EXTRA_CLEAN = cstore.pb-c.h cstore.pb-c.c data/*.cstore data/*.cstore.footer \ @@ -43,10 +50,6 @@ PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) -ifndef MAJORVERSION - MAJORVERSION := $(basename $(VERSION)) -endif - ifeq (,$(findstring $(MAJORVERSION), 9.3 9.4 9.5 9.6 10 11 12)) $(error PostgreSQL 9.3 to 12 is required to compile this extension) endif diff --git a/expected/clean.out b/expected/clean.out index 85b25987b..2c1e82ee6 100644 --- a/expected/clean.out +++ b/expected/clean.out @@ -1,5 +1,3 @@ -DROP TABLE test_insert_command; -DROP TABLE collation_block_filtering_test; DROP TABLE test_null_values; DROP TABLE test_other_types; DROP TABLE test_range_types; diff --git a/sql/clean.sql b/sql/clean.sql index 3375ebeb6..f7dc889fc 100644 --- a/sql/clean.sql +++ b/sql/clean.sql @@ -1,7 +1,4 @@ -DROP TABLE test_block_filtering; -DROP TABLE test_insert_command; -DROP TABLE collation_block_filtering_test; DROP TABLE test_null_values; DROP TABLE test_other_types; DROP TABLE test_range_types; From ada9da609e9898d363119fc564b40c7aa0fde665 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 15 Sep 2020 19:06:15 -0700 Subject: [PATCH 39/91] fixup mod.c --- mod.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mod.c b/mod.c index 4268126e3..cd4072e25 100644 --- a/mod.c +++ b/mod.c @@ -16,7 +16,9 @@ #include "fmgr.h" #include "mod.h" +#if PG_VERSION_NUM >= 120000 #include "cstore_tableam.h" +#endif #include "cstore_fdw.h" PG_MODULE_MAGIC; @@ -24,7 +26,9 @@ PG_MODULE_MAGIC; void _PG_init(void) { +#if PG_VERSION_NUM >= 120000 cstore_tableam_init(); +#endif cstore_fdw_init(); } @@ -32,6 +36,8 @@ _PG_init(void) void _PG_fini(void) { +#if PG_VERSION_NUM >= 120000 cstore_tableam_finish(); +#endif cstore_fdw_finish(); } From 248a2db97044a2102a373d53423947b14738bfeb Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 16 Sep 2020 08:49:25 -0700 Subject: [PATCH 40/91] fixup --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 00c991f7a..b3510156a 100644 --- a/Makefile +++ b/Makefile @@ -5,10 +5,6 @@ MODULE_big = cstore_fdw -ifndef MAJORVERSION - MAJORVERSION := $(basename $(VERSION)) -endif - PG_CPPFLAGS = -std=c11 OBJS = cstore.o cstore_fdw.o cstore_writer.o cstore_reader.o \ cstore_compression.o mod.o cstore_metadata_tables.o cstore_tableam.o @@ -50,6 +46,10 @@ PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) +ifndef MAJORVERSION + MAJORVERSION := $(basename $(VERSION)) +endif + ifeq (,$(findstring $(MAJORVERSION), 9.3 9.4 9.5 9.6 10 11 12)) $(error PostgreSQL 9.3 to 12 is required to compile this extension) endif From 3b3d1b1f898ae2eb261faf5e510f84f3f0976294 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 16 Sep 2020 09:36:18 -0700 Subject: [PATCH 41/91] 11 and 12 both pass --- Makefile | 17 ++++++++++++----- cstore_fdw--1.7--1.8.sql | 20 ++++++++++++++------ 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index b3510156a..e956d8517 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ MODULE_big = cstore_fdw PG_CPPFLAGS = -std=c11 OBJS = cstore.o cstore_fdw.o cstore_writer.o cstore_reader.o \ - cstore_compression.o mod.o cstore_metadata_tables.o cstore_tableam.o + cstore_compression.o mod.o cstore_metadata_tables.o EXTENSION = cstore_fdw DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cstore_fdw--1.4--1.5.sql \ @@ -17,15 +17,22 @@ DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cs # # disabled tests: am_block_filtering am_analyze am_alter # -REGRESS = extension_create am_create am_load am_query am_data_types am_functions \ - am_drop am_insert am_copyto am_truncate clean \ - fdw_create fdw_load fdw_query fdw_analyze fdw_data_types fdw_functions \ - fdw_block_filtering fdw_drop fdw_insert fdw_copyto fdw_alter fdw_truncate +REGRESS = extension_create EXTRA_CLEAN = cstore.pb-c.h cstore.pb-c.c data/*.cstore data/*.cstore.footer \ sql/block_filtering.sql sql/create.sql sql/data_types.sql sql/load.sql \ sql/copyto.sql expected/block_filtering.out expected/create.out \ expected/data_types.out expected/load.out expected/copyto.out +VER := $(shell pg_config --version) +ifeq ($(findstring 12,$(VER)),12) + REGRESS += am_create am_load am_query am_data_types am_functions \ + am_drop am_insert am_copyto am_truncate clean + OBJS += cstore_tableam.o +endif + +REGRESS += fdw_create fdw_load fdw_query fdw_analyze fdw_data_types fdw_functions \ + fdw_block_filtering fdw_drop fdw_insert fdw_copyto fdw_alter fdw_truncate + ifeq ($(enable_coverage),yes) PG_CPPFLAGS += --coverage SHLIB_LINK += --coverage diff --git a/cstore_fdw--1.7--1.8.sql b/cstore_fdw--1.7--1.8.sql index b1519d73e..8fe9416d1 100644 --- a/cstore_fdw--1.7--1.8.sql +++ b/cstore_fdw--1.7--1.8.sql @@ -1,9 +1,17 @@ /* cstore_fdw/cstore_fdw--1.7--1.8.sql */ -CREATE FUNCTION cstore_tableam_handler(internal) -RETURNS table_am_handler -LANGUAGE C -AS 'MODULE_PATHNAME', 'cstore_tableam_handler'; +DO $proc$ +BEGIN -CREATE ACCESS METHOD cstore_tableam -TYPE TABLE HANDLER cstore_tableam_handler; +IF version() ~ '12' THEN + EXECUTE $$ + CREATE FUNCTION cstore_tableam_handler(internal) + RETURNS table_am_handler + LANGUAGE C + AS 'MODULE_PATHNAME', 'cstore_tableam_handler'; + + CREATE ACCESS METHOD cstore_tableam + TYPE TABLE HANDLER cstore_tableam_handler; + $$; +END IF; +END$proc$; From ec8afe0a5d5cfa0bfc2fd43338734c7a465bcff7 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 16 Sep 2020 10:42:32 -0700 Subject: [PATCH 42/91] better makefile --- Makefile | 53 +++++++++++++++++----------- expected/{clean.out => am_clean.out} | 0 expected/fdw_clean.out | 10 ++++++ sql/{clean.sql => am_clean.sql} | 0 sql/fdw_clean.sql | 10 ++++++ 5 files changed, 53 insertions(+), 20 deletions(-) rename expected/{clean.out => am_clean.out} (100%) create mode 100644 expected/fdw_clean.out rename sql/{clean.sql => am_clean.sql} (100%) create mode 100644 sql/fdw_clean.sql diff --git a/Makefile b/Makefile index e956d8517..61b4aeb87 100644 --- a/Makefile +++ b/Makefile @@ -5,8 +5,26 @@ MODULE_big = cstore_fdw +VER := $(lastword $(shell pg_config --version)) +VER_WORDS = $(subst ., ,$(VER)) + +# versions prior to 10 (those with 3 version numbers) not supported +ifeq ($(words $(VER_WORDS)),3) +$(error version $(VER) not supported) +endif + +MVER = $(firstword $(VER_WORDS)) + +ifeq ($(lastword $(sort 12 $(MVER))),$(MVER)) + USE_TABLEAM = yes + USE_FDW = yes +else + USE_TABLEAM = no + USE_FDW = yes +endif + PG_CPPFLAGS = -std=c11 -OBJS = cstore.o cstore_fdw.o cstore_writer.o cstore_reader.o \ +OBJS = cstore.o cstore_writer.o cstore_reader.o \ cstore_compression.o mod.o cstore_metadata_tables.o EXTENSION = cstore_fdw @@ -14,24 +32,27 @@ DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cs cstore_fdw--1.3--1.4.sql cstore_fdw--1.2--1.3.sql cstore_fdw--1.1--1.2.sql \ cstore_fdw--1.0--1.1.sql cstore_fdw--1.7--1.8.sql -# -# disabled tests: am_block_filtering am_analyze am_alter -# -REGRESS = extension_create +REGRESS = extension_create EXTRA_CLEAN = cstore.pb-c.h cstore.pb-c.c data/*.cstore data/*.cstore.footer \ sql/block_filtering.sql sql/create.sql sql/data_types.sql sql/load.sql \ sql/copyto.sql expected/block_filtering.out expected/create.out \ expected/data_types.out expected/load.out expected/copyto.out -VER := $(shell pg_config --version) -ifeq ($(findstring 12,$(VER)),12) - REGRESS += am_create am_load am_query am_data_types am_functions \ - am_drop am_insert am_copyto am_truncate clean - OBJS += cstore_tableam.o +ifeq ($(USE_FDW),yes) + PG_CFLAGS += -DUSE_FDW + OBJS += cstore_fdw.o + REGRESS += fdw_create fdw_load fdw_query fdw_analyze fdw_data_types \ + fdw_functions fdw_block_filtering fdw_drop fdw_insert \ + fdw_copyto fdw_alter fdw_truncate fdw_clean endif -REGRESS += fdw_create fdw_load fdw_query fdw_analyze fdw_data_types fdw_functions \ - fdw_block_filtering fdw_drop fdw_insert fdw_copyto fdw_alter fdw_truncate +# disabled tests: am_block_filtering am_analyze am_alter +ifeq ($(USE_TABLEAM),yes) + PG_CFLAGS += -DUSE_TABLEAM + OBJS += cstore_tableam.o + REGRESS += am_create am_load am_query am_data_types am_functions \ + am_drop am_insert am_copyto am_truncate am_clean +endif ifeq ($(enable_coverage),yes) PG_CPPFLAGS += --coverage @@ -53,14 +74,6 @@ PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) -ifndef MAJORVERSION - MAJORVERSION := $(basename $(VERSION)) -endif - -ifeq (,$(findstring $(MAJORVERSION), 9.3 9.4 9.5 9.6 10 11 12)) - $(error PostgreSQL 9.3 to 12 is required to compile this extension) -endif - installcheck: remove_cstore_files remove_cstore_files: diff --git a/expected/clean.out b/expected/am_clean.out similarity index 100% rename from expected/clean.out rename to expected/am_clean.out diff --git a/expected/fdw_clean.out b/expected/fdw_clean.out new file mode 100644 index 000000000..ecd4d67a1 --- /dev/null +++ b/expected/fdw_clean.out @@ -0,0 +1,10 @@ +DROP FOREIGN TABLE collation_block_filtering_test; +DROP FOREIGN TABLE test_block_filtering; +DROP FOREIGN TABLE test_null_values; +DROP FOREIGN TABLE test_other_types; +DROP FOREIGN TABLE test_range_types; +DROP FOREIGN TABLE test_enum_and_composite_types; +DROP TYPE composite_type; +DROP TYPE enum_type; +DROP FOREIGN TABLE test_datetime_types; +DROP FOREIGN TABLE test_array_types; diff --git a/sql/clean.sql b/sql/am_clean.sql similarity index 100% rename from sql/clean.sql rename to sql/am_clean.sql diff --git a/sql/fdw_clean.sql b/sql/fdw_clean.sql new file mode 100644 index 000000000..ecd4d67a1 --- /dev/null +++ b/sql/fdw_clean.sql @@ -0,0 +1,10 @@ +DROP FOREIGN TABLE collation_block_filtering_test; +DROP FOREIGN TABLE test_block_filtering; +DROP FOREIGN TABLE test_null_values; +DROP FOREIGN TABLE test_other_types; +DROP FOREIGN TABLE test_range_types; +DROP FOREIGN TABLE test_enum_and_composite_types; +DROP TYPE composite_type; +DROP TYPE enum_type; +DROP FOREIGN TABLE test_datetime_types; +DROP FOREIGN TABLE test_array_types; From 4dfec401cef4d96879cae5e97d166403288accda Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 16 Sep 2020 11:06:40 -0700 Subject: [PATCH 43/91] more Makefile cleanup --- Makefile | 8 +++----- mod.c | 16 +++++++++++++--- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 61b4aeb87..8fac03cc4 100644 --- a/Makefile +++ b/Makefile @@ -15,6 +15,7 @@ endif MVER = $(firstword $(VER_WORDS)) +# version >= 12? ifeq ($(lastword $(sort 12 $(MVER))),$(MVER)) USE_TABLEAM = yes USE_FDW = yes @@ -32,7 +33,7 @@ DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cs cstore_fdw--1.3--1.4.sql cstore_fdw--1.2--1.3.sql cstore_fdw--1.1--1.2.sql \ cstore_fdw--1.0--1.1.sql cstore_fdw--1.7--1.8.sql -REGRESS = extension_create +REGRESS = extension_create EXTRA_CLEAN = cstore.pb-c.h cstore.pb-c.c data/*.cstore data/*.cstore.footer \ sql/block_filtering.sql sql/create.sql sql/data_types.sql sql/load.sql \ sql/copyto.sql expected/block_filtering.out expected/create.out \ @@ -74,10 +75,7 @@ PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) -installcheck: remove_cstore_files - -remove_cstore_files: - rm -f data/*.cstore data/*.cstore.footer +installcheck: reindent: citus_indent . diff --git a/mod.c b/mod.c index cd4072e25..3e041dd7a 100644 --- a/mod.c +++ b/mod.c @@ -16,28 +16,38 @@ #include "fmgr.h" #include "mod.h" -#if PG_VERSION_NUM >= 120000 + +#ifdef USE_TABLEAM #include "cstore_tableam.h" #endif + +#ifdef USE_FDW #include "cstore_fdw.h" +#endif PG_MODULE_MAGIC; void _PG_init(void) { -#if PG_VERSION_NUM >= 120000 +#ifdef USE_TABLEAM cstore_tableam_init(); #endif + +#ifdef USE_FDW cstore_fdw_init(); +#endif } void _PG_fini(void) { -#if PG_VERSION_NUM >= 120000 +#if USE_TABLEAM cstore_tableam_finish(); #endif + +#ifdef USE_FDW cstore_fdw_finish(); +#endif } From d352cd07dd009a2651d90d2079e1126d1d0c8b70 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 16 Sep 2020 11:51:23 -0700 Subject: [PATCH 44/91] citus indent and Makefile fixup --- Makefile | 13 ++--- cstore_tableam.c | 137 ++++++++++++++++++++++++++++++++++------------- cstore_tableam.h | 2 +- 3 files changed, 107 insertions(+), 45 deletions(-) diff --git a/Makefile b/Makefile index 8fac03cc4..9cbf4095a 100644 --- a/Makefile +++ b/Makefile @@ -7,21 +7,22 @@ MODULE_big = cstore_fdw VER := $(lastword $(shell pg_config --version)) VER_WORDS = $(subst ., ,$(VER)) +MVER = $(firstword $(VER_WORDS)) -# versions prior to 10 (those with 3 version numbers) not supported -ifeq ($(words $(VER_WORDS)),3) +# error for versions earlier than 10 so that lex comparison will work +ifneq ($(shell printf '%02d' $(MVER)),$(MVER)) $(error version $(VER) not supported) endif -MVER = $(firstword $(VER_WORDS)) - -# version >= 12? +# lexicographic comparison of version number ifeq ($(lastword $(sort 12 $(MVER))),$(MVER)) USE_TABLEAM = yes USE_FDW = yes -else +else ifeq ($(lastword $(sort 11 $(MVER))),$(MVER)) USE_TABLEAM = no USE_FDW = yes +else +$(error version $(VER) is not supported) endif PG_CPPFLAGS = -std=c11 diff --git a/cstore_tableam.c b/cstore_tableam.c index 381f3edd8..fccb9fe6e 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -36,8 +36,8 @@ typedef struct CStoreScanDescData { - TableScanDescData cs_base; - TableReadState *cs_readState; + TableScanDescData cs_base; + TableReadState *cs_readState; } CStoreScanDescData; typedef struct CStoreScanDescData *CStoreScanDesc; @@ -56,15 +56,16 @@ CStoreGetDefaultOptions(void) return cstoreOptions; } + static void cstore_init_write_state(Relation relation) { - //TODO: upgrade lock to serialize writes + /*TODO: upgrade lock to serialize writes */ if (CStoreWriteState != NULL) { - // TODO: consider whether it's possible for a new write to start - // before an old one is flushed + /* TODO: consider whether it's possible for a new write to start */ + /* before an old one is flushed */ Assert(CStoreWriteState->relation->rd_id == relation->rd_id); } @@ -93,35 +94,39 @@ cstore_init_write_state(Relation relation) } } + void cstore_free_write_state() { if (CStoreWriteState != NULL) { - elog(LOG, "flushing write state for relation %d", CStoreWriteState->relation->rd_id); + elog(LOG, "flushing write state for relation %d", + CStoreWriteState->relation->rd_id); CStoreEndWrite(CStoreWriteState); CStoreWriteState = NULL; } } + static const TupleTableSlotOps * cstore_slot_callbacks(Relation relation) { return &TTSOpsVirtual; } + static TableScanDesc cstore_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags) { - Oid relid = relation->rd_id; - TupleDesc tupdesc = relation->rd_att; - CStoreOptions *cstoreOptions = NULL; - TableReadState *readState = NULL; - CStoreScanDesc scan = palloc(sizeof(CStoreScanDescData)); - List *columnList = NIL; + Oid relid = relation->rd_id; + TupleDesc tupdesc = relation->rd_att; + CStoreOptions *cstoreOptions = NULL; + TableReadState *readState = NULL; + CStoreScanDesc scan = palloc(sizeof(CStoreScanDescData)); + List *columnList = NIL; cstoreOptions = CStoreGetDefaultOptions(); @@ -134,19 +139,21 @@ cstore_beginscan(Relation relation, Snapshot snapshot, for (int i = 0; i < tupdesc->natts; i++) { - Index varno = 0; - AttrNumber varattno = i+1; - Oid vartype = tupdesc->attrs[i].atttypid; - int32 vartypmod = 0; - Oid varcollid = 0; - Index varlevelsup = 0; - Var *var = makeVar(varno, varattno, vartype, vartypmod, - varcollid, varlevelsup); + Index varno = 0; + AttrNumber varattno = i + 1; + Oid vartype = tupdesc->attrs[i].atttypid; + int32 vartypmod = 0; + Oid varcollid = 0; + Index varlevelsup = 0; + Var *var = makeVar(varno, varattno, vartype, vartypmod, + varcollid, varlevelsup); if (!tupdesc->attrs[i].attisdropped) + { columnList = lappend(columnList, var); + } } - + readState = CStoreBeginRead(relid, tupdesc, columnList, NULL); readState->relation = relation; @@ -155,6 +162,7 @@ cstore_beginscan(Relation relation, Snapshot snapshot, return ((TableScanDesc) scan); } + static void cstore_endscan(TableScanDesc sscan) { @@ -162,13 +170,15 @@ cstore_endscan(TableScanDesc sscan) CStoreEndRead(scan->cs_readState); } + static void cstore_rescan(TableScanDesc sscan, ScanKey key, bool set_params, - bool allow_strat, bool allow_sync, bool allow_pagemode) + bool allow_strat, bool allow_sync, bool allow_pagemode) { elog(ERROR, "cstore_rescan not implemented"); } + static bool cstore_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot) { @@ -181,51 +191,61 @@ cstore_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot memset(slot->tts_values, 0, sizeof(Datum) * natts); memset(slot->tts_isnull, true, sizeof(bool) * natts); - nextRowFound = CStoreReadNextRow(scan->cs_readState, slot->tts_values, slot->tts_isnull); + nextRowFound = CStoreReadNextRow(scan->cs_readState, slot->tts_values, + slot->tts_isnull); if (!nextRowFound) + { return false; + } ExecStoreVirtualTuple(slot); return true; } + static Size cstore_parallelscan_estimate(Relation rel) { elog(ERROR, "cstore_parallelscan_estimate not implemented"); } + static Size cstore_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan) { elog(ERROR, "cstore_parallelscan_initialize not implemented"); } + static void cstore_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan) { elog(ERROR, "cstore_parallelscan_reinitialize not implemented"); } + static IndexFetchTableData * cstore_index_fetch_begin(Relation rel) { elog(ERROR, "cstore_index_fetch_begin not implemented"); } + static void cstore_index_fetch_reset(IndexFetchTableData *scan) { elog(ERROR, "cstore_index_fetch_reset not implemented"); } + static void cstore_index_fetch_end(IndexFetchTableData *scan) { elog(ERROR, "cstore_index_fetch_end not implemented"); } + static bool cstore_index_fetch_tuple(struct IndexFetchTableData *scan, ItemPointer tid, @@ -236,6 +256,7 @@ cstore_index_fetch_tuple(struct IndexFetchTableData *scan, elog(ERROR, "cstore_index_fetch_tuple not implemented"); } + static bool cstore_fetch_row_version(Relation relation, ItemPointer tid, @@ -245,6 +266,7 @@ cstore_fetch_row_version(Relation relation, elog(ERROR, "cstore_fetch_row_version not implemented"); } + static void cstore_get_latest_tid(TableScanDesc sscan, ItemPointer tid) @@ -252,12 +274,14 @@ cstore_get_latest_tid(TableScanDesc sscan, elog(ERROR, "cstore_get_latest_tid not implemented"); } + static bool cstore_tuple_tid_valid(TableScanDesc scan, ItemPointer tid) { elog(ERROR, "cstore_tuple_tid_valid not implemented"); } + static bool cstore_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot) @@ -265,6 +289,7 @@ cstore_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, return true; } + static TransactionId cstore_compute_xid_horizon_for_tuples(Relation rel, ItemPointerData *tids, @@ -273,6 +298,7 @@ cstore_compute_xid_horizon_for_tuples(Relation rel, elog(ERROR, "cstore_compute_xid_horizon_for_tuples not implemented"); } + static void cstore_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate) @@ -296,6 +322,7 @@ cstore_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, CStoreWriteRow(CStoreWriteState, slot->tts_values, slot->tts_isnull); } + static void cstore_tuple_insert_speculative(Relation relation, TupleTableSlot *slot, CommandId cid, int options, @@ -304,6 +331,7 @@ cstore_tuple_insert_speculative(Relation relation, TupleTableSlot *slot, elog(ERROR, "cstore_tuple_insert_speculative not implemented"); } + static void cstore_tuple_complete_speculative(Relation relation, TupleTableSlot *slot, uint32 specToken, bool succeeded) @@ -311,6 +339,7 @@ cstore_tuple_complete_speculative(Relation relation, TupleTableSlot *slot, elog(ERROR, "cstore_tuple_complete_speculative not implemented"); } + static void cstore_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate) @@ -337,6 +366,7 @@ cstore_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, } } + static TM_Result cstore_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, @@ -345,6 +375,7 @@ cstore_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, elog(ERROR, "cstore_tuple_delete not implemented"); } + static TM_Result cstore_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, @@ -354,6 +385,7 @@ cstore_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, elog(ERROR, "cstore_tuple_update not implemented"); } + static TM_Result cstore_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, @@ -363,16 +395,18 @@ cstore_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, elog(ERROR, "cstore_tuple_lock not implemented"); } + static void cstore_finish_bulk_insert(Relation relation, int options) { - //TODO: flush relation like for heap? - // free write state or only in ExecutorEnd_hook? + /*TODO: flush relation like for heap? */ + /* free write state or only in ExecutorEnd_hook? */ - // for COPY + /* for COPY */ cstore_free_write_state(); } + static void cstore_relation_set_new_filenode(Relation rel, const RelFileNode *newrnode, @@ -390,18 +424,21 @@ cstore_relation_set_new_filenode(Relation rel, smgrclose(srel); } + static void cstore_relation_nontransactional_truncate(Relation rel) { elog(ERROR, "cstore_relation_nontransactional_truncate not implemented"); } + static void cstore_relation_copy_data(Relation rel, const RelFileNode *newrnode) { elog(ERROR, "cstore_relation_copy_data not implemented"); } + static void cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, Relation OldIndex, bool use_sort, @@ -415,6 +452,7 @@ cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, elog(ERROR, "cstore_relation_copy_for_cluster not implemented"); } + static bool cstore_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy) @@ -422,6 +460,7 @@ cstore_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, elog(ERROR, "cstore_scan_analyze_next_block not implemented"); } + static bool cstore_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, @@ -430,6 +469,7 @@ cstore_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, elog(ERROR, "cstore_scan_analyze_next_tuple not implemented"); } + static double cstore_index_build_range_scan(Relation heapRelation, Relation indexRelation, @@ -446,6 +486,7 @@ cstore_index_build_range_scan(Relation heapRelation, elog(ERROR, "cstore_index_build_range_scan not implemented"); } + static void cstore_index_validate_scan(Relation heapRelation, Relation indexRelation, @@ -456,32 +497,39 @@ cstore_index_validate_scan(Relation heapRelation, elog(ERROR, "cstore_index_validate_scan not implemented"); } + static uint64 cstore_relation_size(Relation rel, ForkNumber forkNumber) { - uint64 nblocks = 0; + uint64 nblocks = 0; - /* Open it at the smgr level if not already done */ - RelationOpenSmgr(rel); + /* Open it at the smgr level if not already done */ + RelationOpenSmgr(rel); - /* InvalidForkNumber indicates returning the size for all forks */ - if (forkNumber == InvalidForkNumber) - { - for (int i = 0; i < MAX_FORKNUM; i++) - nblocks += smgrnblocks(rel->rd_smgr, i); - } - else - nblocks = smgrnblocks(rel->rd_smgr, forkNumber); + /* InvalidForkNumber indicates returning the size for all forks */ + if (forkNumber == InvalidForkNumber) + { + for (int i = 0; i < MAX_FORKNUM; i++) + { + nblocks += smgrnblocks(rel->rd_smgr, i); + } + } + else + { + nblocks = smgrnblocks(rel->rd_smgr, forkNumber); + } - return nblocks * BLCKSZ; + return nblocks * BLCKSZ; } + static bool cstore_relation_needs_toast_table(Relation rel) { return false; } + static void cstore_estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, @@ -493,6 +541,7 @@ cstore_estimate_rel_size(Relation rel, int32 *attr_widths, *allvisfrac = 1.0; } + static bool cstore_scan_bitmap_next_block(TableScanDesc scan, TBMIterateResult *tbmres) @@ -500,6 +549,7 @@ cstore_scan_bitmap_next_block(TableScanDesc scan, elog(ERROR, "cstore_scan_bitmap_next_block not implemented"); } + static bool cstore_scan_bitmap_next_tuple(TableScanDesc scan, TBMIterateResult *tbmres, @@ -508,12 +558,14 @@ cstore_scan_bitmap_next_tuple(TableScanDesc scan, elog(ERROR, "cstore_scan_bitmap_next_tuple not implemented"); } + static bool cstore_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate) { elog(ERROR, "cstore_scan_sample_next_block not implemented"); } + static bool cstore_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, TupleTableSlot *slot) @@ -521,16 +573,22 @@ cstore_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, elog(ERROR, "cstore_scan_sample_next_tuple not implemented"); } + static void CStoreExecutorEnd(QueryDesc *queryDesc) { cstore_free_write_state(); if (PreviousExecutorEndHook) + { PreviousExecutorEndHook(queryDesc); + } else + { standard_ExecutorEnd(queryDesc); + } } + void cstore_tableam_init() { @@ -538,12 +596,14 @@ cstore_tableam_init() ExecutorEnd_hook = CStoreExecutorEnd; } + void cstore_tableam_finish() { ExecutorEnd_hook = PreviousExecutorEndHook; } + static const TableAmRoutine cstore_am_methods = { .type = T_TableAmRoutine, @@ -606,6 +666,7 @@ GetCstoreTableAmRoutine(void) return &cstore_am_methods; } + PG_FUNCTION_INFO_V1(cstore_tableam_handler); Datum cstore_tableam_handler(PG_FUNCTION_ARGS) diff --git a/cstore_tableam.h b/cstore_tableam.h index bd1f3805e..3a556728a 100644 --- a/cstore_tableam.h +++ b/cstore_tableam.h @@ -2,7 +2,7 @@ #include "fmgr.h" #include "access/tableam.h" -const TableAmRoutine *GetCstoreTableAmRoutine(void); +const TableAmRoutine * GetCstoreTableAmRoutine(void); Datum cstore_tableam_handler(PG_FUNCTION_ARGS); extern void cstore_free_write_state(void); extern void cstore_tableam_init(void); From b9f2b410b5b6dc1b7e9cec3e756a6d5be66f27ac Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 16 Sep 2020 15:29:24 -0700 Subject: [PATCH 45/91] fix am_alter test --- Makefile | 4 ++-- cstore_tableam.c | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 9cbf4095a..ea5a858bf 100644 --- a/Makefile +++ b/Makefile @@ -48,12 +48,12 @@ ifeq ($(USE_FDW),yes) fdw_copyto fdw_alter fdw_truncate fdw_clean endif -# disabled tests: am_block_filtering am_analyze am_alter +# disabled tests: am_block_filtering am_analyze ifeq ($(USE_TABLEAM),yes) PG_CFLAGS += -DUSE_TABLEAM OBJS += cstore_tableam.o REGRESS += am_create am_load am_query am_data_types am_functions \ - am_drop am_insert am_copyto am_truncate am_clean + am_drop am_insert am_copyto am_alter am_truncate am_clean endif ifeq ($(enable_coverage),yes) diff --git a/cstore_tableam.c b/cstore_tableam.c index fccb9fe6e..e241c19ea 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -457,7 +457,8 @@ static bool cstore_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy) { - elog(ERROR, "cstore_scan_analyze_next_block not implemented"); + /* TODO */ + return false; } @@ -466,7 +467,8 @@ cstore_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot) { - elog(ERROR, "cstore_scan_analyze_next_tuple not implemented"); + /* TODO */ + return false; } From d7f40f3be6e14ccd994fc1918ff2c38c4a07ef00 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 18 Sep 2020 08:43:33 -0700 Subject: [PATCH 46/91] address review comments --- cstore_tableam.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index e241c19ea..c886ebe77 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -145,13 +145,16 @@ cstore_beginscan(Relation relation, Snapshot snapshot, int32 vartypmod = 0; Oid varcollid = 0; Index varlevelsup = 0; - Var *var = makeVar(varno, varattno, vartype, vartypmod, - varcollid, varlevelsup); + Var *var; if (!tupdesc->attrs[i].attisdropped) { - columnList = lappend(columnList, var); + continue; } + + var = makeVar(varno, varattno, vartype, vartypmod, + varcollid, varlevelsup); + columnList = lappend(columnList, var); } readState = CStoreBeginRead(relid, tupdesc, columnList, NULL); @@ -183,13 +186,9 @@ static bool cstore_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot) { CStoreScanDesc scan = (CStoreScanDesc) sscan; - TupleDesc tupdesc = slot->tts_tupleDescriptor; - int natts = tupdesc->natts; bool nextRowFound; ExecClearTuple(slot); - memset(slot->tts_values, 0, sizeof(Datum) * natts); - memset(slot->tts_isnull, true, sizeof(bool) * natts); nextRowFound = CStoreReadNextRow(scan->cs_readState, slot->tts_values, slot->tts_isnull); @@ -537,6 +536,7 @@ cstore_estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac) { + /* TODO */ *attr_widths = 12; *tuples = 100; *pages = 10; From 12daf4c317dc83a7f854ca6b11a0f6ccf7326f78 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 18 Sep 2020 09:09:02 -0700 Subject: [PATCH 47/91] add GUCs --- cstore.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ cstore.h | 6 ++++++ mod.c | 3 +++ 3 files changed, 64 insertions(+) diff --git a/cstore.c b/cstore.c index f04fc4fc6..94fc401a1 100644 --- a/cstore.c +++ b/cstore.c @@ -17,10 +17,65 @@ #include #include "miscadmin.h" +#include "utils/guc.h" #include "utils/rel.h" #include "cstore.h" +int cstore_compression = DEFAULT_COMPRESSION_TYPE; +int cstore_stripe_row_count = DEFAULT_STRIPE_ROW_COUNT; +int cstore_block_row_count = DEFAULT_BLOCK_ROW_COUNT; + +static const struct config_enum_entry cstore_compression_options[] = +{ + {"none", COMPRESSION_NONE, false}, + {"pglz", COMPRESSION_PG_LZ, false}, + {NULL, 0, false} +}; + +void +cstore_init() +{ + DefineCustomEnumVariable("cstore.compression", + "Sets the maximum number of statements tracked by pg_stat_statements.", + NULL, + &cstore_compression, + DEFAULT_COMPRESSION_TYPE, + cstore_compression_options, + PGC_POSTMASTER, + 0, + NULL, + NULL, + NULL); + + DefineCustomIntVariable("cstore.stripe_row_count", + "Sets the maximum number of statements tracked by pg_stat_statements.", + NULL, + &cstore_stripe_row_count, + DEFAULT_STRIPE_ROW_COUNT, + STRIPE_ROW_COUNT_MINIMUM, + STRIPE_ROW_COUNT_MAXIMUM, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + + DefineCustomIntVariable("cstore.block_row_count", + "Sets the maximum number of statements tracked by pg_stat_statements.", + NULL, + &cstore_block_row_count, + DEFAULT_BLOCK_ROW_COUNT, + BLOCK_ROW_COUNT_MINIMUM, + BLOCK_ROW_COUNT_MAXIMUM, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); +} + + /* ParseCompressionType converts a string to a compression type. */ CompressionType ParseCompressionType(const char *compressionTypeString) diff --git a/cstore.h b/cstore.h index 9a1764972..cbd60fca1 100644 --- a/cstore.h +++ b/cstore.h @@ -249,6 +249,12 @@ typedef struct TableWriteState StringInfo compressionBuffer; } TableWriteState; +extern int cstore_compression; +extern int cstore_stripe_row_count; +extern int cstore_block_row_count; + +extern void cstore_init(void); + extern CompressionType ParseCompressionType(const char *compressionTypeString); extern void InitializeCStoreTableFile(Oid relationId, Relation relation, CStoreOptions *cstoreOptions); diff --git a/mod.c b/mod.c index 3e041dd7a..e81f7a6e5 100644 --- a/mod.c +++ b/mod.c @@ -15,6 +15,7 @@ #include "fmgr.h" +#include "cstore.h" #include "mod.h" #ifdef USE_TABLEAM @@ -30,6 +31,8 @@ PG_MODULE_MAGIC; void _PG_init(void) { + cstore_init(); + #ifdef USE_TABLEAM cstore_tableam_init(); #endif From 9f9bb64c4c21f97e111ae4604148176721673342 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 18 Sep 2020 09:18:03 -0700 Subject: [PATCH 48/91] fixup --- cstore_tableam.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index c886ebe77..204746aa0 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -147,7 +147,7 @@ cstore_beginscan(Relation relation, Snapshot snapshot, Index varlevelsup = 0; Var *var; - if (!tupdesc->attrs[i].attisdropped) + if (tupdesc->attrs[i].attisdropped) { continue; } From fbe472828739e9ecb5578cc7ad55385fdda3f026 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 18 Sep 2020 09:19:41 -0700 Subject: [PATCH 49/91] use GUCs --- cstore.c | 5 +++++ cstore.h | 5 ----- cstore_fdw.c | 6 +++--- cstore_tableam.c | 14 +++++++------- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/cstore.c b/cstore.c index 94fc401a1..507d58463 100644 --- a/cstore.c +++ b/cstore.c @@ -22,6 +22,11 @@ #include "cstore.h" +/* Default values for option parameters */ +#define DEFAULT_COMPRESSION_TYPE COMPRESSION_NONE +#define DEFAULT_STRIPE_ROW_COUNT 150000 +#define DEFAULT_BLOCK_ROW_COUNT 10000 + int cstore_compression = DEFAULT_COMPRESSION_TYPE; int cstore_stripe_row_count = DEFAULT_STRIPE_ROW_COUNT; int cstore_block_row_count = DEFAULT_BLOCK_ROW_COUNT; diff --git a/cstore.h b/cstore.h index cbd60fca1..ad0ad20bd 100644 --- a/cstore.h +++ b/cstore.h @@ -24,11 +24,6 @@ #define OPTION_NAME_STRIPE_ROW_COUNT "stripe_row_count" #define OPTION_NAME_BLOCK_ROW_COUNT "block_row_count" -/* Default values for option parameters */ -#define DEFAULT_COMPRESSION_TYPE COMPRESSION_NONE -#define DEFAULT_STRIPE_ROW_COUNT 150000 -#define DEFAULT_BLOCK_ROW_COUNT 10000 - /* Limits for option parameters */ #define STRIPE_ROW_COUNT_MINIMUM 1000 #define STRIPE_ROW_COUNT_MAXIMUM 10000000 diff --git a/cstore_fdw.c b/cstore_fdw.c index 512dee5a3..a66ba1d80 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -1260,9 +1260,9 @@ static CStoreOptions * CStoreGetOptions(Oid foreignTableId) { CStoreOptions *cstoreOptions = NULL; - CompressionType compressionType = DEFAULT_COMPRESSION_TYPE; - int32 stripeRowCount = DEFAULT_STRIPE_ROW_COUNT; - int32 blockRowCount = DEFAULT_BLOCK_ROW_COUNT; + CompressionType compressionType = cstore_compression; + int32 stripeRowCount = cstore_stripe_row_count; + int32 blockRowCount = cstore_block_row_count; char *compressionTypeString = NULL; char *stripeRowCountString = NULL; char *blockRowCountString = NULL; diff --git a/cstore_tableam.c b/cstore_tableam.c index 204746aa0..95630c3e1 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -47,12 +47,12 @@ static ExecutorEnd_hook_type PreviousExecutorEndHook = NULL; static MemoryContext CStoreContext = NULL; static CStoreOptions * -CStoreGetDefaultOptions(void) +CStoreTableAMGetOptions(void) { CStoreOptions *cstoreOptions = palloc0(sizeof(CStoreOptions)); - cstoreOptions->compressionType = DEFAULT_COMPRESSION_TYPE; - cstoreOptions->stripeRowCount = DEFAULT_STRIPE_ROW_COUNT; - cstoreOptions->blockRowCount = DEFAULT_BLOCK_ROW_COUNT; + cstoreOptions->compressionType = cstore_compression; + cstoreOptions->stripeRowCount = cstore_stripe_row_count; + cstoreOptions->blockRowCount = cstore_block_row_count; return cstoreOptions; } @@ -71,7 +71,7 @@ cstore_init_write_state(Relation relation) if (CStoreWriteState == NULL) { - CStoreOptions *cstoreOptions = CStoreGetDefaultOptions(); + CStoreOptions *cstoreOptions = CStoreTableAMGetOptions(); TupleDesc tupdesc = RelationGetDescr(relation); MemoryContext oldContext; @@ -128,7 +128,7 @@ cstore_beginscan(Relation relation, Snapshot snapshot, CStoreScanDesc scan = palloc(sizeof(CStoreScanDescData)); List *columnList = NIL; - cstoreOptions = CStoreGetDefaultOptions(); + cstoreOptions = CStoreTableAMGetOptions(); scan->cs_base.rs_rd = relation; scan->cs_base.rs_snapshot = snapshot; @@ -419,7 +419,7 @@ cstore_relation_set_new_filenode(Relation rel, *freezeXid = RecentXmin; *minmulti = GetOldestMultiXactId(); srel = RelationCreateStorage(*newrnode, persistence); - InitializeCStoreTableFile(rel->rd_id, rel, CStoreGetDefaultOptions()); + InitializeCStoreTableFile(rel->rd_id, rel, CStoreTableAMGetOptions()); smgrclose(srel); } From 0f43534845e940bbbe8e1f4e7b108a2429679df0 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 18 Sep 2020 09:26:20 -0700 Subject: [PATCH 50/91] fixup guc --- cstore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cstore.c b/cstore.c index 507d58463..1e8733003 100644 --- a/cstore.c +++ b/cstore.c @@ -47,7 +47,7 @@ cstore_init() &cstore_compression, DEFAULT_COMPRESSION_TYPE, cstore_compression_options, - PGC_POSTMASTER, + PGC_USERSET, 0, NULL, NULL, From 06f1c9697584a1ef7dccdc21f6b5364d5671b5c3 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 18 Sep 2020 11:16:35 -0700 Subject: [PATCH 51/91] almost works --- Makefile | 3 ++- cstore_tableam.c | 29 ++++++++++++++++++++--------- expected/am_block_filtering.out | 14 +++++++++----- expected/am_create.out | 1 + expected/am_load.out | 2 ++ expected/am_truncate.out | 8 ++++++++ input/am_block_filtering.source | 5 +++++ input/am_create.source | 1 + input/am_load.source | 2 ++ output/am_block_filtering.source | 14 +++++++++----- output/am_create.source | 1 + output/am_load.source | 2 ++ sql/am_block_filtering.sql | 5 +++++ sql/am_create.sql | 1 + sql/am_load.sql | 2 ++ sql/am_truncate.sql | 10 ++++++++-- 16 files changed, 78 insertions(+), 22 deletions(-) diff --git a/Makefile b/Makefile index ea5a858bf..033e8d4a6 100644 --- a/Makefile +++ b/Makefile @@ -53,7 +53,8 @@ ifeq ($(USE_TABLEAM),yes) PG_CFLAGS += -DUSE_TABLEAM OBJS += cstore_tableam.o REGRESS += am_create am_load am_query am_data_types am_functions \ - am_drop am_insert am_copyto am_alter am_truncate am_clean + am_block_filtering am_drop am_insert am_copyto am_alter \ + am_truncate am_clean endif ifeq ($(enable_coverage),yes) diff --git a/cstore_tableam.c b/cstore_tableam.c index 95630c3e1..57ec2fa94 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -56,6 +56,16 @@ CStoreTableAMGetOptions(void) return cstoreOptions; } +static MemoryContext +CStoreMemoryContext(void) +{ + if (CStoreContext == NULL) + { + CStoreContext = AllocSetContextCreate(TopMemoryContext, "cstore context", + ALLOCSET_DEFAULT_SIZES); + } + return CStoreContext; +} static void cstore_init_write_state(Relation relation) @@ -73,22 +83,13 @@ cstore_init_write_state(Relation relation) { CStoreOptions *cstoreOptions = CStoreTableAMGetOptions(); TupleDesc tupdesc = RelationGetDescr(relation); - MemoryContext oldContext; - - if (CStoreContext == NULL) - { - CStoreContext = AllocSetContextCreate(TopMemoryContext, "cstore context", - ALLOCSET_DEFAULT_SIZES); - } elog(LOG, "initializing write state for relation %d", relation->rd_id); - oldContext = MemoryContextSwitchTo(CStoreContext); CStoreWriteState = CStoreBeginWrite(relation->rd_id, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, tupdesc); - MemoryContextSwitchTo(oldContext); CStoreWriteState->relation = relation; } @@ -127,6 +128,7 @@ cstore_beginscan(Relation relation, Snapshot snapshot, TableReadState *readState = NULL; CStoreScanDesc scan = palloc(sizeof(CStoreScanDescData)); List *columnList = NIL; + MemoryContext oldContext = MemoryContextSwitchTo(CStoreMemoryContext()); cstoreOptions = CStoreTableAMGetOptions(); @@ -162,6 +164,7 @@ cstore_beginscan(Relation relation, Snapshot snapshot, scan->cs_readState = readState; + MemoryContextSwitchTo(oldContext); return ((TableScanDesc) scan); } @@ -187,12 +190,15 @@ cstore_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot { CStoreScanDesc scan = (CStoreScanDesc) sscan; bool nextRowFound; + MemoryContext oldContext = MemoryContextSwitchTo(CStoreMemoryContext()); ExecClearTuple(slot); nextRowFound = CStoreReadNextRow(scan->cs_readState, slot->tts_values, slot->tts_isnull); + MemoryContextSwitchTo(oldContext); + if (!nextRowFound) { return false; @@ -303,6 +309,7 @@ cstore_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate) { HeapTuple heapTuple; + MemoryContext oldContext = MemoryContextSwitchTo(CStoreMemoryContext()); cstore_init_write_state(relation); @@ -319,6 +326,7 @@ cstore_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, slot_getallattrs(slot); CStoreWriteRow(CStoreWriteState, slot->tts_values, slot->tts_isnull); + MemoryContextSwitchTo(oldContext); } @@ -343,6 +351,8 @@ static void cstore_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate) { + MemoryContext oldContext = MemoryContextSwitchTo(CStoreMemoryContext()); + cstore_init_write_state(relation); for (int i = 0; i < ntuples; i++) @@ -363,6 +373,7 @@ cstore_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, CStoreWriteRow(CStoreWriteState, tupleSlot->tts_values, tupleSlot->tts_isnull); } + MemoryContextSwitchTo(oldContext); } diff --git a/expected/am_block_filtering.out b/expected/am_block_filtering.out index bccfafd15..005b42e64 100644 --- a/expected/am_block_filtering.out +++ b/expected/am_block_filtering.out @@ -24,9 +24,11 @@ $$ END; $$ LANGUAGE PLPGSQL; -- Create and load data -CREATE FOREIGN TABLE test_block_filtering (a int) - SERVER cstore_server - OPTIONS(block_row_count '1000', stripe_row_count '2000'); +-- block_row_count '1000', stripe_row_count '2000' +set cstore.stripe_row_count = 2000; +set cstore.block_row_count = 1000; +CREATE TABLE test_block_filtering (a int) + USING cstore_tableam; COPY test_block_filtering FROM '/Users/jefdavi/wd/cstore2/data/block_filtering.csv' WITH CSV; -- Verify that filtered_row_count is less than 1000 for the following queries SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); @@ -104,9 +106,11 @@ SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BET 3958 (1 row) +set cstore.stripe_row_count to default; +set cstore.block_row_count to default; -- Verify that we are fine with collations which use a different alphabet order -CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") - SERVER cstore_server; +CREATE TABLE collation_block_filtering_test(A text collate "da_DK") + USING cstore_tableam; COPY collation_block_filtering_test FROM STDIN; SELECT * FROM collation_block_filtering_test WHERE A > 'B'; a diff --git a/expected/am_create.out b/expected/am_create.out index e62447252..47c6a6c44 100644 --- a/expected/am_create.out +++ b/expected/am_create.out @@ -6,6 +6,7 @@ CREATE TABLE contestant (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) USING cstore_tableam; -- Create compressed table with automatically determined file path +-- COMPRESSED CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) USING cstore_tableam; diff --git a/expected/am_load.out b/expected/am_load.out index 110e444fa..02cff343a 100644 --- a/expected/am_load.out +++ b/expected/am_load.out @@ -15,10 +15,12 @@ COPY contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV -- COPY into uncompressed table from program COPY contestant FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' WITH CSV; -- COPY into compressed table +set cstore.compression = 'pglz'; COPY contestant_compressed FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; -- COPY into uncompressed table from program COPY contestant_compressed FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' WITH CSV; +set cstore.compression to default; -- Test column list CREATE TABLE famous_constants (id int, name text, value real) USING cstore_tableam; diff --git a/expected/am_truncate.out b/expected/am_truncate.out index 538b9ddac..99db7fe72 100644 --- a/expected/am_truncate.out +++ b/expected/am_truncate.out @@ -12,11 +12,14 @@ SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; -- CREATE a cstore_fdw table, fill with some data -- CREATE TABLE cstore_truncate_test (a int, b int) USING cstore_tableam; CREATE TABLE cstore_truncate_test_second (a int, b int) USING cstore_tableam; +-- COMPRESSED CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING cstore_tableam; CREATE TABLE cstore_truncate_test_regular (a int, b int); INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +set cstore.compression = 'pglz'; INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +set cstore.compression to default; -- query rows SELECT * FROM cstore_truncate_test; a | b @@ -168,8 +171,11 @@ DROP TABLE cstore_truncate_test_regular; DROP TABLE cstore_truncate_test_compressed; -- test truncate with schema CREATE SCHEMA truncate_schema; +-- COMPRESSED CREATE TABLE truncate_schema.truncate_tbl (id int) USING cstore_tableam; +set cstore.compression = 'pglz'; INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +set cstore.compression to default; SELECT COUNT(*) FROM truncate_schema.truncate_tbl; count ------- @@ -183,7 +189,9 @@ SELECT COUNT(*) FROM truncate_schema.truncate_tbl; 0 (1 row) +set cstore.compression = 'pglz'; INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +set cstore.compression to default; -- create a user that can not truncate CREATE USER truncate_user; GRANT USAGE ON SCHEMA truncate_schema TO truncate_user; diff --git a/input/am_block_filtering.source b/input/am_block_filtering.source index 7ca6862c7..f93eb1988 100644 --- a/input/am_block_filtering.source +++ b/input/am_block_filtering.source @@ -28,6 +28,9 @@ $$ LANGUAGE PLPGSQL; -- Create and load data +-- block_row_count '1000', stripe_row_count '2000' +set cstore.stripe_row_count = 2000; +set cstore.block_row_count = 1000; CREATE TABLE test_block_filtering (a int) USING cstore_tableam; @@ -55,6 +58,8 @@ SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 2 SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); +set cstore.stripe_row_count to default; +set cstore.block_row_count to default; -- Verify that we are fine with collations which use a different alphabet order CREATE TABLE collation_block_filtering_test(A text collate "da_DK") diff --git a/input/am_create.source b/input/am_create.source index 8a1612f7a..6d4d5a388 100644 --- a/input/am_create.source +++ b/input/am_create.source @@ -10,6 +10,7 @@ CREATE TABLE contestant (handle TEXT, birthdate DATE, rating INT, -- Create compressed table with automatically determined file path +-- COMPRESSED CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) USING cstore_tableam; diff --git a/input/am_load.source b/input/am_load.source index c2ad581e8..d0ef9bfac 100644 --- a/input/am_load.source +++ b/input/am_load.source @@ -16,11 +16,13 @@ COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; COPY contestant FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' WITH CSV; -- COPY into compressed table +set cstore.compression = 'pglz'; COPY contestant_compressed FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; -- COPY into uncompressed table from program COPY contestant_compressed FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' WITH CSV; +set cstore.compression to default; -- Test column list CREATE TABLE famous_constants (id int, name text, value real) diff --git a/output/am_block_filtering.source b/output/am_block_filtering.source index 2f664a78a..45cb702b6 100644 --- a/output/am_block_filtering.source +++ b/output/am_block_filtering.source @@ -24,9 +24,11 @@ $$ END; $$ LANGUAGE PLPGSQL; -- Create and load data -CREATE FOREIGN TABLE test_block_filtering (a int) - SERVER cstore_server - OPTIONS(block_row_count '1000', stripe_row_count '2000'); +-- block_row_count '1000', stripe_row_count '2000' +set cstore.stripe_row_count = 2000; +set cstore.block_row_count = 1000; +CREATE TABLE test_block_filtering (a int) + USING cstore_tableam; COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; -- Verify that filtered_row_count is less than 1000 for the following queries SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); @@ -104,9 +106,11 @@ SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BET 3958 (1 row) +set cstore.stripe_row_count to default; +set cstore.block_row_count to default; -- Verify that we are fine with collations which use a different alphabet order -CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") - SERVER cstore_server; +CREATE TABLE collation_block_filtering_test(A text collate "da_DK") + USING cstore_tableam; COPY collation_block_filtering_test FROM STDIN; SELECT * FROM collation_block_filtering_test WHERE A > 'B'; a diff --git a/output/am_create.source b/output/am_create.source index e62447252..47c6a6c44 100644 --- a/output/am_create.source +++ b/output/am_create.source @@ -6,6 +6,7 @@ CREATE TABLE contestant (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) USING cstore_tableam; -- Create compressed table with automatically determined file path +-- COMPRESSED CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) USING cstore_tableam; diff --git a/output/am_load.source b/output/am_load.source index d1f41f717..5eb81a250 100644 --- a/output/am_load.source +++ b/output/am_load.source @@ -15,10 +15,12 @@ COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; -- COPY into uncompressed table from program COPY contestant FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' WITH CSV; -- COPY into compressed table +set cstore.compression = 'pglz'; COPY contestant_compressed FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; -- COPY into uncompressed table from program COPY contestant_compressed FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' WITH CSV; +set cstore.compression to default; -- Test column list CREATE TABLE famous_constants (id int, name text, value real) USING cstore_tableam; diff --git a/sql/am_block_filtering.sql b/sql/am_block_filtering.sql index 38c63535c..2a45716ed 100644 --- a/sql/am_block_filtering.sql +++ b/sql/am_block_filtering.sql @@ -28,6 +28,9 @@ $$ LANGUAGE PLPGSQL; -- Create and load data +-- block_row_count '1000', stripe_row_count '2000' +set cstore.stripe_row_count = 2000; +set cstore.block_row_count = 1000; CREATE TABLE test_block_filtering (a int) USING cstore_tableam; @@ -55,6 +58,8 @@ SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 2 SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); +set cstore.stripe_row_count to default; +set cstore.block_row_count to default; -- Verify that we are fine with collations which use a different alphabet order CREATE TABLE collation_block_filtering_test(A text collate "da_DK") diff --git a/sql/am_create.sql b/sql/am_create.sql index 8a1612f7a..6d4d5a388 100644 --- a/sql/am_create.sql +++ b/sql/am_create.sql @@ -10,6 +10,7 @@ CREATE TABLE contestant (handle TEXT, birthdate DATE, rating INT, -- Create compressed table with automatically determined file path +-- COMPRESSED CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) USING cstore_tableam; diff --git a/sql/am_load.sql b/sql/am_load.sql index c7e9e5287..edc727b3c 100644 --- a/sql/am_load.sql +++ b/sql/am_load.sql @@ -16,11 +16,13 @@ COPY contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV COPY contestant FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' WITH CSV; -- COPY into compressed table +set cstore.compression = 'pglz'; COPY contestant_compressed FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; -- COPY into uncompressed table from program COPY contestant_compressed FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' WITH CSV; +set cstore.compression to default; -- Test column list CREATE TABLE famous_constants (id int, name text, value real) diff --git a/sql/am_truncate.sql b/sql/am_truncate.sql index e124a7831..3fdce1d82 100644 --- a/sql/am_truncate.sql +++ b/sql/am_truncate.sql @@ -9,13 +9,16 @@ SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; -- CREATE a cstore_fdw table, fill with some data -- CREATE TABLE cstore_truncate_test (a int, b int) USING cstore_tableam; CREATE TABLE cstore_truncate_test_second (a int, b int) USING cstore_tableam; +-- COMPRESSED CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING cstore_tableam; CREATE TABLE cstore_truncate_test_regular (a int, b int); INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +set cstore.compression = 'pglz'; INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +set cstore.compression to default; -- query rows SELECT * FROM cstore_truncate_test; @@ -76,15 +79,19 @@ DROP TABLE cstore_truncate_test_compressed; -- test truncate with schema CREATE SCHEMA truncate_schema; +-- COMPRESSED CREATE TABLE truncate_schema.truncate_tbl (id int) USING cstore_tableam; +set cstore.compression = 'pglz'; INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +set cstore.compression to default; SELECT COUNT(*) FROM truncate_schema.truncate_tbl; TRUNCATE TABLE truncate_schema.truncate_tbl; SELECT COUNT(*) FROM truncate_schema.truncate_tbl; +set cstore.compression = 'pglz'; INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); - +set cstore.compression to default; -- create a user that can not truncate CREATE USER truncate_user; GRANT USAGE ON SCHEMA truncate_schema TO truncate_user; @@ -108,7 +115,6 @@ GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; SELECT count(*) FROM truncate_schema.truncate_tbl; TRUNCATE TABLE truncate_schema.truncate_tbl; SELECT count(*) FROM truncate_schema.truncate_tbl; - \c - :current_user -- cleanup From a05e75a6d11d5638fee978ebbfa8b31bb5c2a5d4 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 18 Sep 2020 11:59:28 -0700 Subject: [PATCH 52/91] fixup --- Makefile | 3 +-- cstore.c | 6 +++--- cstore_tableam.c | 2 ++ 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 033e8d4a6..ea5a858bf 100644 --- a/Makefile +++ b/Makefile @@ -53,8 +53,7 @@ ifeq ($(USE_TABLEAM),yes) PG_CFLAGS += -DUSE_TABLEAM OBJS += cstore_tableam.o REGRESS += am_create am_load am_query am_data_types am_functions \ - am_block_filtering am_drop am_insert am_copyto am_alter \ - am_truncate am_clean + am_drop am_insert am_copyto am_alter am_truncate am_clean endif ifeq ($(enable_coverage),yes) diff --git a/cstore.c b/cstore.c index 1e8733003..3dd53dae4 100644 --- a/cstore.c +++ b/cstore.c @@ -33,9 +33,9 @@ int cstore_block_row_count = DEFAULT_BLOCK_ROW_COUNT; static const struct config_enum_entry cstore_compression_options[] = { - {"none", COMPRESSION_NONE, false}, - {"pglz", COMPRESSION_PG_LZ, false}, - {NULL, 0, false} + { "none", COMPRESSION_NONE, false }, + { "pglz", COMPRESSION_PG_LZ, false }, + { NULL, 0, false } }; void diff --git a/cstore_tableam.c b/cstore_tableam.c index 57ec2fa94..aa92f48cf 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -56,6 +56,7 @@ CStoreTableAMGetOptions(void) return cstoreOptions; } + static MemoryContext CStoreMemoryContext(void) { @@ -67,6 +68,7 @@ CStoreMemoryContext(void) return CStoreContext; } + static void cstore_init_write_state(Relation relation) { From c303f0f135e95080eae31480d4881d6fa1b9c742 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 18 Sep 2020 12:06:58 -0700 Subject: [PATCH 53/91] improve rel size estimate --- cstore_tableam.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index aa92f48cf..5849fb5ed 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -549,11 +549,17 @@ cstore_estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac) { - /* TODO */ - *attr_widths = 12; - *tuples = 100; - *pages = 10; + RelationOpenSmgr(rel); + *pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + *tuples = CStoreTableRowCount(rel); + + /* + * Append-only, so everything is visible except in-progress or rolled-back + * transactions. + */ *allvisfrac = 1.0; + + get_rel_data_width(rel, attr_widths); } From 8af9c91540dc76822e71e73ce26039a9362b168b Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Mon, 21 Sep 2020 18:13:14 -0700 Subject: [PATCH 54/91] address review comments --- cstore.c | 6 +++--- cstore_tableam.c | 34 +++++++++------------------------- cstore_tableam.h | 2 -- 3 files changed, 12 insertions(+), 30 deletions(-) diff --git a/cstore.c b/cstore.c index 3dd53dae4..f5846a029 100644 --- a/cstore.c +++ b/cstore.c @@ -42,7 +42,7 @@ void cstore_init() { DefineCustomEnumVariable("cstore.compression", - "Sets the maximum number of statements tracked by pg_stat_statements.", + "Compression type for cstore.", NULL, &cstore_compression, DEFAULT_COMPRESSION_TYPE, @@ -54,7 +54,7 @@ cstore_init() NULL); DefineCustomIntVariable("cstore.stripe_row_count", - "Sets the maximum number of statements tracked by pg_stat_statements.", + "Maximum number of tuples per stripe.", NULL, &cstore_stripe_row_count, DEFAULT_STRIPE_ROW_COUNT, @@ -67,7 +67,7 @@ cstore_init() NULL); DefineCustomIntVariable("cstore.block_row_count", - "Sets the maximum number of statements tracked by pg_stat_statements.", + "Maximum number of rows per block.", NULL, &cstore_block_row_count, DEFAULT_BLOCK_ROW_COUNT, diff --git a/cstore_tableam.c b/cstore_tableam.c index 5849fb5ed..312e10981 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -58,7 +58,7 @@ CStoreTableAMGetOptions(void) static MemoryContext -CStoreMemoryContext(void) +GetCStoreMemoryContext(void) { if (CStoreContext == NULL) { @@ -98,7 +98,7 @@ cstore_init_write_state(Relation relation) } -void +static void cstore_free_write_state() { if (CStoreWriteState != NULL) @@ -130,7 +130,7 @@ cstore_beginscan(Relation relation, Snapshot snapshot, TableReadState *readState = NULL; CStoreScanDesc scan = palloc(sizeof(CStoreScanDescData)); List *columnList = NIL; - MemoryContext oldContext = MemoryContextSwitchTo(CStoreMemoryContext()); + MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); cstoreOptions = CStoreTableAMGetOptions(); @@ -176,6 +176,7 @@ cstore_endscan(TableScanDesc sscan) { CStoreScanDesc scan = (CStoreScanDesc) sscan; CStoreEndRead(scan->cs_readState); + scan->cs_readState = NULL; } @@ -192,7 +193,7 @@ cstore_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot { CStoreScanDesc scan = (CStoreScanDesc) sscan; bool nextRowFound; - MemoryContext oldContext = MemoryContextSwitchTo(CStoreMemoryContext()); + MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); ExecClearTuple(slot); @@ -311,7 +312,7 @@ cstore_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate) { HeapTuple heapTuple; - MemoryContext oldContext = MemoryContextSwitchTo(CStoreMemoryContext()); + MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); cstore_init_write_state(relation); @@ -353,7 +354,7 @@ static void cstore_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate) { - MemoryContext oldContext = MemoryContextSwitchTo(CStoreMemoryContext()); + MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); cstore_init_write_state(relation); @@ -563,23 +564,6 @@ cstore_estimate_rel_size(Relation rel, int32 *attr_widths, } -static bool -cstore_scan_bitmap_next_block(TableScanDesc scan, - TBMIterateResult *tbmres) -{ - elog(ERROR, "cstore_scan_bitmap_next_block not implemented"); -} - - -static bool -cstore_scan_bitmap_next_tuple(TableScanDesc scan, - TBMIterateResult *tbmres, - TupleTableSlot *slot) -{ - elog(ERROR, "cstore_scan_bitmap_next_tuple not implemented"); -} - - static bool cstore_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate) { @@ -674,8 +658,8 @@ static const TableAmRoutine cstore_am_methods = { .relation_estimate_size = cstore_estimate_rel_size, - .scan_bitmap_next_block = cstore_scan_bitmap_next_block, - .scan_bitmap_next_tuple = cstore_scan_bitmap_next_tuple, + .scan_bitmap_next_block = NULL, + .scan_bitmap_next_tuple = NULL, .scan_sample_next_block = cstore_scan_sample_next_block, .scan_sample_next_tuple = cstore_scan_sample_next_tuple }; diff --git a/cstore_tableam.h b/cstore_tableam.h index 3a556728a..bdf7f96c0 100644 --- a/cstore_tableam.h +++ b/cstore_tableam.h @@ -3,7 +3,5 @@ #include "access/tableam.h" const TableAmRoutine * GetCstoreTableAmRoutine(void); -Datum cstore_tableam_handler(PG_FUNCTION_ARGS); -extern void cstore_free_write_state(void); extern void cstore_tableam_init(void); extern void cstore_tableam_finish(void); From bc585be3edef612f47cbdbb9db2f743bf60da14f Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 21 Sep 2020 15:53:21 -0700 Subject: [PATCH 55/91] Save blockRowCount in StripeMetadata --- .gitignore | 20 ++--- cstore.h | 33 +++++--- cstore_fdw--1.7.sql | 1 + cstore_metadata_tables.c | 8 +- cstore_reader.c | 174 ++++++++++++++++++--------------------- cstore_writer.c | 46 +++++------ 6 files changed, 140 insertions(+), 142 deletions(-) diff --git a/.gitignore b/.gitignore index 0c643e590..aa7be0e36 100644 --- a/.gitignore +++ b/.gitignore @@ -42,17 +42,17 @@ /data/*.cstore /data/*.footer -/sql/block_filtering.sql -/sql/copyto.sql -/sql/create.sql -/sql/data_types.sql -/sql/load.sql +/sql/*block_filtering.sql +/sql/*copyto.sql +/sql/*create.sql +/sql/*data_types.sql +/sql/*load.sql -/expected/block_filtering.out -/expected/copyto.out -/expected/create.out -/expected/data_types.out -/expected/load.out +/expected/*block_filtering.out +/expected/*copyto.out +/expected/*create.out +/expected/*data_types.out +/expected/*load.out /results/* /.deps/* /regression.diffs diff --git a/cstore.h b/cstore.h index ad0ad20bd..e48bced61 100644 --- a/cstore.h +++ b/cstore.h @@ -78,6 +78,7 @@ typedef struct StripeMetadata uint64 fileOffset; uint64 dataLength; uint32 blockCount; + uint32 blockRowCount; uint64 rowCount; uint64 id; } StripeMetadata; @@ -128,20 +129,27 @@ typedef struct StripeSkipList /* - * ColumnBlockData represents a block of data in a column. valueArray stores + * BlockData represents a block of data for multiple columns. valueArray stores * the values of data, and existsArray stores whether a value is present. * valueBuffer is used to store (uncompressed) serialized values * referenced by Datum's in valueArray. It is only used for by-reference Datum's. * There is a one-to-one correspondence between valueArray and existsArray. */ -typedef struct ColumnBlockData +typedef struct BlockData { - bool *existsArray; - Datum *valueArray; + uint32 rowCount; + uint32 columnCount; + + /* + * Following are indexed by [column][row]. If a column is not projected, + * then existsArray[column] and valueArray[column] are NULL. + */ + bool **existsArray; + Datum **valueArray; /* valueBuffer keeps actual data for type-by-reference datums from valueArray. */ - StringInfo valueBuffer; -} ColumnBlockData; + StringInfo *valueBufferArray; +} BlockData; /* @@ -197,6 +205,7 @@ typedef struct TableReadState Oid relationId; TableMetadata *tableMetadata; + StripeMetadata *currentStripeMetadata; TupleDesc tupleDescriptor; Relation relation; @@ -212,7 +221,7 @@ typedef struct TableReadState StripeBuffers *stripeBuffers; uint32 readStripeCount; uint64 stripeReadRowCount; - ColumnBlockData **blockDataArray; + BlockData *blockData; int32 deserializedBlockIndex; } TableReadState; @@ -233,7 +242,8 @@ typedef struct TableWriteState StripeBuffers *stripeBuffers; StripeSkipList *stripeSkipList; uint32 stripeMaxRowCount; - ColumnBlockData **blockDataArray; + uint32 blockRowCount; + BlockData *blockData; /* * compressionBuffer buffer is used as temporary storage during @@ -276,10 +286,9 @@ extern void CStoreEndRead(TableReadState *state); /* Function declarations for common functions */ extern FmgrInfo * GetFunctionInfoOrNull(Oid typeId, Oid accessMethodId, int16 procedureId); -extern ColumnBlockData ** CreateEmptyBlockDataArray(uint32 columnCount, bool *columnMask, - uint32 blockRowCount); -extern void FreeColumnBlockDataArray(ColumnBlockData **blockDataArray, - uint32 columnCount); +extern BlockData * CreateEmptyBlockData(uint32 columnCount, bool *columnMask, + uint32 blockRowCount); +extern void FreeBlockData(BlockData *blockData); extern uint64 CStoreTableRowCount(Relation relation); extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, CompressionType compressionType); diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index 19801f1f8..d98652b6d 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -74,6 +74,7 @@ CREATE TABLE cstore_stripes ( file_offset bigint NOT NULL, data_length bigint NOT NULL, block_count int NOT NULL, + block_row_count int NOT NULL, row_count bigint NOT NULL, PRIMARY KEY (relid, stripe), FOREIGN KEY (relid) REFERENCES cstore_tables(relid) ON DELETE CASCADE INITIALLY DEFERRED diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 8a67a3a9e..5285295b9 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -79,13 +79,14 @@ static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); #define Anum_cstore_tables_version_minor 4 /* constants for cstore_stripe */ -#define Natts_cstore_stripes 6 +#define Natts_cstore_stripes 7 #define Anum_cstore_stripes_relid 1 #define Anum_cstore_stripes_stripe 2 #define Anum_cstore_stripes_file_offset 3 #define Anum_cstore_stripes_data_length 4 #define Anum_cstore_stripes_block_count 5 -#define Anum_cstore_stripes_row_count 6 +#define Anum_cstore_stripes_block_row_count 6 +#define Anum_cstore_stripes_row_count 7 /* constants for cstore_skipnodes */ #define Natts_cstore_skipnodes 12 @@ -328,6 +329,7 @@ InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe) Int64GetDatum(stripe->fileOffset), Int64GetDatum(stripe->dataLength), Int32GetDatum(stripe->blockCount), + Int32GetDatum(stripe->blockRowCount), Int64GetDatum(stripe->rowCount) }; @@ -388,6 +390,8 @@ ReadTableMetadata(Oid relid) datumArray[Anum_cstore_stripes_data_length - 1]); stripeMetadata->blockCount = DatumGetInt32( datumArray[Anum_cstore_stripes_block_count - 1]); + stripeMetadata->blockRowCount = DatumGetInt32( + datumArray[Anum_cstore_stripes_block_row_count - 1]); stripeMetadata->rowCount = DatumGetInt64( datumArray[Anum_cstore_stripes_row_count - 1]); diff --git a/cstore_reader.c b/cstore_reader.c index fecb45605..caf07473f 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -45,8 +45,8 @@ static StripeBuffers * LoadFilteredStripeBuffers(Relation relation, List *whereClauseList); static void ReadStripeNextRow(StripeBuffers *stripeBuffers, List *projectedColumnList, uint64 blockIndex, uint64 blockRowIndex, - ColumnBlockData **blockDataArray, - Datum *columnValues, bool *columnNulls); + BlockData *blockData, Datum *columnValues, + bool *columnNulls); static ColumnBuffers * LoadColumnBuffers(Relation relation, ColumnBlockSkipNode *blockSkipNodeArray, uint32 blockCount, uint64 existsFileOffset, @@ -70,15 +70,12 @@ static void DeserializeDatumArray(StringInfo datumBuffer, bool *existsArray, uint32 datumCount, bool datumTypeByValue, int datumTypeLength, char datumTypeAlign, Datum *datumArray); -static void DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, - uint32 rowCount, ColumnBlockData **blockDataArray, - TupleDesc tupleDescriptor); +static BlockData * DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, + uint32 rowCount, TupleDesc tupleDescriptor, + List *projectedColumnList); static Datum ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeForm); static StringInfo ReadFromSmgr(Relation rel, uint64 offset, uint32 size); -static void ResetUncompressedBlockData(ColumnBlockData **blockDataArray, - uint32 columnCount); - /* * CStoreBeginRead initializes a cstore read operation. This function returns a @@ -91,9 +88,6 @@ CStoreBeginRead(Oid relationId, TupleDesc tupleDescriptor, TableReadState *readState = NULL; TableMetadata *tableMetadata = NULL; MemoryContext stripeReadContext = NULL; - uint32 columnCount = 0; - bool *projectedColumnMask = NULL; - ColumnBlockData **blockDataArray = NULL; tableMetadata = ReadTableMetadata(relationId); @@ -106,11 +100,6 @@ CStoreBeginRead(Oid relationId, TupleDesc tupleDescriptor, "Stripe Read Memory Context", ALLOCSET_DEFAULT_SIZES); - columnCount = tupleDescriptor->natts; - projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList); - blockDataArray = CreateEmptyBlockDataArray(columnCount, projectedColumnMask, - tableMetadata->blockRowCount); - readState = palloc0(sizeof(TableReadState)); readState->relationId = relationId; readState->tableMetadata = tableMetadata; @@ -121,7 +110,7 @@ CStoreBeginRead(Oid relationId, TupleDesc tupleDescriptor, readState->stripeReadRowCount = 0; readState->tupleDescriptor = tupleDescriptor; readState->stripeReadContext = stripeReadContext; - readState->blockDataArray = blockDataArray; + readState->blockData = NULL; readState->deserializedBlockIndex = -1; return readState; @@ -138,7 +127,7 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu { uint32 blockIndex = 0; uint32 blockRowIndex = 0; - TableMetadata *tableMetadata = readState->tableMetadata; + StripeMetadata *stripeMetadata = readState->currentStripeMetadata; MemoryContext oldContext = NULL; /* @@ -151,7 +140,7 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu { StripeBuffers *stripeBuffers = NULL; StripeMetadata *stripeMetadata = NULL; - List *stripeMetadataList = tableMetadata->stripeMetadataList; + List *stripeMetadataList = readState->tableMetadata->stripeMetadataList; uint32 stripeCount = list_length(stripeMetadataList); StripeFooter *stripeFooter = NULL; @@ -163,6 +152,7 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu oldContext = MemoryContextSwitchTo(readState->stripeReadContext); MemoryContextReset(readState->stripeReadContext); + readState->blockData = NULL; stripeMetadata = list_nth(stripeMetadataList, readState->readStripeCount); stripeFooter = ReadStripeFooter(readState->relationId, @@ -175,6 +165,7 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu readState->projectedColumnList, readState->whereClauseList); readState->readStripeCount++; + readState->currentStripeMetadata = stripeMetadata; MemoryContextSwitchTo(oldContext); @@ -183,37 +174,38 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu readState->stripeBuffers = stripeBuffers; readState->stripeReadRowCount = 0; readState->deserializedBlockIndex = -1; - ResetUncompressedBlockData(readState->blockDataArray, - stripeBuffers->columnCount); break; } } - blockIndex = readState->stripeReadRowCount / tableMetadata->blockRowCount; - blockRowIndex = readState->stripeReadRowCount % tableMetadata->blockRowCount; + blockIndex = readState->stripeReadRowCount / stripeMetadata->blockRowCount; + blockRowIndex = readState->stripeReadRowCount % stripeMetadata->blockRowCount; if (blockIndex != readState->deserializedBlockIndex) { uint32 lastBlockIndex = 0; uint32 blockRowCount = 0; uint32 stripeRowCount = 0; + StripeMetadata *stripeMetadata = readState->currentStripeMetadata; - stripeRowCount = readState->stripeBuffers->rowCount; - lastBlockIndex = stripeRowCount / tableMetadata->blockRowCount; + stripeRowCount = stripeMetadata->rowCount; + lastBlockIndex = stripeRowCount / stripeMetadata->blockRowCount; if (blockIndex == lastBlockIndex) { - blockRowCount = stripeRowCount % tableMetadata->blockRowCount; + blockRowCount = stripeRowCount % stripeMetadata->blockRowCount; } else { - blockRowCount = tableMetadata->blockRowCount; + blockRowCount = stripeMetadata->blockRowCount; } oldContext = MemoryContextSwitchTo(readState->stripeReadContext); - DeserializeBlockData(readState->stripeBuffers, blockIndex, - blockRowCount, readState->blockDataArray, - readState->tupleDescriptor); + FreeBlockData(readState->blockData); + readState->blockData = + DeserializeBlockData(readState->stripeBuffers, blockIndex, + blockRowCount, readState->tupleDescriptor, + readState->projectedColumnList); MemoryContextSwitchTo(oldContext); @@ -221,7 +213,7 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu } ReadStripeNextRow(readState->stripeBuffers, readState->projectedColumnList, - blockIndex, blockRowIndex, readState->blockDataArray, + blockIndex, blockRowIndex, readState->blockData, columnValues, columnNulls); /* @@ -242,11 +234,8 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu void CStoreEndRead(TableReadState *readState) { - int columnCount = readState->tupleDescriptor->natts; - MemoryContextDelete(readState->stripeReadContext); list_free_deep(readState->tableMetadata->stripeMetadataList); - FreeColumnBlockDataArray(readState->blockDataArray, columnCount); pfree(readState->tableMetadata); pfree(readState); } @@ -256,52 +245,65 @@ CStoreEndRead(TableReadState *readState) * CreateEmptyBlockDataArray creates data buffers to keep deserialized exist and * value arrays for requested columns in columnMask. */ -ColumnBlockData ** -CreateEmptyBlockDataArray(uint32 columnCount, bool *columnMask, uint32 blockRowCount) +BlockData * +CreateEmptyBlockData(uint32 columnCount, bool *columnMask, uint32 blockRowCount) { uint32 columnIndex = 0; - ColumnBlockData **blockDataArray = palloc0(columnCount * sizeof(ColumnBlockData *)); + + BlockData *blockData = palloc0(sizeof(BlockData)); + blockData->existsArray = palloc0(columnCount * sizeof(bool *)); + blockData->valueArray = palloc0(columnCount * sizeof(Datum *)); + blockData->valueBufferArray = palloc0(columnCount * sizeof(StringInfo)); + blockData->columnCount = columnCount; + blockData->rowCount = blockRowCount; /* allocate block memory for deserialized data */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { if (columnMask[columnIndex]) { - ColumnBlockData *blockData = palloc0(sizeof(ColumnBlockData)); - - blockData->existsArray = palloc0(blockRowCount * sizeof(bool)); - blockData->valueArray = palloc0(blockRowCount * sizeof(Datum)); - blockData->valueBuffer = NULL; - blockDataArray[columnIndex] = blockData; + blockData->existsArray[columnIndex] = palloc0(blockRowCount * sizeof(bool)); + blockData->valueArray[columnIndex] = palloc0(blockRowCount * sizeof(Datum)); + blockData->valueBufferArray[columnIndex] = NULL; } } - return blockDataArray; + return blockData; } /* - * FreeColumnBlockDataArray deallocates data buffers to keep deserialized exist and + * FreeBlockData deallocates data buffers to keep deserialized exist and * value arrays for requested columns in columnMask. * ColumnBlockData->serializedValueBuffer lives in memory read/write context * so it is deallocated automatically when the context is deleted. */ void -FreeColumnBlockDataArray(ColumnBlockData **blockDataArray, uint32 columnCount) +FreeBlockData(BlockData *blockData) { uint32 columnIndex = 0; - for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + + if (blockData == NULL) { - ColumnBlockData *blockData = blockDataArray[columnIndex]; - if (blockData != NULL) + return; + } + + for (columnIndex = 0; columnIndex < blockData->columnCount; columnIndex++) + { + if (blockData->existsArray[columnIndex] != NULL) { - pfree(blockData->existsArray); - pfree(blockData->valueArray); - pfree(blockData); + pfree(blockData->existsArray[columnIndex]); + } + + if (blockData->valueArray[columnIndex] != NULL) + { + pfree(blockData->valueArray[columnIndex]); } } - pfree(blockDataArray); + pfree(blockData->existsArray); + pfree(blockData->valueArray); + pfree(blockData); } @@ -403,7 +405,7 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, static void ReadStripeNextRow(StripeBuffers *stripeBuffers, List *projectedColumnList, uint64 blockIndex, uint64 blockRowIndex, - ColumnBlockData **blockDataArray, Datum *columnValues, + BlockData *blockData, Datum *columnValues, bool *columnNulls) { ListCell *projectedColumnCell = NULL; @@ -414,13 +416,12 @@ ReadStripeNextRow(StripeBuffers *stripeBuffers, List *projectedColumnList, foreach(projectedColumnCell, projectedColumnList) { Var *projectedColumn = lfirst(projectedColumnCell); - uint32 projectedColumnIndex = projectedColumn->varattno - 1; - ColumnBlockData *blockData = blockDataArray[projectedColumnIndex]; + uint32 columnIndex = projectedColumn->varattno - 1; - if (blockData->existsArray[blockRowIndex]) + if (blockData->existsArray[columnIndex][blockRowIndex]) { - columnValues[projectedColumnIndex] = blockData->valueArray[blockRowIndex]; - columnNulls[projectedColumnIndex] = false; + columnValues[columnIndex] = blockData->valueArray[columnIndex][blockRowIndex]; + columnNulls[columnIndex] = false; } } } @@ -919,20 +920,23 @@ DeserializeDatumArray(StringInfo datumBuffer, bool *existsArray, uint32 datumCou * data is not present serialized buffer, then default value (or null) is used * to fill value array. */ -static void +static BlockData * DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, - uint32 rowCount, - ColumnBlockData **blockDataArray, TupleDesc tupleDescriptor) + uint32 rowCount, TupleDesc tupleDescriptor, + List *projectedColumnList) { int columnIndex = 0; + bool *columnMask = ProjectedColumnMask(tupleDescriptor->natts, projectedColumnList); + BlockData *blockData = CreateEmptyBlockData(tupleDescriptor->natts, columnMask, + rowCount); + for (columnIndex = 0; columnIndex < stripeBuffers->columnCount; columnIndex++) { - ColumnBlockData *blockData = blockDataArray[columnIndex]; Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; bool columnAdded = false; - if ((columnBuffers == NULL) && (blockData != NULL)) + if (columnBuffers == NULL && columnMask[columnIndex]) { columnAdded = true; } @@ -943,10 +947,6 @@ DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, columnBuffers->blockBuffersArray[blockIndex]; StringInfo valueBuffer = NULL; - /* free previous block's data buffers */ - pfree(blockData->valueBuffer->data); - pfree(blockData->valueBuffer); - /* decompress and deserialize current block's data */ valueBuffer = DecompressBuffer(blockBuffers->valueBuffer, blockBuffers->valueCompressionType); @@ -958,15 +958,16 @@ DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, pfree(blockBuffers->valueBuffer); } - DeserializeBoolArray(blockBuffers->existsBuffer, blockData->existsArray, + DeserializeBoolArray(blockBuffers->existsBuffer, + blockData->existsArray[columnIndex], rowCount); - DeserializeDatumArray(valueBuffer, blockData->existsArray, + DeserializeDatumArray(valueBuffer, blockData->existsArray[columnIndex], rowCount, attributeForm->attbyval, attributeForm->attlen, attributeForm->attalign, - blockData->valueArray); + blockData->valueArray[columnIndex]); /* store current block's data buffer to be freed at next block read */ - blockData->valueBuffer = valueBuffer; + blockData->valueBufferArray[columnIndex] = valueBuffer; } else if (columnAdded) { @@ -983,16 +984,19 @@ DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, for (rowIndex = 0; rowIndex < rowCount; rowIndex++) { - blockData->existsArray[rowIndex] = true; - blockData->valueArray[rowIndex] = defaultValue; + blockData->existsArray[columnIndex][rowIndex] = true; + blockData->valueArray[columnIndex][rowIndex] = defaultValue; } } else { - memset(blockData->existsArray, false, rowCount); + memset(blockData->existsArray[columnIndex], false, + rowCount * sizeof(bool)); } } } + + return blockData; } @@ -1067,23 +1071,3 @@ ReadFromSmgr(Relation rel, uint64 offset, uint32 size) return resultBuffer; } - - -/* - * ResetUncompressedBlockData iterates over deserialized column block data - * and sets valueBuffer field to empty buffer. This field is allocated in stripe - * memory context and becomes invalid once memory context is reset. - */ -static void -ResetUncompressedBlockData(ColumnBlockData **blockDataArray, uint32 columnCount) -{ - uint32 columnIndex = 0; - for (columnIndex = 0; columnIndex < columnCount; columnIndex++) - { - ColumnBlockData *blockData = blockDataArray[columnIndex]; - if (blockData != NULL) - { - blockData->valueBuffer = makeStringInfo(); - } - } -} diff --git a/cstore_writer.c b/cstore_writer.c index 55a314ec4..cf0fa58fe 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -72,7 +72,7 @@ CStoreBeginWrite(Oid relationId, uint32 columnCount = 0; uint32 columnIndex = 0; bool *columnMaskArray = NULL; - ColumnBlockData **blockData = NULL; + BlockData *blockData = NULL; uint64 currentStripeId = 0; tableMetadata = ReadTableMetadata(relationId); @@ -125,20 +125,21 @@ CStoreBeginWrite(Oid relationId, columnMaskArray = palloc(columnCount * sizeof(bool)); memset(columnMaskArray, true, columnCount); - blockData = CreateEmptyBlockDataArray(columnCount, columnMaskArray, blockRowCount); + blockData = CreateEmptyBlockData(columnCount, columnMaskArray, blockRowCount); writeState = palloc0(sizeof(TableWriteState)); writeState->relationId = relationId; writeState->tableMetadata = tableMetadata; writeState->compressionType = compressionType; writeState->stripeMaxRowCount = stripeMaxRowCount; + writeState->blockRowCount = blockRowCount; writeState->tupleDescriptor = tupleDescriptor; writeState->currentFileOffset = currentFileOffset; writeState->comparisonFunctionArray = comparisonFunctionArray; writeState->stripeBuffers = NULL; writeState->stripeSkipList = NULL; writeState->stripeWriteContext = stripeWriteContext; - writeState->blockDataArray = blockData; + writeState->blockData = blockData; writeState->compressionBuffer = NULL; writeState->currentStripeId = currentStripeId; @@ -164,8 +165,8 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul StripeSkipList *stripeSkipList = writeState->stripeSkipList; uint32 columnCount = writeState->tupleDescriptor->natts; TableMetadata *tableMetadata = writeState->tableMetadata; - const uint32 blockRowCount = tableMetadata->blockRowCount; - ColumnBlockData **blockDataArray = writeState->blockDataArray; + const uint32 blockRowCount = writeState->blockRowCount; + BlockData *blockData = writeState->blockData; MemoryContext oldContext = MemoryContextSwitchTo(writeState->stripeWriteContext); if (stripeBuffers == NULL) @@ -184,8 +185,7 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { - ColumnBlockData *blockData = blockDataArray[columnIndex]; - blockData->valueBuffer = makeStringInfo(); + blockData->valueBufferArray[columnIndex] = makeStringInfo(); } } @@ -194,14 +194,13 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { - ColumnBlockData *blockData = blockDataArray[columnIndex]; ColumnBlockSkipNode **blockSkipNodeArray = stripeSkipList->blockSkipNodeArray; ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[columnIndex][blockIndex]; if (columnNulls[columnIndex]) { - blockData->existsArray[blockRowIndex] = false; + blockData->existsArray[columnIndex][blockRowIndex] = false; } else { @@ -214,10 +213,11 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul Oid columnCollation = attributeForm->attcollation; char columnTypeAlign = attributeForm->attalign; - blockData->existsArray[blockRowIndex] = true; + blockData->existsArray[columnIndex][blockRowIndex] = true; - SerializeSingleDatum(blockData->valueBuffer, columnValues[columnIndex], - columnTypeByValue, columnTypeLength, columnTypeAlign); + SerializeSingleDatum(blockData->valueBufferArray[columnIndex], + columnValues[columnIndex], columnTypeByValue, + columnTypeLength, columnTypeAlign); UpdateBlockSkipNodeMinMax(blockSkipNode, columnValues[columnIndex], columnTypeByValue, columnTypeLength, @@ -271,7 +271,6 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul void CStoreEndWrite(TableWriteState *writeState) { - int columnCount = writeState->tupleDescriptor->natts; StripeBuffers *stripeBuffers = writeState->stripeBuffers; if (stripeBuffers != NULL) @@ -289,7 +288,7 @@ CStoreEndWrite(TableWriteState *writeState) MemoryContextDelete(writeState->stripeWriteContext); list_free_deep(writeState->tableMetadata->stripeMetadataList); pfree(writeState->comparisonFunctionArray); - FreeColumnBlockDataArray(writeState->blockDataArray, columnCount); + FreeBlockData(writeState->blockData); pfree(writeState); } @@ -415,6 +414,8 @@ WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) if (RelationNeedsWAL(rel)) { + XLogRecPtr recptr = 0; + XLogBeginInsert(); /* @@ -423,7 +424,7 @@ WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) */ XLogRegisterBuffer(0, buffer, REGBUF_FORCE_IMAGE); - XLogRecPtr recptr = XLogInsert(RM_GENERIC_ID, 0); + recptr = XLogInsert(RM_GENERIC_ID, 0); PageSetLSN(page, recptr); } @@ -452,14 +453,13 @@ FlushStripe(TableWriteState *writeState) StripeFooter *stripeFooter = NULL; uint32 columnIndex = 0; uint32 blockIndex = 0; - TableMetadata *tableMetadata = writeState->tableMetadata; StripeBuffers *stripeBuffers = writeState->stripeBuffers; StripeSkipList *stripeSkipList = writeState->stripeSkipList; ColumnBlockSkipNode **columnSkipNodeArray = stripeSkipList->blockSkipNodeArray; TupleDesc tupleDescriptor = writeState->tupleDescriptor; uint32 columnCount = tupleDescriptor->natts; uint32 blockCount = stripeSkipList->blockCount; - uint32 blockRowCount = tableMetadata->blockRowCount; + uint32 blockRowCount = writeState->blockRowCount; uint32 lastBlockIndex = stripeBuffers->rowCount / blockRowCount; uint32 lastBlockRowCount = stripeBuffers->rowCount % blockRowCount; uint64 initialFileOffset = writeState->currentFileOffset; @@ -565,6 +565,7 @@ FlushStripe(TableWriteState *writeState) stripeMetadata.dataLength = dataLength; stripeMetadata.id = writeState->currentStripeId; stripeMetadata.blockCount = blockCount; + stripeMetadata.blockRowCount = writeState->blockRowCount; return stripeMetadata; } @@ -679,7 +680,7 @@ SerializeBlockData(TableWriteState *writeState, uint32 blockIndex, uint32 rowCou { uint32 columnIndex = 0; StripeBuffers *stripeBuffers = writeState->stripeBuffers; - ColumnBlockData **blockDataArray = writeState->blockDataArray; + BlockData *blockData = writeState->blockData; CompressionType requestedCompressionType = writeState->compressionType; const uint32 columnCount = stripeBuffers->columnCount; StringInfo compressionBuffer = writeState->compressionBuffer; @@ -689,9 +690,9 @@ SerializeBlockData(TableWriteState *writeState, uint32 blockIndex, uint32 rowCou { ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; ColumnBlockBuffers *blockBuffers = columnBuffers->blockBuffersArray[blockIndex]; - ColumnBlockData *blockData = blockDataArray[columnIndex]; - blockBuffers->existsBuffer = SerializeBoolArray(blockData->existsArray, rowCount); + blockBuffers->existsBuffer = + SerializeBoolArray(blockData->existsArray[columnIndex], rowCount); } /* @@ -702,12 +703,11 @@ SerializeBlockData(TableWriteState *writeState, uint32 blockIndex, uint32 rowCou { ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; ColumnBlockBuffers *blockBuffers = columnBuffers->blockBuffersArray[blockIndex]; - ColumnBlockData *blockData = blockDataArray[columnIndex]; StringInfo serializedValueBuffer = NULL; CompressionType actualCompressionType = COMPRESSION_NONE; bool compressed = false; - serializedValueBuffer = blockData->valueBuffer; + serializedValueBuffer = blockData->valueBufferArray[columnIndex]; /* the only other supported compression type is pg_lz for now */ Assert(requestedCompressionType == COMPRESSION_NONE || @@ -730,7 +730,7 @@ SerializeBlockData(TableWriteState *writeState, uint32 blockIndex, uint32 rowCou blockBuffers->valueBuffer = CopyStringInfo(serializedValueBuffer); /* valueBuffer needs to be reset for next block's data */ - resetStringInfo(blockData->valueBuffer); + resetStringInfo(blockData->valueBufferArray[columnIndex]); } } From db5287069ff8b765746470e199e25fce3b979a2f Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 21 Sep 2020 22:10:25 -0700 Subject: [PATCH 56/91] Make block offsets relative to stripe start --- cstore.h | 1 + cstore_reader.c | 14 +++++--------- cstore_writer.c | 30 ++++++++++++++++++++++-------- 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/cstore.h b/cstore.h index e48bced61..f88ebbdab 100644 --- a/cstore.h +++ b/cstore.h @@ -235,6 +235,7 @@ typedef struct TableWriteState TupleDesc tupleDescriptor; FmgrInfo **comparisonFunctionArray; uint64 currentFileOffset; + uint64 currentStripeOffset; Relation relation; MemoryContext stripeWriteContext; diff --git a/cstore_reader.c b/cstore_reader.c index caf07473f..6b5d7ed00 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -49,8 +49,7 @@ static void ReadStripeNextRow(StripeBuffers *stripeBuffers, List *projectedColum bool *columnNulls); static ColumnBuffers * LoadColumnBuffers(Relation relation, ColumnBlockSkipNode *blockSkipNodeArray, - uint32 blockCount, uint64 existsFileOffset, - uint64 valueFileOffset, + uint32 blockCount, uint64 stripeOffset, Form_pg_attribute attributeForm); static bool * SelectedBlockMask(StripeSkipList *stripeSkipList, List *projectedColumnList, List *whereClauseList); @@ -365,8 +364,6 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, { uint64 existsSize = stripeFooter->existsSizeArray[columnIndex]; uint64 valueSize = stripeFooter->valueSizeArray[columnIndex]; - uint64 existsFileOffset = currentColumnFileOffset; - uint64 valueFileOffset = currentColumnFileOffset + existsSize; if (projectedColumnMask[columnIndex]) { @@ -377,8 +374,7 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, ColumnBuffers *columnBuffers = LoadColumnBuffers(relation, blockSkipNode, blockCount, - existsFileOffset, - valueFileOffset, + stripeMetadata->fileOffset, attributeForm); columnBuffersArray[columnIndex] = columnBuffers; @@ -434,7 +430,7 @@ ReadStripeNextRow(StripeBuffers *stripeBuffers, List *projectedColumnList, */ static ColumnBuffers * LoadColumnBuffers(Relation relation, ColumnBlockSkipNode *blockSkipNodeArray, - uint32 blockCount, uint64 existsFileOffset, uint64 valueFileOffset, + uint32 blockCount, uint64 stripeOffset, Form_pg_attribute attributeForm) { ColumnBuffers *columnBuffers = NULL; @@ -455,7 +451,7 @@ LoadColumnBuffers(Relation relation, ColumnBlockSkipNode *blockSkipNodeArray, for (blockIndex = 0; blockIndex < blockCount; blockIndex++) { ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; - uint64 existsOffset = existsFileOffset + blockSkipNode->existsBlockOffset; + uint64 existsOffset = stripeOffset + blockSkipNode->existsBlockOffset; StringInfo rawExistsBuffer = ReadFromSmgr(relation, existsOffset, blockSkipNode->existsLength); @@ -467,7 +463,7 @@ LoadColumnBuffers(Relation relation, ColumnBlockSkipNode *blockSkipNodeArray, { ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; CompressionType compressionType = blockSkipNode->valueCompressionType; - uint64 valueOffset = valueFileOffset + blockSkipNode->valueBlockOffset; + uint64 valueOffset = stripeOffset + blockSkipNode->valueBlockOffset; StringInfo rawValueBuffer = ReadFromSmgr(relation, valueOffset, blockSkipNode->valueLength); diff --git a/cstore_writer.c b/cstore_writer.c index cf0fa58fe..65871b511 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -242,6 +242,7 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul MemoryContextReset(writeState->stripeWriteContext); writeState->currentStripeId++; + writeState->currentStripeOffset = 0; /* set stripe data and skip list to NULL so they are recreated next time */ writeState->stripeBuffers = NULL; @@ -473,12 +474,10 @@ FlushStripe(TableWriteState *writeState) SerializeBlockData(writeState, lastBlockIndex, lastBlockRowCount); } - /* update buffer sizes and positions in stripe skip list */ + /* update buffer sizes in stripe skip list */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { ColumnBlockSkipNode *blockSkipNodeArray = columnSkipNodeArray[columnIndex]; - uint64 currentExistsBlockOffset = 0; - uint64 currentValueBlockOffset = 0; ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; for (blockIndex = 0; blockIndex < blockCount; blockIndex++) @@ -486,21 +485,36 @@ FlushStripe(TableWriteState *writeState) ColumnBlockBuffers *blockBuffers = columnBuffers->blockBuffersArray[blockIndex]; uint64 existsBufferSize = blockBuffers->existsBuffer->len; + ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; + + blockSkipNode->existsBlockOffset = writeState->currentStripeOffset; + blockSkipNode->existsLength = existsBufferSize; + writeState->currentStripeOffset += existsBufferSize; + } + } + + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBlockSkipNode *blockSkipNodeArray = columnSkipNodeArray[columnIndex]; + ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; + + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + ColumnBlockBuffers *blockBuffers = + columnBuffers->blockBuffersArray[blockIndex]; uint64 valueBufferSize = blockBuffers->valueBuffer->len; CompressionType valueCompressionType = blockBuffers->valueCompressionType; ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; - blockSkipNode->existsBlockOffset = currentExistsBlockOffset; - blockSkipNode->existsLength = existsBufferSize; - blockSkipNode->valueBlockOffset = currentValueBlockOffset; + blockSkipNode->valueBlockOffset = writeState->currentStripeOffset; blockSkipNode->valueLength = valueBufferSize; blockSkipNode->valueCompressionType = valueCompressionType; - currentExistsBlockOffset += existsBufferSize; - currentValueBlockOffset += valueBufferSize; + writeState->currentStripeOffset += valueBufferSize; } } + /* create skip list and footer buffers */ SaveStripeSkipList(writeState->relationId, writeState->currentStripeId, stripeSkipList, tupleDescriptor); From 1b45cfb52e2f6a5dc470cc0abdc77fe7a65d7f7f Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 23 Sep 2020 09:53:32 -0700 Subject: [PATCH 57/91] remove generated sql test files --- sql/am_block_filtering.sql | 73 -------------------------------------- sql/am_copyto.sql | 17 --------- sql/am_create.sql | 20 ----------- sql/am_data_types.sql | 68 ----------------------------------- sql/am_load.sql | 46 ------------------------ 5 files changed, 224 deletions(-) delete mode 100644 sql/am_block_filtering.sql delete mode 100644 sql/am_copyto.sql delete mode 100644 sql/am_create.sql delete mode 100644 sql/am_data_types.sql delete mode 100644 sql/am_load.sql diff --git a/sql/am_block_filtering.sql b/sql/am_block_filtering.sql deleted file mode 100644 index 2a45716ed..000000000 --- a/sql/am_block_filtering.sql +++ /dev/null @@ -1,73 +0,0 @@ --- --- Test block filtering in cstore_fdw using min/max values in stripe skip lists. --- - - --- --- filtered_row_count returns number of rows filtered by the WHERE clause. --- If blocks get filtered by cstore_fdw, less rows are passed to WHERE --- clause, so this function should return a lower number. --- -CREATE OR REPLACE FUNCTION filtered_row_count (query text) RETURNS bigint AS -$$ - DECLARE - result bigint; - rec text; - BEGIN - result := 0; - - FOR rec IN EXECUTE 'EXPLAIN ANALYZE ' || query LOOP - IF rec ~ '^\s+Rows Removed by Filter' then - result := regexp_replace(rec, '[^0-9]*', '', 'g'); - END IF; - END LOOP; - - RETURN result; - END; -$$ LANGUAGE PLPGSQL; - - --- Create and load data --- block_row_count '1000', stripe_row_count '2000' -set cstore.stripe_row_count = 2000; -set cstore.block_row_count = 1000; -CREATE TABLE test_block_filtering (a int) - USING cstore_tableam; - -COPY test_block_filtering FROM '/Users/jefdavi/wd/cstore2/data/block_filtering.csv' WITH CSV; - - --- Verify that filtered_row_count is less than 1000 for the following queries -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 200'); -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 9900'); -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 9900'); -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); - - --- Verify that filtered_row_count is less than 2000 for the following queries -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 1 AND 10'); -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN -10 AND 0'); - - --- Load data for second time and verify that filtered_row_count is exactly twice as before -COPY test_block_filtering FROM '/Users/jefdavi/wd/cstore2/data/block_filtering.csv' WITH CSV; -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); - -set cstore.stripe_row_count to default; -set cstore.block_row_count to default; - --- Verify that we are fine with collations which use a different alphabet order -CREATE TABLE collation_block_filtering_test(A text collate "da_DK") - USING cstore_tableam; -COPY collation_block_filtering_test FROM STDIN; -A -Å -B -\. - -SELECT * FROM collation_block_filtering_test WHERE A > 'B'; diff --git a/sql/am_copyto.sql b/sql/am_copyto.sql deleted file mode 100644 index 7288ff66f..000000000 --- a/sql/am_copyto.sql +++ /dev/null @@ -1,17 +0,0 @@ --- --- Test copying data from cstore_fdw tables. --- -CREATE TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, - percentile FLOAT, country CHAR(3), achievements TEXT[]) - USING cstore_tableam; - --- load table data from file -COPY test_contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; - --- export using COPY table TO ... -COPY test_contestant TO STDOUT; - --- export using COPY (SELECT * FROM table) TO ... -COPY (select * from test_contestant) TO STDOUT; - -DROP TABLE test_contestant CASCADE; diff --git a/sql/am_create.sql b/sql/am_create.sql deleted file mode 100644 index 6d4d5a388..000000000 --- a/sql/am_create.sql +++ /dev/null @@ -1,20 +0,0 @@ --- --- Test the CREATE statements related to cstore. --- - - --- Create uncompressed table -CREATE TABLE contestant (handle TEXT, birthdate DATE, rating INT, - percentile FLOAT, country CHAR(3), achievements TEXT[]) - USING cstore_tableam; - - --- Create compressed table with automatically determined file path --- COMPRESSED -CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, - percentile FLOAT, country CHAR(3), achievements TEXT[]) - USING cstore_tableam; - --- Test that querying an empty table works -ANALYZE contestant; -SELECT count(*) FROM contestant; diff --git a/sql/am_data_types.sql b/sql/am_data_types.sql deleted file mode 100644 index b2668e71f..000000000 --- a/sql/am_data_types.sql +++ /dev/null @@ -1,68 +0,0 @@ --- --- Test loading and reading different data types to/from cstore_fdw foreign tables. --- - - --- Settings to make the result deterministic -SET datestyle = "ISO, YMD"; -SET timezone to 'GMT'; -SET intervalstyle TO 'POSTGRES_VERBOSE'; - - --- Test array types -CREATE TABLE test_array_types (int_array int[], bigint_array bigint[], - text_array text[]) USING cstore_tableam; - -COPY test_array_types FROM '/Users/jefdavi/wd/cstore2/data/array_types.csv' WITH CSV; - -SELECT * FROM test_array_types; - - --- Test date/time types -CREATE TABLE test_datetime_types (timestamp timestamp, - timestamp_with_timezone timestamp with time zone, date date, time time, - interval interval) USING cstore_tableam; - -COPY test_datetime_types FROM '/Users/jefdavi/wd/cstore2/data/datetime_types.csv' WITH CSV; - -SELECT * FROM test_datetime_types; - - --- Test enum and composite types -CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); -CREATE TYPE composite_type AS (a int, b text); - -CREATE TABLE test_enum_and_composite_types (enum enum_type, - composite composite_type) USING cstore_tableam; - -COPY test_enum_and_composite_types FROM - '/Users/jefdavi/wd/cstore2/data/enum_and_composite_types.csv' WITH CSV; - -SELECT * FROM test_enum_and_composite_types; - - --- Test range types -CREATE TABLE test_range_types (int4range int4range, int8range int8range, - numrange numrange, tsrange tsrange) USING cstore_tableam; - -COPY test_range_types FROM '/Users/jefdavi/wd/cstore2/data/range_types.csv' WITH CSV; - -SELECT * FROM test_range_types; - - --- Test other types -CREATE TABLE test_other_types (bool boolean, bytea bytea, money money, - inet inet, bitstring bit varying(5), uuid uuid, json json) USING cstore_tableam; - -COPY test_other_types FROM '/Users/jefdavi/wd/cstore2/data/other_types.csv' WITH CSV; - -SELECT * FROM test_other_types; - - --- Test null values -CREATE TABLE test_null_values (a int, b int[], c composite_type) - USING cstore_tableam; - -COPY test_null_values FROM '/Users/jefdavi/wd/cstore2/data/null_values.csv' WITH CSV; - -SELECT * FROM test_null_values; diff --git a/sql/am_load.sql b/sql/am_load.sql deleted file mode 100644 index edc727b3c..000000000 --- a/sql/am_load.sql +++ /dev/null @@ -1,46 +0,0 @@ --- --- Test loading data into cstore_fdw tables. --- - --- COPY with incorrect delimiter -COPY contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' - WITH DELIMITER '|'; -- ERROR - --- COPY with invalid program -COPY contestant FROM PROGRAM 'invalid_program' WITH CSV; -- ERROR - --- COPY into uncompressed table from file -COPY contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; - --- COPY into uncompressed table from program -COPY contestant FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' WITH CSV; - --- COPY into compressed table -set cstore.compression = 'pglz'; -COPY contestant_compressed FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; - --- COPY into uncompressed table from program -COPY contestant_compressed FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' - WITH CSV; -set cstore.compression to default; - --- Test column list -CREATE TABLE famous_constants (id int, name text, value real) - USING cstore_tableam; -COPY famous_constants (value, name, id) FROM STDIN WITH CSV; -3.141,pi,1 -2.718,e,2 -0.577,gamma,3 -5.291e-11,bohr radius,4 -\. - -COPY famous_constants (name, value) FROM STDIN WITH CSV; -avagadro,6.022e23 -electron mass,9.109e-31 -proton mass,1.672e-27 -speed of light,2.997e8 -\. - -SELECT * FROM famous_constants ORDER BY id, name; - -DROP TABLE famous_constants; From a34cdeb83c3b815175d818a980db3c723e0ca984 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 21 Sep 2020 22:59:21 -0700 Subject: [PATCH 58/91] Remove StripeFooter --- cstore.h | 18 +---- cstore_fdw--1.7.sql | 15 +---- cstore_metadata_tables.c | 141 +++------------------------------------ cstore_reader.c | 20 +----- cstore_writer.c | 74 +++----------------- 5 files changed, 24 insertions(+), 244 deletions(-) diff --git a/cstore.h b/cstore.h index f88ebbdab..96fa1ed53 100644 --- a/cstore.h +++ b/cstore.h @@ -77,6 +77,7 @@ typedef struct StripeMetadata { uint64 fileOffset; uint64 dataLength; + uint32 columnCount; uint32 blockCount; uint32 blockRowCount; uint64 rowCount; @@ -186,19 +187,6 @@ typedef struct StripeBuffers } StripeBuffers; -/* - * StripeFooter represents a stripe's footer. In this footer, we keep three - * arrays of sizes. The number of elements in each of the arrays is equal - * to the number of columns. - */ -typedef struct StripeFooter -{ - uint32 columnCount; - uint64 *existsSizeArray; - uint64 *valueSizeArray; -} StripeFooter; - - /* TableReadState represents state of a cstore file read operation. */ typedef struct TableReadState { @@ -235,7 +223,6 @@ typedef struct TableWriteState TupleDesc tupleDescriptor; FmgrInfo **comparisonFunctionArray; uint64 currentFileOffset; - uint64 currentStripeOffset; Relation relation; MemoryContext stripeWriteContext; @@ -296,9 +283,6 @@ extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressionType); /* cstore_metadata_tables.c */ -extern void SaveStripeFooter(Oid relid, uint64 stripe, StripeFooter *footer); -extern StripeFooter * ReadStripeFooter(Oid relid, uint64 stripe, int relationColumnCount); - extern void InitCStoreTableMetadata(Oid relid, int blockRowCount); extern void InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe); extern TableMetadata * ReadTableMetadata(Oid relid); diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index d98652b6d..84b69be07 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -73,6 +73,7 @@ CREATE TABLE cstore_stripes ( stripe bigint NOT NULL, file_offset bigint NOT NULL, data_length bigint NOT NULL, + column_count int NOT NULL, block_count int NOT NULL, block_row_count int NOT NULL, row_count bigint NOT NULL, @@ -82,18 +83,6 @@ CREATE TABLE cstore_stripes ( COMMENT ON TABLE cstore_tables IS 'CStore per stripe metadata'; -CREATE TABLE cstore_stripe_attr ( - relid oid NOT NULL, - stripe bigint NOT NULL, - attr int NOT NULL, - exists_size bigint NOT NULL, - value_size bigint NOT NULL, - PRIMARY KEY (relid, stripe, attr), - FOREIGN KEY (relid, stripe) REFERENCES cstore_stripes(relid, stripe) ON DELETE CASCADE INITIALLY DEFERRED -) WITH (user_catalog_table = true); - -COMMENT ON TABLE cstore_tables IS 'CStore per stripe/column combination metadata'; - CREATE TABLE cstore_skipnodes ( relid oid NOT NULL, stripe bigint NOT NULL, @@ -108,7 +97,7 @@ CREATE TABLE cstore_skipnodes ( exists_stream_length bigint NOT NULL, value_compression_type int NOT NULL, PRIMARY KEY (relid, stripe, attr, block), - FOREIGN KEY (relid, stripe, attr) REFERENCES cstore_stripe_attr(relid, stripe, attr) ON DELETE CASCADE INITIALLY DEFERRED + FOREIGN KEY (relid, stripe) REFERENCES cstore_stripes(relid, stripe) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); COMMENT ON TABLE cstore_tables IS 'CStore per block metadata'; diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 5285295b9..690e9eba9 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -43,8 +43,6 @@ typedef struct EState *estate; } ModifyState; -static Oid CStoreStripeAttrRelationId(void); -static Oid CStoreStripeAttrIndexRelationId(void); static Oid CStoreStripesRelationId(void); static Oid CStoreStripesIndexRelationId(void); static Oid CStoreTablesRelationId(void); @@ -63,14 +61,6 @@ static EState * create_estate_for_relation(Relation rel); static bytea * DatumToBytea(Datum value, Form_pg_attribute attrForm); static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); -/* constants for cstore_stripe_attr */ -#define Natts_cstore_stripe_attr 5 -#define Anum_cstore_stripe_attr_relid 1 -#define Anum_cstore_stripe_attr_stripe 2 -#define Anum_cstore_stripe_attr_attr 3 -#define Anum_cstore_stripe_attr_exists_size 4 -#define Anum_cstore_stripe_attr_value_size 5 - /* constants for cstore_table */ #define Natts_cstore_tables 4 #define Anum_cstore_tables_relid 1 @@ -79,14 +69,15 @@ static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); #define Anum_cstore_tables_version_minor 4 /* constants for cstore_stripe */ -#define Natts_cstore_stripes 7 +#define Natts_cstore_stripes 8 #define Anum_cstore_stripes_relid 1 #define Anum_cstore_stripes_stripe 2 #define Anum_cstore_stripes_file_offset 3 #define Anum_cstore_stripes_data_length 4 -#define Anum_cstore_stripes_block_count 5 -#define Anum_cstore_stripes_block_row_count 6 -#define Anum_cstore_stripes_row_count 7 +#define Anum_cstore_stripes_column_count 5 +#define Anum_cstore_stripes_block_count 6 +#define Anum_cstore_stripes_block_row_count 7 +#define Anum_cstore_stripes_row_count 8 /* constants for cstore_skipnodes */ #define Natts_cstore_skipnodes 12 @@ -328,6 +319,7 @@ InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe) Int64GetDatum(stripe->id), Int64GetDatum(stripe->fileOffset), Int64GetDatum(stripe->dataLength), + Int32GetDatum(stripe->columnCount), Int32GetDatum(stripe->blockCount), Int32GetDatum(stripe->blockRowCount), Int64GetDatum(stripe->rowCount) @@ -388,6 +380,8 @@ ReadTableMetadata(Oid relid) datumArray[Anum_cstore_stripes_file_offset - 1]); stripeMetadata->dataLength = DatumGetInt64( datumArray[Anum_cstore_stripes_data_length - 1]); + stripeMetadata->columnCount = DatumGetInt32( + datumArray[Anum_cstore_stripes_column_count - 1]); stripeMetadata->blockCount = DatumGetInt32( datumArray[Anum_cstore_stripes_block_count - 1]); stripeMetadata->blockRowCount = DatumGetInt32( @@ -485,103 +479,6 @@ DeleteTableMetadataRowIfExists(Oid relid) } -/* - * SaveStripeFooter stores give StripeFooter as cstore_stripe_attr records. - */ -void -SaveStripeFooter(Oid relid, uint64 stripe, StripeFooter *footer) -{ - Oid cstoreStripeAttrOid = CStoreStripeAttrRelationId(); - Relation cstoreStripeAttrs = heap_open(cstoreStripeAttrOid, RowExclusiveLock); - - ModifyState *modifyState = StartModifyRelation(cstoreStripeAttrs); - - for (AttrNumber attr = 1; attr <= footer->columnCount; attr++) - { - bool nulls[Natts_cstore_stripe_attr] = { 0 }; - Datum values[Natts_cstore_stripe_attr] = { - ObjectIdGetDatum(relid), - Int64GetDatum(stripe), - Int16GetDatum(attr), - Int64GetDatum(footer->existsSizeArray[attr - 1]), - Int64GetDatum(footer->valueSizeArray[attr - 1]) - }; - - InsertTupleAndEnforceConstraints(modifyState, values, nulls); - } - - FinishModifyRelation(modifyState); - heap_close(cstoreStripeAttrs, NoLock); -} - - -/* - * ReadStripeFooter returns a StripeFooter by reading relevant records from - * cstore_stripe_attr. - */ -StripeFooter * -ReadStripeFooter(Oid relid, uint64 stripe, int relationColumnCount) -{ - StripeFooter *footer = NULL; - HeapTuple heapTuple; - - Oid cstoreStripeAttrOid = CStoreStripeAttrRelationId(); - Relation cstoreStripeAttrs = heap_open(cstoreStripeAttrOid, AccessShareLock); - Relation index = index_open(CStoreStripeAttrIndexRelationId(), AccessShareLock); - TupleDesc tupleDescriptor = RelationGetDescr(cstoreStripeAttrs); - - SysScanDesc scanDescriptor = NULL; - ScanKeyData scanKey[2]; - ScanKeyInit(&scanKey[0], Anum_cstore_stripe_attr_relid, - BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relid)); - ScanKeyInit(&scanKey[1], Anum_cstore_stripe_attr_stripe, - BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(stripe)); - - scanDescriptor = systable_beginscan_ordered(cstoreStripeAttrs, index, NULL, 2, - scanKey); - - footer = palloc0(sizeof(StripeFooter)); - footer->existsSizeArray = palloc0(relationColumnCount * sizeof(int64)); - footer->valueSizeArray = palloc0(relationColumnCount * sizeof(int64)); - - /* - * Stripe can have less columns than the relation if ALTER TABLE happens - * after stripe is formed. So we calculate column count of a stripe as - * maximum attribute number for that stripe. - */ - footer->columnCount = 0; - - while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) - { - Datum datumArray[Natts_cstore_stripe_attr]; - bool isNullArray[Natts_cstore_stripe_attr]; - AttrNumber attr = 0; - - heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray); - attr = DatumGetInt16(datumArray[2]); - - footer->columnCount = Max(footer->columnCount, attr); - - while (attr > relationColumnCount) - { - ereport(ERROR, (errmsg("unexpected attribute %d for a relation with %d attrs", - attr, relationColumnCount))); - } - - footer->existsSizeArray[attr - 1] = - DatumGetInt64(datumArray[Anum_cstore_stripe_attr_exists_size - 1]); - footer->valueSizeArray[attr - 1] = - DatumGetInt64(datumArray[Anum_cstore_stripe_attr_value_size - 1]); - } - - systable_endscan_ordered(scanDescriptor); - index_close(index, NoLock); - heap_close(cstoreStripeAttrs, NoLock); - - return footer; -} - - /* * StartModifyRelation allocates resources for modifications. */ @@ -760,28 +657,6 @@ ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm) } -/* - * CStoreStripeAttrRelationId returns relation id of cstore_stripe_attr. - * TODO: should we cache this similar to citus? - */ -static Oid -CStoreStripeAttrRelationId(void) -{ - return get_relname_relid("cstore_stripe_attr", CStoreNamespaceId()); -} - - -/* - * CStoreStripeAttrRelationId returns relation id of cstore_stripe_attr_pkey. - * TODO: should we cache this similar to citus? - */ -static Oid -CStoreStripeAttrIndexRelationId(void) -{ - return get_relname_relid("cstore_stripe_attr_pkey", CStoreNamespaceId()); -} - - /* * CStoreStripesRelationId returns relation id of cstore_stripes. * TODO: should we cache this similar to citus? diff --git a/cstore_reader.c b/cstore_reader.c index 6b5d7ed00..25702b272 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -39,7 +39,6 @@ /* static function declarations */ static StripeBuffers * LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, - StripeFooter *stripeFooter, TupleDesc tupleDescriptor, List *projectedColumnList, List *whereClauseList); @@ -141,7 +140,6 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu StripeMetadata *stripeMetadata = NULL; List *stripeMetadataList = readState->tableMetadata->stripeMetadataList; uint32 stripeCount = list_length(stripeMetadataList); - StripeFooter *stripeFooter = NULL; /* if we have read all stripes, return false */ if (readState->readStripeCount == stripeCount) @@ -154,12 +152,8 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu readState->blockData = NULL; stripeMetadata = list_nth(stripeMetadataList, readState->readStripeCount); - stripeFooter = ReadStripeFooter(readState->relationId, - stripeMetadata->id, - readState->tupleDescriptor->natts); stripeBuffers = LoadFilteredStripeBuffers(readState->relation, stripeMetadata, - stripeFooter, readState->tupleDescriptor, readState->projectedColumnList, readState->whereClauseList); @@ -333,12 +327,11 @@ CStoreTableRowCount(Relation relation) */ static StripeBuffers * LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, - StripeFooter *stripeFooter, TupleDesc tupleDescriptor, - List *projectedColumnList, List *whereClauseList) + TupleDesc tupleDescriptor, List *projectedColumnList, + List *whereClauseList) { StripeBuffers *stripeBuffers = NULL; ColumnBuffers **columnBuffersArray = NULL; - uint64 currentColumnFileOffset = 0; uint32 columnIndex = 0; uint32 columnCount = tupleDescriptor->natts; @@ -358,13 +351,9 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, /* load column data for projected columns */ columnBuffersArray = palloc0(columnCount * sizeof(ColumnBuffers *)); - currentColumnFileOffset = stripeMetadata->fileOffset; - for (columnIndex = 0; columnIndex < stripeFooter->columnCount; columnIndex++) + for (columnIndex = 0; columnIndex < stripeMetadata->columnCount; columnIndex++) { - uint64 existsSize = stripeFooter->existsSizeArray[columnIndex]; - uint64 valueSize = stripeFooter->valueSizeArray[columnIndex]; - if (projectedColumnMask[columnIndex]) { ColumnBlockSkipNode *blockSkipNode = @@ -379,9 +368,6 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, columnBuffersArray[columnIndex] = columnBuffers; } - - currentColumnFileOffset += existsSize; - currentColumnFileOffset += valueSize; } stripeBuffers = palloc0(sizeof(StripeBuffers)); diff --git a/cstore_writer.c b/cstore_writer.c index 65871b511..91e73ffa8 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -34,7 +34,6 @@ static StripeSkipList * CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 blockRowCount, uint32 columnCount); static StripeMetadata FlushStripe(TableWriteState *writeState); -static StripeFooter * CreateStripeFooter(StripeSkipList *stripeSkipList); static StringInfo SerializeBoolArray(bool *boolArray, uint32 boolArrayLength); static void SerializeSingleDatum(StringInfo datumBuffer, Datum datum, bool datumTypeByValue, int datumTypeLength, @@ -242,7 +241,6 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul MemoryContextReset(writeState->stripeWriteContext); writeState->currentStripeId++; - writeState->currentStripeOffset = 0; /* set stripe data and skip list to NULL so they are recreated next time */ writeState->stripeBuffers = NULL; @@ -450,8 +448,6 @@ static StripeMetadata FlushStripe(TableWriteState *writeState) { StripeMetadata stripeMetadata = { 0 }; - uint64 dataLength = 0; - StripeFooter *stripeFooter = NULL; uint32 columnIndex = 0; uint32 blockIndex = 0; StripeBuffers *stripeBuffers = writeState->stripeBuffers; @@ -464,6 +460,7 @@ FlushStripe(TableWriteState *writeState) uint32 lastBlockIndex = stripeBuffers->rowCount / blockRowCount; uint32 lastBlockRowCount = stripeBuffers->rowCount % blockRowCount; uint64 initialFileOffset = writeState->currentFileOffset; + uint64 stripeSize = 0; /* * check if the last block needs serialization , the last block was not serialized @@ -487,16 +484,10 @@ FlushStripe(TableWriteState *writeState) uint64 existsBufferSize = blockBuffers->existsBuffer->len; ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; - blockSkipNode->existsBlockOffset = writeState->currentStripeOffset; + blockSkipNode->existsBlockOffset = stripeSize; blockSkipNode->existsLength = existsBufferSize; - writeState->currentStripeOffset += existsBufferSize; + stripeSize += existsBufferSize; } - } - - for (columnIndex = 0; columnIndex < columnCount; columnIndex++) - { - ColumnBlockSkipNode *blockSkipNodeArray = columnSkipNodeArray[columnIndex]; - ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; for (blockIndex = 0; blockIndex < blockCount; blockIndex++) { @@ -506,20 +497,14 @@ FlushStripe(TableWriteState *writeState) CompressionType valueCompressionType = blockBuffers->valueCompressionType; ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; - blockSkipNode->valueBlockOffset = writeState->currentStripeOffset; + blockSkipNode->valueBlockOffset = stripeSize; blockSkipNode->valueLength = valueBufferSize; blockSkipNode->valueCompressionType = valueCompressionType; - writeState->currentStripeOffset += valueBufferSize; + stripeSize += valueBufferSize; } } - - /* create skip list and footer buffers */ - SaveStripeSkipList(writeState->relationId, writeState->currentStripeId, - stripeSkipList, tupleDescriptor); - stripeFooter = CreateStripeFooter(stripeSkipList); - /* * Each stripe has only one section: * Data section, in which we store data for each column continuously. @@ -557,17 +542,9 @@ FlushStripe(TableWriteState *writeState) } } - /* finally, we flush the footer buffer */ - SaveStripeFooter(writeState->relationId, - writeState->currentStripeId, - stripeFooter); - - /* set stripe metadata */ - for (columnIndex = 0; columnIndex < columnCount; columnIndex++) - { - dataLength += stripeFooter->existsSizeArray[columnIndex]; - dataLength += stripeFooter->valueSizeArray[columnIndex]; - } + /* create skip list and footer buffers */ + SaveStripeSkipList(writeState->relationId, writeState->currentStripeId, + stripeSkipList, tupleDescriptor); for (blockIndex = 0; blockIndex < blockCount; blockIndex++) { @@ -576,47 +553,16 @@ FlushStripe(TableWriteState *writeState) } stripeMetadata.fileOffset = initialFileOffset; - stripeMetadata.dataLength = dataLength; + stripeMetadata.dataLength = stripeSize; stripeMetadata.id = writeState->currentStripeId; stripeMetadata.blockCount = blockCount; stripeMetadata.blockRowCount = writeState->blockRowCount; + stripeMetadata.columnCount = columnCount; return stripeMetadata; } -/* Creates and returns the footer for given stripe. */ -static StripeFooter * -CreateStripeFooter(StripeSkipList *stripeSkipList) -{ - StripeFooter *stripeFooter = NULL; - uint32 columnIndex = 0; - uint32 columnCount = stripeSkipList->columnCount; - uint64 *existsSizeArray = palloc0(columnCount * sizeof(uint64)); - uint64 *valueSizeArray = palloc0(columnCount * sizeof(uint64)); - - for (columnIndex = 0; columnIndex < columnCount; columnIndex++) - { - ColumnBlockSkipNode *blockSkipNodeArray = - stripeSkipList->blockSkipNodeArray[columnIndex]; - uint32 blockIndex = 0; - - for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) - { - existsSizeArray[columnIndex] += blockSkipNodeArray[blockIndex].existsLength; - valueSizeArray[columnIndex] += blockSkipNodeArray[blockIndex].valueLength; - } - } - - stripeFooter = palloc0(sizeof(StripeFooter)); - stripeFooter->columnCount = columnCount; - stripeFooter->existsSizeArray = existsSizeArray; - stripeFooter->valueSizeArray = valueSizeArray; - - return stripeFooter; -} - - /* * SerializeBoolArray serializes the given boolean array and returns the result * as a StringInfo. This function packs every 8 boolean values into one byte. From 7714b60e5e69665b7cc9bf8f14d4fb104c98a3f2 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 23 Sep 2020 22:52:44 -0700 Subject: [PATCH 59/91] reset memory context at end of execution --- cstore_tableam.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index 312e10981..0e630d653 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -58,7 +58,7 @@ CStoreTableAMGetOptions(void) static MemoryContext -GetCStoreMemoryContext(void) +GetCStoreMemoryContext() { if (CStoreContext == NULL) { @@ -69,6 +69,16 @@ GetCStoreMemoryContext(void) } +static void +ResetCStoreMemoryContext() +{ + if (CStoreContext != NULL) + { + MemoryContextReset(CStoreContext); + } +} + + static void cstore_init_write_state(Relation relation) { @@ -591,6 +601,7 @@ CStoreExecutorEnd(QueryDesc *queryDesc) { standard_ExecutorEnd(queryDesc); } + ResetCStoreMemoryContext(); } From 1d69519bd88495227d32a8c4309345456866f0fb Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Fri, 25 Sep 2020 13:03:34 -0700 Subject: [PATCH 60/91] Delete autogenerated expected files --- expected/am_block_filtering.out | 120 -------------------------------- expected/am_copyto.out | 23 ------ expected/am_data_types.out | 78 --------------------- expected/am_load.out | 42 ----------- 4 files changed, 263 deletions(-) delete mode 100644 expected/am_block_filtering.out delete mode 100644 expected/am_copyto.out delete mode 100644 expected/am_data_types.out delete mode 100644 expected/am_load.out diff --git a/expected/am_block_filtering.out b/expected/am_block_filtering.out deleted file mode 100644 index 005b42e64..000000000 --- a/expected/am_block_filtering.out +++ /dev/null @@ -1,120 +0,0 @@ --- --- Test block filtering in cstore_fdw using min/max values in stripe skip lists. --- --- --- filtered_row_count returns number of rows filtered by the WHERE clause. --- If blocks get filtered by cstore_fdw, less rows are passed to WHERE --- clause, so this function should return a lower number. --- -CREATE OR REPLACE FUNCTION filtered_row_count (query text) RETURNS bigint AS -$$ - DECLARE - result bigint; - rec text; - BEGIN - result := 0; - - FOR rec IN EXECUTE 'EXPLAIN ANALYZE ' || query LOOP - IF rec ~ '^\s+Rows Removed by Filter' then - result := regexp_replace(rec, '[^0-9]*', '', 'g'); - END IF; - END LOOP; - - RETURN result; - END; -$$ LANGUAGE PLPGSQL; --- Create and load data --- block_row_count '1000', stripe_row_count '2000' -set cstore.stripe_row_count = 2000; -set cstore.block_row_count = 1000; -CREATE TABLE test_block_filtering (a int) - USING cstore_tableam; -COPY test_block_filtering FROM '/Users/jefdavi/wd/cstore2/data/block_filtering.csv' WITH CSV; --- Verify that filtered_row_count is less than 1000 for the following queries -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); - filtered_row_count --------------------- - 0 -(1 row) - -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); - filtered_row_count --------------------- - 801 -(1 row) - -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 200'); - filtered_row_count --------------------- - 200 -(1 row) - -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 9900'); - filtered_row_count --------------------- - 101 -(1 row) - -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 9900'); - filtered_row_count --------------------- - 900 -(1 row) - -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); - filtered_row_count --------------------- - 0 -(1 row) - --- Verify that filtered_row_count is less than 2000 for the following queries -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 1 AND 10'); - filtered_row_count --------------------- - 990 -(1 row) - -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); - filtered_row_count --------------------- - 1979 -(1 row) - -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN -10 AND 0'); - filtered_row_count --------------------- - 0 -(1 row) - --- Load data for second time and verify that filtered_row_count is exactly twice as before -COPY test_block_filtering FROM '/Users/jefdavi/wd/cstore2/data/block_filtering.csv' WITH CSV; -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); - filtered_row_count --------------------- - 1602 -(1 row) - -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); - filtered_row_count --------------------- - 0 -(1 row) - -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); - filtered_row_count --------------------- - 3958 -(1 row) - -set cstore.stripe_row_count to default; -set cstore.block_row_count to default; --- Verify that we are fine with collations which use a different alphabet order -CREATE TABLE collation_block_filtering_test(A text collate "da_DK") - USING cstore_tableam; -COPY collation_block_filtering_test FROM STDIN; -SELECT * FROM collation_block_filtering_test WHERE A > 'B'; - a ---- - Å -(1 row) - diff --git a/expected/am_copyto.out b/expected/am_copyto.out deleted file mode 100644 index c8a5f676b..000000000 --- a/expected/am_copyto.out +++ /dev/null @@ -1,23 +0,0 @@ --- --- Test copying data from cstore_fdw tables. --- -CREATE TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, - percentile FLOAT, country CHAR(3), achievements TEXT[]) - USING cstore_tableam; --- load table data from file -COPY test_contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; --- export using COPY table TO ... -COPY test_contestant TO STDOUT; -a 01-10-1990 2090 97.1 XA {a} -b 11-01-1990 2203 98.1 XA {a,b} -c 11-01-1988 2907 99.4 XB {w,y} -d 05-05-1985 2314 98.3 XB {} -e 05-05-1995 2236 98.2 XC {a} --- export using COPY (SELECT * FROM table) TO ... -COPY (select * from test_contestant) TO STDOUT; -a 01-10-1990 2090 97.1 XA {a} -b 11-01-1990 2203 98.1 XA {a,b} -c 11-01-1988 2907 99.4 XB {w,y} -d 05-05-1985 2314 98.3 XB {} -e 05-05-1995 2236 98.2 XC {a} -DROP TABLE test_contestant CASCADE; diff --git a/expected/am_data_types.out b/expected/am_data_types.out deleted file mode 100644 index a597ff8de..000000000 --- a/expected/am_data_types.out +++ /dev/null @@ -1,78 +0,0 @@ --- --- Test loading and reading different data types to/from cstore_fdw foreign tables. --- --- Settings to make the result deterministic -SET datestyle = "ISO, YMD"; -SET timezone to 'GMT'; -SET intervalstyle TO 'POSTGRES_VERBOSE'; --- Test array types -CREATE TABLE test_array_types (int_array int[], bigint_array bigint[], - text_array text[]) USING cstore_tableam; -COPY test_array_types FROM '/Users/jefdavi/wd/cstore2/data/array_types.csv' WITH CSV; -SELECT * FROM test_array_types; - int_array | bigint_array | text_array ---------------------------+--------------------------------------------+------------ - {1,2,3} | {1,2,3} | {a,b,c} - {} | {} | {} - {-2147483648,2147483647} | {-9223372036854775808,9223372036854775807} | {""} -(3 rows) - --- Test date/time types -CREATE TABLE test_datetime_types (timestamp timestamp, - timestamp_with_timezone timestamp with time zone, date date, time time, - interval interval) USING cstore_tableam; -COPY test_datetime_types FROM '/Users/jefdavi/wd/cstore2/data/datetime_types.csv' WITH CSV; -SELECT * FROM test_datetime_types; - timestamp | timestamp_with_timezone | date | time | interval ----------------------+-------------------------+------------+----------+----------- - 2000-01-02 04:05:06 | 1999-01-08 12:05:06+00 | 2000-01-02 | 04:05:06 | @ 4 hours - 1970-01-01 00:00:00 | infinity | -infinity | 00:00:00 | @ 0 -(2 rows) - --- Test enum and composite types -CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); -CREATE TYPE composite_type AS (a int, b text); -CREATE TABLE test_enum_and_composite_types (enum enum_type, - composite composite_type) USING cstore_tableam; -COPY test_enum_and_composite_types FROM - '/Users/jefdavi/wd/cstore2/data/enum_and_composite_types.csv' WITH CSV; -SELECT * FROM test_enum_and_composite_types; - enum | composite -------+----------- - a | (2,b) - b | (3,c) -(2 rows) - --- Test range types -CREATE TABLE test_range_types (int4range int4range, int8range int8range, - numrange numrange, tsrange tsrange) USING cstore_tableam; -COPY test_range_types FROM '/Users/jefdavi/wd/cstore2/data/range_types.csv' WITH CSV; -SELECT * FROM test_range_types; - int4range | int8range | numrange | tsrange ------------+-----------+----------+----------------------------------------------- - [1,3) | [1,3) | [1,3) | ["2000-01-02 00:30:00","2010-02-03 12:30:00") - empty | [1,) | (,) | empty -(2 rows) - --- Test other types -CREATE TABLE test_other_types (bool boolean, bytea bytea, money money, - inet inet, bitstring bit varying(5), uuid uuid, json json) USING cstore_tableam; -COPY test_other_types FROM '/Users/jefdavi/wd/cstore2/data/other_types.csv' WITH CSV; -SELECT * FROM test_other_types; - bool | bytea | money | inet | bitstring | uuid | json -------+------------+-------+-------------+-----------+--------------------------------------+------------------ - f | \xdeadbeef | $1.00 | 192.168.1.2 | 10101 | a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11 | {"key": "value"} - t | \xcdb0 | $1.50 | 127.0.0.1 | | a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11 | [] -(2 rows) - --- Test null values -CREATE TABLE test_null_values (a int, b int[], c composite_type) - USING cstore_tableam; -COPY test_null_values FROM '/Users/jefdavi/wd/cstore2/data/null_values.csv' WITH CSV; -SELECT * FROM test_null_values; - a | b | c ----+--------+----- - | {NULL} | (,) - | | -(2 rows) - diff --git a/expected/am_load.out b/expected/am_load.out deleted file mode 100644 index 02cff343a..000000000 --- a/expected/am_load.out +++ /dev/null @@ -1,42 +0,0 @@ --- --- Test loading data into cstore_fdw tables. --- --- COPY with incorrect delimiter -COPY contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' - WITH DELIMITER '|'; -- ERROR -ERROR: missing data for column "birthdate" -CONTEXT: COPY contestant, line 1: "a,1990-01-10,2090,97.1,XA ,{a}" --- COPY with invalid program -COPY contestant FROM PROGRAM 'invalid_program' WITH CSV; -- ERROR -ERROR: program "invalid_program" failed -DETAIL: command not found --- COPY into uncompressed table from file -COPY contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; --- COPY into uncompressed table from program -COPY contestant FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' WITH CSV; --- COPY into compressed table -set cstore.compression = 'pglz'; -COPY contestant_compressed FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; --- COPY into uncompressed table from program -COPY contestant_compressed FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' - WITH CSV; -set cstore.compression to default; --- Test column list -CREATE TABLE famous_constants (id int, name text, value real) - USING cstore_tableam; -COPY famous_constants (value, name, id) FROM STDIN WITH CSV; -COPY famous_constants (name, value) FROM STDIN WITH CSV; -SELECT * FROM famous_constants ORDER BY id, name; - id | name | value -----+----------------+----------- - 1 | pi | 3.141 - 2 | e | 2.718 - 3 | gamma | 0.577 - 4 | bohr radius | 5.291e-11 - | avagadro | 6.022e+23 - | electron mass | 9.109e-31 - | proton mass | 1.672e-27 - | speed of light | 2.997e+08 -(8 rows) - -DROP TABLE famous_constants; From 5a077f2308e29f616da3f4be0d85026fbf8d3912 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Fri, 25 Sep 2020 13:10:32 -0700 Subject: [PATCH 61/91] Remove the unused drop event trigger --- cstore_fdw--1.7.sql | 27 --------------------------- cstore_fdw.c | 25 ------------------------- 2 files changed, 52 deletions(-) diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index 84b69be07..fa8b558e0 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -31,33 +31,6 @@ RETURNS bigint AS 'MODULE_PATHNAME' LANGUAGE C STRICT; -CREATE OR REPLACE FUNCTION cstore_clean_table_resources(oid) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT; - -CREATE OR REPLACE FUNCTION cstore_drop_trigger() - RETURNS event_trigger - LANGUAGE plpgsql - AS $csdt$ -DECLARE v_obj record; -BEGIN - FOR v_obj IN SELECT * FROM pg_event_trigger_dropped_objects() LOOP - - IF v_obj.object_type NOT IN ('table', 'foreign table') THEN - CONTINUE; - END IF; - - PERFORM cstore.cstore_clean_table_resources(v_obj.objid); - - END LOOP; -END; -$csdt$; - -CREATE EVENT TRIGGER cstore_drop_event - ON SQL_DROP - EXECUTE PROCEDURE cstore_drop_trigger(); - CREATE TABLE cstore_tables ( relid oid NOT NULL, block_row_count int NOT NULL, diff --git a/cstore_fdw.c b/cstore_fdw.c index a66ba1d80..9a8882697 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -196,7 +196,6 @@ PG_FUNCTION_INFO_V1(cstore_ddl_event_end_trigger); PG_FUNCTION_INFO_V1(cstore_table_size); PG_FUNCTION_INFO_V1(cstore_fdw_handler); PG_FUNCTION_INFO_V1(cstore_fdw_validator); -PG_FUNCTION_INFO_V1(cstore_clean_table_resources); /* saved hook value in case of unload */ @@ -1179,30 +1178,6 @@ cstore_fdw_validator(PG_FUNCTION_ARGS) } -/* - * cstore_clean_table_resources cleans up table data and metadata with provided - * relation id. The function is meant to be called from drop_event_trigger. It - * has no way of knowing if the provided relation id belongs to a cstore table. - * Therefore it first checks if data file exists at default location before - * attempting to remove data and footer files. If the table is created at a - * custom path than its resources would not be removed. - */ -Datum -cstore_clean_table_resources(PG_FUNCTION_ARGS) -{ - /* - * TODO: Event triggers do not offer the relfilenode of the - * dropped table, and by the time the sql_drop event trigger - * is called, the object is already gone so we can't look it - * up. Therefore, we can't drop the Smgr storage here, which - * means that cascaded drops of cstore foreign tables will - * leak storage. - */ - - PG_RETURN_VOID(); -} - - /* * OptionNamesString finds all options that are valid for the current context, * and concatenates these option names in a comma separated string. The function From ec1e277e8ee7bb57b66e57476a8c707e33bec7a9 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Sat, 26 Sep 2020 23:50:23 -0700 Subject: [PATCH 62/91] Initial implementation of ANALYZE --- Makefile | 4 ++-- cstore_tableam.c | 27 ++++++++++++++++++++++++--- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index ea5a858bf..483aebc35 100644 --- a/Makefile +++ b/Makefile @@ -48,11 +48,11 @@ ifeq ($(USE_FDW),yes) fdw_copyto fdw_alter fdw_truncate fdw_clean endif -# disabled tests: am_block_filtering am_analyze +# disabled tests: am_block_filtering ifeq ($(USE_TABLEAM),yes) PG_CFLAGS += -DUSE_TABLEAM OBJS += cstore_tableam.o - REGRESS += am_create am_load am_query am_data_types am_functions \ + REGRESS += am_create am_load am_query am_analyze am_data_types am_functions \ am_drop am_insert am_copyto am_alter am_truncate am_clean endif diff --git a/cstore_tableam.c b/cstore_tableam.c index 312e10981..3f8c37db9 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -470,8 +470,13 @@ static bool cstore_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy) { - /* TODO */ - return false; + /* + * Our access method is not pages based, i.e. tuples are not confined + * to pages boundaries. So not much to do here. We return true anyway + * so acquire_sample_rows() in analyze.c would call our + * cstore_scan_analyze_next_tuple() callback. + */ + return true; } @@ -480,7 +485,23 @@ cstore_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot) { - /* TODO */ + /* + * Currently we don't do anything smart to reduce number of rows returned + * for ANALYZE. The TableAM API's ANALYZE functions are designed for page + * based access methods where it chooses random pages, and then reads + * tuples from those pages. + * + * We could do something like that here by choosing sample stripes or blocks, + * but getting that correct might need quite some work. Since cstore_fdw's + * ANALYZE scanned all rows, as a starter we do the same here and scan all + * rows. + */ + if (cstore_getnextslot(scan, ForwardScanDirection, slot)) + { + (*liverows)++; + return true; + } + return false; } From cf0ba6103ed1d8e91a6976b5c001be5d8a7b6f7e Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Sat, 26 Sep 2020 12:23:14 -0700 Subject: [PATCH 63/91] Associate metadata with rel filenode --- cstore.c | 23 ++------- cstore.h | 26 +++++----- cstore_fdw--1.7.sql | 16 +++--- cstore_fdw.c | 47 +++++++++--------- cstore_metadata_tables.c | 103 +++++++++++++++++++++++++-------------- cstore_reader.c | 11 +++-- cstore_tableam.c | 13 ++--- cstore_writer.c | 16 +++--- 8 files changed, 134 insertions(+), 121 deletions(-) diff --git a/cstore.c b/cstore.c index f5846a029..d6b6751e2 100644 --- a/cstore.c +++ b/cstore.c @@ -102,26 +102,11 @@ ParseCompressionType(const char *compressionTypeString) /* - * InitializeCStoreTableFile creates data and footer file for a cstore table. - * The function assumes data and footer files do not exist, therefore - * it should be called on empty or non-existing table. Notice that the caller - * is expected to acquire AccessExclusiveLock on the relation. + * InitializeCStoreTableFile initializes metadata for the given relation + * file node. */ void -InitializeCStoreTableFile(Oid relationId, Relation relation, CStoreOptions *cstoreOptions) +InitializeCStoreTableFile(Oid relNode, CStoreOptions *cstoreOptions) { - TableWriteState *writeState = NULL; - TupleDesc tupleDescriptor = RelationGetDescr(relation); - - InitCStoreTableMetadata(relationId, cstoreOptions->blockRowCount); - - /* - * Initialize state to write to the cstore file. This creates an - * empty data file and a valid footer file for the table. - */ - writeState = CStoreBeginWrite(relationId, - cstoreOptions->compressionType, - cstoreOptions->stripeRowCount, - cstoreOptions->blockRowCount, tupleDescriptor); - CStoreEndWrite(writeState); + InitCStoreTableMetadata(relNode, cstoreOptions->blockRowCount); } diff --git a/cstore.h b/cstore.h index 96fa1ed53..dd5f9e6e1 100644 --- a/cstore.h +++ b/cstore.h @@ -16,7 +16,9 @@ #include "fmgr.h" #include "lib/stringinfo.h" +#include "nodes/parsenodes.h" #include "storage/bufpage.h" +#include "storage/lockdefs.h" #include "utils/relcache.h" /* Defines for valid option names */ @@ -190,8 +192,6 @@ typedef struct StripeBuffers /* TableReadState represents state of a cstore file read operation. */ typedef struct TableReadState { - Oid relationId; - TableMetadata *tableMetadata; StripeMetadata *currentStripeMetadata; TupleDesc tupleDescriptor; @@ -217,7 +217,6 @@ typedef struct TableReadState /* TableWriteState represents state of a cstore file write operation. */ typedef struct TableWriteState { - Oid relationId; TableMetadata *tableMetadata; CompressionType compressionType; TupleDesc tupleDescriptor; @@ -249,11 +248,12 @@ extern int cstore_block_row_count; extern void cstore_init(void); extern CompressionType ParseCompressionType(const char *compressionTypeString); -extern void InitializeCStoreTableFile(Oid relationId, Relation relation, - CStoreOptions *cstoreOptions); +extern void InitializeCStoreTableFile(Oid relNode, CStoreOptions *cstoreOptions); +extern bool IsCStoreFdwTable(Oid relationId); +extern Relation cstore_fdw_open(Oid relationId, LOCKMODE lockmode); /* Function declarations for writing to a cstore file */ -extern TableWriteState * CStoreBeginWrite(Oid relationId, +extern TableWriteState * CStoreBeginWrite(Relation relation, CompressionType compressionType, uint64 stripeMaxRowCount, uint32 blockRowCount, @@ -263,7 +263,7 @@ extern void CStoreWriteRow(TableWriteState *state, Datum *columnValues, extern void CStoreEndWrite(TableWriteState *state); /* Function declarations for reading from a cstore file */ -extern TableReadState * CStoreBeginRead(Oid relationId, +extern TableReadState * CStoreBeginRead(Relation relation, TupleDesc tupleDescriptor, List *projectedColumnList, List *qualConditions); extern bool CStoreReadFinished(TableReadState *state); @@ -283,12 +283,14 @@ extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressionType); /* cstore_metadata_tables.c */ -extern void InitCStoreTableMetadata(Oid relid, int blockRowCount); -extern void InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe); -extern TableMetadata * ReadTableMetadata(Oid relid); -extern void SaveStripeSkipList(Oid relid, uint64 stripe, StripeSkipList *stripeSkipList, +extern void DeleteTableMetadataRowIfExists(Oid relfilenode); +extern void InitCStoreTableMetadata(Oid relfilenode, int blockRowCount); +extern void InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe); +extern TableMetadata * ReadTableMetadata(Oid relfilenode); +extern void SaveStripeSkipList(Oid relfilenode, uint64 stripe, + StripeSkipList *stripeSkipList, TupleDesc tupleDescriptor); -extern StripeSkipList * ReadStripeSkipList(Oid relid, uint64 stripe, +extern StripeSkipList * ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor, uint32 blockCount); diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index fa8b558e0..b3470b6a5 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -32,17 +32,17 @@ AS 'MODULE_PATHNAME' LANGUAGE C STRICT; CREATE TABLE cstore_tables ( - relid oid NOT NULL, + relfilenode oid NOT NULL, block_row_count int NOT NULL, version_major bigint NOT NULL, version_minor bigint NOT NULL, - PRIMARY KEY (relid) + PRIMARY KEY (relfilenode) ) WITH (user_catalog_table = true); COMMENT ON TABLE cstore_tables IS 'CStore table wide metadata'; CREATE TABLE cstore_stripes ( - relid oid NOT NULL, + relfilenode oid NOT NULL, stripe bigint NOT NULL, file_offset bigint NOT NULL, data_length bigint NOT NULL, @@ -50,14 +50,14 @@ CREATE TABLE cstore_stripes ( block_count int NOT NULL, block_row_count int NOT NULL, row_count bigint NOT NULL, - PRIMARY KEY (relid, stripe), - FOREIGN KEY (relid) REFERENCES cstore_tables(relid) ON DELETE CASCADE INITIALLY DEFERRED + PRIMARY KEY (relfilenode, stripe), + FOREIGN KEY (relfilenode) REFERENCES cstore_tables(relfilenode) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); COMMENT ON TABLE cstore_tables IS 'CStore per stripe metadata'; CREATE TABLE cstore_skipnodes ( - relid oid NOT NULL, + relfilenode oid NOT NULL, stripe bigint NOT NULL, attr int NOT NULL, block int NOT NULL, @@ -69,8 +69,8 @@ CREATE TABLE cstore_skipnodes ( exists_stream_offset bigint NOT NULL, exists_stream_length bigint NOT NULL, value_compression_type int NOT NULL, - PRIMARY KEY (relid, stripe, attr, block), - FOREIGN KEY (relid, stripe) REFERENCES cstore_stripes(relid, stripe) ON DELETE CASCADE INITIALLY DEFERRED + PRIMARY KEY (relfilenode, stripe, attr, block), + FOREIGN KEY (relfilenode, stripe) REFERENCES cstore_stripes(relfilenode, stripe) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); COMMENT ON TABLE cstore_tables IS 'CStore per block metadata'; diff --git a/cstore_fdw.c b/cstore_fdw.c index 9a8882697..f9f886f79 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -131,7 +131,6 @@ static List * FindCStoreTables(List *tableList); static List * OpenRelationsForTruncate(List *cstoreTableList); static void FdwNewRelFileNode(Relation relation); static void TruncateCStoreTables(List *cstoreRelationList); -static bool CStoreTable(Oid relationId); static bool CStoreServer(ForeignServer *server); static bool DistributedTable(Oid relationId); static bool DistributedWorkerCopy(CopyStmt *copyStatement); @@ -189,7 +188,6 @@ static bool CStoreIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); #endif static void cstore_fdw_initrel(Relation rel); -static Relation cstore_fdw_open(Oid relationId, LOCKMODE lockmode); static Relation cstore_fdw_openrv(RangeVar *relation, LOCKMODE lockmode); PG_FUNCTION_INFO_V1(cstore_ddl_event_end_trigger); @@ -267,7 +265,8 @@ cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS) * We have no chance to hook into server creation to create data * directory for it during database creation time. */ - InitializeCStoreTableFile(relationId, relation, CStoreGetOptions(relationId)); + InitializeCStoreTableFile(relation->rd_node.relNode, + CStoreGetOptions(relationId)); heap_close(relation, AccessExclusiveLock); } } @@ -403,7 +402,7 @@ CopyCStoreTableStatement(CopyStmt *copyStatement) { Oid relationId = RangeVarGetRelid(copyStatement->relation, AccessShareLock, true); - bool cstoreTable = CStoreTable(relationId); + bool cstoreTable = IsCStoreFdwTable(relationId); if (cstoreTable) { bool distributedTable = DistributedTable(relationId); @@ -558,12 +557,11 @@ CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) #endif /* init state to write to the cstore file */ - writeState = CStoreBeginWrite(relationId, + writeState = CStoreBeginWrite(relation, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, tupleDescriptor); - writeState->relation = relation; while (nextRowFound) { @@ -686,7 +684,7 @@ CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement) } relationId = RangeVarGetRelid(relationRangeVar, AccessShareLock, true); - if (!CStoreTable(relationId)) + if (!IsCStoreFdwTable(relationId)) { return; } @@ -765,7 +763,7 @@ FindCStoreTables(List *tableList) { RangeVar *rangeVar = (RangeVar *) lfirst(relationCell); Oid relationId = RangeVarGetRelid(rangeVar, AccessShareLock, true); - if (CStoreTable(relationId) && !DistributedTable(relationId)) + if (IsCStoreFdwTable(relationId) && !DistributedTable(relationId)) { cstoreTableList = lappend(cstoreTableList, rangeVar); } @@ -825,10 +823,11 @@ TruncateCStoreTables(List *cstoreRelationList) Relation relation = (Relation) lfirst(relationCell); Oid relationId = relation->rd_id; - Assert(CStoreTable(relationId)); + Assert(IsCStoreFdwTable(relationId)); FdwNewRelFileNode(relation); - InitializeCStoreTableFile(relationId, relation, CStoreGetOptions(relationId)); + InitializeCStoreTableFile(relation->rd_node.relNode, + CStoreGetOptions(relationId)); } } @@ -861,7 +860,6 @@ FdwNewRelFileNode(Relation relation) Relation tmprel; Oid tablespace; Oid filenode; - RelFileNode newrnode; /* * Upgrade to AccessExclusiveLock, and hold until the end of the @@ -887,10 +885,6 @@ FdwNewRelFileNode(Relation relation) filenode = GetNewRelFileNode(tablespace, NULL, persistence); - newrnode.spcNode = tablespace; - newrnode.dbNode = MyDatabaseId; - newrnode.relNode = filenode; - classform->relfilenode = filenode; classform->relpages = 0; /* it's empty until further notice */ classform->reltuples = 0; @@ -900,6 +894,10 @@ FdwNewRelFileNode(Relation relation) CatalogTupleUpdate(pg_class, &tuple->t_self, tuple); CommandCounterIncrement(); + + relation->rd_node.spcNode = tablespace; + relation->rd_node.dbNode = MyDatabaseId; + relation->rd_node.relNode = filenode; } heap_freetuple(tuple); @@ -928,11 +926,11 @@ FdwCreateStorage(Relation relation) /* - * CStoreTable checks if the given table name belongs to a foreign columnar store + * IsCStoreFdwTable checks if the given table name belongs to a foreign columnar store * table. If it does, the function returns true. Otherwise, it returns false. */ -static bool -CStoreTable(Oid relationId) +bool +IsCStoreFdwTable(Oid relationId) { bool cstoreTable = false; char relationKind = 0; @@ -1055,7 +1053,7 @@ Datum cstore_table_size(PG_FUNCTION_ARGS) { Oid relationId = PG_GETARG_OID(0); - bool cstoreTable = CStoreTable(relationId); + bool cstoreTable = IsCStoreFdwTable(relationId); Relation relation; BlockNumber nblocks; @@ -1705,6 +1703,7 @@ CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags) ForeignScan *foreignScan = NULL; List *foreignPrivateList = NIL; List *whereClauseList = NIL; + Relation relation = NULL; cstore_fdw_initrel(currentRelation); @@ -1721,9 +1720,8 @@ CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags) whereClauseList = foreignScan->scan.plan.qual; columnList = (List *) linitial(foreignPrivateList); - readState = CStoreBeginRead(foreignTableId, - tupleDescriptor, columnList, whereClauseList); - readState->relation = cstore_fdw_open(foreignTableId, AccessShareLock); + relation = cstore_fdw_open(foreignTableId, AccessShareLock); + readState = CStoreBeginRead(relation, tupleDescriptor, columnList, whereClauseList); scanState->fdw_state = (void *) readState; } @@ -2067,13 +2065,12 @@ CStoreBeginForeignInsert(ModifyTableState *modifyTableState, ResultRelInfo *rela cstoreOptions = CStoreGetOptions(foreignTableOid); tupleDescriptor = RelationGetDescr(relationInfo->ri_RelationDesc); - writeState = CStoreBeginWrite(foreignTableOid, + writeState = CStoreBeginWrite(relation, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, tupleDescriptor); - writeState->relation = relation; relationInfo->ri_FdwState = (void *) writeState; } @@ -2196,7 +2193,7 @@ cstore_fdw_initrel(Relation rel) } -static Relation +Relation cstore_fdw_open(Oid relationId, LOCKMODE lockmode) { Relation rel = heap_open(relationId, lockmode); diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 690e9eba9..4459d3009 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -50,8 +50,7 @@ static Oid CStoreTablesIndexRelationId(void); static Oid CStoreSkipNodesRelationId(void); static Oid CStoreSkipNodesIndexRelationId(void); static Oid CStoreNamespaceId(void); -static int TableBlockRowCount(Oid relid); -static void DeleteTableMetadataRowIfExists(Oid relid); +static bool ReadCStoreTables(Oid relfilenode, uint64 *blockRowCount); static ModifyState * StartModifyRelation(Relation rel); static void InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values, bool *nulls); @@ -63,14 +62,14 @@ static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); /* constants for cstore_table */ #define Natts_cstore_tables 4 -#define Anum_cstore_tables_relid 1 +#define Anum_cstore_tables_relfilenode 1 #define Anum_cstore_tables_block_row_count 2 #define Anum_cstore_tables_version_major 3 #define Anum_cstore_tables_version_minor 4 /* constants for cstore_stripe */ #define Natts_cstore_stripes 8 -#define Anum_cstore_stripes_relid 1 +#define Anum_cstore_stripes_relfilenode 1 #define Anum_cstore_stripes_stripe 2 #define Anum_cstore_stripes_file_offset 3 #define Anum_cstore_stripes_data_length 4 @@ -81,7 +80,7 @@ static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); /* constants for cstore_skipnodes */ #define Natts_cstore_skipnodes 12 -#define Anum_cstore_skipnodes_relid 1 +#define Anum_cstore_skipnodes_relfilenode 1 #define Anum_cstore_skipnodes_stripe 2 #define Anum_cstore_skipnodes_attr 3 #define Anum_cstore_skipnodes_block 4 @@ -99,7 +98,7 @@ static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); * InitCStoreTableMetadata adds a record for the given relation in cstore_table. */ void -InitCStoreTableMetadata(Oid relid, int blockRowCount) +InitCStoreTableMetadata(Oid relfilenode, int blockRowCount) { Oid cstoreTablesOid = InvalidOid; Relation cstoreTables = NULL; @@ -107,13 +106,13 @@ InitCStoreTableMetadata(Oid relid, int blockRowCount) bool nulls[Natts_cstore_tables] = { 0 }; Datum values[Natts_cstore_tables] = { - ObjectIdGetDatum(relid), + ObjectIdGetDatum(relfilenode), Int32GetDatum(blockRowCount), Int32GetDatum(CSTORE_VERSION_MAJOR), Int32GetDatum(CSTORE_VERSION_MINOR) }; - DeleteTableMetadataRowIfExists(relid); + DeleteTableMetadataRowIfExists(relfilenode); cstoreTablesOid = CStoreTablesRelationId(); cstoreTables = heap_open(cstoreTablesOid, RowExclusiveLock); @@ -133,7 +132,7 @@ InitCStoreTableMetadata(Oid relid, int blockRowCount) * of cstore_skipnodes. */ void -SaveStripeSkipList(Oid relid, uint64 stripe, StripeSkipList *stripeSkipList, +SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipList, TupleDesc tupleDescriptor) { uint32 columnIndex = 0; @@ -155,7 +154,7 @@ SaveStripeSkipList(Oid relid, uint64 stripe, StripeSkipList *stripeSkipList, &stripeSkipList->blockSkipNodeArray[columnIndex][blockIndex]; Datum values[Natts_cstore_skipnodes] = { - ObjectIdGetDatum(relid), + ObjectIdGetDatum(relfilenode), Int64GetDatum(stripe), Int32GetDatum(columnIndex + 1), Int32GetDatum(blockIndex), @@ -201,7 +200,7 @@ SaveStripeSkipList(Oid relid, uint64 stripe, StripeSkipList *stripeSkipList, * ReadStripeSkipList fetches StripeSkipList for a given stripe. */ StripeSkipList * -ReadStripeSkipList(Oid relid, uint64 stripe, TupleDesc tupleDescriptor, +ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor, uint32 blockCount) { StripeSkipList *skipList = NULL; @@ -218,8 +217,8 @@ ReadStripeSkipList(Oid relid, uint64 stripe, TupleDesc tupleDescriptor, cstoreSkipNodes = heap_open(cstoreSkipNodesOid, AccessShareLock); index = index_open(CStoreSkipNodesIndexRelationId(), AccessShareLock); - ScanKeyInit(&scanKey[0], Anum_cstore_skipnodes_relid, - BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relid)); + ScanKeyInit(&scanKey[0], Anum_cstore_skipnodes_relfilenode, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); ScanKeyInit(&scanKey[1], Anum_cstore_skipnodes_stripe, BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(stripe)); @@ -311,11 +310,11 @@ ReadStripeSkipList(Oid relid, uint64 stripe, TupleDesc tupleDescriptor, * InsertStripeMetadataRow adds a row to cstore_stripes. */ void -InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe) +InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe) { bool nulls[Natts_cstore_stripes] = { 0 }; Datum values[Natts_cstore_stripes] = { - ObjectIdGetDatum(relid), + ObjectIdGetDatum(relfilenode), Int64GetDatum(stripe->id), Int64GetDatum(stripe->fileOffset), Int64GetDatum(stripe->dataLength), @@ -339,11 +338,11 @@ InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe) /* - * ReadTableMetadata constructs TableMetadata for a given relid by reading + * ReadTableMetadata constructs TableMetadata for a given relfilenode by reading * from cstore_tables and cstore_stripes. */ TableMetadata * -ReadTableMetadata(Oid relid) +ReadTableMetadata(Oid relfilenode) { Oid cstoreStripesOid = InvalidOid; Relation cstoreStripes = NULL; @@ -352,12 +351,18 @@ ReadTableMetadata(Oid relid) ScanKeyData scanKey[1]; SysScanDesc scanDescriptor = NULL; HeapTuple heapTuple; + bool found = false; TableMetadata *tableMetadata = palloc0(sizeof(TableMetadata)); - tableMetadata->blockRowCount = TableBlockRowCount(relid); + found = ReadCStoreTables(relfilenode, &tableMetadata->blockRowCount); + if (!found) + { + ereport(ERROR, (errmsg("Relfilenode %d doesn't belong to a cstore table.", + relfilenode))); + } - ScanKeyInit(&scanKey[0], Anum_cstore_stripes_relid, - BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relid)); + ScanKeyInit(&scanKey[0], Anum_cstore_stripes_relfilenode, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); cstoreStripesOid = CStoreStripesRelationId(); cstoreStripes = heap_open(cstoreStripesOid, AccessShareLock); @@ -402,12 +407,13 @@ ReadTableMetadata(Oid relid) /* - * TableBlockRowCount returns block_row_count column from cstore_tables for a given relid. + * ReadCStoreTables reads corresponding record from cstore_tables. Returns false if + * table was not found in cstore_tables. */ -static int -TableBlockRowCount(Oid relid) +static bool +ReadCStoreTables(Oid relfilenode, uint64 *blockRowCount) { - int blockRowCount = 0; + bool found = false; Oid cstoreTablesOid = InvalidOid; Relation cstoreTables = NULL; Relation index = NULL; @@ -416,12 +422,29 @@ TableBlockRowCount(Oid relid) SysScanDesc scanDescriptor = NULL; HeapTuple heapTuple = NULL; - ScanKeyInit(&scanKey[0], Anum_cstore_tables_relid, - BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relid)); + ScanKeyInit(&scanKey[0], Anum_cstore_tables_relfilenode, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); cstoreTablesOid = CStoreTablesRelationId(); - cstoreTables = heap_open(cstoreTablesOid, AccessShareLock); - index = index_open(CStoreTablesIndexRelationId(), AccessShareLock); + cstoreTables = try_relation_open(cstoreTablesOid, AccessShareLock); + if (cstoreTables == NULL) + { + /* + * Extension has been dropped. This can be called while + * dropping extension or database via ObjectAccess(). + */ + return false; + } + + index = try_relation_open(CStoreTablesIndexRelationId(), AccessShareLock); + if (index == NULL) + { + heap_close(cstoreTables, NoLock); + + /* extension has been dropped */ + return false; + } + tupleDescriptor = RelationGetDescr(cstoreTables); scanDescriptor = systable_beginscan_ordered(cstoreTables, index, NULL, 1, scanKey); @@ -432,22 +455,24 @@ TableBlockRowCount(Oid relid) Datum datumArray[Natts_cstore_tables]; bool isNullArray[Natts_cstore_tables]; heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray); - blockRowCount = DatumGetInt32(datumArray[Anum_cstore_tables_block_row_count - 1]); + *blockRowCount = DatumGetInt32(datumArray[Anum_cstore_tables_block_row_count - + 1]); + found = true; } systable_endscan_ordered(scanDescriptor); index_close(index, NoLock); heap_close(cstoreTables, NoLock); - return blockRowCount; + return found; } /* - * DeleteTableMetadataRowIfExists removes the row with given relid from cstore_stripes. + * DeleteTableMetadataRowIfExists removes the row with given relfilenode from cstore_stripes. */ -static void -DeleteTableMetadataRowIfExists(Oid relid) +void +DeleteTableMetadataRowIfExists(Oid relfilenode) { Oid cstoreTablesOid = InvalidOid; Relation cstoreTables = NULL; @@ -456,11 +481,17 @@ DeleteTableMetadataRowIfExists(Oid relid) SysScanDesc scanDescriptor = NULL; HeapTuple heapTuple = NULL; - ScanKeyInit(&scanKey[0], Anum_cstore_tables_relid, - BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relid)); + ScanKeyInit(&scanKey[0], Anum_cstore_tables_relfilenode, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); cstoreTablesOid = CStoreTablesRelationId(); - cstoreTables = heap_open(cstoreTablesOid, AccessShareLock); + cstoreTables = try_relation_open(cstoreTablesOid, AccessShareLock); + if (cstoreTables == NULL) + { + /* extension has been dropped */ + return; + } + index = index_open(CStoreTablesIndexRelationId(), AccessShareLock); scanDescriptor = systable_beginscan_ordered(cstoreTables, index, NULL, 1, scanKey); diff --git a/cstore_reader.c b/cstore_reader.c index 25702b272..2ee4101c0 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -80,14 +80,15 @@ static StringInfo ReadFromSmgr(Relation rel, uint64 offset, uint32 size); * read handle that's used during reading rows and finishing the read operation. */ TableReadState * -CStoreBeginRead(Oid relationId, TupleDesc tupleDescriptor, +CStoreBeginRead(Relation relation, TupleDesc tupleDescriptor, List *projectedColumnList, List *whereClauseList) { TableReadState *readState = NULL; TableMetadata *tableMetadata = NULL; MemoryContext stripeReadContext = NULL; + Oid relNode = relation->rd_node.relNode; - tableMetadata = ReadTableMetadata(relationId); + tableMetadata = ReadTableMetadata(relNode); /* * We allocate all stripe specific data in the stripeReadContext, and reset @@ -99,7 +100,7 @@ CStoreBeginRead(Oid relationId, TupleDesc tupleDescriptor, ALLOCSET_DEFAULT_SIZES); readState = palloc0(sizeof(TableReadState)); - readState->relationId = relationId; + readState->relation = relation; readState->tableMetadata = tableMetadata; readState->projectedColumnList = projectedColumnList; readState->whereClauseList = whereClauseList; @@ -308,7 +309,7 @@ CStoreTableRowCount(Relation relation) ListCell *stripeMetadataCell = NULL; uint64 totalRowCount = 0; - tableMetadata = ReadTableMetadata(relation->rd_id); + tableMetadata = ReadTableMetadata(relation->rd_node.relNode); foreach(stripeMetadataCell, tableMetadata->stripeMetadataList) { @@ -337,7 +338,7 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, bool *projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList); - StripeSkipList *stripeSkipList = ReadStripeSkipList(RelationGetRelid(relation), + StripeSkipList *stripeSkipList = ReadStripeSkipList(relation->rd_node.relNode, stripeMetadata->id, tupleDescriptor, stripeMetadata->blockCount); diff --git a/cstore_tableam.c b/cstore_tableam.c index 243cbcb3e..d091916cf 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -97,13 +97,11 @@ cstore_init_write_state(Relation relation) TupleDesc tupdesc = RelationGetDescr(relation); elog(LOG, "initializing write state for relation %d", relation->rd_id); - CStoreWriteState = CStoreBeginWrite(relation->rd_id, + CStoreWriteState = CStoreBeginWrite(relation, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, tupdesc); - - CStoreWriteState->relation = relation; } } @@ -134,16 +132,12 @@ cstore_beginscan(Relation relation, Snapshot snapshot, ParallelTableScanDesc parallel_scan, uint32 flags) { - Oid relid = relation->rd_id; TupleDesc tupdesc = relation->rd_att; - CStoreOptions *cstoreOptions = NULL; TableReadState *readState = NULL; CStoreScanDesc scan = palloc(sizeof(CStoreScanDescData)); List *columnList = NIL; MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); - cstoreOptions = CStoreTableAMGetOptions(); - scan->cs_base.rs_rd = relation; scan->cs_base.rs_snapshot = snapshot; scan->cs_base.rs_nkeys = nkeys; @@ -171,8 +165,7 @@ cstore_beginscan(Relation relation, Snapshot snapshot, columnList = lappend(columnList, var); } - readState = CStoreBeginRead(relid, tupdesc, columnList, NULL); - readState->relation = relation; + readState = CStoreBeginRead(relation, tupdesc, columnList, NULL); scan->cs_readState = readState; @@ -443,7 +436,7 @@ cstore_relation_set_new_filenode(Relation rel, *freezeXid = RecentXmin; *minmulti = GetOldestMultiXactId(); srel = RelationCreateStorage(*newrnode, persistence); - InitializeCStoreTableFile(rel->rd_id, rel, CStoreTableAMGetOptions()); + InitializeCStoreTableFile(newrnode->relNode, CStoreTableAMGetOptions()); smgrclose(srel); } diff --git a/cstore_writer.c b/cstore_writer.c index 91e73ffa8..728c855b4 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -58,7 +58,7 @@ static StringInfo CopyStringInfo(StringInfo sourceString); * will be added. */ TableWriteState * -CStoreBeginWrite(Oid relationId, +CStoreBeginWrite(Relation relation, CompressionType compressionType, uint64 stripeMaxRowCount, uint32 blockRowCount, TupleDesc tupleDescriptor) @@ -73,8 +73,9 @@ CStoreBeginWrite(Oid relationId, bool *columnMaskArray = NULL; BlockData *blockData = NULL; uint64 currentStripeId = 0; + Oid relNode = relation->rd_node.relNode; - tableMetadata = ReadTableMetadata(relationId); + tableMetadata = ReadTableMetadata(relNode); /* * If stripeMetadataList is not empty, jump to the position right after @@ -127,7 +128,7 @@ CStoreBeginWrite(Oid relationId, blockData = CreateEmptyBlockData(columnCount, columnMaskArray, blockRowCount); writeState = palloc0(sizeof(TableWriteState)); - writeState->relationId = relationId; + writeState->relation = relation; writeState->tableMetadata = tableMetadata; writeState->compressionType = compressionType; writeState->stripeMaxRowCount = stripeMaxRowCount; @@ -251,7 +252,8 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul * doesn't free it. */ MemoryContextSwitchTo(oldContext); - InsertStripeMetadataRow(writeState->relationId, &stripeMetadata); + InsertStripeMetadataRow(writeState->relation->rd_node.relNode, + &stripeMetadata); AppendStripeMetadata(tableMetadata, stripeMetadata); } else @@ -280,7 +282,8 @@ CStoreEndWrite(TableWriteState *writeState) MemoryContextReset(writeState->stripeWriteContext); MemoryContextSwitchTo(oldContext); - InsertStripeMetadataRow(writeState->relationId, &stripeMetadata); + InsertStripeMetadataRow(writeState->relation->rd_node.relNode, + &stripeMetadata); AppendStripeMetadata(writeState->tableMetadata, stripeMetadata); } @@ -543,7 +546,8 @@ FlushStripe(TableWriteState *writeState) } /* create skip list and footer buffers */ - SaveStripeSkipList(writeState->relationId, writeState->currentStripeId, + SaveStripeSkipList(writeState->relation->rd_node.relNode, + writeState->currentStripeId, stripeSkipList, tupleDescriptor); for (blockIndex = 0; blockIndex < blockCount; blockIndex++) From d37c717e143fdd07275393f5e81ebbc6780fc069 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Sat, 26 Sep 2020 12:39:16 -0700 Subject: [PATCH 64/91] Clean-up resources on drop --- cstore.c | 80 ++++++++++++++++++++++++++++++++++++++++ cstore.h | 1 + cstore_fdw.c | 50 ------------------------- cstore_metadata_tables.c | 11 ++++++ expected/am_drop.out | 15 ++++++++ expected/fdw_drop.out | 15 ++++++++ sql/am_drop.sql | 8 ++++ sql/fdw_drop.sql | 8 ++++ 8 files changed, 138 insertions(+), 50 deletions(-) diff --git a/cstore.c b/cstore.c index d6b6751e2..1d6e414ae 100644 --- a/cstore.c +++ b/cstore.c @@ -16,9 +16,13 @@ #include #include +#include "access/heapam.h" +#include "catalog/objectaccess.h" +#include "catalog/storage.h" #include "miscadmin.h" #include "utils/guc.h" #include "utils/rel.h" +#include "utils/relcache.h" #include "cstore.h" @@ -38,6 +42,11 @@ static const struct config_enum_entry cstore_compression_options[] = { NULL, 0, false } }; +static object_access_hook_type prevObjectAccess = NULL; + +static void ObjectAccess(ObjectAccessType access, Oid classId, Oid objectId, int subId, + void *arg); + void cstore_init() { @@ -78,6 +87,9 @@ cstore_init() NULL, NULL, NULL); + + prevObjectAccess = object_access_hook; + object_access_hook = ObjectAccess; } @@ -110,3 +122,71 @@ InitializeCStoreTableFile(Oid relNode, CStoreOptions *cstoreOptions) { InitCStoreTableMetadata(relNode, cstoreOptions->blockRowCount); } + + +/* + * Implements object_access_hook. One of the places this is called is just + * before dropping an object, which allows us to clean-up resources for + * cstore tables while the pg_class record for the table is still there. + */ +static void +ObjectAccess(ObjectAccessType access, Oid classId, Oid objectId, int subId, void *arg) +{ + if (prevObjectAccess) + { + prevObjectAccess(access, classId, objectId, subId, arg); + } + + /* + * Do nothing if this is not a DROP relation command. + */ + if (access != OAT_DROP || classId != RelationRelationId || OidIsValid(subId)) + { + return; + } + + if (IsCStoreFdwTable(objectId)) + { + /* + * Drop both metadata and storage. We need to drop storage here since + * we manage relfilenode for FDW tables in the extension. + */ + Relation rel = cstore_fdw_open(objectId, AccessExclusiveLock); + RelationOpenSmgr(rel); + RelationDropStorage(rel); + DeleteTableMetadataRowIfExists(rel->rd_node.relNode); + + /* keep the lock since we did physical changes to the relation */ + relation_close(rel, NoLock); + } + else + { + Oid relNode = InvalidOid; + Relation rel = try_relation_open(objectId, AccessExclusiveLock); + if (rel == NULL) + { + return; + } + + relNode = rel->rd_node.relNode; + if (IsCStoreStorage(relNode)) + { + /* + * Drop only metadata for table am cstore tables. Postgres manages + * storage for these tables, so we don't need to drop that. + */ + DeleteTableMetadataRowIfExists(relNode); + + /* keep the lock since we did physical changes to the relation */ + relation_close(rel, NoLock); + } + else + { + /* + * For non-cstore tables, we do nothing. + * Release the lock since we haven't changed the relation. + */ + relation_close(rel, AccessExclusiveLock); + } + } +} diff --git a/cstore.h b/cstore.h index dd5f9e6e1..919352c6c 100644 --- a/cstore.h +++ b/cstore.h @@ -283,6 +283,7 @@ extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressionType); /* cstore_metadata_tables.c */ +extern bool IsCStoreStorage(Oid relfilenode); extern void DeleteTableMetadataRowIfExists(Oid relfilenode); extern void InitCStoreTableMetadata(Oid relfilenode, int blockRowCount); extern void InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe); diff --git a/cstore_fdw.c b/cstore_fdw.c index f9f886f79..d4c5c1ec1 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -126,7 +126,6 @@ static uint64 CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString); static uint64 CopyOutCStoreTable(CopyStmt *copyStatement, const char *queryString); static void CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement); -static List * DroppedCStoreRelidList(DropStmt *dropStatement); static List * FindCStoreTables(List *tableList); static List * OpenRelationsForTruncate(List *cstoreTableList); static void FdwNewRelFileNode(Relation relation); @@ -315,25 +314,6 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, destReceiver, completionTag); } } - else if (nodeTag(parseTree) == T_DropStmt) - { - List *dropRelids = DroppedCStoreRelidList((DropStmt *) parseTree); - ListCell *lc = NULL; - - /* drop smgr storage */ - foreach(lc, dropRelids) - { - Oid relid = lfirst_oid(lc); - Relation relation = cstore_fdw_open(relid, AccessExclusiveLock); - - RelationOpenSmgr(relation); - RelationDropStorage(relation); - heap_close(relation, AccessExclusiveLock); - } - - CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, - destReceiver, completionTag); - } else if (nodeTag(parseTree) == T_TruncateStmt) { TruncateStmt *truncateStatement = (TruncateStmt *) parseTree; @@ -723,36 +703,6 @@ CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement) } -/* - * DropppedCStoreRelidList extracts and returns the list of cstore relids - * from DROP table statement - */ -static List * -DroppedCStoreRelidList(DropStmt *dropStatement) -{ - List *droppedCStoreRelidList = NIL; - - if (dropStatement->removeType == OBJECT_FOREIGN_TABLE) - { - ListCell *dropObjectCell = NULL; - foreach(dropObjectCell, dropStatement->objects) - { - List *tableNameList = (List *) lfirst(dropObjectCell); - RangeVar *rangeVar = makeRangeVarFromNameList(tableNameList); - - Oid relationId = RangeVarGetRelid(rangeVar, AccessShareLock, true); - if (CStoreTable(relationId)) - { - droppedCStoreRelidList = lappend_oid(droppedCStoreRelidList, - relationId); - } - } - } - - return droppedCStoreRelidList; -} - - /* FindCStoreTables returns list of CStore tables from given table list */ static List * FindCStoreTables(List *tableList) diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 4459d3009..e1f1caedf 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -94,6 +94,17 @@ static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); #define Anum_cstore_skipnodes_value_compression_type 12 +/* + * IsCStoreStorage returns if relfilenode belongs to a cstore table. + */ +bool +IsCStoreStorage(Oid relfilenode) +{ + uint64 blockRowCount = 0; + return ReadCStoreTables(relfilenode, &blockRowCount); +} + + /* * InitCStoreTableMetadata adds a record for the given relation in cstore_table. */ diff --git a/expected/am_drop.out b/expected/am_drop.out index e1c634d7f..c1fc60519 100644 --- a/expected/am_drop.out +++ b/expected/am_drop.out @@ -12,14 +12,29 @@ -- 'postgres' directory is excluded from comparison to have the same result. -- store postgres database oid SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset +SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset -- DROP cstore_fdw tables DROP TABLE contestant; DROP TABLE contestant_compressed; +-- make sure DROP deletes metadata +SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; + ?column? +---------- + 2 +(1 row) + -- Create a cstore_fdw table under a schema and drop it. CREATE SCHEMA test_schema; CREATE TABLE test_schema.test_table(data int) USING cstore_tableam; +SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset DROP SCHEMA test_schema CASCADE; NOTICE: drop cascades to table test_schema.test_table +SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; + ?column? +---------- + 1 +(1 row) + SELECT current_database() datname \gset CREATE DATABASE db_to_drop; \c db_to_drop diff --git a/expected/fdw_drop.out b/expected/fdw_drop.out index 926f69337..24c0f518d 100644 --- a/expected/fdw_drop.out +++ b/expected/fdw_drop.out @@ -12,14 +12,29 @@ -- 'postgres' directory is excluded from comparison to have the same result. -- store postgres database oid SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset +SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset -- DROP cstore_fdw tables DROP FOREIGN TABLE contestant; DROP FOREIGN TABLE contestant_compressed; +-- make sure DROP deletes metadata +SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; + ?column? +---------- + 2 +(1 row) + -- Create a cstore_fdw table under a schema and drop it. CREATE SCHEMA test_schema; CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; +SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset DROP SCHEMA test_schema CASCADE; NOTICE: drop cascades to foreign table test_schema.test_table +SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; + ?column? +---------- + 1 +(1 row) + SELECT current_database() datname \gset CREATE DATABASE db_to_drop; \c db_to_drop diff --git a/sql/am_drop.sql b/sql/am_drop.sql index f92f90b9d..06873aa6e 100644 --- a/sql/am_drop.sql +++ b/sql/am_drop.sql @@ -15,14 +15,22 @@ -- store postgres database oid SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset +SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset + -- DROP cstore_fdw tables DROP TABLE contestant; DROP TABLE contestant_compressed; +-- make sure DROP deletes metadata +SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; + -- Create a cstore_fdw table under a schema and drop it. CREATE SCHEMA test_schema; CREATE TABLE test_schema.test_table(data int) USING cstore_tableam; + +SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset DROP SCHEMA test_schema CASCADE; +SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; SELECT current_database() datname \gset diff --git a/sql/fdw_drop.sql b/sql/fdw_drop.sql index c64b5c99b..7c6dd5c6e 100644 --- a/sql/fdw_drop.sql +++ b/sql/fdw_drop.sql @@ -15,14 +15,22 @@ -- store postgres database oid SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset +SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset + -- DROP cstore_fdw tables DROP FOREIGN TABLE contestant; DROP FOREIGN TABLE contestant_compressed; +-- make sure DROP deletes metadata +SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; + -- Create a cstore_fdw table under a schema and drop it. CREATE SCHEMA test_schema; CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; + +SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset DROP SCHEMA test_schema CASCADE; +SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; SELECT current_database() datname \gset From a87c15a1e1078343e6cef85127b0e3eff7f4d1ca Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Thu, 1 Oct 2020 21:09:47 -0700 Subject: [PATCH 65/91] Address feedback --- cstore.c | 80 ------------------------------------ cstore.h | 2 - cstore_fdw.c | 82 +++++++++++++++++++++++++++++++++---- cstore_metadata_tables.c | 11 ----- cstore_tableam.c | 87 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 162 insertions(+), 100 deletions(-) diff --git a/cstore.c b/cstore.c index 1d6e414ae..d6b6751e2 100644 --- a/cstore.c +++ b/cstore.c @@ -16,13 +16,9 @@ #include #include -#include "access/heapam.h" -#include "catalog/objectaccess.h" -#include "catalog/storage.h" #include "miscadmin.h" #include "utils/guc.h" #include "utils/rel.h" -#include "utils/relcache.h" #include "cstore.h" @@ -42,11 +38,6 @@ static const struct config_enum_entry cstore_compression_options[] = { NULL, 0, false } }; -static object_access_hook_type prevObjectAccess = NULL; - -static void ObjectAccess(ObjectAccessType access, Oid classId, Oid objectId, int subId, - void *arg); - void cstore_init() { @@ -87,9 +78,6 @@ cstore_init() NULL, NULL, NULL); - - prevObjectAccess = object_access_hook; - object_access_hook = ObjectAccess; } @@ -122,71 +110,3 @@ InitializeCStoreTableFile(Oid relNode, CStoreOptions *cstoreOptions) { InitCStoreTableMetadata(relNode, cstoreOptions->blockRowCount); } - - -/* - * Implements object_access_hook. One of the places this is called is just - * before dropping an object, which allows us to clean-up resources for - * cstore tables while the pg_class record for the table is still there. - */ -static void -ObjectAccess(ObjectAccessType access, Oid classId, Oid objectId, int subId, void *arg) -{ - if (prevObjectAccess) - { - prevObjectAccess(access, classId, objectId, subId, arg); - } - - /* - * Do nothing if this is not a DROP relation command. - */ - if (access != OAT_DROP || classId != RelationRelationId || OidIsValid(subId)) - { - return; - } - - if (IsCStoreFdwTable(objectId)) - { - /* - * Drop both metadata and storage. We need to drop storage here since - * we manage relfilenode for FDW tables in the extension. - */ - Relation rel = cstore_fdw_open(objectId, AccessExclusiveLock); - RelationOpenSmgr(rel); - RelationDropStorage(rel); - DeleteTableMetadataRowIfExists(rel->rd_node.relNode); - - /* keep the lock since we did physical changes to the relation */ - relation_close(rel, NoLock); - } - else - { - Oid relNode = InvalidOid; - Relation rel = try_relation_open(objectId, AccessExclusiveLock); - if (rel == NULL) - { - return; - } - - relNode = rel->rd_node.relNode; - if (IsCStoreStorage(relNode)) - { - /* - * Drop only metadata for table am cstore tables. Postgres manages - * storage for these tables, so we don't need to drop that. - */ - DeleteTableMetadataRowIfExists(relNode); - - /* keep the lock since we did physical changes to the relation */ - relation_close(rel, NoLock); - } - else - { - /* - * For non-cstore tables, we do nothing. - * Release the lock since we haven't changed the relation. - */ - relation_close(rel, AccessExclusiveLock); - } - } -} diff --git a/cstore.h b/cstore.h index 919352c6c..ef937ba3c 100644 --- a/cstore.h +++ b/cstore.h @@ -249,8 +249,6 @@ extern void cstore_init(void); extern CompressionType ParseCompressionType(const char *compressionTypeString); extern void InitializeCStoreTableFile(Oid relNode, CStoreOptions *cstoreOptions); -extern bool IsCStoreFdwTable(Oid relationId); -extern Relation cstore_fdw_open(Oid relationId, LOCKMODE lockmode); /* Function declarations for writing to a cstore file */ extern TableWriteState * CStoreBeginWrite(Relation relation, diff --git a/cstore_fdw.c b/cstore_fdw.c index d4c5c1ec1..406a153c4 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -25,6 +25,7 @@ #include "catalog/catalog.h" #include "catalog/indexing.h" #include "catalog/namespace.h" +#include "catalog/objectaccess.h" #include "catalog/pg_foreign_table.h" #include "catalog/pg_namespace.h" #include "catalog/storage.h" @@ -54,6 +55,7 @@ #include "parser/parser.h" #include "parser/parse_coerce.h" #include "parser/parse_type.h" +#include "storage/lmgr.h" #include "storage/smgr.h" #include "tcop/utility.h" #include "utils/builtins.h" @@ -105,6 +107,8 @@ static const CStoreValidOption ValidOptionArray[] = { OPTION_NAME_BLOCK_ROW_COUNT, ForeignTableRelationId } }; +static object_access_hook_type prevObjectAccessHook = NULL; + /* local functions forward declarations */ #if PG_VERSION_NUM >= 100000 static void CStoreProcessUtility(PlannedStmt *plannedStatement, const char *queryString, @@ -130,7 +134,8 @@ static List * FindCStoreTables(List *tableList); static List * OpenRelationsForTruncate(List *cstoreTableList); static void FdwNewRelFileNode(Relation relation); static void TruncateCStoreTables(List *cstoreRelationList); -static bool CStoreServer(ForeignServer *server); +static bool IsCStoreFdwTable(Oid relationId); +static bool IsCStoreServer(ForeignServer *server); static bool DistributedTable(Oid relationId); static bool DistributedWorkerCopy(CopyStmt *copyStatement); static StringInfo OptionNamesString(Oid currentContextId); @@ -187,7 +192,11 @@ static bool CStoreIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); #endif static void cstore_fdw_initrel(Relation rel); +static Relation cstore_fdw_open(Oid relationId, LOCKMODE lockmode); static Relation cstore_fdw_openrv(RangeVar *relation, LOCKMODE lockmode); +static void CStoreFdwObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId, + int subId, + void *arg); PG_FUNCTION_INFO_V1(cstore_ddl_event_end_trigger); PG_FUNCTION_INFO_V1(cstore_table_size); @@ -209,6 +218,8 @@ cstore_fdw_init() { PreviousProcessUtilityHook = ProcessUtility_hook; ProcessUtility_hook = CStoreProcessUtility; + prevObjectAccessHook = object_access_hook; + object_access_hook = CStoreFdwObjectAccessHook; } @@ -251,7 +262,7 @@ cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS) bool missingOK = false; ForeignServer *server = GetForeignServerByName(serverName, missingOK); - if (CStoreServer(server)) + if (IsCStoreServer(server)) { Oid relationId = RangeVarGetRelid(createStatement->base.relation, AccessShareLock, false); @@ -358,7 +369,6 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, destReceiver, completionTag); } - /* handle other utility statements */ else { @@ -895,7 +905,7 @@ IsCStoreFdwTable(Oid relationId) { ForeignTable *foreignTable = GetForeignTable(relationId); ForeignServer *server = GetForeignServer(foreignTable->serverid); - if (CStoreServer(server)) + if (IsCStoreServer(server)) { cstoreTable = true; } @@ -906,11 +916,11 @@ IsCStoreFdwTable(Oid relationId) /* - * CStoreServer checks if the given foreign server belongs to cstore_fdw. If it + * IsCStoreServer checks if the given foreign server belongs to cstore_fdw. If it * does, the function returns true. Otherwise, it returns false. */ static bool -CStoreServer(ForeignServer *server) +IsCStoreServer(ForeignServer *server) { ForeignDataWrapper *foreignDataWrapper = GetForeignDataWrapper(server->fdwid); bool cstoreServer = false; @@ -2143,7 +2153,7 @@ cstore_fdw_initrel(Relation rel) } -Relation +static Relation cstore_fdw_open(Oid relationId, LOCKMODE lockmode) { Relation rel = heap_open(relationId, lockmode); @@ -2163,3 +2173,61 @@ cstore_fdw_openrv(RangeVar *relation, LOCKMODE lockmode) return rel; } + + +/* + * Implements object_access_hook. One of the places this is called is just + * before dropping an object, which allows us to clean-up resources for + * cstore tables. + * + * When cleaning up resources, we need to have access to the pg_class record + * for the table so we can indentify the relfilenode belonging to the relation. + * We don't have access to this information in sql_drop event triggers, since + * the relation has already been dropped there. object_access_hook is called + * __before__ dropping tables, so we still have access to the pg_class + * entry here. + * + * Note that the utility hook is called once per __command__, and not for + * every object dropped, and since a drop can cascade to other objects, it + * is difficult to get full set of dropped objects in the utility hook. + * But object_access_hook is called once per dropped object, so it is + * much easier to clean-up all dropped objects here. + */ +static void +CStoreFdwObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId, + int subId, void *arg) +{ + if (prevObjectAccessHook) + { + prevObjectAccessHook(access, classId, objectId, subId, arg); + } + + /* + * Do nothing if this is not a DROP relation command. + */ + if (access != OAT_DROP || classId != RelationRelationId || OidIsValid(subId)) + { + return; + } + + /* + * Lock relation to prevent it from being dropped and to avoid + * race conditions in the next if block. + */ + LockRelationOid(objectId, AccessShareLock); + + if (IsCStoreFdwTable(objectId)) + { + /* + * Drop both metadata and storage. We need to drop storage here since + * we manage relfilenode for FDW tables in the extension. + */ + Relation rel = cstore_fdw_open(objectId, AccessExclusiveLock); + RelationOpenSmgr(rel); + RelationDropStorage(rel); + DeleteTableMetadataRowIfExists(rel->rd_node.relNode); + + /* keep the lock since we did physical changes to the relation */ + relation_close(rel, NoLock); + } +} diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index e1f1caedf..4459d3009 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -94,17 +94,6 @@ static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); #define Anum_cstore_skipnodes_value_compression_type 12 -/* - * IsCStoreStorage returns if relfilenode belongs to a cstore table. - */ -bool -IsCStoreStorage(Oid relfilenode) -{ - uint64 blockRowCount = 0; - return ReadCStoreTables(relfilenode, &blockRowCount); -} - - /* * InitCStoreTableMetadata adds a record for the given relation in cstore_table. */ diff --git a/cstore_tableam.c b/cstore_tableam.c index d091916cf..6d02ebe24 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -14,6 +14,8 @@ #include "access/xact.h" #include "catalog/catalog.h" #include "catalog/index.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_am.h" #include "catalog/storage.h" #include "catalog/storage_xlog.h" #include "commands/progress.h" @@ -30,10 +32,13 @@ #include "storage/smgr.h" #include "utils/builtins.h" #include "utils/rel.h" +#include "utils/syscache.h" #include "cstore.h" #include "cstore_tableam.h" +#define CSTORE_TABLEAM_NAME "cstore_tableam" + typedef struct CStoreScanDescData { TableScanDescData cs_base; @@ -45,6 +50,13 @@ typedef struct CStoreScanDescData *CStoreScanDesc; static TableWriteState *CStoreWriteState = NULL; static ExecutorEnd_hook_type PreviousExecutorEndHook = NULL; static MemoryContext CStoreContext = NULL; +static object_access_hook_type prevObjectAccessHook = NULL; + +/* forward declaration for static functions */ +static void CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid + objectId, int subId, + void *arg); +static bool IsCStoreTableAmTable(Oid relationId); static CStoreOptions * CStoreTableAMGetOptions(void) @@ -624,6 +636,8 @@ cstore_tableam_init() { PreviousExecutorEndHook = ExecutorEnd_hook; ExecutorEnd_hook = CStoreExecutorEnd; + prevObjectAccessHook = object_access_hook; + object_access_hook = CStoreTableAMObjectAccessHook; } @@ -634,6 +648,79 @@ cstore_tableam_finish() } +/* + * Implements object_access_hook. One of the places this is called is just + * before dropping an object, which allows us to clean-up resources for + * cstore tables. + * + * See the comments for CStoreFdwObjectAccessHook for more details. + */ +static void +CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId, int + subId, + void *arg) +{ + if (prevObjectAccessHook) + { + prevObjectAccessHook(access, classId, objectId, subId, arg); + } + + /* + * Do nothing if this is not a DROP relation command. + */ + if (access != OAT_DROP || classId != RelationRelationId || OidIsValid(subId)) + { + return; + } + + /* + * Lock relation to prevent it from being dropped and to avoid + * race conditions in the next if block. + */ + LockRelationOid(objectId, AccessShareLock); + + if (IsCStoreTableAmTable(objectId)) + { + /* + * Drop metadata. No need to drop storage here since for + * tableam tables storage is managed by postgres. + */ + Relation rel = table_open(objectId, AccessExclusiveLock); + DeleteTableMetadataRowIfExists(rel->rd_node.relNode); + + /* keep the lock since we did physical changes to the relation */ + table_close(rel, NoLock); + } +} + + +/* + * IsCStoreTableAmTable returns true if relation has cstore_tableam + * access method. This can be called before extension creation. + */ +static bool +IsCStoreTableAmTable(Oid relationId) +{ + bool result; + Relation rel; + + if (!OidIsValid(relationId)) + { + return false; + } + + /* + * Lock relation to prevent it from being dropped & + * avoid race conditions. + */ + rel = relation_open(relationId, AccessShareLock); + result = rel->rd_tableam == GetCstoreTableAmRoutine(); + relation_close(rel, NoLock); + + return result; +} + + static const TableAmRoutine cstore_am_methods = { .type = T_TableAmRoutine, From a70b0c362e71d06db62bcc65e0ae973dbcb0278e Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Thu, 1 Oct 2020 21:23:06 -0700 Subject: [PATCH 66/91] Rename cstore_tables to cstore_data_files --- cstore.c | 11 ---- cstore.h | 18 +++--- cstore_fdw--1.7.sql | 10 +-- cstore_fdw.c | 19 ++---- cstore_metadata_tables.c | 128 ++++++++++++++++++++------------------- cstore_reader.c | 18 +++--- cstore_tableam.c | 5 +- cstore_writer.c | 26 ++++---- expected/am_drop.out | 8 +-- expected/fdw_drop.out | 8 +-- sql/am_drop.sql | 8 +-- sql/fdw_drop.sql | 8 +-- 12 files changed, 125 insertions(+), 142 deletions(-) diff --git a/cstore.c b/cstore.c index d6b6751e2..a724a62a0 100644 --- a/cstore.c +++ b/cstore.c @@ -99,14 +99,3 @@ ParseCompressionType(const char *compressionTypeString) return compressionType; } - - -/* - * InitializeCStoreTableFile initializes metadata for the given relation - * file node. - */ -void -InitializeCStoreTableFile(Oid relNode, CStoreOptions *cstoreOptions) -{ - InitCStoreTableMetadata(relNode, cstoreOptions->blockRowCount); -} diff --git a/cstore.h b/cstore.h index ef937ba3c..8efb0e6af 100644 --- a/cstore.h +++ b/cstore.h @@ -87,12 +87,12 @@ typedef struct StripeMetadata } StripeMetadata; -/* TableMetadata represents the metadata of a cstore file. */ -typedef struct TableMetadata +/* DataFileMetadata represents the metadata of a cstore file. */ +typedef struct DataFileMetadata { List *stripeMetadataList; uint64 blockRowCount; -} TableMetadata; +} DataFileMetadata; /* ColumnBlockSkipNode contains statistics for a ColumnBlockData. */ @@ -192,7 +192,7 @@ typedef struct StripeBuffers /* TableReadState represents state of a cstore file read operation. */ typedef struct TableReadState { - TableMetadata *tableMetadata; + DataFileMetadata *datafileMetadata; StripeMetadata *currentStripeMetadata; TupleDesc tupleDescriptor; Relation relation; @@ -217,7 +217,7 @@ typedef struct TableReadState /* TableWriteState represents state of a cstore file write operation. */ typedef struct TableWriteState { - TableMetadata *tableMetadata; + DataFileMetadata *datafileMetadata; CompressionType compressionType; TupleDesc tupleDescriptor; FmgrInfo **comparisonFunctionArray; @@ -248,7 +248,6 @@ extern int cstore_block_row_count; extern void cstore_init(void); extern CompressionType ParseCompressionType(const char *compressionTypeString); -extern void InitializeCStoreTableFile(Oid relNode, CStoreOptions *cstoreOptions); /* Function declarations for writing to a cstore file */ extern TableWriteState * CStoreBeginWrite(Relation relation, @@ -281,11 +280,10 @@ extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressionType); /* cstore_metadata_tables.c */ -extern bool IsCStoreStorage(Oid relfilenode); -extern void DeleteTableMetadataRowIfExists(Oid relfilenode); -extern void InitCStoreTableMetadata(Oid relfilenode, int blockRowCount); +extern void DeleteDataFileMetadataRowIfExists(Oid relfilenode); +extern void InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount); extern void InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe); -extern TableMetadata * ReadTableMetadata(Oid relfilenode); +extern DataFileMetadata * ReadDataFileMetadata(Oid relfilenode); extern void SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipList, TupleDesc tupleDescriptor); diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index b3470b6a5..c19bb1449 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -31,7 +31,7 @@ RETURNS bigint AS 'MODULE_PATHNAME' LANGUAGE C STRICT; -CREATE TABLE cstore_tables ( +CREATE TABLE cstore_data_files ( relfilenode oid NOT NULL, block_row_count int NOT NULL, version_major bigint NOT NULL, @@ -39,7 +39,7 @@ CREATE TABLE cstore_tables ( PRIMARY KEY (relfilenode) ) WITH (user_catalog_table = true); -COMMENT ON TABLE cstore_tables IS 'CStore table wide metadata'; +COMMENT ON TABLE cstore_data_files IS 'CStore data file wide metadata'; CREATE TABLE cstore_stripes ( relfilenode oid NOT NULL, @@ -51,10 +51,10 @@ CREATE TABLE cstore_stripes ( block_row_count int NOT NULL, row_count bigint NOT NULL, PRIMARY KEY (relfilenode, stripe), - FOREIGN KEY (relfilenode) REFERENCES cstore_tables(relfilenode) ON DELETE CASCADE INITIALLY DEFERRED + FOREIGN KEY (relfilenode) REFERENCES cstore_data_files(relfilenode) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); -COMMENT ON TABLE cstore_tables IS 'CStore per stripe metadata'; +COMMENT ON TABLE cstore_stripes IS 'CStore per stripe metadata'; CREATE TABLE cstore_skipnodes ( relfilenode oid NOT NULL, @@ -73,4 +73,4 @@ CREATE TABLE cstore_skipnodes ( FOREIGN KEY (relfilenode, stripe) REFERENCES cstore_stripes(relfilenode, stripe) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); -COMMENT ON TABLE cstore_tables IS 'CStore per block metadata'; +COMMENT ON TABLE cstore_skipnodes IS 'CStore per block metadata'; diff --git a/cstore_fdw.c b/cstore_fdw.c index 406a153c4..33a29ad32 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -267,16 +267,8 @@ cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS) Oid relationId = RangeVarGetRelid(createStatement->base.relation, AccessShareLock, false); Relation relation = cstore_fdw_open(relationId, AccessExclusiveLock); - - /* - * Make sure database directory exists before creating a table. - * This is necessary when a foreign server is created inside - * a template database and a new database is created out of it. - * We have no chance to hook into server creation to create data - * directory for it during database creation time. - */ - InitializeCStoreTableFile(relation->rd_node.relNode, - CStoreGetOptions(relationId)); + CStoreOptions *options = CStoreGetOptions(relationId); + InitCStoreDataFileMetadata(relation->rd_node.relNode, options->blockRowCount); heap_close(relation, AccessExclusiveLock); } } @@ -369,6 +361,7 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, destReceiver, completionTag); } + /* handle other utility statements */ else { @@ -782,12 +775,12 @@ TruncateCStoreTables(List *cstoreRelationList) { Relation relation = (Relation) lfirst(relationCell); Oid relationId = relation->rd_id; + CStoreOptions *options = CStoreGetOptions(relationId); Assert(IsCStoreFdwTable(relationId)); FdwNewRelFileNode(relation); - InitializeCStoreTableFile(relation->rd_node.relNode, - CStoreGetOptions(relationId)); + InitCStoreDataFileMetadata(relation->rd_node.relNode, options->blockRowCount); } } @@ -2225,7 +2218,7 @@ CStoreFdwObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId, Relation rel = cstore_fdw_open(objectId, AccessExclusiveLock); RelationOpenSmgr(rel); RelationDropStorage(rel); - DeleteTableMetadataRowIfExists(rel->rd_node.relNode); + DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode); /* keep the lock since we did physical changes to the relation */ relation_close(rel, NoLock); diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 4459d3009..3ebee02e7 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -45,12 +45,12 @@ typedef struct static Oid CStoreStripesRelationId(void); static Oid CStoreStripesIndexRelationId(void); -static Oid CStoreTablesRelationId(void); -static Oid CStoreTablesIndexRelationId(void); +static Oid CStoreDataFilesRelationId(void); +static Oid CStoreDataFilesIndexRelationId(void); static Oid CStoreSkipNodesRelationId(void); static Oid CStoreSkipNodesIndexRelationId(void); static Oid CStoreNamespaceId(void); -static bool ReadCStoreTables(Oid relfilenode, uint64 *blockRowCount); +static bool ReadCStoreDataFiles(Oid relfilenode, uint64 *blockRowCount); static ModifyState * StartModifyRelation(Relation rel); static void InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values, bool *nulls); @@ -61,11 +61,11 @@ static bytea * DatumToBytea(Datum value, Form_pg_attribute attrForm); static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); /* constants for cstore_table */ -#define Natts_cstore_tables 4 -#define Anum_cstore_tables_relfilenode 1 -#define Anum_cstore_tables_block_row_count 2 -#define Anum_cstore_tables_version_major 3 -#define Anum_cstore_tables_version_minor 4 +#define Natts_cstore_data_files 4 +#define Anum_cstore_data_files_relfilenode 1 +#define Anum_cstore_data_files_block_row_count 2 +#define Anum_cstore_data_files_version_major 3 +#define Anum_cstore_data_files_version_minor 4 /* constants for cstore_stripe */ #define Natts_cstore_stripes 8 @@ -95,35 +95,36 @@ static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); /* - * InitCStoreTableMetadata adds a record for the given relation in cstore_table. + * InitCStoreDataFileMetadata adds a record for the given relfilenode + * in cstore_data_files. */ void -InitCStoreTableMetadata(Oid relfilenode, int blockRowCount) +InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount) { - Oid cstoreTablesOid = InvalidOid; - Relation cstoreTables = NULL; + Oid cstoreDataFilesOid = InvalidOid; + Relation cstoreDataFiles = NULL; ModifyState *modifyState = NULL; - bool nulls[Natts_cstore_tables] = { 0 }; - Datum values[Natts_cstore_tables] = { + bool nulls[Natts_cstore_data_files] = { 0 }; + Datum values[Natts_cstore_data_files] = { ObjectIdGetDatum(relfilenode), Int32GetDatum(blockRowCount), Int32GetDatum(CSTORE_VERSION_MAJOR), Int32GetDatum(CSTORE_VERSION_MINOR) }; - DeleteTableMetadataRowIfExists(relfilenode); + DeleteDataFileMetadataRowIfExists(relfilenode); - cstoreTablesOid = CStoreTablesRelationId(); - cstoreTables = heap_open(cstoreTablesOid, RowExclusiveLock); + cstoreDataFilesOid = CStoreDataFilesRelationId(); + cstoreDataFiles = heap_open(cstoreDataFilesOid, RowExclusiveLock); - modifyState = StartModifyRelation(cstoreTables); + modifyState = StartModifyRelation(cstoreDataFiles); InsertTupleAndEnforceConstraints(modifyState, values, nulls); FinishModifyRelation(modifyState); CommandCounterIncrement(); - heap_close(cstoreTables, NoLock); + heap_close(cstoreDataFiles, NoLock); } @@ -338,11 +339,11 @@ InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe) /* - * ReadTableMetadata constructs TableMetadata for a given relfilenode by reading - * from cstore_tables and cstore_stripes. + * ReadDataFileMetadata constructs DataFileMetadata for a given relfilenode by reading + * from cstore_data_files and cstore_stripes. */ -TableMetadata * -ReadTableMetadata(Oid relfilenode) +DataFileMetadata * +ReadDataFileMetadata(Oid relfilenode) { Oid cstoreStripesOid = InvalidOid; Relation cstoreStripes = NULL; @@ -353,8 +354,8 @@ ReadTableMetadata(Oid relfilenode) HeapTuple heapTuple; bool found = false; - TableMetadata *tableMetadata = palloc0(sizeof(TableMetadata)); - found = ReadCStoreTables(relfilenode, &tableMetadata->blockRowCount); + DataFileMetadata *datafileMetadata = palloc0(sizeof(DataFileMetadata)); + found = ReadCStoreDataFiles(relfilenode, &datafileMetadata->blockRowCount); if (!found) { ereport(ERROR, (errmsg("Relfilenode %d doesn't belong to a cstore table.", @@ -394,40 +395,41 @@ ReadTableMetadata(Oid relfilenode) stripeMetadata->rowCount = DatumGetInt64( datumArray[Anum_cstore_stripes_row_count - 1]); - tableMetadata->stripeMetadataList = lappend(tableMetadata->stripeMetadataList, - stripeMetadata); + datafileMetadata->stripeMetadataList = lappend( + datafileMetadata->stripeMetadataList, + stripeMetadata); } systable_endscan_ordered(scanDescriptor); index_close(index, NoLock); heap_close(cstoreStripes, NoLock); - return tableMetadata; + return datafileMetadata; } /* - * ReadCStoreTables reads corresponding record from cstore_tables. Returns false if - * table was not found in cstore_tables. + * ReadCStoreDataFiles reads corresponding record from cstore_data_files. Returns + * false if table was not found in cstore_data_files. */ static bool -ReadCStoreTables(Oid relfilenode, uint64 *blockRowCount) +ReadCStoreDataFiles(Oid relfilenode, uint64 *blockRowCount) { bool found = false; - Oid cstoreTablesOid = InvalidOid; - Relation cstoreTables = NULL; + Oid cstoreDataFilesOid = InvalidOid; + Relation cstoreDataFiles = NULL; Relation index = NULL; TupleDesc tupleDescriptor = NULL; ScanKeyData scanKey[1]; SysScanDesc scanDescriptor = NULL; HeapTuple heapTuple = NULL; - ScanKeyInit(&scanKey[0], Anum_cstore_tables_relfilenode, + ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode, BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); - cstoreTablesOid = CStoreTablesRelationId(); - cstoreTables = try_relation_open(cstoreTablesOid, AccessShareLock); - if (cstoreTables == NULL) + cstoreDataFilesOid = CStoreDataFilesRelationId(); + cstoreDataFiles = try_relation_open(cstoreDataFilesOid, AccessShareLock); + if (cstoreDataFiles == NULL) { /* * Extension has been dropped. This can be called while @@ -436,77 +438,77 @@ ReadCStoreTables(Oid relfilenode, uint64 *blockRowCount) return false; } - index = try_relation_open(CStoreTablesIndexRelationId(), AccessShareLock); + index = try_relation_open(CStoreDataFilesIndexRelationId(), AccessShareLock); if (index == NULL) { - heap_close(cstoreTables, NoLock); + heap_close(cstoreDataFiles, NoLock); /* extension has been dropped */ return false; } - tupleDescriptor = RelationGetDescr(cstoreTables); + tupleDescriptor = RelationGetDescr(cstoreDataFiles); - scanDescriptor = systable_beginscan_ordered(cstoreTables, index, NULL, 1, scanKey); + scanDescriptor = systable_beginscan_ordered(cstoreDataFiles, index, NULL, 1, scanKey); heapTuple = systable_getnext(scanDescriptor); if (HeapTupleIsValid(heapTuple)) { - Datum datumArray[Natts_cstore_tables]; - bool isNullArray[Natts_cstore_tables]; + Datum datumArray[Natts_cstore_data_files]; + bool isNullArray[Natts_cstore_data_files]; heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray); - *blockRowCount = DatumGetInt32(datumArray[Anum_cstore_tables_block_row_count - + *blockRowCount = DatumGetInt32(datumArray[Anum_cstore_data_files_block_row_count - 1]); found = true; } systable_endscan_ordered(scanDescriptor); index_close(index, NoLock); - heap_close(cstoreTables, NoLock); + heap_close(cstoreDataFiles, NoLock); return found; } /* - * DeleteTableMetadataRowIfExists removes the row with given relfilenode from cstore_stripes. + * DeleteDataFileMetadataRowIfExists removes the row with given relfilenode from cstore_stripes. */ void -DeleteTableMetadataRowIfExists(Oid relfilenode) +DeleteDataFileMetadataRowIfExists(Oid relfilenode) { - Oid cstoreTablesOid = InvalidOid; - Relation cstoreTables = NULL; + Oid cstoreDataFilesOid = InvalidOid; + Relation cstoreDataFiles = NULL; Relation index = NULL; ScanKeyData scanKey[1]; SysScanDesc scanDescriptor = NULL; HeapTuple heapTuple = NULL; - ScanKeyInit(&scanKey[0], Anum_cstore_tables_relfilenode, + ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode, BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); - cstoreTablesOid = CStoreTablesRelationId(); - cstoreTables = try_relation_open(cstoreTablesOid, AccessShareLock); - if (cstoreTables == NULL) + cstoreDataFilesOid = CStoreDataFilesRelationId(); + cstoreDataFiles = try_relation_open(cstoreDataFilesOid, AccessShareLock); + if (cstoreDataFiles == NULL) { /* extension has been dropped */ return; } - index = index_open(CStoreTablesIndexRelationId(), AccessShareLock); + index = index_open(CStoreDataFilesIndexRelationId(), AccessShareLock); - scanDescriptor = systable_beginscan_ordered(cstoreTables, index, NULL, 1, scanKey); + scanDescriptor = systable_beginscan_ordered(cstoreDataFiles, index, NULL, 1, scanKey); heapTuple = systable_getnext(scanDescriptor); if (HeapTupleIsValid(heapTuple)) { - ModifyState *modifyState = StartModifyRelation(cstoreTables); + ModifyState *modifyState = StartModifyRelation(cstoreDataFiles); DeleteTupleAndEnforceConstraints(modifyState, heapTuple); FinishModifyRelation(modifyState); } systable_endscan_ordered(scanDescriptor); index_close(index, NoLock); - heap_close(cstoreTables, NoLock); + heap_close(cstoreDataFiles, NoLock); } @@ -711,24 +713,24 @@ CStoreStripesIndexRelationId(void) /* - * CStoreTablesRelationId returns relation id of cstore_tables. + * CStoreDataFilesRelationId returns relation id of cstore_data_files. * TODO: should we cache this similar to citus? */ static Oid -CStoreTablesRelationId(void) +CStoreDataFilesRelationId(void) { - return get_relname_relid("cstore_tables", CStoreNamespaceId()); + return get_relname_relid("cstore_data_files", CStoreNamespaceId()); } /* - * CStoreTablesIndexRelationId returns relation id of cstore_tables_idx. + * CStoreDataFilesIndexRelationId returns relation id of cstore_data_files_pkey. * TODO: should we cache this similar to citus? */ static Oid -CStoreTablesIndexRelationId(void) +CStoreDataFilesIndexRelationId(void) { - return get_relname_relid("cstore_tables_pkey", CStoreNamespaceId()); + return get_relname_relid("cstore_data_files_pkey", CStoreNamespaceId()); } diff --git a/cstore_reader.c b/cstore_reader.c index 2ee4101c0..66807ad08 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -84,11 +84,11 @@ CStoreBeginRead(Relation relation, TupleDesc tupleDescriptor, List *projectedColumnList, List *whereClauseList) { TableReadState *readState = NULL; - TableMetadata *tableMetadata = NULL; + DataFileMetadata *datafileMetadata = NULL; MemoryContext stripeReadContext = NULL; Oid relNode = relation->rd_node.relNode; - tableMetadata = ReadTableMetadata(relNode); + datafileMetadata = ReadDataFileMetadata(relNode); /* * We allocate all stripe specific data in the stripeReadContext, and reset @@ -101,7 +101,7 @@ CStoreBeginRead(Relation relation, TupleDesc tupleDescriptor, readState = palloc0(sizeof(TableReadState)); readState->relation = relation; - readState->tableMetadata = tableMetadata; + readState->datafileMetadata = datafileMetadata; readState->projectedColumnList = projectedColumnList; readState->whereClauseList = whereClauseList; readState->stripeBuffers = NULL; @@ -139,7 +139,7 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu { StripeBuffers *stripeBuffers = NULL; StripeMetadata *stripeMetadata = NULL; - List *stripeMetadataList = readState->tableMetadata->stripeMetadataList; + List *stripeMetadataList = readState->datafileMetadata->stripeMetadataList; uint32 stripeCount = list_length(stripeMetadataList); /* if we have read all stripes, return false */ @@ -229,8 +229,8 @@ void CStoreEndRead(TableReadState *readState) { MemoryContextDelete(readState->stripeReadContext); - list_free_deep(readState->tableMetadata->stripeMetadataList); - pfree(readState->tableMetadata); + list_free_deep(readState->datafileMetadata->stripeMetadataList); + pfree(readState->datafileMetadata); pfree(readState); } @@ -305,13 +305,13 @@ FreeBlockData(BlockData *blockData) uint64 CStoreTableRowCount(Relation relation) { - TableMetadata *tableMetadata = NULL; + DataFileMetadata *datafileMetadata = NULL; ListCell *stripeMetadataCell = NULL; uint64 totalRowCount = 0; - tableMetadata = ReadTableMetadata(relation->rd_node.relNode); + datafileMetadata = ReadDataFileMetadata(relation->rd_node.relNode); - foreach(stripeMetadataCell, tableMetadata->stripeMetadataList) + foreach(stripeMetadataCell, datafileMetadata->stripeMetadataList) { StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell); totalRowCount += stripeMetadata->rowCount; diff --git a/cstore_tableam.c b/cstore_tableam.c index 6d02ebe24..4e7f22c31 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -443,12 +443,13 @@ cstore_relation_set_new_filenode(Relation rel, MultiXactId *minmulti) { SMgrRelation srel; + CStoreOptions *options = CStoreTableAMGetOptions(); Assert(persistence == RELPERSISTENCE_PERMANENT); *freezeXid = RecentXmin; *minmulti = GetOldestMultiXactId(); srel = RelationCreateStorage(*newrnode, persistence); - InitializeCStoreTableFile(newrnode->relNode, CStoreTableAMGetOptions()); + InitCStoreDataFileMetadata(newrnode->relNode, options->blockRowCount); smgrclose(srel); } @@ -686,7 +687,7 @@ CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId * tableam tables storage is managed by postgres. */ Relation rel = table_open(objectId, AccessExclusiveLock); - DeleteTableMetadataRowIfExists(rel->rd_node.relNode); + DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode); /* keep the lock since we did physical changes to the relation */ table_close(rel, NoLock); diff --git a/cstore_writer.c b/cstore_writer.c index 728c855b4..c70b448c7 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -45,7 +45,7 @@ static void UpdateBlockSkipNodeMinMax(ColumnBlockSkipNode *blockSkipNode, int columnTypeLength, Oid columnCollation, FmgrInfo *comparisonFunction); static Datum DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength); -static void AppendStripeMetadata(TableMetadata *tableMetadata, +static void AppendStripeMetadata(DataFileMetadata *datafileMetadata, StripeMetadata stripeMetadata); static StringInfo CopyStringInfo(StringInfo sourceString); @@ -64,7 +64,7 @@ CStoreBeginWrite(Relation relation, TupleDesc tupleDescriptor) { TableWriteState *writeState = NULL; - TableMetadata *tableMetadata = NULL; + DataFileMetadata *datafileMetadata = NULL; FmgrInfo **comparisonFunctionArray = NULL; MemoryContext stripeWriteContext = NULL; uint64 currentFileOffset = 0; @@ -75,18 +75,18 @@ CStoreBeginWrite(Relation relation, uint64 currentStripeId = 0; Oid relNode = relation->rd_node.relNode; - tableMetadata = ReadTableMetadata(relNode); + datafileMetadata = ReadDataFileMetadata(relNode); /* * If stripeMetadataList is not empty, jump to the position right after * the last position. */ - if (tableMetadata->stripeMetadataList != NIL) + if (datafileMetadata->stripeMetadataList != NIL) { StripeMetadata *lastStripe = NULL; uint64 lastStripeSize = 0; - lastStripe = llast(tableMetadata->stripeMetadataList); + lastStripe = llast(datafileMetadata->stripeMetadataList); lastStripeSize += lastStripe->dataLength; currentFileOffset = lastStripe->fileOffset + lastStripeSize; @@ -129,7 +129,7 @@ CStoreBeginWrite(Relation relation, writeState = palloc0(sizeof(TableWriteState)); writeState->relation = relation; - writeState->tableMetadata = tableMetadata; + writeState->datafileMetadata = datafileMetadata; writeState->compressionType = compressionType; writeState->stripeMaxRowCount = stripeMaxRowCount; writeState->blockRowCount = blockRowCount; @@ -164,7 +164,7 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul StripeBuffers *stripeBuffers = writeState->stripeBuffers; StripeSkipList *stripeSkipList = writeState->stripeSkipList; uint32 columnCount = writeState->tupleDescriptor->natts; - TableMetadata *tableMetadata = writeState->tableMetadata; + DataFileMetadata *datafileMetadata = writeState->datafileMetadata; const uint32 blockRowCount = writeState->blockRowCount; BlockData *blockData = writeState->blockData; MemoryContext oldContext = MemoryContextSwitchTo(writeState->stripeWriteContext); @@ -254,7 +254,7 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul MemoryContextSwitchTo(oldContext); InsertStripeMetadataRow(writeState->relation->rd_node.relNode, &stripeMetadata); - AppendStripeMetadata(tableMetadata, stripeMetadata); + AppendStripeMetadata(datafileMetadata, stripeMetadata); } else { @@ -284,11 +284,11 @@ CStoreEndWrite(TableWriteState *writeState) MemoryContextSwitchTo(oldContext); InsertStripeMetadataRow(writeState->relation->rd_node.relNode, &stripeMetadata); - AppendStripeMetadata(writeState->tableMetadata, stripeMetadata); + AppendStripeMetadata(writeState->datafileMetadata, stripeMetadata); } MemoryContextDelete(writeState->stripeWriteContext); - list_free_deep(writeState->tableMetadata->stripeMetadataList); + list_free_deep(writeState->datafileMetadata->stripeMetadataList); pfree(writeState->comparisonFunctionArray); FreeBlockData(writeState->blockData); pfree(writeState); @@ -791,13 +791,13 @@ DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength) * table footer's stripeMetadataList. */ static void -AppendStripeMetadata(TableMetadata *tableMetadata, StripeMetadata stripeMetadata) +AppendStripeMetadata(DataFileMetadata *datafileMetadata, StripeMetadata stripeMetadata) { StripeMetadata *stripeMetadataCopy = palloc0(sizeof(StripeMetadata)); memcpy(stripeMetadataCopy, &stripeMetadata, sizeof(StripeMetadata)); - tableMetadata->stripeMetadataList = lappend(tableMetadata->stripeMetadataList, - stripeMetadataCopy); + datafileMetadata->stripeMetadataList = lappend(datafileMetadata->stripeMetadataList, + stripeMetadataCopy); } diff --git a/expected/am_drop.out b/expected/am_drop.out index c1fc60519..26de328f6 100644 --- a/expected/am_drop.out +++ b/expected/am_drop.out @@ -12,12 +12,12 @@ -- 'postgres' directory is excluded from comparison to have the same result. -- store postgres database oid SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset -SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset +SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset -- DROP cstore_fdw tables DROP TABLE contestant; DROP TABLE contestant_compressed; -- make sure DROP deletes metadata -SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; +SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; ?column? ---------- 2 @@ -26,10 +26,10 @@ SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; -- Create a cstore_fdw table under a schema and drop it. CREATE SCHEMA test_schema; CREATE TABLE test_schema.test_table(data int) USING cstore_tableam; -SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset +SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset DROP SCHEMA test_schema CASCADE; NOTICE: drop cascades to table test_schema.test_table -SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; +SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; ?column? ---------- 1 diff --git a/expected/fdw_drop.out b/expected/fdw_drop.out index 24c0f518d..e1ddf0fd0 100644 --- a/expected/fdw_drop.out +++ b/expected/fdw_drop.out @@ -12,12 +12,12 @@ -- 'postgres' directory is excluded from comparison to have the same result. -- store postgres database oid SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset -SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset +SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset -- DROP cstore_fdw tables DROP FOREIGN TABLE contestant; DROP FOREIGN TABLE contestant_compressed; -- make sure DROP deletes metadata -SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; +SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; ?column? ---------- 2 @@ -26,10 +26,10 @@ SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; -- Create a cstore_fdw table under a schema and drop it. CREATE SCHEMA test_schema; CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; -SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset +SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset DROP SCHEMA test_schema CASCADE; NOTICE: drop cascades to foreign table test_schema.test_table -SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; +SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; ?column? ---------- 1 diff --git a/sql/am_drop.sql b/sql/am_drop.sql index 06873aa6e..080712881 100644 --- a/sql/am_drop.sql +++ b/sql/am_drop.sql @@ -15,22 +15,22 @@ -- store postgres database oid SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset -SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset +SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset -- DROP cstore_fdw tables DROP TABLE contestant; DROP TABLE contestant_compressed; -- make sure DROP deletes metadata -SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; +SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; -- Create a cstore_fdw table under a schema and drop it. CREATE SCHEMA test_schema; CREATE TABLE test_schema.test_table(data int) USING cstore_tableam; -SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset +SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset DROP SCHEMA test_schema CASCADE; -SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; +SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; SELECT current_database() datname \gset diff --git a/sql/fdw_drop.sql b/sql/fdw_drop.sql index 7c6dd5c6e..f89374a5a 100644 --- a/sql/fdw_drop.sql +++ b/sql/fdw_drop.sql @@ -15,22 +15,22 @@ -- store postgres database oid SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset -SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset +SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset -- DROP cstore_fdw tables DROP FOREIGN TABLE contestant; DROP FOREIGN TABLE contestant_compressed; -- make sure DROP deletes metadata -SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; +SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; -- Create a cstore_fdw table under a schema and drop it. CREATE SCHEMA test_schema; CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; -SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset +SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset DROP SCHEMA test_schema CASCADE; -SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; +SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; SELECT current_database() datname \gset From a8da9acc634bc8ed0e47ca1cf0fb2c6edd9fcffc Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 09:51:24 -0700 Subject: [PATCH 67/91] Fix writes after rollback --- Makefile | 4 +- cstore_writer.c | 14 ++++++- expected/am_rollback.out | 77 +++++++++++++++++++++++++++++++++++++++ expected/fdw_rollback.out | 77 +++++++++++++++++++++++++++++++++++++++ sql/am_rollback.sql | 41 +++++++++++++++++++++ sql/fdw_rollback.sql | 41 +++++++++++++++++++++ 6 files changed, 251 insertions(+), 3 deletions(-) create mode 100644 expected/am_rollback.out create mode 100644 expected/fdw_rollback.out create mode 100644 sql/am_rollback.sql create mode 100644 sql/fdw_rollback.sql diff --git a/Makefile b/Makefile index 483aebc35..ac7e15037 100644 --- a/Makefile +++ b/Makefile @@ -45,7 +45,7 @@ ifeq ($(USE_FDW),yes) OBJS += cstore_fdw.o REGRESS += fdw_create fdw_load fdw_query fdw_analyze fdw_data_types \ fdw_functions fdw_block_filtering fdw_drop fdw_insert \ - fdw_copyto fdw_alter fdw_truncate fdw_clean + fdw_copyto fdw_alter fdw_rollback fdw_truncate fdw_clean endif # disabled tests: am_block_filtering @@ -53,7 +53,7 @@ ifeq ($(USE_TABLEAM),yes) PG_CFLAGS += -DUSE_TABLEAM OBJS += cstore_tableam.o REGRESS += am_create am_load am_query am_analyze am_data_types am_functions \ - am_drop am_insert am_copyto am_alter am_truncate am_clean + am_drop am_insert am_copyto am_alter am_rollback am_truncate am_clean endif ifeq ($(enable_coverage),yes) diff --git a/cstore_writer.c b/cstore_writer.c index c70b448c7..1146bd0a3 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -403,7 +403,19 @@ WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) PageInit(page, BLCKSZ, 0); } - /* always appending */ + /* + * After a transaction has been rolled-back, we might be + * over-writing the rolledback write, so phdr->pd_lower can be + * different from addr.offset. + * + * We reset pd_lower to reset the rolledback write. + */ + if (phdr->pd_lower > addr.offset) + { + ereport(DEBUG1, (errmsg("over-writing page %u", addr.blockno), + errdetail("This can happen after a roll-back."))); + phdr->pd_lower = addr.offset; + } Assert(phdr->pd_lower == addr.offset); START_CRIT_SECTION(); diff --git a/expected/am_rollback.out b/expected/am_rollback.out new file mode 100644 index 000000000..130baaa3a --- /dev/null +++ b/expected/am_rollback.out @@ -0,0 +1,77 @@ +-- +-- Testing we handle rollbacks properly +-- +CREATE TABLE t(a int, b int) USING cstore_tableam; +BEGIN; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +ROLLBACK; +SELECT count(*) FROM t; + count +------- + 0 +(1 row) + +-- check stripe metadata also have been rolled-back +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + count +------- + 0 +(1 row) + +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; + count +------- + 10 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + count +------- + 1 +(1 row) + +-- savepoint rollback +BEGIN; +SAVEPOINT s0; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SAVEPOINT s1; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; + count +------- + 30 +(1 row) + +ROLLBACK TO SAVEPOINT s1; +SELECT count(*) FROM t; + count +------- + 20 +(1 row) + +ROLLBACK TO SAVEPOINT s0; +SELECT count(*) FROM t; + count +------- + 10 +(1 row) + +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +COMMIT; +SELECT count(*) FROM t; + count +------- + 20 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + count +------- + 2 +(1 row) + +DROP TABLE t; diff --git a/expected/fdw_rollback.out b/expected/fdw_rollback.out new file mode 100644 index 000000000..f50f9fd19 --- /dev/null +++ b/expected/fdw_rollback.out @@ -0,0 +1,77 @@ +-- +-- Testing we handle rollbacks properly +-- +CREATE FOREIGN TABLE t(a int, b int) SERVER cstore_server; +BEGIN; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +ROLLBACK; +SELECT count(*) FROM t; + count +------- + 0 +(1 row) + +-- check stripe metadata also have been rolled-back +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + count +------- + 0 +(1 row) + +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; + count +------- + 10 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + count +------- + 1 +(1 row) + +-- savepoint rollback +BEGIN; +SAVEPOINT s0; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SAVEPOINT s1; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; + count +------- + 30 +(1 row) + +ROLLBACK TO SAVEPOINT s1; +SELECT count(*) FROM t; + count +------- + 20 +(1 row) + +ROLLBACK TO SAVEPOINT s0; +SELECT count(*) FROM t; + count +------- + 10 +(1 row) + +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +COMMIT; +SELECT count(*) FROM t; + count +------- + 20 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + count +------- + 2 +(1 row) + +DROP FOREIGN TABLE t; diff --git a/sql/am_rollback.sql b/sql/am_rollback.sql new file mode 100644 index 000000000..da1cc8ce4 --- /dev/null +++ b/sql/am_rollback.sql @@ -0,0 +1,41 @@ +-- +-- Testing we handle rollbacks properly +-- + +CREATE TABLE t(a int, b int) USING cstore_tableam; + +BEGIN; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +ROLLBACK; +SELECT count(*) FROM t; + +-- check stripe metadata also have been rolled-back +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + +-- savepoint rollback +BEGIN; +SAVEPOINT s0; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SAVEPOINT s1; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; +ROLLBACK TO SAVEPOINT s1; +SELECT count(*) FROM t; +ROLLBACK TO SAVEPOINT s0; +SELECT count(*) FROM t; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +COMMIT; + +SELECT count(*) FROM t; + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + +DROP TABLE t; diff --git a/sql/fdw_rollback.sql b/sql/fdw_rollback.sql new file mode 100644 index 000000000..804868ac9 --- /dev/null +++ b/sql/fdw_rollback.sql @@ -0,0 +1,41 @@ +-- +-- Testing we handle rollbacks properly +-- + +CREATE FOREIGN TABLE t(a int, b int) SERVER cstore_server; + +BEGIN; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +ROLLBACK; +SELECT count(*) FROM t; + +-- check stripe metadata also have been rolled-back +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + +-- savepoint rollback +BEGIN; +SAVEPOINT s0; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SAVEPOINT s1; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; +ROLLBACK TO SAVEPOINT s1; +SELECT count(*) FROM t; +ROLLBACK TO SAVEPOINT s0; +SELECT count(*) FROM t; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +COMMIT; + +SELECT count(*) FROM t; + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + +DROP FOREIGN TABLE t; From b72a4d8d1964401e02dbda8d8da3be2f2ce2df6a Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 10:08:26 -0700 Subject: [PATCH 68/91] Clean-up old metadata on TRUNCATE --- cstore_fdw.c | 1 + cstore_tableam.c | 3 +++ expected/am_truncate.out | 8 ++++++++ expected/fdw_truncate.out | 8 ++++++++ sql/am_truncate.sql | 5 +++++ sql/fdw_truncate.sql | 5 +++++ 6 files changed, 30 insertions(+) diff --git a/cstore_fdw.c b/cstore_fdw.c index 33a29ad32..2790efaca 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -825,6 +825,7 @@ FdwNewRelFileNode(Relation relation) if (OidIsValid(relation->rd_rel->relfilenode)) { RelationDropStorage(relation); + DeleteDataFileMetadataRowIfExists(relation->rd_rel->relfilenode); } if (OidIsValid(relation->rd_rel->reltablespace)) diff --git a/cstore_tableam.c b/cstore_tableam.c index 4e7f22c31..2744a9a76 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -445,6 +445,9 @@ cstore_relation_set_new_filenode(Relation rel, SMgrRelation srel; CStoreOptions *options = CStoreTableAMGetOptions(); + /* delete old relfilenode metadata */ + DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode); + Assert(persistence == RELPERSISTENCE_PERMANENT); *freezeXid = RecentXmin; *minmulti = GetOldestMultiXactId(); diff --git a/expected/am_truncate.out b/expected/am_truncate.out index 99db7fe72..951a77f04 100644 --- a/expected/am_truncate.out +++ b/expected/am_truncate.out @@ -15,6 +15,7 @@ CREATE TABLE cstore_truncate_test_second (a int, b int) USING cstore_tableam; -- COMPRESSED CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING cstore_tableam; CREATE TABLE cstore_truncate_test_regular (a int, b int); +SELECT count(*) AS cstore_data_files_before_truncate FROM cstore.cstore_data_files \gset INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; set cstore.compression = 'pglz'; INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; @@ -145,6 +146,13 @@ SELECT * from cstore_truncate_test; ---+--- (0 rows) +-- make sure TRUNATE deletes metadata for old relfilenode +SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_files; + ?column? +---------- + 0 +(1 row) + -- test if a cached truncate from a pl/pgsql function works CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ BEGIN diff --git a/expected/fdw_truncate.out b/expected/fdw_truncate.out index c92c15559..f357c6358 100644 --- a/expected/fdw_truncate.out +++ b/expected/fdw_truncate.out @@ -14,6 +14,7 @@ CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; CREATE FOREIGN TABLE cstore_truncate_test_compressed (a int, b int) SERVER cstore_server OPTIONS (compression 'pglz'); CREATE TABLE cstore_truncate_test_regular (a int, b int); +SELECT count(*) AS cstore_data_files_before_truncate FROM cstore.cstore_data_files \gset INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; @@ -142,6 +143,13 @@ SELECT * from cstore_truncate_test; ---+--- (0 rows) +-- make sure TRUNATE deletes metadata for old relfilenode +SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_files; + ?column? +---------- + 0 +(1 row) + -- test if a cached truncate from a pl/pgsql function works CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ BEGIN diff --git a/sql/am_truncate.sql b/sql/am_truncate.sql index 3fdce1d82..ae86098ee 100644 --- a/sql/am_truncate.sql +++ b/sql/am_truncate.sql @@ -13,6 +13,8 @@ CREATE TABLE cstore_truncate_test_second (a int, b int) USING cstore_tableam; CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING cstore_tableam; CREATE TABLE cstore_truncate_test_regular (a int, b int); +SELECT count(*) AS cstore_data_files_before_truncate FROM cstore.cstore_data_files \gset + INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; set cstore.compression = 'pglz'; @@ -60,6 +62,9 @@ SELECT * from cstore_truncate_test_regular; TRUNCATE TABLE cstore_truncate_test; SELECT * from cstore_truncate_test; +-- make sure TRUNATE deletes metadata for old relfilenode +SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_files; + -- test if a cached truncate from a pl/pgsql function works CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ BEGIN diff --git a/sql/fdw_truncate.sql b/sql/fdw_truncate.sql index a1849045e..b82e7fc7b 100644 --- a/sql/fdw_truncate.sql +++ b/sql/fdw_truncate.sql @@ -12,6 +12,8 @@ CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_se CREATE FOREIGN TABLE cstore_truncate_test_compressed (a int, b int) SERVER cstore_server OPTIONS (compression 'pglz'); CREATE TABLE cstore_truncate_test_regular (a int, b int); +SELECT count(*) AS cstore_data_files_before_truncate FROM cstore.cstore_data_files \gset + INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; @@ -57,6 +59,9 @@ SELECT * from cstore_truncate_test_regular; TRUNCATE TABLE cstore_truncate_test; SELECT * from cstore_truncate_test; +-- make sure TRUNATE deletes metadata for old relfilenode +SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_files; + -- test if a cached truncate from a pl/pgsql function works CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ BEGIN From 62fc59202ce052159e7faeb29b53e8c70fbd61b5 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 10:09:19 -0700 Subject: [PATCH 69/91] Implement nontransactional truncate --- cstore.h | 2 +- cstore_metadata_tables.c | 13 ++++++++++--- cstore_reader.c | 4 ++-- cstore_tableam.c | 32 +++++++++++++++++++++++++++++--- cstore_writer.c | 2 +- expected/am_truncate.out | 24 ++++++++++++++++++++++++ expected/fdw_truncate.out | 24 ++++++++++++++++++++++++ sql/am_truncate.sql | 14 ++++++++++++++ sql/fdw_truncate.sql | 14 ++++++++++++++ 9 files changed, 119 insertions(+), 10 deletions(-) diff --git a/cstore.h b/cstore.h index 8efb0e6af..489a4839b 100644 --- a/cstore.h +++ b/cstore.h @@ -283,7 +283,7 @@ extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressio extern void DeleteDataFileMetadataRowIfExists(Oid relfilenode); extern void InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount); extern void InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe); -extern DataFileMetadata * ReadDataFileMetadata(Oid relfilenode); +extern DataFileMetadata * ReadDataFileMetadata(Oid relfilenode, bool missingOk); extern void SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipList, TupleDesc tupleDescriptor); diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 3ebee02e7..7c214eed5 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -343,7 +343,7 @@ InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe) * from cstore_data_files and cstore_stripes. */ DataFileMetadata * -ReadDataFileMetadata(Oid relfilenode) +ReadDataFileMetadata(Oid relfilenode, bool missingOk) { Oid cstoreStripesOid = InvalidOid; Relation cstoreStripes = NULL; @@ -358,8 +358,15 @@ ReadDataFileMetadata(Oid relfilenode) found = ReadCStoreDataFiles(relfilenode, &datafileMetadata->blockRowCount); if (!found) { - ereport(ERROR, (errmsg("Relfilenode %d doesn't belong to a cstore table.", - relfilenode))); + if (!missingOk) + { + ereport(ERROR, (errmsg("Relfilenode %d doesn't belong to a cstore table.", + relfilenode))); + } + else + { + return NULL; + } } ScanKeyInit(&scanKey[0], Anum_cstore_stripes_relfilenode, diff --git a/cstore_reader.c b/cstore_reader.c index 66807ad08..e51695353 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -88,7 +88,7 @@ CStoreBeginRead(Relation relation, TupleDesc tupleDescriptor, MemoryContext stripeReadContext = NULL; Oid relNode = relation->rd_node.relNode; - datafileMetadata = ReadDataFileMetadata(relNode); + datafileMetadata = ReadDataFileMetadata(relNode, false); /* * We allocate all stripe specific data in the stripeReadContext, and reset @@ -309,7 +309,7 @@ CStoreTableRowCount(Relation relation) ListCell *stripeMetadataCell = NULL; uint64 totalRowCount = 0; - datafileMetadata = ReadDataFileMetadata(relation->rd_node.relNode); + datafileMetadata = ReadDataFileMetadata(relation->rd_node.relNode, false); foreach(stripeMetadataCell, datafileMetadata->stripeMetadataList) { diff --git a/cstore_tableam.c b/cstore_tableam.c index 2744a9a76..0369ca15a 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -443,7 +443,20 @@ cstore_relation_set_new_filenode(Relation rel, MultiXactId *minmulti) { SMgrRelation srel; - CStoreOptions *options = CStoreTableAMGetOptions(); + DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, true); + uint64 blockRowCount = 0; + + if (metadata != NULL) + { + /* existing table (e.g. TRUNCATE), use existing blockRowCount */ + blockRowCount = metadata->blockRowCount; + } + else + { + /* new table, use options */ + CStoreOptions *options = CStoreTableAMGetOptions(); + blockRowCount = options->blockRowCount; + } /* delete old relfilenode metadata */ DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode); @@ -452,7 +465,7 @@ cstore_relation_set_new_filenode(Relation rel, *freezeXid = RecentXmin; *minmulti = GetOldestMultiXactId(); srel = RelationCreateStorage(*newrnode, persistence); - InitCStoreDataFileMetadata(newrnode->relNode, options->blockRowCount); + InitCStoreDataFileMetadata(newrnode->relNode, blockRowCount); smgrclose(srel); } @@ -460,7 +473,20 @@ cstore_relation_set_new_filenode(Relation rel, static void cstore_relation_nontransactional_truncate(Relation rel) { - elog(ERROR, "cstore_relation_nontransactional_truncate not implemented"); + DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, false); + + /* + * No need to set new relfilenode, since the table was created in this + * transaction and no other transaction can see this relation yet. We + * can just truncate the relation. + * + * This is similar to what is done in heapam_relation_nontransactional_truncate. + */ + RelationTruncate(rel, 0); + + /* Delete old relfilenode metadata and recreate it */ + DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode); + InitCStoreDataFileMetadata(rel->rd_node.relNode, metadata->blockRowCount); } diff --git a/cstore_writer.c b/cstore_writer.c index 1146bd0a3..8069f4aba 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -75,7 +75,7 @@ CStoreBeginWrite(Relation relation, uint64 currentStripeId = 0; Oid relNode = relation->rd_node.relNode; - datafileMetadata = ReadDataFileMetadata(relNode); + datafileMetadata = ReadDataFileMetadata(relNode, false); /* * If stripeMetadataList is not empty, jump to the position right after diff --git a/expected/am_truncate.out b/expected/am_truncate.out index 951a77f04..245c72062 100644 --- a/expected/am_truncate.out +++ b/expected/am_truncate.out @@ -153,6 +153,30 @@ SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_fil 0 (1 row) +-- test if truncation in the same transaction that created the table works properly +BEGIN; +CREATE TABLE cstore_same_transaction_truncate(a int) USING cstore_tableam; +INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(1, 100); +TRUNCATE cstore_same_transaction_truncate; +INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(20, 23); +COMMIT; +-- should output "1" for the newly created relation +SELECT count(*) - :cstore_data_files_before_truncate FROM cstore.cstore_data_files; + ?column? +---------- + 1 +(1 row) + +SELECT * FROM cstore_same_transaction_truncate; + a +---- + 20 + 21 + 22 + 23 +(4 rows) + +DROP TABLE cstore_same_transaction_truncate; -- test if a cached truncate from a pl/pgsql function works CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ BEGIN diff --git a/expected/fdw_truncate.out b/expected/fdw_truncate.out index f357c6358..6192c704c 100644 --- a/expected/fdw_truncate.out +++ b/expected/fdw_truncate.out @@ -150,6 +150,30 @@ SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_fil 0 (1 row) +-- test if truncation in the same transaction that created the table works properly +BEGIN; +CREATE FOREIGN TABLE cstore_same_transaction_truncate(a int) SERVER cstore_server; +INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(1, 100); +TRUNCATE cstore_same_transaction_truncate; +INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(20, 23); +COMMIT; +-- should output "1" for the newly created relation +SELECT count(*) - :cstore_data_files_before_truncate FROM cstore.cstore_data_files; + ?column? +---------- + 1 +(1 row) + +SELECT * FROM cstore_same_transaction_truncate; + a +---- + 20 + 21 + 22 + 23 +(4 rows) + +DROP FOREIGN TABLE cstore_same_transaction_truncate; -- test if a cached truncate from a pl/pgsql function works CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ BEGIN diff --git a/sql/am_truncate.sql b/sql/am_truncate.sql index ae86098ee..5d27a69fb 100644 --- a/sql/am_truncate.sql +++ b/sql/am_truncate.sql @@ -65,6 +65,20 @@ SELECT * from cstore_truncate_test; -- make sure TRUNATE deletes metadata for old relfilenode SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_files; +-- test if truncation in the same transaction that created the table works properly +BEGIN; +CREATE TABLE cstore_same_transaction_truncate(a int) USING cstore_tableam; +INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(1, 100); +TRUNCATE cstore_same_transaction_truncate; +INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(20, 23); +COMMIT; + +-- should output "1" for the newly created relation +SELECT count(*) - :cstore_data_files_before_truncate FROM cstore.cstore_data_files; +SELECT * FROM cstore_same_transaction_truncate; + +DROP TABLE cstore_same_transaction_truncate; + -- test if a cached truncate from a pl/pgsql function works CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ BEGIN diff --git a/sql/fdw_truncate.sql b/sql/fdw_truncate.sql index b82e7fc7b..ed2aaa04a 100644 --- a/sql/fdw_truncate.sql +++ b/sql/fdw_truncate.sql @@ -62,6 +62,20 @@ SELECT * from cstore_truncate_test; -- make sure TRUNATE deletes metadata for old relfilenode SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_files; +-- test if truncation in the same transaction that created the table works properly +BEGIN; +CREATE FOREIGN TABLE cstore_same_transaction_truncate(a int) SERVER cstore_server; +INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(1, 100); +TRUNCATE cstore_same_transaction_truncate; +INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(20, 23); +COMMIT; + +-- should output "1" for the newly created relation +SELECT count(*) - :cstore_data_files_before_truncate FROM cstore.cstore_data_files; +SELECT * FROM cstore_same_transaction_truncate; + +DROP FOREIGN TABLE cstore_same_transaction_truncate; + -- test if a cached truncate from a pl/pgsql function works CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ BEGIN From 9b9b9e2cf052457ef96444c5c222647e5e4672fd Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Thu, 8 Oct 2020 19:07:18 +0200 Subject: [PATCH 70/91] remove double declaration of stripeMetadata (#20) Compilers seem to behave differently with variable shadowing as both I and the marlin deployment have segfaults when querying a cstore table today, however, CI seem to not care :D This removes a double declaration that was not caught in #10 --- cstore_reader.c | 1 - 1 file changed, 1 deletion(-) diff --git a/cstore_reader.c b/cstore_reader.c index e51695353..499c990d4 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -138,7 +138,6 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu while (readState->stripeBuffers == NULL) { StripeBuffers *stripeBuffers = NULL; - StripeMetadata *stripeMetadata = NULL; List *stripeMetadataList = readState->datafileMetadata->stripeMetadataList; uint32 stripeCount = list_length(stripeMetadataList); From 92e160344321e2ac26ea782ff3ccfc37eaf0703f Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Thu, 8 Oct 2020 11:03:07 -0700 Subject: [PATCH 71/91] Remove shadowed variables --- Makefile | 2 +- cstore_reader.c | 7 +++---- cstore_writer.c | 3 +-- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index ac7e15037..461c33280 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ else $(error version $(VER) is not supported) endif -PG_CPPFLAGS = -std=c11 +PG_CPPFLAGS = -std=c11 -Wshadow OBJS = cstore.o cstore_writer.o cstore_reader.o \ cstore_compression.o mod.o cstore_metadata_tables.o diff --git a/cstore_reader.c b/cstore_reader.c index 499c990d4..69a115ed9 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -179,7 +179,6 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu uint32 lastBlockIndex = 0; uint32 blockRowCount = 0; uint32 stripeRowCount = 0; - StripeMetadata *stripeMetadata = readState->currentStripeMetadata; stripeRowCount = stripeMetadata->rowCount; lastBlockIndex = stripeRowCount / stripeMetadata->blockRowCount; @@ -995,10 +994,10 @@ ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeFor for (defValIndex = 0; defValIndex < tupleConstraints->num_defval; defValIndex++) { - AttrDefault defaultValue = tupleConstraints->defval[defValIndex]; - if (defaultValue.adnum == attributeForm->attnum) + AttrDefault attrDefault = tupleConstraints->defval[defValIndex]; + if (attrDefault.adnum == attributeForm->attnum) { - defaultValueNode = stringToNode(defaultValue.adbin); + defaultValueNode = stringToNode(attrDefault.adbin); break; } } diff --git a/cstore_writer.c b/cstore_writer.c index 8069f4aba..9d57ec275 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -386,7 +386,7 @@ WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) while (addr.blockno >= nblocks) { - Buffer buffer = ReadBuffer(rel, P_NEW); + buffer = ReadBuffer(rel, P_NEW); ReleaseBuffer(buffer); nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); } @@ -534,7 +534,6 @@ FlushStripe(TableWriteState *writeState) for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; - uint32 blockIndex = 0; for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) { From d1c7d9f09d6a5eb8405c3807f4a6b02b86019227 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Thu, 8 Oct 2020 11:29:27 -0700 Subject: [PATCH 72/91] address feedback --- Makefile | 2 +- cstore_reader.c | 5 +---- cstore_writer.c | 4 ++-- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 461c33280..6ef8431c8 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ else $(error version $(VER) is not supported) endif -PG_CPPFLAGS = -std=c11 -Wshadow +PG_CFLAGS = -std=c11 -Wshadow OBJS = cstore.o cstore_writer.o cstore_reader.o \ cstore_compression.o mod.o cstore_metadata_tables.o diff --git a/cstore_reader.c b/cstore_reader.c index 69a115ed9..cf2d0b171 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -988,7 +988,6 @@ DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, static Datum ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeForm) { - Datum defaultValue = 0; Node *defaultValueNode = NULL; int defValIndex = 0; @@ -1009,7 +1008,7 @@ ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeFor if (IsA(defaultValueNode, Const)) { Const *constNode = (Const *) defaultValueNode; - defaultValue = constNode->constvalue; + return constNode->constvalue; } else { @@ -1018,8 +1017,6 @@ ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeFor errhint("Expression is either mutable or " "does not evaluate to constant value"))); } - - return defaultValue; } diff --git a/cstore_writer.c b/cstore_writer.c index 9d57ec275..2c0ca541e 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -386,8 +386,8 @@ WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) while (addr.blockno >= nblocks) { - buffer = ReadBuffer(rel, P_NEW); - ReleaseBuffer(buffer); + Buffer newBuffer = ReadBuffer(rel, P_NEW); + ReleaseBuffer(newBuffer); nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); } From 7cc8c8c155ad07e8a9be7a844dc332a5c2a9ffda Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 10:27:58 -0700 Subject: [PATCH 73/91] Support VACUUM FULL --- Makefile | 2 +- cstore_tableam.c | 122 ++++++++++++++++++++++++++++++++++------- expected/am_vacuum.out | 89 ++++++++++++++++++++++++++++++ sql/am_vacuum.sql | 37 +++++++++++++ 4 files changed, 229 insertions(+), 21 deletions(-) create mode 100644 expected/am_vacuum.out create mode 100644 sql/am_vacuum.sql diff --git a/Makefile b/Makefile index 6ef8431c8..60d8855f8 100644 --- a/Makefile +++ b/Makefile @@ -53,7 +53,7 @@ ifeq ($(USE_TABLEAM),yes) PG_CFLAGS += -DUSE_TABLEAM OBJS += cstore_tableam.o REGRESS += am_create am_load am_query am_analyze am_data_types am_functions \ - am_drop am_insert am_copyto am_alter am_rollback am_truncate am_clean + am_drop am_insert am_copyto am_alter am_rollback am_truncate am_vacuum am_clean endif ifeq ($(enable_coverage),yes) diff --git a/cstore_tableam.c b/cstore_tableam.c index 0369ca15a..337dbe06f 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -19,6 +19,7 @@ #include "catalog/storage.h" #include "catalog/storage_xlog.h" #include "commands/progress.h" +#include "commands/vacuum.h" #include "executor/executor.h" #include "nodes/makefuncs.h" #include "optimizer/plancat.h" @@ -131,6 +132,36 @@ cstore_free_write_state() } +static List * +RelationColumnList(Relation rel) +{ + List *columnList = NIL; + TupleDesc tupdesc = RelationGetDescr(rel); + + for (int i = 0; i < tupdesc->natts; i++) + { + Index varno = 0; + AttrNumber varattno = i + 1; + Oid vartype = tupdesc->attrs[i].atttypid; + int32 vartypmod = 0; + Oid varcollid = 0; + Index varlevelsup = 0; + Var *var; + + if (tupdesc->attrs[i].attisdropped) + { + continue; + } + + var = makeVar(varno, varattno, vartype, vartypmod, + varcollid, varlevelsup); + columnList = lappend(columnList, var); + } + + return columnList; +} + + static const TupleTableSlotOps * cstore_slot_callbacks(Relation relation) { @@ -157,25 +188,7 @@ cstore_beginscan(Relation relation, Snapshot snapshot, scan->cs_base.rs_flags = flags; scan->cs_base.rs_parallel = parallel_scan; - for (int i = 0; i < tupdesc->natts; i++) - { - Index varno = 0; - AttrNumber varattno = i + 1; - Oid vartype = tupdesc->attrs[i].atttypid; - int32 vartypmod = 0; - Oid varcollid = 0; - Index varlevelsup = 0; - Var *var; - - if (tupdesc->attrs[i].attisdropped) - { - continue; - } - - var = makeVar(varno, varattno, vartype, vartypmod, - varcollid, varlevelsup); - columnList = lappend(columnList, var); - } + columnList = RelationColumnList(relation); readState = CStoreBeginRead(relation, tupdesc, columnList, NULL); @@ -497,6 +510,13 @@ cstore_relation_copy_data(Relation rel, const RelFileNode *newrnode) } +/* + * cstore_relation_copy_for_cluster is called on VACUUM FULL, at which + * we should copy data from OldHeap to NewHeap. + * + * In general TableAM case this can also be called for the CLUSTER command + * which is not applicable for cstore since it doesn't support indexes. + */ static void cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, Relation OldIndex, bool use_sort, @@ -507,7 +527,69 @@ cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, double *tups_vacuumed, double *tups_recently_dead) { - elog(ERROR, "cstore_relation_copy_for_cluster not implemented"); + TableWriteState *writeState = NULL; + TableReadState *readState = NULL; + CStoreOptions *cstoreOptions = NULL; + Datum *sourceValues = NULL; + bool *sourceNulls = NULL; + Datum *targetValues = NULL; + bool *targetNulls = NULL; + TupleDesc sourceDesc = RelationGetDescr(OldHeap); + TupleDesc targetDesc = RelationGetDescr(NewHeap); + + if (OldIndex != NULL || use_sort) + { + ereport(ERROR, (errmsg("cstore_am doesn't support indexes"))); + } + + /* + * copy_table_data in cluster.c assumes tuple descriptors are exactly + * the same. Even dropped columns exist and are marked as attisdropped + * in the target relation. + */ + Assert(sourceDesc->natts == targetDesc->natts); + + cstoreOptions = CStoreTableAMGetOptions(); + + writeState = CStoreBeginWrite(NewHeap, + cstoreOptions->compressionType, + cstoreOptions->stripeRowCount, + cstoreOptions->blockRowCount, + targetDesc); + + readState = CStoreBeginRead(OldHeap, sourceDesc, RelationColumnList(OldHeap), NULL); + + sourceValues = palloc0(sourceDesc->natts * sizeof(Datum)); + sourceNulls = palloc0(sourceDesc->natts * sizeof(bool)); + + targetValues = palloc0(targetDesc->natts * sizeof(Datum)); + targetNulls = palloc0(targetDesc->natts * sizeof(bool)); + + *num_tuples = 0; + + while (CStoreReadNextRow(readState, sourceValues, sourceNulls)) + { + memset(targetNulls, true, targetDesc->natts * sizeof(bool)); + + for (int attrIndex = 0; attrIndex < sourceDesc->natts; attrIndex++) + { + FormData_pg_attribute *sourceAttr = TupleDescAttr(sourceDesc, attrIndex); + + if (!sourceAttr->attisdropped) + { + targetNulls[attrIndex] = sourceNulls[attrIndex]; + targetValues[attrIndex] = sourceValues[attrIndex]; + } + } + + CStoreWriteRow(writeState, targetValues, targetNulls); + (*num_tuples)++; + } + + *tups_vacuumed = *num_tuples; + + CStoreEndWrite(writeState); + CStoreEndRead(readState); } diff --git a/expected/am_vacuum.out b/expected/am_vacuum.out new file mode 100644 index 000000000..d689be800 --- /dev/null +++ b/expected/am_vacuum.out @@ -0,0 +1,89 @@ +CREATE TABLE t(a int, b int) USING cstore_tableam; +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + count +------- + 0 +(1 row) + +INSERT INTO t SELECT i, i * i FROM generate_series(1, 10) i; +INSERT INTO t SELECT i, i * i FROM generate_series(11, 20) i; +INSERT INTO t SELECT i, i * i FROM generate_series(21, 30) i; +SELECT sum(a), sum(b) FROM t; + sum | sum +-----+------ + 465 | 9455 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + count +------- + 3 +(1 row) + +-- vacuum full should merge stripes together +VACUUM FULL t; +SELECT sum(a), sum(b) FROM t; + sum | sum +-----+------ + 465 | 9455 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + count +------- + 1 +(1 row) + +-- test the case when all data cannot fit into a single stripe +SET cstore.stripe_row_count TO 1000; +INSERT INTO t SELECT i, 2 * i FROM generate_series(1,2500) i; +SELECT sum(a), sum(b) FROM t; + sum | sum +---------+--------- + 3126715 | 6261955 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + count +------- + 4 +(1 row) + +VACUUM FULL t; +SELECT sum(a), sum(b) FROM t; + sum | sum +---------+--------- + 3126715 | 6261955 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + count +------- + 3 +(1 row) + +-- VACUUM FULL doesn't reclaim dropped columns, but converts them to NULLs +ALTER TABLE t DROP COLUMN a; +SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; + stripe | attr | block | ?column? | ?column? +--------+------+-------+----------+---------- + 0 | 1 | 0 | f | f + 0 | 2 | 0 | f | f + 1 | 1 | 0 | f | f + 1 | 2 | 0 | f | f + 2 | 1 | 0 | f | f + 2 | 2 | 0 | f | f +(6 rows) + +VACUUM FULL t; +SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; + stripe | attr | block | ?column? | ?column? +--------+------+-------+----------+---------- + 0 | 1 | 0 | t | t + 0 | 2 | 0 | f | f + 1 | 1 | 0 | t | t + 1 | 2 | 0 | f | f + 2 | 1 | 0 | t | t + 2 | 2 | 0 | f | f +(6 rows) + diff --git a/sql/am_vacuum.sql b/sql/am_vacuum.sql new file mode 100644 index 000000000..070a13b05 --- /dev/null +++ b/sql/am_vacuum.sql @@ -0,0 +1,37 @@ +CREATE TABLE t(a int, b int) USING cstore_tableam; +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + +INSERT INTO t SELECT i, i * i FROM generate_series(1, 10) i; +INSERT INTO t SELECT i, i * i FROM generate_series(11, 20) i; +INSERT INTO t SELECT i, i * i FROM generate_series(21, 30) i; + +SELECT sum(a), sum(b) FROM t; +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + +-- vacuum full should merge stripes together +VACUUM FULL t; + +SELECT sum(a), sum(b) FROM t; +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + +-- test the case when all data cannot fit into a single stripe +SET cstore.stripe_row_count TO 1000; +INSERT INTO t SELECT i, 2 * i FROM generate_series(1,2500) i; + +SELECT sum(a), sum(b) FROM t; +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + +VACUUM FULL t; + +SELECT sum(a), sum(b) FROM t; +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + +-- VACUUM FULL doesn't reclaim dropped columns, but converts them to NULLs +ALTER TABLE t DROP COLUMN a; + +SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; + +VACUUM FULL t; + +SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; + From eeb25aca856142b6395a515730863ea0f32a80fe Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 10:28:50 -0700 Subject: [PATCH 74/91] Add a test which checks for resource clean-up --- expected/am_vacuum.out | 16 ++++++++++++++++ sql/am_vacuum.sql | 10 ++++++++++ 2 files changed, 26 insertions(+) diff --git a/expected/am_vacuum.out b/expected/am_vacuum.out index d689be800..3abb3c668 100644 --- a/expected/am_vacuum.out +++ b/expected/am_vacuum.out @@ -1,3 +1,4 @@ +SELECT count(*) AS columnar_table_count FROM cstore.cstore_tables \gset CREATE TABLE t(a int, b int) USING cstore_tableam; SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; count @@ -87,3 +88,18 @@ SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cs 2 | 2 | 0 | f | f (6 rows) +-- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands +SELECT count(*) - :columnar_table_count FROM cstore.cstore_tables; + ?column? +---------- + 1 +(1 row) + +DROP TABLE t; +-- Make sure we cleaned the metadata for t too +SELECT count(*) - :columnar_table_count FROM cstore.cstore_tables; + ?column? +---------- + 0 +(1 row) + diff --git a/sql/am_vacuum.sql b/sql/am_vacuum.sql index 070a13b05..6a5e0687e 100644 --- a/sql/am_vacuum.sql +++ b/sql/am_vacuum.sql @@ -1,4 +1,7 @@ +SELECT count(*) AS columnar_table_count FROM cstore.cstore_tables \gset + CREATE TABLE t(a int, b int) USING cstore_tableam; + SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; INSERT INTO t SELECT i, i * i FROM generate_series(1, 10) i; @@ -35,3 +38,10 @@ VACUUM FULL t; SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; +-- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands +SELECT count(*) - :columnar_table_count FROM cstore.cstore_tables; + +DROP TABLE t; + +-- Make sure we cleaned the metadata for t too +SELECT count(*) - :columnar_table_count FROM cstore.cstore_tables; From aa3032cfdd90c1edab6393c234174828a8238007 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 10:30:02 -0700 Subject: [PATCH 75/91] Address feedback --- cstore_tableam.c | 38 ++++++++++---------------------------- expected/am_vacuum.out | 6 +++--- sql/am_vacuum.sql | 6 +++--- 3 files changed, 16 insertions(+), 34 deletions(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index 337dbe06f..39a0695e2 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -143,8 +143,8 @@ RelationColumnList(Relation rel) Index varno = 0; AttrNumber varattno = i + 1; Oid vartype = tupdesc->attrs[i].atttypid; - int32 vartypmod = 0; - Oid varcollid = 0; + int32 vartypmod = tupdesc->attrs[i].atttypmod; + Oid varcollid = tupdesc->attrs[i].attcollation; Index varlevelsup = 0; Var *var; @@ -530,16 +530,14 @@ cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, TableWriteState *writeState = NULL; TableReadState *readState = NULL; CStoreOptions *cstoreOptions = NULL; - Datum *sourceValues = NULL; - bool *sourceNulls = NULL; - Datum *targetValues = NULL; - bool *targetNulls = NULL; + Datum *values = NULL; + bool *nulls = NULL; TupleDesc sourceDesc = RelationGetDescr(OldHeap); TupleDesc targetDesc = RelationGetDescr(NewHeap); if (OldIndex != NULL || use_sort) { - ereport(ERROR, (errmsg("cstore_am doesn't support indexes"))); + ereport(ERROR, (errmsg(CSTORE_TABLEAM_NAME " doesn't support indexes"))); } /* @@ -559,34 +557,18 @@ cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, readState = CStoreBeginRead(OldHeap, sourceDesc, RelationColumnList(OldHeap), NULL); - sourceValues = palloc0(sourceDesc->natts * sizeof(Datum)); - sourceNulls = palloc0(sourceDesc->natts * sizeof(bool)); - - targetValues = palloc0(targetDesc->natts * sizeof(Datum)); - targetNulls = palloc0(targetDesc->natts * sizeof(bool)); + values = palloc0(sourceDesc->natts * sizeof(Datum)); + nulls = palloc0(sourceDesc->natts * sizeof(bool)); *num_tuples = 0; - while (CStoreReadNextRow(readState, sourceValues, sourceNulls)) + while (CStoreReadNextRow(readState, values, nulls)) { - memset(targetNulls, true, targetDesc->natts * sizeof(bool)); - - for (int attrIndex = 0; attrIndex < sourceDesc->natts; attrIndex++) - { - FormData_pg_attribute *sourceAttr = TupleDescAttr(sourceDesc, attrIndex); - - if (!sourceAttr->attisdropped) - { - targetNulls[attrIndex] = sourceNulls[attrIndex]; - targetValues[attrIndex] = sourceValues[attrIndex]; - } - } - - CStoreWriteRow(writeState, targetValues, targetNulls); + CStoreWriteRow(writeState, values, nulls); (*num_tuples)++; } - *tups_vacuumed = *num_tuples; + *tups_vacuumed = 0; CStoreEndWrite(writeState); CStoreEndRead(readState); diff --git a/expected/am_vacuum.out b/expected/am_vacuum.out index 3abb3c668..dbeddca2b 100644 --- a/expected/am_vacuum.out +++ b/expected/am_vacuum.out @@ -1,4 +1,4 @@ -SELECT count(*) AS columnar_table_count FROM cstore.cstore_tables \gset +SELECT count(*) AS columnar_table_count FROM cstore.cstore_data_files \gset CREATE TABLE t(a int, b int) USING cstore_tableam; SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; count @@ -89,7 +89,7 @@ SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cs (6 rows) -- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands -SELECT count(*) - :columnar_table_count FROM cstore.cstore_tables; +SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; ?column? ---------- 1 @@ -97,7 +97,7 @@ SELECT count(*) - :columnar_table_count FROM cstore.cstore_tables; DROP TABLE t; -- Make sure we cleaned the metadata for t too -SELECT count(*) - :columnar_table_count FROM cstore.cstore_tables; +SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; ?column? ---------- 0 diff --git a/sql/am_vacuum.sql b/sql/am_vacuum.sql index 6a5e0687e..8cb70167d 100644 --- a/sql/am_vacuum.sql +++ b/sql/am_vacuum.sql @@ -1,4 +1,4 @@ -SELECT count(*) AS columnar_table_count FROM cstore.cstore_tables \gset +SELECT count(*) AS columnar_table_count FROM cstore.cstore_data_files \gset CREATE TABLE t(a int, b int) USING cstore_tableam; @@ -39,9 +39,9 @@ VACUUM FULL t; SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; -- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands -SELECT count(*) - :columnar_table_count FROM cstore.cstore_tables; +SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; DROP TABLE t; -- Make sure we cleaned the metadata for t too -SELECT count(*) - :columnar_table_count FROM cstore.cstore_tables; +SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; From 2ede755107fc8389ef3b38a0e62f67ce4ae2fc93 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 10:34:52 -0700 Subject: [PATCH 76/91] Initial version of VACUUM --- cstore_tableam.c | 137 ++++++++++++++++++++++++++++++++++++++++- expected/am_vacuum.out | 52 ++++++++++++++++ sql/am_vacuum.sql | 20 ++++++ 3 files changed, 208 insertions(+), 1 deletion(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index 39a0695e2..59df86fb2 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -32,6 +32,7 @@ #include "storage/procarray.h" #include "storage/smgr.h" #include "utils/builtins.h" +#include "utils/pg_rusage.h" #include "utils/rel.h" #include "utils/syscache.h" @@ -40,6 +41,15 @@ #define CSTORE_TABLEAM_NAME "cstore_tableam" +/* + * Timing parameters for truncate locking heuristics. + * + * These are the same values from src/backend/access/heap/vacuumlazy.c + */ +#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */ +#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */ +#define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */ + typedef struct CStoreScanDescData { TableScanDescData cs_base; @@ -59,6 +69,9 @@ static void CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, void *arg); static bool IsCStoreTableAmTable(Oid relationId); + +static void TruncateCStore(Relation rel, int elevel); + static CStoreOptions * CStoreTableAMGetOptions(void) { @@ -575,6 +588,128 @@ cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, } +/* + * cstore_vacuum_rel implements VACUUM without FULL option. + */ +static void +cstore_vacuum_rel(Relation rel, VacuumParams *params, + BufferAccessStrategy bstrategy) +{ + int elevel = (params->options & VACOPT_VERBOSE) ? INFO : DEBUG2; + + /* this should have been resolved by vacuum.c until now */ + Assert(params->truncate != VACOPT_TERNARY_DEFAULT); + + /* + * We don't have updates, deletes, or concurrent updates, so all we + * care for now is truncating the unused space at the end of storage. + */ + if (params->truncate == VACOPT_TERNARY_ENABLED) + { + TruncateCStore(rel, elevel); + } +} + + +/* + * TruncateCStore truncates the unused space at the end of main fork for + * a cstore table. This unused space can be created by aborted transactions. + * + * This implementation is based on heap_vacuum_rel in vacuumlazy.c with some + * changes so it suits columnar store relations. + */ +static void +TruncateCStore(Relation rel, int elevel) +{ + PGRUsage ru0; + int lock_retry = 0; + BlockNumber old_rel_pages = 0; + BlockNumber new_rel_pages = 0; + DataFileMetadata *metadata = NULL; + ListCell *stripeMetadataCell = NULL; + + pg_rusage_init(&ru0); + + /* Report that we are now truncating */ + pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, + PROGRESS_VACUUM_PHASE_TRUNCATE); + + /* + * We need an ExclusiveLock to do the truncation. + * Loop until we acquire a lock or retry threshold is reached. + */ + while (true) + { + if (ConditionalLockRelation(rel, AccessExclusiveLock)) + { + break; + } + + /* + * Check for interrupts while trying to (re-)acquire the exclusive + * lock. + */ + CHECK_FOR_INTERRUPTS(); + + if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT / + VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL)) + { + /* + * We failed to establish the lock in the specified number of + * retries. This means we give up truncating. + */ + ereport(elevel, + (errmsg("\"%s\": stopping truncate due to conflicting lock request", + RelationGetRelationName(rel)))); + return; + } + + pg_usleep(VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL * 1000L); + } + + RelationOpenSmgr(rel); + old_rel_pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + RelationCloseSmgr(rel); + + /* loop over stripes and find max used block */ + metadata = ReadDataFileMetadata(rel->rd_node.relNode); + foreach(stripeMetadataCell, metadata->stripeMetadataList) + { + StripeMetadata *stripe = lfirst(stripeMetadataCell); + uint64 lastByte = stripe->fileOffset + stripe->dataLength - 1; + SmgrAddr addr = logical_to_smgr(lastByte); + new_rel_pages = Max(new_rel_pages, addr.blockno + 1); + } + + if (new_rel_pages == old_rel_pages) + { + UnlockRelation(rel, AccessExclusiveLock); + return; + } + + /* + * Truncate the storage. Note that RelationTruncate() takes care of + * Write Ahead Logging. + */ + RelationTruncate(rel, new_rel_pages); + + /* + * We can release the exclusive lock as soon as we have truncated. + * Other backends can't safely access the relation until they have + * processed the smgr invalidation that smgrtruncate sent out ... but + * that should happen as part of standard invalidation processing once + * they acquire lock on the relation. + */ + UnlockRelation(rel, AccessExclusiveLock); + + ereport(elevel, + (errmsg("\"%s\": truncated %u to %u pages", + RelationGetRelationName(rel), + old_rel_pages, new_rel_pages), + errdetail_internal("%s", pg_rusage_show(&ru0)))); +} + + static bool cstore_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy) @@ -853,7 +988,7 @@ static const TableAmRoutine cstore_am_methods = { .relation_nontransactional_truncate = cstore_relation_nontransactional_truncate, .relation_copy_data = cstore_relation_copy_data, .relation_copy_for_cluster = cstore_relation_copy_for_cluster, - .relation_vacuum = heap_vacuum_rel, + .relation_vacuum = cstore_vacuum_rel, .scan_analyze_next_block = cstore_scan_analyze_next_block, .scan_analyze_next_tuple = cstore_scan_analyze_next_tuple, .index_build_range_scan = cstore_index_build_range_scan, diff --git a/expected/am_vacuum.out b/expected/am_vacuum.out index dbeddca2b..7a1ff2777 100644 --- a/expected/am_vacuum.out +++ b/expected/am_vacuum.out @@ -95,6 +95,58 @@ SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; 1 (1 row) +-- do this in a transaction so concurrent autovacuum doesn't interfere with results +BEGIN; +SAVEPOINT s1; +SELECT count(*) FROM t; + count +------- + 2530 +(1 row) + +SELECT pg_size_pretty(pg_relation_size('t')); + pg_size_pretty +---------------- + 16 kB +(1 row) + +INSERT INTO t SELECT i FROM generate_series(1, 10000) i; +SELECT pg_size_pretty(pg_relation_size('t')); + pg_size_pretty +---------------- + 56 kB +(1 row) + +SELECT count(*) FROM t; + count +------- + 12530 +(1 row) + +ROLLBACK TO SAVEPOINT s1; +-- not truncated by VACUUM or autovacuum yet (being in transaction ensures this), +-- so relation size should be same as before. +SELECT pg_size_pretty(pg_relation_size('t')); + pg_size_pretty +---------------- + 56 kB +(1 row) + +COMMIT; +-- vacuum should truncate the relation to the usable space +VACUUM t; +SELECT pg_size_pretty(pg_relation_size('t')); + pg_size_pretty +---------------- + 16 kB +(1 row) + +SELECT count(*) FROM t; + count +------- + 2530 +(1 row) + DROP TABLE t; -- Make sure we cleaned the metadata for t too SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; diff --git a/sql/am_vacuum.sql b/sql/am_vacuum.sql index 8cb70167d..10d1c7f6c 100644 --- a/sql/am_vacuum.sql +++ b/sql/am_vacuum.sql @@ -41,6 +41,26 @@ SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cs -- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; +-- do this in a transaction so concurrent autovacuum doesn't interfere with results +BEGIN; +SAVEPOINT s1; +SELECT count(*) FROM t; +SELECT pg_size_pretty(pg_relation_size('t')); +INSERT INTO t SELECT i FROM generate_series(1, 10000) i; +SELECT pg_size_pretty(pg_relation_size('t')); +SELECT count(*) FROM t; +ROLLBACK TO SAVEPOINT s1; + +-- not truncated by VACUUM or autovacuum yet (being in transaction ensures this), +-- so relation size should be same as before. +SELECT pg_size_pretty(pg_relation_size('t')); +COMMIT; + +-- vacuum should truncate the relation to the usable space +VACUUM t; +SELECT pg_size_pretty(pg_relation_size('t')); +SELECT count(*) FROM t; + DROP TABLE t; -- Make sure we cleaned the metadata for t too From 74dd1facf36250e661e8b79f05bc562549ad9da3 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 10:43:03 -0700 Subject: [PATCH 77/91] add isolation tests --- Makefile | 18 ++++++++++ expected/am_vacuum_vs_insert.out | 58 ++++++++++++++++++++++++++++++++ expected/create.out | 6 ++++ specs/am_vacuum_vs_insert.spec | 47 ++++++++++++++++++++++++++ specs/create.spec | 8 +++++ 5 files changed, 137 insertions(+) create mode 100644 expected/am_vacuum_vs_insert.out create mode 100644 expected/create.out create mode 100644 specs/am_vacuum_vs_insert.spec create mode 100644 specs/create.spec diff --git a/Makefile b/Makefile index 60d8855f8..58340450f 100644 --- a/Makefile +++ b/Makefile @@ -35,6 +35,7 @@ DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cs cstore_fdw--1.0--1.1.sql cstore_fdw--1.7--1.8.sql REGRESS = extension_create +ISOLATION = create EXTRA_CLEAN = cstore.pb-c.h cstore.pb-c.c data/*.cstore data/*.cstore.footer \ sql/block_filtering.sql sql/create.sql sql/data_types.sql sql/load.sql \ sql/copyto.sql expected/block_filtering.out expected/create.out \ @@ -54,6 +55,7 @@ ifeq ($(USE_TABLEAM),yes) OBJS += cstore_tableam.o REGRESS += am_create am_load am_query am_analyze am_data_types am_functions \ am_drop am_insert am_copyto am_alter am_rollback am_truncate am_vacuum am_clean + ISOLATION += am_vacuum_vs_insert endif ifeq ($(enable_coverage),yes) @@ -76,6 +78,22 @@ PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) +# command for getting postgres source directory is taken from citus/configure.in +POSTGRES_SRCDIR=$(shell grep ^abs_top_srcdir $(shell dirname $(shell $(PG_CONFIG) --pgxs))/../Makefile.global|cut -d ' ' -f3-) +PGXS_ISOLATION_TESTER=$(top_builddir)/src/test/isolation/pg_isolation_regress + +# If postgres installation doesn't include pg_isolation_regress, try using the +# one in postgres source directory. +ifeq (,$(wildcard $(PGXS_ISOLATION_TESTER))) + pg_isolation_regress_installcheck = \ + $(POSTGRES_SRCDIR)/src/test/isolation/pg_isolation_regress \ + --inputdir=$(srcdir) $(EXTRA_REGRESS_OPTS) +else + pg_isolation_regress_installcheck = \ + $(PGXS_ISOLATION_TESTER) \ + --inputdir=$(srcdir) $(EXTRA_REGRESS_OPTS) +endif + installcheck: reindent: diff --git a/expected/am_vacuum_vs_insert.out b/expected/am_vacuum_vs_insert.out new file mode 100644 index 000000000..8ef78bfa4 --- /dev/null +++ b/expected/am_vacuum_vs_insert.out @@ -0,0 +1,58 @@ +Parsed test spec with 2 sessions + +starting permutation: s1-insert s1-begin s1-insert s2-vacuum s1-commit s2-select +step s1-insert: + INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; + +step s1-begin: + BEGIN; + +step s1-insert: + INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; + +step s2-vacuum: + VACUUM test_vacuum_vs_insert; + +step s1-commit: + COMMIT; + +step s2-select: + SELECT * FROM test_vacuum_vs_insert; + +a b + +1 2 +2 4 +3 6 +1 2 +2 4 +3 6 + +starting permutation: s1-insert s1-begin s1-insert s2-vacuum-full s1-commit s2-select +step s1-insert: + INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; + +step s1-begin: + BEGIN; + +step s1-insert: + INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; + +step s2-vacuum-full: + VACUUM FULL test_vacuum_vs_insert; + +step s1-commit: + COMMIT; + +step s2-vacuum-full: <... completed> +step s2-select: + SELECT * FROM test_vacuum_vs_insert; + +a b + +1 2 +2 4 +3 6 +1 2 +2 4 +3 6 diff --git a/expected/create.out b/expected/create.out new file mode 100644 index 000000000..39b477c81 --- /dev/null +++ b/expected/create.out @@ -0,0 +1,6 @@ +Parsed test spec with 1 sessions + +starting permutation: s1a +step s1a: + CREATE EXTENSION cstore_fdw; + diff --git a/specs/am_vacuum_vs_insert.spec b/specs/am_vacuum_vs_insert.spec new file mode 100644 index 000000000..57105e1dd --- /dev/null +++ b/specs/am_vacuum_vs_insert.spec @@ -0,0 +1,47 @@ +setup +{ + CREATE TABLE test_vacuum_vs_insert (a int, b int) USING cstore_tableam; +} + +teardown +{ + DROP TABLE IF EXISTS test_vacuum_vs_insert CASCADE; +} + +session "s1" + +step "s1-begin" +{ + BEGIN; +} + +step "s1-insert" +{ + INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; +} + +step "s1-commit" +{ + COMMIT; +} + +session "s2" + +step "s2-vacuum" +{ + VACUUM test_vacuum_vs_insert; +} + +step "s2-vacuum-full" +{ + VACUUM FULL test_vacuum_vs_insert; +} + +step "s2-select" +{ + SELECT * FROM test_vacuum_vs_insert; +} + +permutation "s1-insert" "s1-begin" "s1-insert" "s2-vacuum" "s1-commit" "s2-select" +permutation "s1-insert" "s1-begin" "s1-insert" "s2-vacuum-full" "s1-commit" "s2-select" + diff --git a/specs/create.spec b/specs/create.spec new file mode 100644 index 000000000..f8e874678 --- /dev/null +++ b/specs/create.spec @@ -0,0 +1,8 @@ +session "s1" +step "s1a" +{ + CREATE EXTENSION cstore_fdw; +} + +permutation "s1a" + From 37e3845e6afdb91bfd77940770da74ecd07ca698 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 10:47:09 -0700 Subject: [PATCH 78/91] Address Nils feedback --- cstore_tableam.c | 96 ++++++++++++++++++++++---------- expected/am_vacuum_vs_insert.out | 9 ++- specs/am_vacuum_vs_insert.spec | 4 +- 3 files changed, 76 insertions(+), 33 deletions(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index 59df86fb2..f6b51b770 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -46,7 +46,6 @@ * * These are the same values from src/backend/access/heap/vacuumlazy.c */ -#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */ #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */ #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */ @@ -68,6 +67,8 @@ static void CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, objectId, int subId, void *arg); static bool IsCStoreTableAmTable(Oid relationId); +static bool ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode, + int timeout, int retryInterval); static void TruncateCStore(Relation rel, int elevel); @@ -622,7 +623,6 @@ static void TruncateCStore(Relation rel, int elevel) { PGRUsage ru0; - int lock_retry = 0; BlockNumber old_rel_pages = 0; BlockNumber new_rel_pages = 0; DataFileMetadata *metadata = NULL; @@ -634,45 +634,46 @@ TruncateCStore(Relation rel, int elevel) pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_TRUNCATE); + /* - * We need an ExclusiveLock to do the truncation. - * Loop until we acquire a lock or retry threshold is reached. + * We need access exclusive lock on the relation in order to do + * truncation. If we can't get it, give up rather than waiting --- we + * don't want to block other backends, and we don't want to deadlock + * (which is quite possible considering we already hold a lower-grade + * lock). + * + * The decisions for AccessExclusiveLock and conditional lock with + * a timeout is based on lazy_truncate_heap in vacuumlazy.c. */ - while (true) + if (!ConditionalLockRelationWithTimeout(rel, AccessExclusiveLock, + VACUUM_TRUNCATE_LOCK_TIMEOUT, + VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL)) { - if (ConditionalLockRelation(rel, AccessExclusiveLock)) - { - break; - } - /* - * Check for interrupts while trying to (re-)acquire the exclusive - * lock. + * We failed to establish the lock in the specified number of + * retries. This means we give up truncating. */ - CHECK_FOR_INTERRUPTS(); - - if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT / - VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL)) - { - /* - * We failed to establish the lock in the specified number of - * retries. This means we give up truncating. - */ - ereport(elevel, - (errmsg("\"%s\": stopping truncate due to conflicting lock request", - RelationGetRelationName(rel)))); - return; - } - - pg_usleep(VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL * 1000L); + ereport(elevel, + (errmsg("\"%s\": stopping truncate due to conflicting lock request", + RelationGetRelationName(rel)))); + return; } RelationOpenSmgr(rel); old_rel_pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); RelationCloseSmgr(rel); - /* loop over stripes and find max used block */ + /* + * Get metadata as viewed in latest snapshot. Reading metadata in transaction + * snapshot is not enough, since stripes could have been created between + * current transaction start and lock acquisition time. Ignoring those + * stripes can destory data. + */ + PushActiveSnapshot(GetLatestSnapshot()); metadata = ReadDataFileMetadata(rel->rd_node.relNode); + PopActiveSnapshot(); + + /* loop over stripes and find max used block */ foreach(stripeMetadataCell, metadata->stripeMetadataList) { StripeMetadata *stripe = lfirst(stripeMetadataCell); @@ -710,6 +711,43 @@ TruncateCStore(Relation rel, int elevel) } +/* + * ConditionalLockRelationWithTimeout tries to acquire a relation lock until + * it either succeeds or timesout. It doesn't enter wait queue and instead it + * sleeps between lock tries. + * + * This is based on the lock loop in lazy_truncate_heap(). + */ +static bool +ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode, int timeout, + int retryInterval) +{ + int lock_retry = 0; + + while (true) + { + if (ConditionalLockRelation(rel, lockMode)) + { + break; + } + + /* + * Check for interrupts while trying to (re-)acquire the lock + */ + CHECK_FOR_INTERRUPTS(); + + if (++lock_retry > (timeout / retryInterval)) + { + return false; + } + + pg_usleep(retryInterval * 1000L); + } + + return true; +} + + static bool cstore_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy) diff --git a/expected/am_vacuum_vs_insert.out b/expected/am_vacuum_vs_insert.out index 8ef78bfa4..ae23d9a26 100644 --- a/expected/am_vacuum_vs_insert.out +++ b/expected/am_vacuum_vs_insert.out @@ -10,8 +10,9 @@ step s1-begin: step s1-insert: INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; +s2: INFO: "test_vacuum_vs_insert": stopping truncate due to conflicting lock request step s2-vacuum: - VACUUM test_vacuum_vs_insert; + VACUUM VERBOSE test_vacuum_vs_insert; step s1-commit: COMMIT; @@ -39,11 +40,15 @@ step s1-insert: INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; step s2-vacuum-full: - VACUUM FULL test_vacuum_vs_insert; + VACUUM FULL VERBOSE test_vacuum_vs_insert; step s1-commit: COMMIT; +s2: INFO: vacuuming "public.test_vacuum_vs_insert" +s2: INFO: "test_vacuum_vs_insert": found 0 removable, 6 nonremovable row versions in 1 pages +DETAIL: 0 dead row versions cannot be removed yet. +CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s. step s2-vacuum-full: <... completed> step s2-select: SELECT * FROM test_vacuum_vs_insert; diff --git a/specs/am_vacuum_vs_insert.spec b/specs/am_vacuum_vs_insert.spec index 57105e1dd..ac2d83667 100644 --- a/specs/am_vacuum_vs_insert.spec +++ b/specs/am_vacuum_vs_insert.spec @@ -29,12 +29,12 @@ session "s2" step "s2-vacuum" { - VACUUM test_vacuum_vs_insert; + VACUUM VERBOSE test_vacuum_vs_insert; } step "s2-vacuum-full" { - VACUUM FULL test_vacuum_vs_insert; + VACUUM FULL VERBOSE test_vacuum_vs_insert; } step "s2-select" From 55885c81dd5cdb9c60cd0e23a27d681a4df97034 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 10:48:34 -0700 Subject: [PATCH 79/91] log stats on verbose --- cstore_tableam.c | 106 ++++++++++++++++++++++++++++++- expected/am_vacuum.out | 58 ++++++++++++++++- expected/am_vacuum_vs_insert.out | 5 ++ specs/am_vacuum_vs_insert.spec | 1 - specs/create.spec | 1 - sql/am_vacuum.sql | 38 ++++++++++- 6 files changed, 203 insertions(+), 6 deletions(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index f6b51b770..fa3cd8739 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -69,8 +69,8 @@ static void CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, static bool IsCStoreTableAmTable(Oid relationId); static bool ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode, int timeout, int retryInterval); - - +static void LogRelationStats(Relation rel, int elevel); +static char * CompressionTypeStr(CompressionType type); static void TruncateCStore(Relation rel, int elevel); static CStoreOptions * @@ -609,6 +609,108 @@ cstore_vacuum_rel(Relation rel, VacuumParams *params, { TruncateCStore(rel, elevel); } + + LogRelationStats(rel, elevel); +} + + +static void +LogRelationStats(Relation rel, int elevel) +{ + DataFileMetadata *datafileMetadata = NULL; + ListCell *stripeMetadataCell = NULL; + Oid relfilenode = rel->rd_node.relNode; + StringInfo infoBuf = makeStringInfo(); + + int compressionStats[COMPRESSION_COUNT] = { 0 }; + uint64 totalStripeLength = 0; + uint64 tupleCount = 0; + uint64 blockCount = 0; + uint64 relPages = 0; + int stripeCount = 0; + TupleDesc tupdesc = RelationGetDescr(rel); + uint64 droppedBlocksWithData = 0; + + datafileMetadata = ReadDataFileMetadata(relfilenode); + stripeCount = list_length(datafileMetadata->stripeMetadataList); + + foreach(stripeMetadataCell, datafileMetadata->stripeMetadataList) + { + StripeMetadata *stripe = lfirst(stripeMetadataCell); + StripeSkipList *skiplist = ReadStripeSkipList(relfilenode, stripe->id, + RelationGetDescr(rel), + stripe->blockCount); + for (uint32 column = 0; column < skiplist->columnCount; column++) + { + bool attrDropped = tupdesc->attrs[column].attisdropped; + for (uint32 block = 0; block < skiplist->blockCount; block++) + { + ColumnBlockSkipNode *skipnode = + &skiplist->blockSkipNodeArray[column][block]; + + /* ignore zero length blocks for dropped attributes */ + if (skipnode->valueLength > 0) + { + compressionStats[skipnode->valueCompressionType]++; + blockCount++; + + if (attrDropped) + { + droppedBlocksWithData++; + } + } + } + } + + tupleCount += stripe->rowCount; + totalStripeLength += stripe->dataLength; + } + + RelationOpenSmgr(rel); + relPages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + RelationCloseSmgr(rel); + + appendStringInfo(infoBuf, "total file size: %ld, total data size: %ld\n", + relPages * BLCKSZ, totalStripeLength); + appendStringInfo(infoBuf, + "total row count: %ld, stripe count: %d, " + "average rows per stripe: %ld\n", + tupleCount, stripeCount, tupleCount / stripeCount); + appendStringInfo(infoBuf, + "block count: %ld" + ", containing data for dropped columns: %ld", + blockCount, droppedBlocksWithData); + for (int compressionType = 0; compressionType < COMPRESSION_COUNT; compressionType++) + { + appendStringInfo(infoBuf, + ", %s compressed: %d", + CompressionTypeStr(compressionType), + compressionStats[compressionType]); + } + appendStringInfoString(infoBuf, "\n"); + + ereport(elevel, (errmsg("statistics for \"%s\":\n%s", RelationGetRelationName(rel), + infoBuf->data))); +} + + +/* + * CompressionTypeStr returns string representation of a compression type. + */ +static char * +CompressionTypeStr(CompressionType type) +{ + switch (type) + { + case COMPRESSION_NONE: + return "none"; + + case COMPRESSION_PG_LZ: + return "pglz"; + + default: + return "unknown"; + } } diff --git a/expected/am_vacuum.out b/expected/am_vacuum.out index 7a1ff2777..9552f6ade 100644 --- a/expected/am_vacuum.out +++ b/expected/am_vacuum.out @@ -134,7 +134,14 @@ SELECT pg_size_pretty(pg_relation_size('t')); COMMIT; -- vacuum should truncate the relation to the usable space -VACUUM t; +VACUUM VERBOSE t; +INFO: "t": truncated 7 to 2 pages +DETAIL: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s +INFO: statistics for "t": +total file size: 16384, total data size: 10754 +total row count: 2530, stripe count: 3, average rows per stripe: 843 +block count: 3, containing data for dropped columns: 0, none compressed: 3, pglz compressed: 0 + SELECT pg_size_pretty(pg_relation_size('t')); pg_size_pretty ---------------- @@ -147,6 +154,55 @@ SELECT count(*) FROM t; 2530 (1 row) +-- add some stripes with different compression types and create some gaps, +-- then vacuum to print stats +BEGIN; +SET cstore.block_row_count TO 1000; +SET cstore.stripe_row_count TO 2000; +SET cstore.compression TO "pglz"; +SAVEPOINT s1; +INSERT INTO t SELECT i FROM generate_series(1, 1500) i; +ROLLBACK TO SAVEPOINT s1; +INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; +SET cstore.compression TO "none"; +SAVEPOINT s2; +INSERT INTO t SELECT i FROM generate_series(1, 1500) i; +ROLLBACK TO SAVEPOINT s2; +INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; +COMMIT; +VACUUM VERBOSE t; +INFO: statistics for "t": +total file size: 24576, total data size: 18808 +total row count: 5530, stripe count: 5, average rows per stripe: 1106 +block count: 7, containing data for dropped columns: 0, none compressed: 5, pglz compressed: 2 + +SELECT count(*) FROM t; + count +------- + 5530 +(1 row) + +-- check that we report blocks with data for dropped columns +ALTER TABLE t ADD COLUMN c int; +INSERT INTO t SELECT 1, i / 5 FROM generate_series(1, 1500) i; +ALTER TABLE t DROP COLUMN c; +VACUUM VERBOSE t; +INFO: statistics for "t": +total file size: 32768, total data size: 31372 +total row count: 7030, stripe count: 6, average rows per stripe: 1171 +block count: 11, containing data for dropped columns: 2, none compressed: 9, pglz compressed: 2 + +-- vacuum full should remove blocks for dropped columns +-- note that, a block will be stored in non-compressed for if compression +-- doesn't reduce its size. +SET cstore.compression TO "pglz"; +VACUUM FULL t; +VACUUM VERBOSE t; +INFO: statistics for "t": +total file size: 16384, total data size: 15728 +total row count: 7030, stripe count: 4, average rows per stripe: 1757 +block count: 8, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 6 + DROP TABLE t; -- Make sure we cleaned the metadata for t too SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; diff --git a/expected/am_vacuum_vs_insert.out b/expected/am_vacuum_vs_insert.out index ae23d9a26..767604251 100644 --- a/expected/am_vacuum_vs_insert.out +++ b/expected/am_vacuum_vs_insert.out @@ -11,6 +11,11 @@ step s1-insert: INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; s2: INFO: "test_vacuum_vs_insert": stopping truncate due to conflicting lock request +s2: INFO: statistics for "test_vacuum_vs_insert": +total file size: 8192, total data size: 26 +total row count: 3, stripe count: 1, average rows per stripe: 3 +block count: 2, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 0 + step s2-vacuum: VACUUM VERBOSE test_vacuum_vs_insert; diff --git a/specs/am_vacuum_vs_insert.spec b/specs/am_vacuum_vs_insert.spec index ac2d83667..59c7274d5 100644 --- a/specs/am_vacuum_vs_insert.spec +++ b/specs/am_vacuum_vs_insert.spec @@ -44,4 +44,3 @@ step "s2-select" permutation "s1-insert" "s1-begin" "s1-insert" "s2-vacuum" "s1-commit" "s2-select" permutation "s1-insert" "s1-begin" "s1-insert" "s2-vacuum-full" "s1-commit" "s2-select" - diff --git a/specs/create.spec b/specs/create.spec index f8e874678..09fc32131 100644 --- a/specs/create.spec +++ b/specs/create.spec @@ -5,4 +5,3 @@ step "s1a" } permutation "s1a" - diff --git a/sql/am_vacuum.sql b/sql/am_vacuum.sql index 10d1c7f6c..f7f9d77bd 100644 --- a/sql/am_vacuum.sql +++ b/sql/am_vacuum.sql @@ -57,10 +57,46 @@ SELECT pg_size_pretty(pg_relation_size('t')); COMMIT; -- vacuum should truncate the relation to the usable space -VACUUM t; +VACUUM VERBOSE t; SELECT pg_size_pretty(pg_relation_size('t')); SELECT count(*) FROM t; +-- add some stripes with different compression types and create some gaps, +-- then vacuum to print stats + +BEGIN; +SET cstore.block_row_count TO 1000; +SET cstore.stripe_row_count TO 2000; +SET cstore.compression TO "pglz"; +SAVEPOINT s1; +INSERT INTO t SELECT i FROM generate_series(1, 1500) i; +ROLLBACK TO SAVEPOINT s1; +INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; +SET cstore.compression TO "none"; +SAVEPOINT s2; +INSERT INTO t SELECT i FROM generate_series(1, 1500) i; +ROLLBACK TO SAVEPOINT s2; +INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; +COMMIT; + +VACUUM VERBOSE t; + +SELECT count(*) FROM t; + +-- check that we report blocks with data for dropped columns +ALTER TABLE t ADD COLUMN c int; +INSERT INTO t SELECT 1, i / 5 FROM generate_series(1, 1500) i; +ALTER TABLE t DROP COLUMN c; + +VACUUM VERBOSE t; + +-- vacuum full should remove blocks for dropped columns +-- note that, a block will be stored in non-compressed for if compression +-- doesn't reduce its size. +SET cstore.compression TO "pglz"; +VACUUM FULL t; +VACUUM VERBOSE t; + DROP TABLE t; -- Make sure we cleaned the metadata for t too From 76a71aa61a283e2e973d235d211d9db328fab425 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 14:18:29 -0700 Subject: [PATCH 80/91] Use SnapshotDirty for reading metadata in truncation --- cstore.h | 1 + cstore_metadata_tables.c | 3 ++- cstore_tableam.c | 12 ++---------- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/cstore.h b/cstore.h index 489a4839b..c84588627 100644 --- a/cstore.h +++ b/cstore.h @@ -20,6 +20,7 @@ #include "storage/bufpage.h" #include "storage/lockdefs.h" #include "utils/relcache.h" +#include "utils/snapmgr.h" /* Defines for valid option names */ #define OPTION_NAME_COMPRESSION_TYPE "compression" diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 7c214eed5..565a37b07 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -377,7 +377,8 @@ ReadDataFileMetadata(Oid relfilenode, bool missingOk) index = index_open(CStoreStripesIndexRelationId(), AccessShareLock); tupleDescriptor = RelationGetDescr(cstoreStripes); - scanDescriptor = systable_beginscan_ordered(cstoreStripes, index, NULL, 1, scanKey); + scanDescriptor = systable_beginscan_ordered(cstoreStripes, index, NULL, 1, + scanKey); while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) { diff --git a/cstore_tableam.c b/cstore_tableam.c index fa3cd8739..4e9d47260 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -631,7 +631,7 @@ LogRelationStats(Relation rel, int elevel) TupleDesc tupdesc = RelationGetDescr(rel); uint64 droppedBlocksWithData = 0; - datafileMetadata = ReadDataFileMetadata(relfilenode); + datafileMetadata = ReadDataFileMetadata(relfilenode, false); stripeCount = list_length(datafileMetadata->stripeMetadataList); foreach(stripeMetadataCell, datafileMetadata->stripeMetadataList) @@ -765,15 +765,7 @@ TruncateCStore(Relation rel, int elevel) old_rel_pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); RelationCloseSmgr(rel); - /* - * Get metadata as viewed in latest snapshot. Reading metadata in transaction - * snapshot is not enough, since stripes could have been created between - * current transaction start and lock acquisition time. Ignoring those - * stripes can destory data. - */ - PushActiveSnapshot(GetLatestSnapshot()); - metadata = ReadDataFileMetadata(rel->rd_node.relNode); - PopActiveSnapshot(); + metadata = ReadDataFileMetadata(rel->rd_node.relNode, false); /* loop over stripes and find max used block */ foreach(stripeMetadataCell, metadata->stripeMetadataList) From e481e73d18722121c67a625b31e7732f01f545c6 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Thu, 8 Oct 2020 16:02:45 -0700 Subject: [PATCH 81/91] Encapsulate snapshot used for reading stripes in cstore_metadata_tables --- cstore.h | 1 + cstore_metadata_tables.c | 68 +++++++++++++++++++++++++------- cstore_tableam.c | 25 ++++++------ expected/am_vacuum.out | 6 +-- expected/am_vacuum_vs_insert.out | 2 +- 5 files changed, 70 insertions(+), 32 deletions(-) diff --git a/cstore.h b/cstore.h index c84588627..8a64730c8 100644 --- a/cstore.h +++ b/cstore.h @@ -285,6 +285,7 @@ extern void DeleteDataFileMetadataRowIfExists(Oid relfilenode); extern void InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount); extern void InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe); extern DataFileMetadata * ReadDataFileMetadata(Oid relfilenode, bool missingOk); +extern uint64 GetHighestUsedAddress(Oid relfilenode); extern void SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipList, TupleDesc tupleDescriptor); diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 565a37b07..d5ad28388 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -43,6 +43,7 @@ typedef struct EState *estate; } ModifyState; +static List * ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot); static Oid CStoreStripesRelationId(void); static Oid CStoreStripesIndexRelationId(void); static Oid CStoreDataFilesRelationId(void); @@ -345,17 +346,8 @@ InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe) DataFileMetadata * ReadDataFileMetadata(Oid relfilenode, bool missingOk) { - Oid cstoreStripesOid = InvalidOid; - Relation cstoreStripes = NULL; - Relation index = NULL; - TupleDesc tupleDescriptor = NULL; - ScanKeyData scanKey[1]; - SysScanDesc scanDescriptor = NULL; - HeapTuple heapTuple; - bool found = false; - DataFileMetadata *datafileMetadata = palloc0(sizeof(DataFileMetadata)); - found = ReadCStoreDataFiles(relfilenode, &datafileMetadata->blockRowCount); + bool found = ReadCStoreDataFiles(relfilenode, &datafileMetadata->blockRowCount); if (!found) { if (!missingOk) @@ -369,6 +361,56 @@ ReadDataFileMetadata(Oid relfilenode, bool missingOk) } } + datafileMetadata->stripeMetadataList = + ReadDataFileStripeList(relfilenode, GetTransactionSnapshot()); + + return datafileMetadata; +} + + +/* + * GetHighestUsedAddress returns the highest used address for the given + * relfilenode across all active and inactive transactions. + */ +uint64 +GetHighestUsedAddress(Oid relfilenode) +{ + uint64 highestUsedAddress = 0; + ListCell *stripeMetadataCell = NULL; + List *stripeMetadataList = NIL; + + SnapshotData SnapshotDirty; + InitDirtySnapshot(SnapshotDirty); + + stripeMetadataList = ReadDataFileStripeList(relfilenode, &SnapshotDirty); + + foreach(stripeMetadataCell, stripeMetadataList) + { + StripeMetadata *stripe = lfirst(stripeMetadataCell); + uint64 lastByte = stripe->fileOffset + stripe->dataLength - 1; + highestUsedAddress = Max(highestUsedAddress, lastByte); + } + + return highestUsedAddress; +} + + +/* + * ReadDataFileStripeList reads the stripe list for a given relfilenode + * in the given snapshot. + */ +static List * +ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot) +{ + List *stripeMetadataList = NIL; + Oid cstoreStripesOid = InvalidOid; + Relation cstoreStripes = NULL; + Relation index = NULL; + TupleDesc tupleDescriptor = NULL; + ScanKeyData scanKey[1]; + SysScanDesc scanDescriptor = NULL; + HeapTuple heapTuple; + ScanKeyInit(&scanKey[0], Anum_cstore_stripes_relfilenode, BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); @@ -403,16 +445,14 @@ ReadDataFileMetadata(Oid relfilenode, bool missingOk) stripeMetadata->rowCount = DatumGetInt64( datumArray[Anum_cstore_stripes_row_count - 1]); - datafileMetadata->stripeMetadataList = lappend( - datafileMetadata->stripeMetadataList, - stripeMetadata); + stripeMetadataList = lappend(stripeMetadataList, stripeMetadata); } systable_endscan_ordered(scanDescriptor); index_close(index, NoLock); heap_close(cstoreStripes, NoLock); - return datafileMetadata; + return stripeMetadataList; } diff --git a/cstore_tableam.c b/cstore_tableam.c index 4e9d47260..0840436ec 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -601,6 +601,8 @@ cstore_vacuum_rel(Relation rel, VacuumParams *params, /* this should have been resolved by vacuum.c until now */ Assert(params->truncate != VACOPT_TERNARY_DEFAULT); + LogRelationStats(rel, elevel); + /* * We don't have updates, deletes, or concurrent updates, so all we * care for now is truncating the unused space at the end of storage. @@ -609,8 +611,6 @@ cstore_vacuum_rel(Relation rel, VacuumParams *params, { TruncateCStore(rel, elevel); } - - LogRelationStats(rel, elevel); } @@ -727,8 +727,7 @@ TruncateCStore(Relation rel, int elevel) PGRUsage ru0; BlockNumber old_rel_pages = 0; BlockNumber new_rel_pages = 0; - DataFileMetadata *metadata = NULL; - ListCell *stripeMetadataCell = NULL; + SmgrAddr highestPhysicalAddress; pg_rusage_init(&ru0); @@ -765,17 +764,15 @@ TruncateCStore(Relation rel, int elevel) old_rel_pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); RelationCloseSmgr(rel); - metadata = ReadDataFileMetadata(rel->rd_node.relNode, false); - - /* loop over stripes and find max used block */ - foreach(stripeMetadataCell, metadata->stripeMetadataList) - { - StripeMetadata *stripe = lfirst(stripeMetadataCell); - uint64 lastByte = stripe->fileOffset + stripe->dataLength - 1; - SmgrAddr addr = logical_to_smgr(lastByte); - new_rel_pages = Max(new_rel_pages, addr.blockno + 1); - } + /* + * Due to the AccessExclusive lock there's no danger that + * new stripes be added beyond highestPhysicalAddress while + * we're truncating. + */ + highestPhysicalAddress = + logical_to_smgr(GetHighestUsedAddress(rel->rd_node.relNode)); + new_rel_pages = highestPhysicalAddress.blockno + 1; if (new_rel_pages == old_rel_pages) { UnlockRelation(rel, AccessExclusiveLock); diff --git a/expected/am_vacuum.out b/expected/am_vacuum.out index 9552f6ade..3db30a761 100644 --- a/expected/am_vacuum.out +++ b/expected/am_vacuum.out @@ -135,13 +135,13 @@ SELECT pg_size_pretty(pg_relation_size('t')); COMMIT; -- vacuum should truncate the relation to the usable space VACUUM VERBOSE t; -INFO: "t": truncated 7 to 2 pages -DETAIL: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s INFO: statistics for "t": -total file size: 16384, total data size: 10754 +total file size: 57344, total data size: 10754 total row count: 2530, stripe count: 3, average rows per stripe: 843 block count: 3, containing data for dropped columns: 0, none compressed: 3, pglz compressed: 0 +INFO: "t": truncated 7 to 2 pages +DETAIL: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s SELECT pg_size_pretty(pg_relation_size('t')); pg_size_pretty ---------------- diff --git a/expected/am_vacuum_vs_insert.out b/expected/am_vacuum_vs_insert.out index 767604251..a3eb0fb89 100644 --- a/expected/am_vacuum_vs_insert.out +++ b/expected/am_vacuum_vs_insert.out @@ -10,12 +10,12 @@ step s1-begin: step s1-insert: INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; -s2: INFO: "test_vacuum_vs_insert": stopping truncate due to conflicting lock request s2: INFO: statistics for "test_vacuum_vs_insert": total file size: 8192, total data size: 26 total row count: 3, stripe count: 1, average rows per stripe: 3 block count: 2, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 0 +s2: INFO: "test_vacuum_vs_insert": stopping truncate due to conflicting lock request step s2-vacuum: VACUUM VERBOSE test_vacuum_vs_insert; From 102b7670d40cba7731ce2f4763965d757db28a3e Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Fri, 9 Oct 2020 12:35:42 -0700 Subject: [PATCH 82/91] Fix tautological compare issue (#19) --- .gitignore | 1 + cstore_metadata_tables.c | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index aa7be0e36..6b3554f3b 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ *.ko *.obj *.elf +*.bc # Libraries *.lib diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 7c214eed5..171860762 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -205,7 +205,7 @@ ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor, uint32 blockCount) { StripeSkipList *skipList = NULL; - uint32 columnIndex = 0; + int32 columnIndex = 0; Oid cstoreSkipNodesOid = InvalidOid; Relation cstoreSkipNodes = NULL; Relation index = NULL; @@ -237,8 +237,8 @@ ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor, while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) { - uint32 attr = 0; - uint32 blockIndex = 0; + int32 attr = 0; + int32 blockIndex = 0; ColumnBlockSkipNode *skipNode = NULL; Datum datumArray[Natts_cstore_skipnodes]; @@ -253,13 +253,13 @@ ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor, if (attr <= 0 || attr > columnCount) { ereport(ERROR, (errmsg("invalid stripe skipnode entry"), - errdetail("Attribute number out of range: %u", attr))); + errdetail("Attribute number out of range: %d", attr))); } if (blockIndex < 0 || blockIndex >= blockCount) { ereport(ERROR, (errmsg("invalid stripe skipnode entry"), - errdetail("Block number out of range: %u", blockIndex))); + errdetail("Block number out of range: %d", blockIndex))); } columnIndex = attr - 1; From 5fc7f61936367dce31179b5b6dc2b83eb61e61ba Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Tue, 13 Oct 2020 13:36:02 +0200 Subject: [PATCH 83/91] Projection pushdown (#11) DESCRIPTION: add pushdown support for projections and quals in table access method scan This implementation uses custom scans to push projections into the scans on a columnar table. The custom scan replaces all access paths to a table to force the projection of the columns. --- Makefile | 6 +- cstore.h | 1 + cstore_customscan.c | 426 +++++++++++++++++++++++++++++++++++++++++++ cstore_customscan.h | 19 ++ cstore_reader.c | 14 ++ cstore_tableam.c | 46 ++++- cstore_tableam.h | 8 + expected/am_join.out | 37 ++++ sql/am_join.sql | 28 +++ 9 files changed, 579 insertions(+), 6 deletions(-) create mode 100644 cstore_customscan.c create mode 100644 cstore_customscan.h create mode 100644 expected/am_join.out create mode 100644 sql/am_join.sql diff --git a/Makefile b/Makefile index 58340450f..7e8bee13a 100644 --- a/Makefile +++ b/Makefile @@ -49,12 +49,12 @@ ifeq ($(USE_FDW),yes) fdw_copyto fdw_alter fdw_rollback fdw_truncate fdw_clean endif -# disabled tests: am_block_filtering ifeq ($(USE_TABLEAM),yes) PG_CFLAGS += -DUSE_TABLEAM - OBJS += cstore_tableam.o + OBJS += cstore_tableam.o cstore_customscan.o REGRESS += am_create am_load am_query am_analyze am_data_types am_functions \ - am_drop am_insert am_copyto am_alter am_rollback am_truncate am_vacuum am_clean + am_drop am_insert am_copyto am_alter am_rollback am_truncate am_vacuum am_clean \ + am_block_filtering am_join ISOLATION += am_vacuum_vs_insert endif diff --git a/cstore.h b/cstore.h index 8a64730c8..f5e0590a8 100644 --- a/cstore.h +++ b/cstore.h @@ -267,6 +267,7 @@ extern TableReadState * CStoreBeginRead(Relation relation, extern bool CStoreReadFinished(TableReadState *state); extern bool CStoreReadNextRow(TableReadState *state, Datum *columnValues, bool *columnNulls); +extern void CStoreRescan(TableReadState *readState); extern void CStoreEndRead(TableReadState *state); /* Function declarations for common functions */ diff --git a/cstore_customscan.c b/cstore_customscan.c new file mode 100644 index 000000000..0dcdff111 --- /dev/null +++ b/cstore_customscan.c @@ -0,0 +1,426 @@ +/*------------------------------------------------------------------------- + * + * cstore_customscan.c + * + * This file contains the implementation of a postgres custom scan that + * we use to push down the projections into the table access methods. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/skey.h" +#include "nodes/extensible.h" +#include "nodes/pg_list.h" +#include "nodes/plannodes.h" +#include "optimizer/optimizer.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/restrictinfo.h" +#include "utils/relcache.h" + +#include "cstore.h" +#include "cstore_customscan.h" +#include "cstore_tableam.h" + +typedef struct CStoreScanPath +{ + CustomPath custom_path; + + /* place for local state during planning */ +} CStoreScanPath; + +typedef struct CStoreScanScan +{ + CustomScan custom_scan; + + /* place for local state during execution */ +} CStoreScanScan; + +typedef struct CStoreScanState +{ + CustomScanState custom_scanstate; + + List *qual; +} CStoreScanState; + + +static void CStoreSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti, + RangeTblEntry *rte); +static Path * CreateCStoreScanPath(RelOptInfo *rel, RangeTblEntry *rte); +static Cost CStoreScanCost(RangeTblEntry *rte); +static Plan * CStoreScanPath_PlanCustomPath(PlannerInfo *root, + RelOptInfo *rel, + struct CustomPath *best_path, + List *tlist, + List *clauses, + List *custom_plans); + +static Node * CStoreScan_CreateCustomScanState(CustomScan *cscan); + +static void CStoreScan_BeginCustomScan(CustomScanState *node, EState *estate, int eflags); +static TupleTableSlot * CStoreScan_ExecCustomScan(CustomScanState *node); +static void CStoreScan_EndCustomScan(CustomScanState *node); +static void CStoreScan_ReScanCustomScan(CustomScanState *node); + +/* saved hook value in case of unload */ +static set_rel_pathlist_hook_type PreviousSetRelPathlistHook = NULL; + +static bool EnableCStoreCustomScan = true; + + +const struct CustomPathMethods CStoreScanPathMethods = { + .CustomName = "CStoreScan", + .PlanCustomPath = CStoreScanPath_PlanCustomPath, +}; + +const struct CustomScanMethods CStoreScanScanMethods = { + .CustomName = "CStoreScan", + .CreateCustomScanState = CStoreScan_CreateCustomScanState, +}; + +const struct CustomExecMethods CStoreExecuteMethods = { + .CustomName = "CStoreScan", + + .BeginCustomScan = CStoreScan_BeginCustomScan, + .ExecCustomScan = CStoreScan_ExecCustomScan, + .EndCustomScan = CStoreScan_EndCustomScan, + .ReScanCustomScan = CStoreScan_ReScanCustomScan, + + .ExplainCustomScan = NULL, +}; + + +/* + * cstore_customscan_init installs the hook required to intercept the postgres planner and + * provide extra paths for cstore tables + */ +void +cstore_customscan_init() +{ + PreviousSetRelPathlistHook = set_rel_pathlist_hook; + set_rel_pathlist_hook = CStoreSetRelPathlistHook; + + /* register customscan specific GUC's */ + DefineCustomBoolVariable( + "cstore.enable_custom_scan", + gettext_noop("Enables the use of a custom scan to push projections and quals " + "into the storage layer"), + NULL, + &EnableCStoreCustomScan, + true, + PGC_USERSET, + GUC_NO_SHOW_ALL, + NULL, NULL, NULL); +} + + +static void +clear_paths(RelOptInfo *rel) +{ + rel->pathlist = NULL; + rel->partial_pathlist = NULL; + rel->cheapest_startup_path = NULL; + rel->cheapest_total_path = NULL; + rel->cheapest_unique_path = NULL; +} + + +static void +CStoreSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti, + RangeTblEntry *rte) +{ + /* call into previous hook if assigned */ + if (PreviousSetRelPathlistHook) + { + PreviousSetRelPathlistHook(root, rel, rti, rte); + } + + if (!EnableCStoreCustomScan) + { + /* custon scans are disabled, use normal table access method api instead */ + return; + } + + if (!OidIsValid(rte->relid)) + { + /* some calls to the pathlist hook don't have a valid relation set. Do nothing */ + return; + } + + /* + * Here we want to inspect if this relation pathlist hook is accessing a cstore table. + * If that is the case we want to insert an extra path that pushes down the projection + * into the scan of the table to minimize the data read. + */ + Relation relation = RelationIdGetRelation(rte->relid); + if (relation->rd_tableam == GetCstoreTableAmRoutine()) + { + ereport(DEBUG1, (errmsg("pathlist hook for cstore table am"))); + + /* we propose a new path that will be the only path for scanning this relation */ + Path *customPath = CreateCStoreScanPath(rel, rte); + clear_paths(rel); + add_path(rel, customPath); + } + RelationClose(relation); +} + + +static Path * +CreateCStoreScanPath(RelOptInfo *rel, RangeTblEntry *rte) +{ + CStoreScanPath *cspath = (CStoreScanPath *) newNode(sizeof(CStoreScanPath), + T_CustomPath); + + /* + * popuate custom path information + */ + CustomPath *cpath = &cspath->custom_path; + cpath->methods = &CStoreScanPathMethods; + + /* + * populate generic path information + */ + Path *path = &cpath->path; + path->pathtype = T_CustomScan; + path->parent = rel; + path->pathtarget = rel->reltarget; + + /* + * Add cost estimates for a cstore table scan, row count is the rows estimated by + * postgres' planner. + */ + path->rows = rel->rows; + path->startup_cost = 0; + path->total_cost = path->startup_cost + CStoreScanCost(rte); + + return (Path *) cspath; +} + + +/* + * CStoreScanCost calculates the cost of scanning the cstore table. The cost is estimated + * by using all stripe metadata to estimate based on the columns to read how many pages + * need to be read. + */ +static Cost +CStoreScanCost(RangeTblEntry *rte) +{ + Relation rel = RelationIdGetRelation(rte->relid); + DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, false); + RelationClose(rel); + rel = NULL; + + uint32 maxColumnCount = 0; + uint64 totalStripeSize = 0; + ListCell *stripeMetadataCell = NULL; + foreach(stripeMetadataCell, metadata->stripeMetadataList) + { + StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell); + totalStripeSize += stripeMetadata->dataLength; + maxColumnCount = Max(maxColumnCount, stripeMetadata->columnCount); + } + + Bitmapset *attr_needed = rte->selectedCols; + double numberOfColumnsRead = bms_num_members(attr_needed); + double selectionRatio = numberOfColumnsRead / (double) maxColumnCount; + Cost scanCost = (double) totalStripeSize / BLCKSZ * selectionRatio; + + return scanCost; +} + + +static Plan * +CStoreScanPath_PlanCustomPath(PlannerInfo *root, + RelOptInfo *rel, + struct CustomPath *best_path, + List *tlist, + List *clauses, + List *custom_plans) +{ + CStoreScanScan *plan = (CStoreScanScan *) newNode(sizeof(CStoreScanScan), + T_CustomScan); + + CustomScan *cscan = &plan->custom_scan; + cscan->methods = &CStoreScanScanMethods; + + /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */ + clauses = extract_actual_clauses(clauses, false); + + cscan->scan.plan.targetlist = list_copy(tlist); + cscan->scan.plan.qual = clauses; + cscan->scan.scanrelid = best_path->path.parent->relid; + + return (Plan *) plan; +} + + +static Node * +CStoreScan_CreateCustomScanState(CustomScan *cscan) +{ + CStoreScanState *cstorescanstate = (CStoreScanState *) newNode( + sizeof(CStoreScanState), T_CustomScanState); + + CustomScanState *cscanstate = &cstorescanstate->custom_scanstate; + cscanstate->methods = &CStoreExecuteMethods; + + cstorescanstate->qual = cscan->scan.plan.qual; + + return (Node *) cscanstate; +} + + +static void +CStoreScan_BeginCustomScan(CustomScanState *cscanstate, EState *estate, int eflags) +{ + /* scan slot is already initialized */ +} + + +static Bitmapset * +CStoreAttrNeeded(ScanState *ss) +{ + TupleTableSlot *slot = ss->ss_ScanTupleSlot; + int natts = slot->tts_tupleDescriptor->natts; + Bitmapset *attr_needed = NULL; + Plan *plan = ss->ps.plan; + int flags = PVC_RECURSE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | PVC_RECURSE_PLACEHOLDERS; + List *vars = list_concat(pull_var_clause((Node *) plan->targetlist, flags), + pull_var_clause((Node *) plan->qual, flags)); + ListCell *lc; + + foreach(lc, vars) + { + Var *var = lfirst(lc); + + if (var->varattno == 0) + { + elog(DEBUG1, "Need attribute: all"); + + /* all attributes are required, we don't need to add more so break*/ + attr_needed = bms_add_range(attr_needed, 0, natts - 1); + break; + } + + elog(DEBUG1, "Need attribute: %d", var->varattno); + attr_needed = bms_add_member(attr_needed, var->varattno - 1); + } + + return attr_needed; +} + + +static TupleTableSlot * +CStoreScanNext(CStoreScanState *cstorescanstate) +{ + CustomScanState *node = (CustomScanState *) cstorescanstate; + TableScanDesc scandesc; + EState *estate; + ScanDirection direction; + TupleTableSlot *slot; + + /* + * get information from the estate and scan state + */ + scandesc = node->ss.ss_currentScanDesc; + estate = node->ss.ps.state; + direction = estate->es_direction; + slot = node->ss.ss_ScanTupleSlot; + + if (scandesc == NULL) + { + /* the cstore access method does not use the flags, they are specific to heap */ + uint32 flags = 0; + Bitmapset *attr_needed = CStoreAttrNeeded(&node->ss); + + /* + * We reach here if the scan is not parallel, or if we're serially + * executing a scan that was planned to be parallel. + */ + scandesc = cstore_beginscan_extended(node->ss.ss_currentRelation, + estate->es_snapshot, + 0, NULL, NULL, flags, attr_needed, + cstorescanstate->qual); + bms_free(attr_needed); + + node->ss.ss_currentScanDesc = scandesc; + } + + /* + * get the next tuple from the table + */ + if (table_scan_getnextslot(scandesc, direction, slot)) + { + return slot; + } + return NULL; +} + + +/* + * SeqRecheck -- access method routine to recheck a tuple in EvalPlanQual + */ +static bool +CStoreScanRecheck(CStoreScanState *node, TupleTableSlot *slot) +{ + return true; +} + + +static TupleTableSlot * +CStoreScan_ExecCustomScan(CustomScanState *node) +{ + return ExecScan(&node->ss, + (ExecScanAccessMtd) CStoreScanNext, + (ExecScanRecheckMtd) CStoreScanRecheck); +} + + +static void +CStoreScan_EndCustomScan(CustomScanState *node) +{ + TableScanDesc scanDesc; + + /* + * get information from node + */ + scanDesc = node->ss.ss_currentScanDesc; + + /* + * Free the exprcontext + */ + ExecFreeExprContext(&node->ss.ps); + + /* + * clean out the tuple table + */ + if (node->ss.ps.ps_ResultTupleSlot) + { + ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); + } + ExecClearTuple(node->ss.ss_ScanTupleSlot); + + /* + * close heap scan + */ + if (scanDesc != NULL) + { + table_endscan(scanDesc); + } +} + + +static void +CStoreScan_ReScanCustomScan(CustomScanState *node) +{ + TableScanDesc scanDesc = node->ss.ss_currentScanDesc; + if (scanDesc != NULL) + { + table_rescan(node->ss.ss_currentScanDesc, NULL); + } +} diff --git a/cstore_customscan.h b/cstore_customscan.h new file mode 100644 index 000000000..9e388e13f --- /dev/null +++ b/cstore_customscan.h @@ -0,0 +1,19 @@ +/*------------------------------------------------------------------------- + * + * cstore_customscan.h + * + * Forward declarations of functions to hookup the custom scan feature of + * cstore. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#ifndef CSTORE_FDW_CSTORE_CUSTOMSCAN_H +#define CSTORE_FDW_CSTORE_CUSTOMSCAN_H + +void cstore_customscan_init(void); + + +#endif /*CSTORE_FDW_CSTORE_CUSTOMSCAN_H */ diff --git a/cstore_reader.c b/cstore_reader.c index cf2d0b171..c86021f7e 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -29,6 +29,7 @@ #endif #include "optimizer/restrictinfo.h" #include "storage/fd.h" +#include "utils/guc.h" #include "utils/memutils.h" #include "utils/lsyscache.h" #include "utils/rel.h" @@ -222,6 +223,19 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu } +/* + * CStoreRescan clears the position where we were scanning so that the next read starts at + * the beginning again + */ +void +CStoreRescan(TableReadState *readState) +{ + readState->stripeBuffers = NULL; + readState->readStripeCount = 0; + readState->stripeReadRowCount = 0; +} + + /* Finishes a cstore read operation. */ void CStoreEndRead(TableReadState *readState) diff --git a/cstore_tableam.c b/cstore_tableam.c index 0840436ec..eae806e59 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -37,6 +37,7 @@ #include "utils/syscache.h" #include "cstore.h" +#include "cstore_customscan.h" #include "cstore_tableam.h" #define CSTORE_TABLEAM_NAME "cstore_tableam" @@ -154,7 +155,7 @@ RelationColumnList(Relation rel) for (int i = 0; i < tupdesc->natts; i++) { - Index varno = 0; + Index varno = 1; AttrNumber varattno = i + 1; Oid vartype = tupdesc->attrs[i].atttypid; int32 vartypmod = tupdesc->attrs[i].atttypmod; @@ -188,11 +189,36 @@ cstore_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags) +{ + TableScanDesc scandesc; + int natts = relation->rd_att->natts; + Bitmapset *attr_needed = NULL; + + attr_needed = bms_add_range(attr_needed, 0, natts - 1); + + /* the cstore access method does not use the flags, they are specific to heap */ + flags = 0; + + scandesc = cstore_beginscan_extended(relation, snapshot, nkeys, key, parallel_scan, + flags, attr_needed, NULL); + + pfree(attr_needed); + + return scandesc; +} + + +TableScanDesc +cstore_beginscan_extended(Relation relation, Snapshot snapshot, + int nkeys, ScanKey key, + ParallelTableScanDesc parallel_scan, + uint32 flags, Bitmapset *attr_needed, List *scanQual) { TupleDesc tupdesc = relation->rd_att; TableReadState *readState = NULL; CStoreScanDesc scan = palloc(sizeof(CStoreScanDescData)); List *columnList = NIL; + List *neededColumnList = NIL; MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); scan->cs_base.rs_rd = relation; @@ -204,7 +230,18 @@ cstore_beginscan(Relation relation, Snapshot snapshot, columnList = RelationColumnList(relation); - readState = CStoreBeginRead(relation, tupdesc, columnList, NULL); + /* only collect columns that we need for the scan */ + ListCell *columnCell = NULL; + foreach(columnCell, columnList) + { + Var *var = castNode(Var, lfirst(columnCell)); + if (bms_is_member(var->varattno - 1, attr_needed)) + { + neededColumnList = lappend(neededColumnList, var); + } + } + + readState = CStoreBeginRead(relation, tupdesc, neededColumnList, scanQual); scan->cs_readState = readState; @@ -226,7 +263,8 @@ static void cstore_rescan(TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode) { - elog(ERROR, "cstore_rescan not implemented"); + CStoreScanDesc scan = (CStoreScanDesc) sscan; + CStoreRescan(scan->cs_readState); } @@ -996,6 +1034,8 @@ cstore_tableam_init() ExecutorEnd_hook = CStoreExecutorEnd; prevObjectAccessHook = object_access_hook; object_access_hook = CStoreTableAMObjectAccessHook; + + cstore_customscan_init(); } diff --git a/cstore_tableam.h b/cstore_tableam.h index bdf7f96c0..557506b9f 100644 --- a/cstore_tableam.h +++ b/cstore_tableam.h @@ -1,7 +1,15 @@ #include "postgres.h" #include "fmgr.h" #include "access/tableam.h" +#include "access/skey.h" +#include "nodes/bitmapset.h" const TableAmRoutine * GetCstoreTableAmRoutine(void); extern void cstore_tableam_init(void); extern void cstore_tableam_finish(void); + +extern TableScanDesc cstore_beginscan_extended(Relation relation, Snapshot snapshot, + int nkeys, ScanKey key, + ParallelTableScanDesc parallel_scan, + uint32 flags, Bitmapset *attr_needed, + List *scanQual); diff --git a/expected/am_join.out b/expected/am_join.out new file mode 100644 index 000000000..fbb628187 --- /dev/null +++ b/expected/am_join.out @@ -0,0 +1,37 @@ +CREATE SCHEMA am_cstore_join; +SET search_path TO am_cstore_join; +CREATE TABLE users (id int, name text) USING cstore_tableam; +INSERT INTO users SELECT a, 'name' || a FROM generate_series(0,30-1) AS a; +CREATE TABLE things (id int, user_id int, name text) USING cstore_tableam; +INSERT INTO things SELECT a, a % 30, 'thing' || a FROM generate_series(1,300) AS a; +-- force the nested loop to rescan the table +SET enable_material TO off; +SET enable_hashjoin TO off; +SET enable_mergejoin TO off; +SELECT count(*) +FROM users +JOIN things ON (users.id = things.user_id) +WHERE things.id > 290; + count +------- + 10 +(1 row) + +-- verify the join uses a nested loop to trigger the rescan behaviour +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM users +JOIN things ON (users.id = things.user_id) +WHERE things.id > 299990; + QUERY PLAN +-------------------------------------------------- + Aggregate + -> Nested Loop + Join Filter: (users.id = things.user_id) + -> Custom Scan (CStoreScan) on things + Filter: (id > 299990) + -> Custom Scan (CStoreScan) on users +(6 rows) + +SET client_min_messages TO warning; +DROP SCHEMA am_cstore_join CASCADE; diff --git a/sql/am_join.sql b/sql/am_join.sql new file mode 100644 index 000000000..4d78dfe5b --- /dev/null +++ b/sql/am_join.sql @@ -0,0 +1,28 @@ +CREATE SCHEMA am_cstore_join; +SET search_path TO am_cstore_join; + +CREATE TABLE users (id int, name text) USING cstore_tableam; +INSERT INTO users SELECT a, 'name' || a FROM generate_series(0,30-1) AS a; + +CREATE TABLE things (id int, user_id int, name text) USING cstore_tableam; +INSERT INTO things SELECT a, a % 30, 'thing' || a FROM generate_series(1,300) AS a; + +-- force the nested loop to rescan the table +SET enable_material TO off; +SET enable_hashjoin TO off; +SET enable_mergejoin TO off; + +SELECT count(*) +FROM users +JOIN things ON (users.id = things.user_id) +WHERE things.id > 290; + +-- verify the join uses a nested loop to trigger the rescan behaviour +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM users +JOIN things ON (users.id = things.user_id) +WHERE things.id > 299990; + +SET client_min_messages TO warning; +DROP SCHEMA am_cstore_join CASCADE; From 4355ca494541903fe8b1f2abaed29c4f8357f959 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 14 Oct 2020 13:56:58 -0700 Subject: [PATCH 84/91] trigger fix and tests --- Makefile | 2 +- cstore_customscan.c | 2 +- cstore_tableam.c | 59 +++++++++++++++++++++++++++++++++++++ expected/am_trigger.out | 65 +++++++++++++++++++++++++++++++++++++++++ sql/am_trigger.sql | 61 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 187 insertions(+), 2 deletions(-) create mode 100644 expected/am_trigger.out create mode 100644 sql/am_trigger.sql diff --git a/Makefile b/Makefile index 7e8bee13a..0d581f145 100644 --- a/Makefile +++ b/Makefile @@ -54,7 +54,7 @@ ifeq ($(USE_TABLEAM),yes) OBJS += cstore_tableam.o cstore_customscan.o REGRESS += am_create am_load am_query am_analyze am_data_types am_functions \ am_drop am_insert am_copyto am_alter am_rollback am_truncate am_vacuum am_clean \ - am_block_filtering am_join + am_block_filtering am_join am_trigger ISOLATION += am_vacuum_vs_insert endif diff --git a/cstore_customscan.c b/cstore_customscan.c index 0dcdff111..d7e6eb667 100644 --- a/cstore_customscan.c +++ b/cstore_customscan.c @@ -145,7 +145,7 @@ CStoreSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti, return; } - if (!OidIsValid(rte->relid)) + if (!OidIsValid(rte->relid) || rte->rtekind != RTE_RELATION) { /* some calls to the pathlist hook don't have a valid relation set. Do nothing */ return; diff --git a/cstore_tableam.c b/cstore_tableam.c index eae806e59..09a65d75b 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -16,6 +16,7 @@ #include "catalog/index.h" #include "catalog/objectaccess.h" #include "catalog/pg_am.h" +#include "catalog/pg_trigger.h" #include "catalog/storage.h" #include "catalog/storage_xlog.h" #include "commands/progress.h" @@ -31,6 +32,7 @@ #include "storage/predicate.h" #include "storage/procarray.h" #include "storage/smgr.h" +#include "tcop/utility.h" #include "utils/builtins.h" #include "utils/pg_rusage.h" #include "utils/rel.h" @@ -62,11 +64,19 @@ static TableWriteState *CStoreWriteState = NULL; static ExecutorEnd_hook_type PreviousExecutorEndHook = NULL; static MemoryContext CStoreContext = NULL; static object_access_hook_type prevObjectAccessHook = NULL; +static ProcessUtility_hook_type PreviousProcessUtilityHook = NULL; /* forward declaration for static functions */ static void CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId, int subId, void *arg); +static void CStoreTableAMProcessUtility(PlannedStmt *plannedStatement, + const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + QueryEnvironment *queryEnvironment, + DestReceiver *destReceiver, + char *completionTag); static bool IsCStoreTableAmTable(Oid relationId); static bool ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode, int timeout, int retryInterval); @@ -1027,11 +1037,60 @@ CStoreExecutorEnd(QueryDesc *queryDesc) } +static void +CStoreTableAMProcessUtility(PlannedStmt *plannedStatement, + const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + QueryEnvironment *queryEnvironment, + DestReceiver *destReceiver, + char *completionTag) +{ + Node *parseTree = plannedStatement->utilityStmt; + + if (nodeTag(parseTree) == T_CreateTrigStmt) + { + CreateTrigStmt *createTrigStmt = (CreateTrigStmt *) parseTree; + Relation rel; + bool isCStore; + + rel = relation_openrv(createTrigStmt->relation, AccessShareLock); + isCStore = rel->rd_tableam == GetCstoreTableAmRoutine(); + relation_close(rel, AccessShareLock); + + if (isCStore && + createTrigStmt->row && + createTrigStmt->timing == TRIGGER_TYPE_AFTER) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg( + "AFTER ROW triggers are not supported for columnstore access method"), + errhint("Consider an AFTER STATEMENT trigger instead."))); + } + } + + if (PreviousProcessUtilityHook != NULL) + { + PreviousProcessUtilityHook(plannedStatement, queryString, context, + paramListInfo, queryEnvironment, + destReceiver, completionTag); + } + else + { + standard_ProcessUtility(plannedStatement, queryString, context, + paramListInfo, queryEnvironment, + destReceiver, completionTag); + } +} + + void cstore_tableam_init() { PreviousExecutorEndHook = ExecutorEnd_hook; ExecutorEnd_hook = CStoreExecutorEnd; + PreviousProcessUtilityHook = ProcessUtility_hook; + ProcessUtility_hook = CStoreTableAMProcessUtility; prevObjectAccessHook = object_access_hook; object_access_hook = CStoreTableAMObjectAccessHook; diff --git a/expected/am_trigger.out b/expected/am_trigger.out new file mode 100644 index 000000000..53b2c9d9e --- /dev/null +++ b/expected/am_trigger.out @@ -0,0 +1,65 @@ +create or replace function trs_before() returns trigger language plpgsql as $$ +BEGIN + RAISE NOTICE 'BEFORE STATEMENT %', TG_OP; + RETURN NULL; +END; +$$; +create or replace function trs_after() returns trigger language plpgsql as $$ +DECLARE + r RECORD; +BEGIN + RAISE NOTICE 'AFTER STATEMENT %', TG_OP; + IF (TG_OP = 'DELETE') THEN + FOR R IN select * from old_table + LOOP + RAISE NOTICE ' (%)', r.i; + END LOOP; + ELSE + FOR R IN select * from new_table + LOOP + RAISE NOTICE ' (%)', r.i; + END LOOP; + END IF; + RETURN NULL; +END; +$$; +create or replace function trr_before() returns trigger language plpgsql as $$ +BEGIN + RAISE NOTICE 'BEFORE ROW %: (%)', TG_OP, NEW.i; + RETURN NEW; +END; +$$; +create or replace function trr_after() returns trigger language plpgsql as $$ +BEGIN + RAISE NOTICE 'AFTER ROW %: (%)', TG_OP, NEW.i; + RETURN NEW; +END; +$$; +create table test_tr(i int) using cstore_tableam; +create trigger tr_before_stmt before insert on test_tr + for each statement execute procedure trs_before(); +create trigger tr_after_stmt after insert on test_tr + referencing new table as new_table + for each statement execute procedure trs_after(); +create trigger tr_before_row before insert on test_tr + for each row execute procedure trr_before(); +-- after triggers require TIDs, which are not supported yet +create trigger tr_after_row after insert on test_tr + for each row execute procedure trr_after(); +ERROR: AFTER ROW triggers are not supported for columnstore access method +HINT: Consider an AFTER STATEMENT trigger instead. +insert into test_tr values(1); +NOTICE: BEFORE STATEMENT INSERT +NOTICE: BEFORE ROW INSERT: (1) +NOTICE: AFTER STATEMENT INSERT +NOTICE: (1) +insert into test_tr values(2),(3),(4); +NOTICE: BEFORE STATEMENT INSERT +NOTICE: BEFORE ROW INSERT: (2) +NOTICE: BEFORE ROW INSERT: (3) +NOTICE: BEFORE ROW INSERT: (4) +NOTICE: AFTER STATEMENT INSERT +NOTICE: (2) +NOTICE: (3) +NOTICE: (4) +drop table test_tr; diff --git a/sql/am_trigger.sql b/sql/am_trigger.sql new file mode 100644 index 000000000..b8a918cf4 --- /dev/null +++ b/sql/am_trigger.sql @@ -0,0 +1,61 @@ + +create or replace function trs_before() returns trigger language plpgsql as $$ +BEGIN + RAISE NOTICE 'BEFORE STATEMENT %', TG_OP; + RETURN NULL; +END; +$$; + +create or replace function trs_after() returns trigger language plpgsql as $$ +DECLARE + r RECORD; +BEGIN + RAISE NOTICE 'AFTER STATEMENT %', TG_OP; + IF (TG_OP = 'DELETE') THEN + FOR R IN select * from old_table + LOOP + RAISE NOTICE ' (%)', r.i; + END LOOP; + ELSE + FOR R IN select * from new_table + LOOP + RAISE NOTICE ' (%)', r.i; + END LOOP; + END IF; + RETURN NULL; +END; +$$; + +create or replace function trr_before() returns trigger language plpgsql as $$ +BEGIN + RAISE NOTICE 'BEFORE ROW %: (%)', TG_OP, NEW.i; + RETURN NEW; +END; +$$; + +create or replace function trr_after() returns trigger language plpgsql as $$ +BEGIN + RAISE NOTICE 'AFTER ROW %: (%)', TG_OP, NEW.i; + RETURN NEW; +END; +$$; + +create table test_tr(i int) using cstore_tableam; + +create trigger tr_before_stmt before insert on test_tr + for each statement execute procedure trs_before(); +create trigger tr_after_stmt after insert on test_tr + referencing new table as new_table + for each statement execute procedure trs_after(); + +create trigger tr_before_row before insert on test_tr + for each row execute procedure trr_before(); + +-- after triggers require TIDs, which are not supported yet +create trigger tr_after_row after insert on test_tr + for each row execute procedure trr_after(); + +insert into test_tr values(1); +insert into test_tr values(2),(3),(4); + +drop table test_tr; From c92ea1de9603ccbb1ffb4837f2995de09342694e Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Tue, 13 Oct 2020 22:07:42 -0700 Subject: [PATCH 85/91] Implement concurrent writes --- Makefile | 2 +- cstore.h | 33 ++++++- cstore_fdw.c | 12 +-- cstore_metadata_tables.c | 103 +++++++++++++++++++++- cstore_tableam.c | 2 - cstore_writer.c | 128 ++++++--------------------- expected/am_vacuum.out | 26 +++--- expected/am_vacuum_vs_insert.out | 4 +- expected/am_write_concurrency.out | 142 ++++++++++++++++++++++++++++++ specs/am_write_concurrency.spec | 67 ++++++++++++++ 10 files changed, 388 insertions(+), 131 deletions(-) create mode 100644 expected/am_write_concurrency.out create mode 100644 specs/am_write_concurrency.spec diff --git a/Makefile b/Makefile index 0d581f145..6804bae42 100644 --- a/Makefile +++ b/Makefile @@ -55,7 +55,7 @@ ifeq ($(USE_TABLEAM),yes) REGRESS += am_create am_load am_query am_analyze am_data_types am_functions \ am_drop am_insert am_copyto am_alter am_rollback am_truncate am_vacuum am_clean \ am_block_filtering am_join am_trigger - ISOLATION += am_vacuum_vs_insert + ISOLATION += am_write_concurrency am_vacuum_vs_insert endif ifeq ($(enable_coverage),yes) diff --git a/cstore.h b/cstore.h index f5e0590a8..7ff657e33 100644 --- a/cstore.h +++ b/cstore.h @@ -218,15 +218,12 @@ typedef struct TableReadState /* TableWriteState represents state of a cstore file write operation. */ typedef struct TableWriteState { - DataFileMetadata *datafileMetadata; CompressionType compressionType; TupleDesc tupleDescriptor; FmgrInfo **comparisonFunctionArray; - uint64 currentFileOffset; Relation relation; MemoryContext stripeWriteContext; - uint64 currentStripeId; StripeBuffers *stripeBuffers; StripeSkipList *stripeSkipList; uint32 stripeMaxRowCount; @@ -284,9 +281,11 @@ extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressio /* cstore_metadata_tables.c */ extern void DeleteDataFileMetadataRowIfExists(Oid relfilenode); extern void InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount); -extern void InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe); extern DataFileMetadata * ReadDataFileMetadata(Oid relfilenode, bool missingOk); extern uint64 GetHighestUsedAddress(Oid relfilenode); +extern StripeMetadata ReserveStripe(Relation rel, uint64 size, + uint64 rowCount, uint64 columnCount, + uint64 blockCount, uint64 blockRowCount); extern void SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipList, TupleDesc tupleDescriptor); @@ -317,4 +316,30 @@ logical_to_smgr(uint64 logicalOffset) } +/* + * Map a physical page adnd offset address to a logical address. + */ +static inline uint64 +smgr_to_logical(SmgrAddr addr) +{ + uint64 bytes_per_page = BLCKSZ - SizeOfPageHeaderData; + return bytes_per_page * addr.blockno + addr.offset - SizeOfPageHeaderData; +} + + +/* + * Get the first usable address of next block. + */ +static inline SmgrAddr +next_block_start(SmgrAddr addr) +{ + SmgrAddr result = { + .blockno = addr.blockno + 1, + .offset = SizeOfPageHeaderData + }; + + return result; +} + + #endif /* CSTORE_H */ diff --git a/cstore_fdw.c b/cstore_fdw.c index 2790efaca..221c97843 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -494,10 +494,10 @@ CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) Assert(copyStatement->relation != NULL); /* - * Open and lock the relation. We acquire ShareUpdateExclusiveLock to allow - * concurrent reads, but block concurrent writes. + * Open and lock the relation. We acquire RowExclusiveLock to allow + * concurrent reads and writes. */ - relation = cstore_fdw_openrv(copyStatement->relation, ShareUpdateExclusiveLock); + relation = cstore_fdw_openrv(copyStatement->relation, RowExclusiveLock); relationId = RelationGetRelid(relation); /* allocate column values and nulls arrays */ @@ -572,7 +572,7 @@ CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) /* end read/write sessions and close the relation */ EndCopyFrom(copyState); CStoreEndWrite(writeState); - heap_close(relation, ShareUpdateExclusiveLock); + heap_close(relation, RowExclusiveLock); return processedRowCount; } @@ -2015,7 +2015,7 @@ CStoreBeginForeignInsert(ModifyTableState *modifyTableState, ResultRelInfo *rela Relation relation = NULL; foreignTableOid = RelationGetRelid(relationInfo->ri_RelationDesc); - relation = cstore_fdw_open(foreignTableOid, ShareUpdateExclusiveLock); + relation = cstore_fdw_open(foreignTableOid, RowExclusiveLock); cstoreOptions = CStoreGetOptions(foreignTableOid); tupleDescriptor = RelationGetDescr(relationInfo->ri_RelationDesc); @@ -2086,7 +2086,7 @@ CStoreEndForeignInsert(EState *executorState, ResultRelInfo *relationInfo) Relation relation = writeState->relation; CStoreEndWrite(writeState); - heap_close(relation, ShareUpdateExclusiveLock); + heap_close(relation, RowExclusiveLock); } } diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index ced5900d6..1bfc4be49 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -31,6 +31,8 @@ #include "lib/stringinfo.h" #include "port.h" #include "storage/fd.h" +#include "storage/lmgr.h" +#include "storage/smgr.h" #include "utils/builtins.h" #include "utils/fmgroids.h" #include "utils/memutils.h" @@ -43,6 +45,10 @@ typedef struct EState *estate; } ModifyState; +static void InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe); +static void GetHighestUsedAddressAndId(Oid relfilenode, + uint64 *highestUsedAddress, + uint64 *highestUsedId); static List * ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot); static Oid CStoreStripesRelationId(void); static Oid CStoreStripesIndexRelationId(void); @@ -311,7 +317,7 @@ ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor, /* * InsertStripeMetadataRow adds a row to cstore_stripes. */ -void +static void InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe) { bool nulls[Natts_cstore_stripes] = { 0 }; @@ -330,7 +336,9 @@ InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe) Relation cstoreStripes = heap_open(cstoreStripesOid, RowExclusiveLock); ModifyState *modifyState = StartModifyRelation(cstoreStripes); + InsertTupleAndEnforceConstraints(modifyState, values, nulls); + FinishModifyRelation(modifyState); CommandCounterIncrement(); @@ -376,6 +384,23 @@ uint64 GetHighestUsedAddress(Oid relfilenode) { uint64 highestUsedAddress = 0; + uint64 highestUsedId = 0; + + GetHighestUsedAddressAndId(relfilenode, &highestUsedAddress, &highestUsedId); + + return highestUsedAddress; +} + + +/* + * GetHighestUsedAddressAndId returns the highest used address and id for + * the given relfilenode across all active and inactive transactions. + */ +static void +GetHighestUsedAddressAndId(Oid relfilenode, + uint64 *highestUsedAddress, + uint64 *highestUsedId) +{ ListCell *stripeMetadataCell = NULL; List *stripeMetadataList = NIL; @@ -384,14 +409,83 @@ GetHighestUsedAddress(Oid relfilenode) stripeMetadataList = ReadDataFileStripeList(relfilenode, &SnapshotDirty); + *highestUsedId = 0; + *highestUsedAddress = 0; + foreach(stripeMetadataCell, stripeMetadataList) { StripeMetadata *stripe = lfirst(stripeMetadataCell); uint64 lastByte = stripe->fileOffset + stripe->dataLength - 1; - highestUsedAddress = Max(highestUsedAddress, lastByte); + *highestUsedAddress = Max(*highestUsedAddress, lastByte); + *highestUsedId = Max(*highestUsedId, stripe->id); + } +} + + +/* + * ReserveStripe reserves and stripe of given size for the given relation, + * and inserts it into cstore_stripes. It is guaranteed that concurrent + * writes won't overwrite the returned stripe. + */ +StripeMetadata +ReserveStripe(Relation rel, uint64 sizeBytes, + uint64 rowCount, uint64 columnCount, + uint64 blockCount, uint64 blockRowCount) +{ + StripeMetadata stripe = { 0 }; + Oid relfilenode = InvalidOid; + uint64 currLogicalHigh = 0; + SmgrAddr currSmgrHigh; + uint64 nblocks = 0; + uint64 resLogicalStart = 0; + SmgrAddr resSmgrStart; + uint64 resLogicalEnd = 0; + SmgrAddr resSmgrEnd; + uint64 highestId = 0; + + /* + * We take ShareUpdateExclusiveLock here, so two space + * reservations conflict, space reservation <-> vacuum + * conflict, but space reservation doesn't conflict with + * reads & writes. + */ + LockRelation(rel, ShareUpdateExclusiveLock); + + relfilenode = rel->rd_node.relNode; + GetHighestUsedAddressAndId(relfilenode, &currLogicalHigh, &highestId); + currSmgrHigh = logical_to_smgr(currLogicalHigh); + + resSmgrStart = next_block_start(currSmgrHigh); + resLogicalStart = smgr_to_logical(resSmgrStart); + + resLogicalEnd = resLogicalStart + sizeBytes - 1; + resSmgrEnd = logical_to_smgr(resLogicalEnd); + + RelationOpenSmgr(rel); + nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + + while (resSmgrEnd.blockno >= nblocks) + { + Buffer newBuffer = ReadBuffer(rel, P_NEW); + ReleaseBuffer(newBuffer); + nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); } - return highestUsedAddress; + RelationCloseSmgr(rel); + + stripe.fileOffset = resLogicalStart; + stripe.dataLength = sizeBytes; + stripe.blockCount = blockCount; + stripe.blockRowCount = blockRowCount; + stripe.columnCount = columnCount; + stripe.rowCount = rowCount; + stripe.id = highestId + 1; + + InsertStripeMetadataRow(relfilenode, &stripe); + + UnlockRelation(rel, ShareUpdateExclusiveLock); + + return stripe; } @@ -419,7 +513,7 @@ ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot) index = index_open(CStoreStripesIndexRelationId(), AccessShareLock); tupleDescriptor = RelationGetDescr(cstoreStripes); - scanDescriptor = systable_beginscan_ordered(cstoreStripes, index, NULL, 1, + scanDescriptor = systable_beginscan_ordered(cstoreStripes, index, snapshot, 1, scanKey); while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) @@ -593,6 +687,7 @@ InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values, bool *nulls) #if PG_VERSION_NUM >= 120000 TupleTableSlot *slot = ExecInitExtraTupleSlot(state->estate, tupleDescriptor, &TTSOpsHeapTuple); + ExecStoreHeapTuple(tuple, slot, false); #else TupleTableSlot *slot = ExecInitExtraTupleSlot(state->estate, tupleDescriptor); diff --git a/cstore_tableam.c b/cstore_tableam.c index 09a65d75b..ae7799410 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -120,8 +120,6 @@ ResetCStoreMemoryContext() static void cstore_init_write_state(Relation relation) { - /*TODO: upgrade lock to serialize writes */ - if (CStoreWriteState != NULL) { /* TODO: consider whether it's possible for a new write to start */ diff --git a/cstore_writer.c b/cstore_writer.c index 2c0ca541e..3be14994b 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -33,7 +33,7 @@ static StripeBuffers * CreateEmptyStripeBuffers(uint32 stripeMaxRowCount, static StripeSkipList * CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 blockRowCount, uint32 columnCount); -static StripeMetadata FlushStripe(TableWriteState *writeState); +static void FlushStripe(TableWriteState *writeState); static StringInfo SerializeBoolArray(bool *boolArray, uint32 boolArrayLength); static void SerializeSingleDatum(StringInfo datumBuffer, Datum datum, bool datumTypeByValue, int datumTypeLength, @@ -45,8 +45,6 @@ static void UpdateBlockSkipNodeMinMax(ColumnBlockSkipNode *blockSkipNode, int columnTypeLength, Oid columnCollation, FmgrInfo *comparisonFunction); static Datum DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength); -static void AppendStripeMetadata(DataFileMetadata *datafileMetadata, - StripeMetadata stripeMetadata); static StringInfo CopyStringInfo(StringInfo sourceString); @@ -64,34 +62,12 @@ CStoreBeginWrite(Relation relation, TupleDesc tupleDescriptor) { TableWriteState *writeState = NULL; - DataFileMetadata *datafileMetadata = NULL; FmgrInfo **comparisonFunctionArray = NULL; MemoryContext stripeWriteContext = NULL; - uint64 currentFileOffset = 0; uint32 columnCount = 0; uint32 columnIndex = 0; bool *columnMaskArray = NULL; BlockData *blockData = NULL; - uint64 currentStripeId = 0; - Oid relNode = relation->rd_node.relNode; - - datafileMetadata = ReadDataFileMetadata(relNode, false); - - /* - * If stripeMetadataList is not empty, jump to the position right after - * the last position. - */ - if (datafileMetadata->stripeMetadataList != NIL) - { - StripeMetadata *lastStripe = NULL; - uint64 lastStripeSize = 0; - - lastStripe = llast(datafileMetadata->stripeMetadataList); - lastStripeSize += lastStripe->dataLength; - - currentFileOffset = lastStripe->fileOffset + lastStripeSize; - currentStripeId = lastStripe->id + 1; - } /* get comparison function pointers for each of the columns */ columnCount = tupleDescriptor->natts; @@ -129,19 +105,16 @@ CStoreBeginWrite(Relation relation, writeState = palloc0(sizeof(TableWriteState)); writeState->relation = relation; - writeState->datafileMetadata = datafileMetadata; writeState->compressionType = compressionType; writeState->stripeMaxRowCount = stripeMaxRowCount; writeState->blockRowCount = blockRowCount; writeState->tupleDescriptor = tupleDescriptor; - writeState->currentFileOffset = currentFileOffset; writeState->comparisonFunctionArray = comparisonFunctionArray; writeState->stripeBuffers = NULL; writeState->stripeSkipList = NULL; writeState->stripeWriteContext = stripeWriteContext; writeState->blockData = blockData; writeState->compressionBuffer = NULL; - writeState->currentStripeId = currentStripeId; return writeState; } @@ -164,7 +137,6 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul StripeBuffers *stripeBuffers = writeState->stripeBuffers; StripeSkipList *stripeSkipList = writeState->stripeSkipList; uint32 columnCount = writeState->tupleDescriptor->natts; - DataFileMetadata *datafileMetadata = writeState->datafileMetadata; const uint32 blockRowCount = writeState->blockRowCount; BlockData *blockData = writeState->blockData; MemoryContext oldContext = MemoryContextSwitchTo(writeState->stripeWriteContext); @@ -238,28 +210,14 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul stripeBuffers->rowCount++; if (stripeBuffers->rowCount >= writeState->stripeMaxRowCount) { - StripeMetadata stripeMetadata = FlushStripe(writeState); - MemoryContextReset(writeState->stripeWriteContext); - - writeState->currentStripeId++; + FlushStripe(writeState); /* set stripe data and skip list to NULL so they are recreated next time */ writeState->stripeBuffers = NULL; writeState->stripeSkipList = NULL; + } - /* - * Append stripeMetadata in old context so next MemoryContextReset - * doesn't free it. - */ - MemoryContextSwitchTo(oldContext); - InsertStripeMetadataRow(writeState->relation->rd_node.relNode, - &stripeMetadata); - AppendStripeMetadata(datafileMetadata, stripeMetadata); - } - else - { - MemoryContextSwitchTo(oldContext); - } + MemoryContextSwitchTo(oldContext); } @@ -278,17 +236,13 @@ CStoreEndWrite(TableWriteState *writeState) { MemoryContext oldContext = MemoryContextSwitchTo(writeState->stripeWriteContext); - StripeMetadata stripeMetadata = FlushStripe(writeState); + FlushStripe(writeState); MemoryContextReset(writeState->stripeWriteContext); MemoryContextSwitchTo(oldContext); - InsertStripeMetadataRow(writeState->relation->rd_node.relNode, - &stripeMetadata); - AppendStripeMetadata(writeState->datafileMetadata, stripeMetadata); } MemoryContextDelete(writeState->stripeWriteContext); - list_free_deep(writeState->datafileMetadata->stripeMetadataList); pfree(writeState->comparisonFunctionArray); FreeBlockData(writeState->blockData); pfree(writeState); @@ -366,11 +320,9 @@ CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 blockRowCount, static void -WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) +WriteToSmgr(Relation rel, uint64 logicalOffset, char *data, uint32 dataLength) { - uint64 logicalOffset = writeState->currentFileOffset; uint64 remaining = dataLength; - Relation rel = writeState->relation; Buffer buffer; while (remaining > 0) @@ -383,14 +335,7 @@ WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) RelationOpenSmgr(rel); nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); - - while (addr.blockno >= nblocks) - { - Buffer newBuffer = ReadBuffer(rel, P_NEW); - ReleaseBuffer(newBuffer); - nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); - } - + Assert(addr.blockno < nblocks); RelationCloseSmgr(rel); buffer = ReadBuffer(rel, addr.blockno); @@ -459,7 +404,7 @@ WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) * the function creates the skip list and footer buffers. Finally, the function * flushes the skip list, data, and footer buffers to the file. */ -static StripeMetadata +static void FlushStripe(TableWriteState *writeState) { StripeMetadata stripeMetadata = { 0 }; @@ -474,8 +419,9 @@ FlushStripe(TableWriteState *writeState) uint32 blockRowCount = writeState->blockRowCount; uint32 lastBlockIndex = stripeBuffers->rowCount / blockRowCount; uint32 lastBlockRowCount = stripeBuffers->rowCount % blockRowCount; - uint64 initialFileOffset = writeState->currentFileOffset; + uint64 currentFileOffset = 0; uint64 stripeSize = 0; + uint64 stripeRowCount = 0; /* * check if the last block needs serialization , the last block was not serialized @@ -520,6 +466,18 @@ FlushStripe(TableWriteState *writeState) } } + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + stripeRowCount += + stripeSkipList->blockSkipNodeArray[0][blockIndex].rowCount; + } + + stripeMetadata = ReserveStripe(writeState->relation, stripeSize, + stripeRowCount, columnCount, blockCount, + blockRowCount); + + currentFileOffset = stripeMetadata.fileOffset; + /* * Each stripe has only one section: * Data section, in which we store data for each column continuously. @@ -541,8 +499,9 @@ FlushStripe(TableWriteState *writeState) columnBuffers->blockBuffersArray[blockIndex]; StringInfo existsBuffer = blockBuffers->existsBuffer; - WriteToSmgr(writeState, existsBuffer->data, existsBuffer->len); - writeState->currentFileOffset += existsBuffer->len; + WriteToSmgr(writeState->relation, currentFileOffset, + existsBuffer->data, existsBuffer->len); + currentFileOffset += existsBuffer->len; } for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) @@ -551,30 +510,16 @@ FlushStripe(TableWriteState *writeState) columnBuffers->blockBuffersArray[blockIndex]; StringInfo valueBuffer = blockBuffers->valueBuffer; - WriteToSmgr(writeState, valueBuffer->data, valueBuffer->len); - writeState->currentFileOffset += valueBuffer->len; + WriteToSmgr(writeState->relation, currentFileOffset, + valueBuffer->data, valueBuffer->len); + currentFileOffset += valueBuffer->len; } } /* create skip list and footer buffers */ SaveStripeSkipList(writeState->relation->rd_node.relNode, - writeState->currentStripeId, + stripeMetadata.id, stripeSkipList, tupleDescriptor); - - for (blockIndex = 0; blockIndex < blockCount; blockIndex++) - { - stripeMetadata.rowCount += - stripeSkipList->blockSkipNodeArray[0][blockIndex].rowCount; - } - - stripeMetadata.fileOffset = initialFileOffset; - stripeMetadata.dataLength = stripeSize; - stripeMetadata.id = writeState->currentStripeId; - stripeMetadata.blockCount = blockCount; - stripeMetadata.blockRowCount = writeState->blockRowCount; - stripeMetadata.columnCount = columnCount; - - return stripeMetadata; } @@ -797,21 +742,6 @@ DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength) } -/* - * AppendStripeMetadata adds a copy of given stripeMetadata to the given - * table footer's stripeMetadataList. - */ -static void -AppendStripeMetadata(DataFileMetadata *datafileMetadata, StripeMetadata stripeMetadata) -{ - StripeMetadata *stripeMetadataCopy = palloc0(sizeof(StripeMetadata)); - memcpy(stripeMetadataCopy, &stripeMetadata, sizeof(StripeMetadata)); - - datafileMetadata->stripeMetadataList = lappend(datafileMetadata->stripeMetadataList, - stripeMetadataCopy); -} - - /* * CopyStringInfo creates a deep copy of given source string allocating only needed * amount of memory. diff --git a/expected/am_vacuum.out b/expected/am_vacuum.out index 3db30a761..d1270a3d2 100644 --- a/expected/am_vacuum.out +++ b/expected/am_vacuum.out @@ -68,24 +68,24 @@ ALTER TABLE t DROP COLUMN a; SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; stripe | attr | block | ?column? | ?column? --------+------+-------+----------+---------- - 0 | 1 | 0 | f | f - 0 | 2 | 0 | f | f 1 | 1 | 0 | f | f 1 | 2 | 0 | f | f 2 | 1 | 0 | f | f 2 | 2 | 0 | f | f + 3 | 1 | 0 | f | f + 3 | 2 | 0 | f | f (6 rows) VACUUM FULL t; SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; stripe | attr | block | ?column? | ?column? --------+------+-------+----------+---------- - 0 | 1 | 0 | t | t - 0 | 2 | 0 | f | f 1 | 1 | 0 | t | t 1 | 2 | 0 | f | f 2 | 1 | 0 | t | t 2 | 2 | 0 | f | f + 3 | 1 | 0 | t | t + 3 | 2 | 0 | f | f (6 rows) -- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands @@ -107,14 +107,14 @@ SELECT count(*) FROM t; SELECT pg_size_pretty(pg_relation_size('t')); pg_size_pretty ---------------- - 16 kB + 32 kB (1 row) INSERT INTO t SELECT i FROM generate_series(1, 10000) i; SELECT pg_size_pretty(pg_relation_size('t')); pg_size_pretty ---------------- - 56 kB + 112 kB (1 row) SELECT count(*) FROM t; @@ -129,23 +129,23 @@ ROLLBACK TO SAVEPOINT s1; SELECT pg_size_pretty(pg_relation_size('t')); pg_size_pretty ---------------- - 56 kB + 112 kB (1 row) COMMIT; -- vacuum should truncate the relation to the usable space VACUUM VERBOSE t; INFO: statistics for "t": -total file size: 57344, total data size: 10754 +total file size: 114688, total data size: 10754 total row count: 2530, stripe count: 3, average rows per stripe: 843 block count: 3, containing data for dropped columns: 0, none compressed: 3, pglz compressed: 0 -INFO: "t": truncated 7 to 2 pages +INFO: "t": truncated 14 to 4 pages DETAIL: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s SELECT pg_size_pretty(pg_relation_size('t')); pg_size_pretty ---------------- - 16 kB + 32 kB (1 row) SELECT count(*) FROM t; @@ -172,7 +172,7 @@ INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; COMMIT; VACUUM VERBOSE t; INFO: statistics for "t": -total file size: 24576, total data size: 18808 +total file size: 49152, total data size: 18808 total row count: 5530, stripe count: 5, average rows per stripe: 1106 block count: 7, containing data for dropped columns: 0, none compressed: 5, pglz compressed: 2 @@ -188,7 +188,7 @@ INSERT INTO t SELECT 1, i / 5 FROM generate_series(1, 1500) i; ALTER TABLE t DROP COLUMN c; VACUUM VERBOSE t; INFO: statistics for "t": -total file size: 32768, total data size: 31372 +total file size: 65536, total data size: 31372 total row count: 7030, stripe count: 6, average rows per stripe: 1171 block count: 11, containing data for dropped columns: 2, none compressed: 9, pglz compressed: 2 @@ -199,7 +199,7 @@ SET cstore.compression TO "pglz"; VACUUM FULL t; VACUUM VERBOSE t; INFO: statistics for "t": -total file size: 16384, total data size: 15728 +total file size: 49152, total data size: 15728 total row count: 7030, stripe count: 4, average rows per stripe: 1757 block count: 8, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 6 diff --git a/expected/am_vacuum_vs_insert.out b/expected/am_vacuum_vs_insert.out index a3eb0fb89..d463bd076 100644 --- a/expected/am_vacuum_vs_insert.out +++ b/expected/am_vacuum_vs_insert.out @@ -11,7 +11,7 @@ step s1-insert: INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; s2: INFO: statistics for "test_vacuum_vs_insert": -total file size: 8192, total data size: 26 +total file size: 24576, total data size: 26 total row count: 3, stripe count: 1, average rows per stripe: 3 block count: 2, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 0 @@ -51,7 +51,7 @@ step s1-commit: COMMIT; s2: INFO: vacuuming "public.test_vacuum_vs_insert" -s2: INFO: "test_vacuum_vs_insert": found 0 removable, 6 nonremovable row versions in 1 pages +s2: INFO: "test_vacuum_vs_insert": found 0 removable, 6 nonremovable row versions in 3 pages DETAIL: 0 dead row versions cannot be removed yet. CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s. step s2-vacuum-full: <... completed> diff --git a/expected/am_write_concurrency.out b/expected/am_write_concurrency.out new file mode 100644 index 000000000..41c6ee7e6 --- /dev/null +++ b/expected/am_write_concurrency.out @@ -0,0 +1,142 @@ +Parsed test spec with 2 sessions + +starting permutation: s1-begin s2-begin s1-insert s2-insert s1-select s2-select s1-commit s2-commit s1-select +step s1-begin: + BEGIN; + +step s2-begin: + BEGIN; + +step s1-insert: + INSERT INTO test_insert_concurrency SELECT i, 2 * i FROM generate_series(1, 3) i; + +step s2-insert: + INSERT INTO test_insert_concurrency SELECT i, 2 * i FROM generate_series(4, 6) i; + +step s1-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +1 2 +2 4 +3 6 +step s2-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +4 8 +5 10 +6 12 +step s1-commit: + COMMIT; + +step s2-commit: + COMMIT; + +step s1-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +1 2 +2 4 +3 6 +4 8 +5 10 +6 12 + +starting permutation: s1-begin s2-begin s1-copy s2-insert s1-select s2-select s1-commit s2-commit s1-select +step s1-begin: + BEGIN; + +step s2-begin: + BEGIN; + +step s1-copy: + COPY test_insert_concurrency(a) FROM PROGRAM 'seq 11 13'; + +step s2-insert: + INSERT INTO test_insert_concurrency SELECT i, 2 * i FROM generate_series(4, 6) i; + +step s1-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +11 +12 +13 +step s2-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +4 8 +5 10 +6 12 +step s1-commit: + COMMIT; + +step s2-commit: + COMMIT; + +step s1-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +4 8 +5 10 +6 12 +11 +12 +13 + +starting permutation: s1-begin s2-begin s2-insert s1-copy s1-select s2-select s1-commit s2-commit s1-select +step s1-begin: + BEGIN; + +step s2-begin: + BEGIN; + +step s2-insert: + INSERT INTO test_insert_concurrency SELECT i, 2 * i FROM generate_series(4, 6) i; + +step s1-copy: + COPY test_insert_concurrency(a) FROM PROGRAM 'seq 11 13'; + +step s1-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +11 +12 +13 +step s2-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +4 8 +5 10 +6 12 +step s1-commit: + COMMIT; + +step s2-commit: + COMMIT; + +step s1-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +4 8 +5 10 +6 12 +11 +12 +13 diff --git a/specs/am_write_concurrency.spec b/specs/am_write_concurrency.spec new file mode 100644 index 000000000..7b5d90a4d --- /dev/null +++ b/specs/am_write_concurrency.spec @@ -0,0 +1,67 @@ +setup +{ + CREATE TABLE test_insert_concurrency (a int, b int) USING cstore_tableam; +} + +teardown +{ + DROP TABLE IF EXISTS test_insert_concurrency CASCADE; +} + +session "s1" + +step "s1-begin" +{ + BEGIN; +} + +step "s1-insert" +{ + INSERT INTO test_insert_concurrency SELECT i, 2 * i FROM generate_series(1, 3) i; +} + +step "s1-copy" +{ + COPY test_insert_concurrency(a) FROM PROGRAM 'seq 11 13'; +} + +step "s1-select" +{ + SELECT * FROM test_insert_concurrency ORDER BY a; +} + +step "s1-commit" +{ + COMMIT; +} + +session "s2" + +step "s2-begin" +{ + BEGIN; +} + +step "s2-insert" +{ + INSERT INTO test_insert_concurrency SELECT i, 2 * i FROM generate_series(4, 6) i; +} + +step "s2-select" +{ + SELECT * FROM test_insert_concurrency ORDER BY a; +} + +step "s2-commit" +{ + COMMIT; +} + +# writes shouldn't block writes or reads +permutation "s1-begin" "s2-begin" "s1-insert" "s2-insert" "s1-select" "s2-select" "s1-commit" "s2-commit" "s1-select" + +# copy vs insert +permutation "s1-begin" "s2-begin" "s1-copy" "s2-insert" "s1-select" "s2-select" "s1-commit" "s2-commit" "s1-select" + +# insert vs copy +permutation "s1-begin" "s2-begin" "s2-insert" "s1-copy" "s1-select" "s2-select" "s1-commit" "s2-commit" "s1-select" From a3caa5ff0f41dafd83ed2677bdb063e4c035efca Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 30 Oct 2020 19:27:42 -0700 Subject: [PATCH 86/91] fix "make clean" --- Makefile | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 6804bae42..5b65a08bb 100644 --- a/Makefile +++ b/Makefile @@ -36,10 +36,12 @@ DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cs REGRESS = extension_create ISOLATION = create -EXTRA_CLEAN = cstore.pb-c.h cstore.pb-c.c data/*.cstore data/*.cstore.footer \ - sql/block_filtering.sql sql/create.sql sql/data_types.sql sql/load.sql \ - sql/copyto.sql expected/block_filtering.out expected/create.out \ - expected/data_types.out expected/load.out expected/copyto.out +EXTRA_CLEAN = sql/fdw_block_filtering.sql sql/fdw_create.sql sql/fdw_data_types.sql sql/fdw_load.sql \ + sql/fdw_copyto.sql expected/fdw_block_filtering.out expected/fdw_create.out \ + expected/fdw_data_types.out expected/fdw_load.out expected/fdw_copyto.out \ + sql/am_block_filtering.sql sql/am_create.sql sql/am_data_types.sql sql/am_load.sql \ + sql/am_copyto.sql expected/am_block_filtering.out expected/am_create.out \ + expected/am_data_types.out expected/am_load.out expected/am_copyto.out ifeq ($(USE_FDW),yes) PG_CFLAGS += -DUSE_FDW From acd49b68aa20342b828adb08f19f4c8d34a5fcd5 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Sun, 1 Nov 2020 16:57:31 -0800 Subject: [PATCH 87/91] Support for v13 --- cstore_fdw--1.7--1.8.sql | 2 +- cstore_fdw.c | 45 +++++++++++++++++++++++++++------------- cstore_tableam.c | 45 +++++++++++++++++++++++++++------------- cstore_version_compat.h | 28 +++++++++++++++---------- 4 files changed, 80 insertions(+), 40 deletions(-) diff --git a/cstore_fdw--1.7--1.8.sql b/cstore_fdw--1.7--1.8.sql index 8fe9416d1..cf6d510d5 100644 --- a/cstore_fdw--1.7--1.8.sql +++ b/cstore_fdw--1.7--1.8.sql @@ -3,7 +3,7 @@ DO $proc$ BEGIN -IF version() ~ '12' THEN +IF version() ~ '12' or version() ~ '13' THEN EXECUTE $$ CREATE FUNCTION cstore_tableam_handler(internal) RETURNS table_am_handler diff --git a/cstore_fdw.c b/cstore_fdw.c index 221c97843..328125535 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -20,7 +20,11 @@ #include "access/heapam.h" #include "access/reloptions.h" +#if PG_VERSION_NUM >= 130000 +#include "access/heaptoast.h" +#else #include "access/tuptoaster.h" +#endif #include "access/xact.h" #include "catalog/catalog.h" #include "catalog/indexing.h" @@ -110,7 +114,14 @@ static const CStoreValidOption ValidOptionArray[] = static object_access_hook_type prevObjectAccessHook = NULL; /* local functions forward declarations */ -#if PG_VERSION_NUM >= 100000 +#if PG_VERSION_NUM >= 130000 +static void CStoreProcessUtility(PlannedStmt *plannedStatement, const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + QueryEnvironment *queryEnvironment, + DestReceiver *destReceiver, + QueryCompletion *queryCompletion); +#elif PG_VERSION_NUM >= 100000 static void CStoreProcessUtility(PlannedStmt *plannedStatement, const char *queryString, ProcessUtilityContext context, ParamListInfo paramListInfo, @@ -216,7 +227,8 @@ static ProcessUtility_hook_type PreviousProcessUtilityHook = NULL; void cstore_fdw_init() { - PreviousProcessUtilityHook = ProcessUtility_hook; + PreviousProcessUtilityHook = (ProcessUtility_hook != NULL) ? + ProcessUtility_hook : standard_ProcessUtility; ProcessUtility_hook = CStoreProcessUtility; prevObjectAccessHook = object_access_hook; object_access_hook = CStoreFdwObjectAccessHook; @@ -284,13 +296,20 @@ cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS) * the previous utility hook or the standard utility command via macro * CALL_PREVIOUS_UTILITY. */ -#if PG_VERSION_NUM >= 100000 +#if PG_VERSION_NUM >= 130000 static void CStoreProcessUtility(PlannedStmt *plannedStatement, const char *queryString, ProcessUtilityContext context, ParamListInfo paramListInfo, QueryEnvironment *queryEnvironment, - DestReceiver *destReceiver, char *completionTag) + DestReceiver *destReceiver, QueryCompletion *queryCompletion) +#elif PG_VERSION_NUM >= 100000 +static void +CStoreProcessUtility(PlannedStmt * plannedStatement, const char * queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + QueryEnvironment * queryEnvironment, + DestReceiver * destReceiver, char * completionTag) #else static void CStoreProcessUtility(Node * parseTree, const char * queryString, @@ -299,6 +318,9 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, DestReceiver * destReceiver, char * completionTag) #endif { +#if PG_VERSION_NUM >= 130000 + char *completionTag = NULL; +#endif #if PG_VERSION_NUM >= 100000 Node *parseTree = plannedStatement->utilityStmt; #endif @@ -313,8 +335,7 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, } else { - CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, - destReceiver, completionTag); + CALL_PREVIOUS_UTILITY(); } } else if (nodeTag(parseTree) == T_TruncateStmt) @@ -330,8 +351,7 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, { truncateStatement->relations = otherTablesList; - CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, - destReceiver, completionTag); + CALL_PREVIOUS_UTILITY(); /* restore the former relation list. Our * replacement could be freed but still needed @@ -352,21 +372,18 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, { AlterTableStmt *alterTable = (AlterTableStmt *) parseTree; CStoreProcessAlterTableCommand(alterTable); - CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, - destReceiver, completionTag); + CALL_PREVIOUS_UTILITY(); } else if (nodeTag(parseTree) == T_DropdbStmt) { /* let postgres handle error checking and dropping of the database */ - CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, - destReceiver, completionTag); + CALL_PREVIOUS_UTILITY(); } /* handle other utility statements */ else { - CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, - destReceiver, completionTag); + CALL_PREVIOUS_UTILITY(); } } diff --git a/cstore_tableam.c b/cstore_tableam.c index ae7799410..c22ab7baf 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -10,7 +10,11 @@ #include "access/rewriteheap.h" #include "access/tableam.h" #include "access/tsmapi.h" +#if PG_VERSION_NUM >= 130000 +#include "access/heaptoast.h" +#else #include "access/tuptoaster.h" +#endif #include "access/xact.h" #include "catalog/catalog.h" #include "catalog/index.h" @@ -41,6 +45,7 @@ #include "cstore.h" #include "cstore_customscan.h" #include "cstore_tableam.h" +#include "cstore_version_compat.h" #define CSTORE_TABLEAM_NAME "cstore_tableam" @@ -70,6 +75,15 @@ static ProcessUtility_hook_type PreviousProcessUtilityHook = NULL; static void CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId, int subId, void *arg); +#if PG_VERSION_NUM >= 130000 +static void CStoreTableAMProcessUtility(PlannedStmt *plannedStatement, + const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + QueryEnvironment *queryEnvironment, + DestReceiver *destReceiver, + QueryCompletion *qc); +#else static void CStoreTableAMProcessUtility(PlannedStmt *plannedStatement, const char *queryString, ProcessUtilityContext context, @@ -77,6 +91,8 @@ static void CStoreTableAMProcessUtility(PlannedStmt *plannedStatement, QueryEnvironment *queryEnvironment, DestReceiver *destReceiver, char *completionTag); +#endif + static bool IsCStoreTableAmTable(Oid relationId); static bool ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode, int timeout, int retryInterval); @@ -1035,6 +1051,7 @@ CStoreExecutorEnd(QueryDesc *queryDesc) } +#if PG_VERSION_NUM >= 130000 static void CStoreTableAMProcessUtility(PlannedStmt *plannedStatement, const char *queryString, @@ -1042,7 +1059,17 @@ CStoreTableAMProcessUtility(PlannedStmt *plannedStatement, ParamListInfo paramListInfo, QueryEnvironment *queryEnvironment, DestReceiver *destReceiver, - char *completionTag) + QueryCompletion *queryCompletion) +#else +static void +CStoreTableAMProcessUtility(PlannedStmt * plannedStatement, + const char * queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + QueryEnvironment * queryEnvironment, + DestReceiver * destReceiver, + char * completionTag) +#endif { Node *parseTree = plannedStatement->utilityStmt; @@ -1067,18 +1094,7 @@ CStoreTableAMProcessUtility(PlannedStmt *plannedStatement, } } - if (PreviousProcessUtilityHook != NULL) - { - PreviousProcessUtilityHook(plannedStatement, queryString, context, - paramListInfo, queryEnvironment, - destReceiver, completionTag); - } - else - { - standard_ProcessUtility(plannedStatement, queryString, context, - paramListInfo, queryEnvironment, - destReceiver, completionTag); - } + CALL_PREVIOUS_UTILITY(); } @@ -1087,7 +1103,8 @@ cstore_tableam_init() { PreviousExecutorEndHook = ExecutorEnd_hook; ExecutorEnd_hook = CStoreExecutorEnd; - PreviousProcessUtilityHook = ProcessUtility_hook; + PreviousProcessUtilityHook = (ProcessUtility_hook != NULL) ? + ProcessUtility_hook : standard_ProcessUtility; ProcessUtility_hook = CStoreTableAMProcessUtility; prevObjectAccessHook = object_access_hook; object_access_hook = CStoreTableAMObjectAccessHook; diff --git a/cstore_version_compat.h b/cstore_version_compat.h index 3d1a60f93..69eb9c9f3 100644 --- a/cstore_version_compat.h +++ b/cstore_version_compat.h @@ -32,18 +32,18 @@ ExplainPropertyInteger(qlabel, NULL, value, es) #endif -#define PREVIOUS_UTILITY (PreviousProcessUtilityHook != NULL \ - ? PreviousProcessUtilityHook : standard_ProcessUtility) -#if PG_VERSION_NUM >= 100000 -#define CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, \ - destReceiver, completionTag) \ - PREVIOUS_UTILITY(plannedStatement, queryString, context, paramListInfo, \ - queryEnvironment, destReceiver, completionTag) +#if PG_VERSION_NUM >= 130000 +#define CALL_PREVIOUS_UTILITY() \ + PreviousProcessUtilityHook(plannedStatement, queryString, context, paramListInfo, \ + queryEnvironment, destReceiver, queryCompletion) +#elif PG_VERSION_NUM >= 100000 +#define CALL_PREVIOUS_UTILITY() \ + PreviousProcessUtilityHook(plannedStatement, queryString, context, paramListInfo, \ + queryEnvironment, destReceiver, completionTag) #else -#define CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, \ - destReceiver, completionTag) \ - PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, destReceiver, \ - completionTag) +#define CALL_PREVIOUS_UTILITY() \ + PreviousProcessUtilityHook(parseTree, queryString, context, paramListInfo, \ + destReceiver, completionTag) #endif #if PG_VERSION_NUM < 120000 @@ -56,4 +56,10 @@ #endif +#if PG_VERSION_NUM >= 130000 +#define heap_open table_open +#define heap_openrv table_openrv +#define heap_close table_close +#endif + #endif /* CSTORE_COMPAT_H */ From 288025d9eae42d60767d39ee84ffa5440aa044ea Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Mon, 2 Nov 2020 13:04:18 +0100 Subject: [PATCH 88/91] add pg13 on CI --- .circleci/config.yml | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 9f2532c1d..645211182 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -45,6 +45,20 @@ jobs: paths: - install-12.tar + build-13: + docker: + - image: 'citus/extbuilder:13.0' + steps: + - checkout + - run: + name: 'Configure, Build, and Install' + command: | + PG_MAJOR=13 .circleci/build.sh + - persist_to_workspace: + root: . + paths: + - install-13.tar + test-11_checkinstall: docker: - image: 'citus/exttester:11.9' @@ -85,6 +99,26 @@ jobs: - codecov/upload: flags: 'test_12,installcheck' + test-13_checkinstall: + docker: + - image: 'citus/exttester:13.0' + working_directory: /home/circleci/project + steps: + - checkout + - attach_workspace: + at: . + - run: + name: 'Prepare Container & Install Extension' + command: | + chown -R circleci:circleci /home/circleci + tar xfv "${CIRCLE_WORKING_DIRECTORY}/install-${PG_MAJOR}.tar" --directory / + - run: + name: 'Run Test' + command: | + gosu circleci .circleci/run_test.sh installcheck + - codecov/upload: + flags: 'test_13,installcheck' + workflows: version: 2 build_and_test: @@ -94,8 +128,11 @@ workflows: - build-11 - build-12 + - build-13 - test-11_checkinstall: requires: [build-11] - test-12_checkinstall: requires: [build-12] + - test-13_checkinstall: + requires: [build-13] From 653dbc615a493d8da7f004b83533d450327fa596 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Sat, 31 Oct 2020 13:34:26 -0700 Subject: [PATCH 89/91] Use -Werror --- Makefile | 4 ++-- cstore_customscan.c | 33 ++++++++++++++++++++------------- cstore_tableam.c | 2 +- cstore_writer.c | 1 + 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index 5b65a08bb..04b9c12b2 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ else $(error version $(VER) is not supported) endif -PG_CFLAGS = -std=c11 -Wshadow +PG_CFLAGS = -std=c11 -Wshadow -Werror OBJS = cstore.o cstore_writer.o cstore_reader.o \ cstore_compression.o mod.o cstore_metadata_tables.o @@ -40,7 +40,7 @@ EXTRA_CLEAN = sql/fdw_block_filtering.sql sql/fdw_create.sql sql/fdw_data_types. sql/fdw_copyto.sql expected/fdw_block_filtering.out expected/fdw_create.out \ expected/fdw_data_types.out expected/fdw_load.out expected/fdw_copyto.out \ sql/am_block_filtering.sql sql/am_create.sql sql/am_data_types.sql sql/am_load.sql \ - sql/am_copyto.sql expected/am_block_filtering.out expected/am_create.out \ + sql/am_copyto.sql expected/am_block_filtering.out \ expected/am_data_types.out expected/am_load.out expected/am_copyto.out ifeq ($(USE_FDW),yes) diff --git a/cstore_customscan.c b/cstore_customscan.c index d7e6eb667..7c163e5c9 100644 --- a/cstore_customscan.c +++ b/cstore_customscan.c @@ -133,6 +133,8 @@ static void CStoreSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { + Relation relation; + /* call into previous hook if assigned */ if (PreviousSetRelPathlistHook) { @@ -156,13 +158,14 @@ CStoreSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti, * If that is the case we want to insert an extra path that pushes down the projection * into the scan of the table to minimize the data read. */ - Relation relation = RelationIdGetRelation(rte->relid); + relation = RelationIdGetRelation(rte->relid); if (relation->rd_tableam == GetCstoreTableAmRoutine()) { + Path *customPath = CreateCStoreScanPath(rel, rte); + ereport(DEBUG1, (errmsg("pathlist hook for cstore table am"))); /* we propose a new path that will be the only path for scanning this relation */ - Path *customPath = CreateCStoreScanPath(rel, rte); clear_paths(rel); add_path(rel, customPath); } @@ -175,17 +178,19 @@ CreateCStoreScanPath(RelOptInfo *rel, RangeTblEntry *rte) { CStoreScanPath *cspath = (CStoreScanPath *) newNode(sizeof(CStoreScanPath), T_CustomPath); + CustomPath *cpath; + Path *path; /* * popuate custom path information */ - CustomPath *cpath = &cspath->custom_path; + cpath = &cspath->custom_path; cpath->methods = &CStoreScanPathMethods; /* * populate generic path information */ - Path *path = &cpath->path; + path = &cpath->path; path->pathtype = T_CustomScan; path->parent = rel; path->pathtarget = rel->reltarget; @@ -212,12 +217,13 @@ CStoreScanCost(RangeTblEntry *rte) { Relation rel = RelationIdGetRelation(rte->relid); DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, false); - RelationClose(rel); - rel = NULL; - uint32 maxColumnCount = 0; uint64 totalStripeSize = 0; ListCell *stripeMetadataCell = NULL; + + RelationClose(rel); + rel = NULL; + foreach(stripeMetadataCell, metadata->stripeMetadataList) { StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell); @@ -225,12 +231,13 @@ CStoreScanCost(RangeTblEntry *rte) maxColumnCount = Max(maxColumnCount, stripeMetadata->columnCount); } - Bitmapset *attr_needed = rte->selectedCols; - double numberOfColumnsRead = bms_num_members(attr_needed); - double selectionRatio = numberOfColumnsRead / (double) maxColumnCount; - Cost scanCost = (double) totalStripeSize / BLCKSZ * selectionRatio; - - return scanCost; + { + Bitmapset *attr_needed = rte->selectedCols; + double numberOfColumnsRead = bms_num_members(attr_needed); + double selectionRatio = numberOfColumnsRead / (double) maxColumnCount; + Cost scanCost = (double) totalStripeSize / BLCKSZ * selectionRatio; + return scanCost; + } } diff --git a/cstore_tableam.c b/cstore_tableam.c index c22ab7baf..b1624f59f 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -244,6 +244,7 @@ cstore_beginscan_extended(Relation relation, Snapshot snapshot, List *columnList = NIL; List *neededColumnList = NIL; MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); + ListCell *columnCell = NULL; scan->cs_base.rs_rd = relation; scan->cs_base.rs_snapshot = snapshot; @@ -255,7 +256,6 @@ cstore_beginscan_extended(Relation relation, Snapshot snapshot, columnList = RelationColumnList(relation); /* only collect columns that we need for the scan */ - ListCell *columnCell = NULL; foreach(columnCell, columnList) { Var *var = castNode(Var, lfirst(columnCell)); diff --git a/cstore_writer.c b/cstore_writer.c index 3be14994b..9ca8c806e 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -336,6 +336,7 @@ WriteToSmgr(Relation rel, uint64 logicalOffset, char *data, uint32 dataLength) RelationOpenSmgr(rel); nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); Assert(addr.blockno < nblocks); + (void) nblocks; /* keep compiler quiet */ RelationCloseSmgr(rel); buffer = ReadBuffer(rel, addr.blockno); From d03e9ca8611f9f805bb3a491233dd6256cf560ec Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Tue, 3 Nov 2020 13:39:46 +0100 Subject: [PATCH 90/91] Feature: cstore table options (#25) DESCRIPTION: Add UDF's to maintain cstore table options This PR adds two UDF's and a view to interact and maintain the cstore table options. - ``alter_cstore_table_set(relid REGCLASS, [ options ... ])`` - ``alter_cstore_table_reset(relid REGCLASS, [ options ... ])`` - ``cstore.cstore_options`` The `set` function takes options and their specific types. When specified it will change the option associated with the table to the provided value. When omitted no action is taken. The `reset` function takes options as booleans. When set to `true` the value of the option associated with the table will be reset to the current default as specified by the associated GUC's. The options view containes a record for every cstore table with its associated settings as columns. --- Makefile | 2 +- cstore.h | 8 +- cstore_compression.c | 24 ++++ cstore_fdw--1.7--1.8.sql | 18 +++ cstore_fdw--1.7.sql | 12 ++ cstore_fdw.c | 6 +- cstore_metadata_tables.c | 133 ++++++++++++++++++++-- cstore_tableam.c | 209 ++++++++++++++++++++++++++++++----- expected/am_tableoptions.out | 179 ++++++++++++++++++++++++++++++ expected/am_vacuum.out | 33 +++++- sql/am_tableoptions.sql | 102 +++++++++++++++++ sql/am_vacuum.sql | 13 ++- 12 files changed, 687 insertions(+), 52 deletions(-) create mode 100644 expected/am_tableoptions.out create mode 100644 sql/am_tableoptions.sql diff --git a/Makefile b/Makefile index 04b9c12b2..6be7bbd45 100644 --- a/Makefile +++ b/Makefile @@ -56,7 +56,7 @@ ifeq ($(USE_TABLEAM),yes) OBJS += cstore_tableam.o cstore_customscan.o REGRESS += am_create am_load am_query am_analyze am_data_types am_functions \ am_drop am_insert am_copyto am_alter am_rollback am_truncate am_vacuum am_clean \ - am_block_filtering am_join am_trigger + am_block_filtering am_join am_trigger am_tableoptions ISOLATION += am_write_concurrency am_vacuum_vs_insert endif diff --git a/cstore.h b/cstore.h index 7ff657e33..35598cd41 100644 --- a/cstore.h +++ b/cstore.h @@ -93,6 +93,8 @@ typedef struct DataFileMetadata { List *stripeMetadataList; uint64 blockRowCount; + uint64 stripeRowCount; + CompressionType compression; } DataFileMetadata; @@ -277,10 +279,14 @@ extern uint64 CStoreTableRowCount(Relation relation); extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, CompressionType compressionType); extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressionType); +extern char * CompressionTypeStr(CompressionType type); /* cstore_metadata_tables.c */ extern void DeleteDataFileMetadataRowIfExists(Oid relfilenode); -extern void InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount); +extern void InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int + stripeRowCount, CompressionType compression); +extern void UpdateCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int + stripeRowCount, CompressionType compression); extern DataFileMetadata * ReadDataFileMetadata(Oid relfilenode, bool missingOk); extern uint64 GetHighestUsedAddress(Oid relfilenode); extern StripeMetadata ReserveStripe(Relation rel, uint64 size, diff --git a/cstore_compression.c b/cstore_compression.c index f6122614a..f36d8dd04 100644 --- a/cstore_compression.c +++ b/cstore_compression.c @@ -170,3 +170,27 @@ DecompressBuffer(StringInfo buffer, CompressionType compressionType) return decompressedBuffer; } + + +/* + * CompressionTypeStr returns string representation of a compression type. + */ +char * +CompressionTypeStr(CompressionType type) +{ + switch (type) + { + case COMPRESSION_NONE: + { + return "none"; + } + + case COMPRESSION_PG_LZ: + { + return "pglz"; + } + + default: + return "unknown"; + } +} diff --git a/cstore_fdw--1.7--1.8.sql b/cstore_fdw--1.7--1.8.sql index cf6d510d5..81cbadfb4 100644 --- a/cstore_fdw--1.7--1.8.sql +++ b/cstore_fdw--1.7--1.8.sql @@ -12,6 +12,24 @@ IF version() ~ '12' or version() ~ '13' THEN CREATE ACCESS METHOD cstore_tableam TYPE TABLE HANDLER cstore_tableam_handler; + + CREATE FUNCTION pg_catalog.alter_cstore_table_set( + table_name regclass, + block_row_count int DEFAULT NULL, + stripe_row_count int DEFAULT NULL, + compression name DEFAULT null) + RETURNS void + LANGUAGE C + AS 'MODULE_PATHNAME', 'alter_cstore_table_set'; + + CREATE FUNCTION pg_catalog.alter_cstore_table_reset( + table_name regclass, + block_row_count bool DEFAULT false, + stripe_row_count bool DEFAULT false, + compression bool DEFAULT false) + RETURNS void + LANGUAGE C + AS 'MODULE_PATHNAME', 'alter_cstore_table_reset'; $$; END IF; END$proc$; diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index c19bb1449..1f874ce60 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -34,6 +34,8 @@ LANGUAGE C STRICT; CREATE TABLE cstore_data_files ( relfilenode oid NOT NULL, block_row_count int NOT NULL, + stripe_row_count int NOT NULL, + compression name NOT NULL, version_major bigint NOT NULL, version_minor bigint NOT NULL, PRIMARY KEY (relfilenode) @@ -74,3 +76,13 @@ CREATE TABLE cstore_skipnodes ( ) WITH (user_catalog_table = true); COMMENT ON TABLE cstore_skipnodes IS 'CStore per block metadata'; + +CREATE VIEW cstore_options AS +SELECT c.oid::regclass regclass, + d.block_row_count, + d.stripe_row_count, + d.compression +FROM pg_class c +JOIN cstore.cstore_data_files d USING(relfilenode); + +COMMENT ON VIEW cstore_options IS 'CStore per table settings'; diff --git a/cstore_fdw.c b/cstore_fdw.c index 328125535..c2497fd27 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -280,7 +280,8 @@ cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS) AccessShareLock, false); Relation relation = cstore_fdw_open(relationId, AccessExclusiveLock); CStoreOptions *options = CStoreGetOptions(relationId); - InitCStoreDataFileMetadata(relation->rd_node.relNode, options->blockRowCount); + InitCStoreDataFileMetadata(relation->rd_node.relNode, options->blockRowCount, + options->stripeRowCount, options->compressionType); heap_close(relation, AccessExclusiveLock); } } @@ -797,7 +798,8 @@ TruncateCStoreTables(List *cstoreRelationList) Assert(IsCStoreFdwTable(relationId)); FdwNewRelFileNode(relation); - InitCStoreDataFileMetadata(relation->rd_node.relNode, options->blockRowCount); + InitCStoreDataFileMetadata(relation->rd_node.relNode, options->blockRowCount, + options->stripeRowCount, options->compressionType); } } diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 1bfc4be49..793f3dd7f 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -57,7 +57,7 @@ static Oid CStoreDataFilesIndexRelationId(void); static Oid CStoreSkipNodesRelationId(void); static Oid CStoreSkipNodesIndexRelationId(void); static Oid CStoreNamespaceId(void); -static bool ReadCStoreDataFiles(Oid relfilenode, uint64 *blockRowCount); +static bool ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata); static ModifyState * StartModifyRelation(Relation rel); static void InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values, bool *nulls); @@ -68,11 +68,31 @@ static bytea * DatumToBytea(Datum value, Form_pg_attribute attrForm); static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); /* constants for cstore_table */ -#define Natts_cstore_data_files 4 +#define Natts_cstore_data_files 6 #define Anum_cstore_data_files_relfilenode 1 #define Anum_cstore_data_files_block_row_count 2 -#define Anum_cstore_data_files_version_major 3 -#define Anum_cstore_data_files_version_minor 4 +#define Anum_cstore_data_files_stripe_row_count 3 +#define Anum_cstore_data_files_compression 4 +#define Anum_cstore_data_files_version_major 5 +#define Anum_cstore_data_files_version_minor 6 + +/* ---------------- + * cstore.cstore_data_files definition. + * ---------------- + */ +typedef struct FormData_cstore_data_files +{ + Oid relfilenode; + int32 block_row_count; + int32 stripe_row_count; + NameData compression; + int64 version_major; + int64 version_minor; + +#ifdef CATALOG_VARLEN /* variable-length fields start here */ +#endif +} FormData_cstore_data_files; +typedef FormData_cstore_data_files *Form_cstore_data_files; /* constants for cstore_stripe */ #define Natts_cstore_stripes 8 @@ -106,16 +126,22 @@ static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); * in cstore_data_files. */ void -InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount) +InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCount, + CompressionType compression) { Oid cstoreDataFilesOid = InvalidOid; Relation cstoreDataFiles = NULL; ModifyState *modifyState = NULL; + NameData compressionName = { 0 }; + + namestrcpy(&compressionName, CompressionTypeStr(compression)); bool nulls[Natts_cstore_data_files] = { 0 }; Datum values[Natts_cstore_data_files] = { ObjectIdGetDatum(relfilenode), Int32GetDatum(blockRowCount), + Int32GetDatum(stripeRowCount), + NameGetDatum(&compressionName), Int32GetDatum(CSTORE_VERSION_MAJOR), Int32GetDatum(CSTORE_VERSION_MINOR) }; @@ -135,6 +161,84 @@ InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount) } +void +UpdateCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCount, + CompressionType compression) +{ + const int scanKeyCount = 1; + ScanKeyData scanKey[1]; + bool indexOK = true; + SysScanDesc scanDescriptor = NULL; + Form_cstore_data_files metadata = NULL; + HeapTuple heapTuple = NULL; + Datum values[Natts_cstore_data_files] = { 0 }; + bool isnull[Natts_cstore_data_files] = { 0 }; + bool replace[Natts_cstore_data_files] = { 0 }; + + Relation cstoreDataFiles = heap_open(CStoreDataFilesRelationId(), RowExclusiveLock); + TupleDesc tupleDescriptor = RelationGetDescr(cstoreDataFiles); + + ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode, BTEqualStrategyNumber, + F_INT8EQ, ObjectIdGetDatum(relfilenode)); + + scanDescriptor = systable_beginscan(cstoreDataFiles, + CStoreDataFilesIndexRelationId(), + indexOK, + NULL, scanKeyCount, scanKey); + + heapTuple = systable_getnext(scanDescriptor); + if (heapTuple == NULL) + { + ereport(ERROR, (errmsg("relfilenode %d doesn't belong to a cstore table", + relfilenode))); + } + + metadata = (Form_cstore_data_files) GETSTRUCT(heapTuple); + + bool changed = false; + if (metadata->block_row_count != blockRowCount) + { + values[Anum_cstore_data_files_block_row_count - 1] = Int32GetDatum(blockRowCount); + isnull[Anum_cstore_data_files_block_row_count - 1] = false; + replace[Anum_cstore_data_files_block_row_count - 1] = true; + changed = true; + } + + if (metadata->stripe_row_count != stripeRowCount) + { + values[Anum_cstore_data_files_stripe_row_count - 1] = Int32GetDatum( + stripeRowCount); + isnull[Anum_cstore_data_files_stripe_row_count - 1] = false; + replace[Anum_cstore_data_files_stripe_row_count - 1] = true; + changed = true; + } + + if (ParseCompressionType(NameStr(metadata->compression)) != compression) + { + Name compressionName = palloc0(sizeof(NameData)); + namestrcpy(compressionName, CompressionTypeStr(compression)); + values[Anum_cstore_data_files_compression - 1] = NameGetDatum(compressionName); + isnull[Anum_cstore_data_files_compression - 1] = false; + replace[Anum_cstore_data_files_compression - 1] = true; + changed = true; + } + + if (changed) + { + heapTuple = heap_modify_tuple(heapTuple, tupleDescriptor, values, isnull, + replace); + + CatalogTupleUpdate(cstoreDataFiles, &heapTuple->t_self, heapTuple); + + CommandCounterIncrement(); + } + + systable_endscan(scanDescriptor); + + heap_close(cstoreDataFiles, NoLock); +} + + /* * SaveStripeSkipList saves StripeSkipList for a given stripe as rows * of cstore_skipnodes. @@ -355,7 +459,7 @@ DataFileMetadata * ReadDataFileMetadata(Oid relfilenode, bool missingOk) { DataFileMetadata *datafileMetadata = palloc0(sizeof(DataFileMetadata)); - bool found = ReadCStoreDataFiles(relfilenode, &datafileMetadata->blockRowCount); + bool found = ReadCStoreDataFiles(relfilenode, datafileMetadata); if (!found) { if (!missingOk) @@ -555,7 +659,7 @@ ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot) * false if table was not found in cstore_data_files. */ static bool -ReadCStoreDataFiles(Oid relfilenode, uint64 *blockRowCount) +ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata) { bool found = false; Oid cstoreDataFilesOid = InvalidOid; @@ -599,8 +703,19 @@ ReadCStoreDataFiles(Oid relfilenode, uint64 *blockRowCount) Datum datumArray[Natts_cstore_data_files]; bool isNullArray[Natts_cstore_data_files]; heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray); - *blockRowCount = DatumGetInt32(datumArray[Anum_cstore_data_files_block_row_count - - 1]); + + if (metadata) + { + Name compressionName = NULL; + + metadata->blockRowCount = DatumGetInt32( + datumArray[Anum_cstore_data_files_block_row_count - 1]); + metadata->stripeRowCount = DatumGetInt32( + datumArray[Anum_cstore_data_files_stripe_row_count - 1]); + compressionName = DatumGetName( + datumArray[Anum_cstore_data_files_compression - 1]); + metadata->compression = ParseCompressionType(NameStr(*compressionName)); + } found = true; } diff --git a/cstore_tableam.c b/cstore_tableam.c index b1624f59f..ce7d7de97 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -97,11 +97,15 @@ static bool IsCStoreTableAmTable(Oid relationId); static bool ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode, int timeout, int retryInterval); static void LogRelationStats(Relation rel, int elevel); -static char * CompressionTypeStr(CompressionType type); static void TruncateCStore(Relation rel, int elevel); + +/* + * CStoreTableAMDefaultOptions returns the default options for a cstore table am table. + * These options are based on the GUC's controlling the defaults. + */ static CStoreOptions * -CStoreTableAMGetOptions(void) +CStoreTableAMDefaultOptions() { CStoreOptions *cstoreOptions = palloc0(sizeof(CStoreOptions)); cstoreOptions->compressionType = cstore_compression; @@ -111,6 +115,27 @@ CStoreTableAMGetOptions(void) } +/* + * CStoreTableAMGetOptions returns the options based on a relation. It is advised the + * relation is a cstore table am table, if not it will raise an error + */ +static CStoreOptions * +CStoreTableAMGetOptions(Relation rel) +{ + CStoreOptions *cstoreOptions = NULL; + DataFileMetadata *metadata = NULL; + + Assert(rel != NULL); + + cstoreOptions = palloc0(sizeof(CStoreOptions)); + metadata = ReadDataFileMetadata(rel->rd_node.relNode, false); + cstoreOptions->compressionType = metadata->compression; + cstoreOptions->stripeRowCount = metadata->stripeRowCount; + cstoreOptions->blockRowCount = metadata->blockRowCount; + return cstoreOptions; +} + + static MemoryContext GetCStoreMemoryContext() { @@ -145,7 +170,7 @@ cstore_init_write_state(Relation relation) if (CStoreWriteState == NULL) { - CStoreOptions *cstoreOptions = CStoreTableAMGetOptions(); + CStoreOptions *cstoreOptions = CStoreTableAMGetOptions(relation); TupleDesc tupdesc = RelationGetDescr(relation); elog(LOG, "initializing write state for relation %d", relation->rd_id); @@ -534,17 +559,23 @@ cstore_relation_set_new_filenode(Relation rel, SMgrRelation srel; DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, true); uint64 blockRowCount = 0; + uint64 stripeRowCount = 0; + CompressionType compression = 0; if (metadata != NULL) { /* existing table (e.g. TRUNCATE), use existing blockRowCount */ blockRowCount = metadata->blockRowCount; + stripeRowCount = metadata->stripeRowCount; + compression = metadata->compression; } else { /* new table, use options */ - CStoreOptions *options = CStoreTableAMGetOptions(); + CStoreOptions *options = CStoreTableAMDefaultOptions(); blockRowCount = options->blockRowCount; + stripeRowCount = options->stripeRowCount; + compression = options->compressionType; } /* delete old relfilenode metadata */ @@ -554,7 +585,8 @@ cstore_relation_set_new_filenode(Relation rel, *freezeXid = RecentXmin; *minmulti = GetOldestMultiXactId(); srel = RelationCreateStorage(*newrnode, persistence); - InitCStoreDataFileMetadata(newrnode->relNode, blockRowCount); + InitCStoreDataFileMetadata(newrnode->relNode, blockRowCount, stripeRowCount, + compression); smgrclose(srel); } @@ -575,7 +607,8 @@ cstore_relation_nontransactional_truncate(Relation rel) /* Delete old relfilenode metadata and recreate it */ DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode); - InitCStoreDataFileMetadata(rel->rd_node.relNode, metadata->blockRowCount); + InitCStoreDataFileMetadata(rel->rd_node.relNode, metadata->blockRowCount, + metadata->stripeRowCount, metadata->compression); } @@ -623,7 +656,19 @@ cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, */ Assert(sourceDesc->natts == targetDesc->natts); - cstoreOptions = CStoreTableAMGetOptions(); + /* + * Since we are copying into a new relation we need to copy the settings from the old + * relation first. + */ + + cstoreOptions = CStoreTableAMGetOptions(OldHeap); + + UpdateCStoreDataFileMetadata(NewHeap->rd_node.relNode, + cstoreOptions->blockRowCount, + cstoreOptions->stripeRowCount, + cstoreOptions->compressionType); + + cstoreOptions = CStoreTableAMGetOptions(NewHeap); writeState = CStoreBeginWrite(NewHeap, cstoreOptions->compressionType, @@ -756,26 +801,6 @@ LogRelationStats(Relation rel, int elevel) } -/* - * CompressionTypeStr returns string representation of a compression type. - */ -static char * -CompressionTypeStr(CompressionType type) -{ - switch (type) - { - case COMPRESSION_NONE: - return "none"; - - case COMPRESSION_PG_LZ: - return "pglz"; - - default: - return "unknown"; - } -} - - /* * TruncateCStore truncates the unused space at the end of main fork for * a cstore table. This unused space can be created by aborted transactions. @@ -1262,3 +1287,133 @@ cstore_tableam_handler(PG_FUNCTION_ARGS) { PG_RETURN_POINTER(&cstore_am_methods); } + + +/* + * alter_cstore_table_set is a UDF exposed in postgres to change settings on a columnar + * table. Calling this function on a non-columnar table gives an error. + * + * sql syntax: + * pg_catalog.alter_cstore_table_set( + * table_name regclass, + * block_row_count int DEFAULT NULL, + * stripe_row_count int DEFAULT NULL, + * compression name DEFAULT null) + * + * All arguments except the table name are optional. The UDF is supposed to be called + * like: + * SELECT alter_cstore_table_set('table', compression => 'pglz'); + * + * This will only update the compression of the table, keeping all other settings the + * same. Multiple settings can be changed at the same time by providing multiple + * arguments. Calling the argument with the NULL value will be interperted as not having + * provided the argument. + */ +PG_FUNCTION_INFO_V1(alter_cstore_table_set); +Datum +alter_cstore_table_set(PG_FUNCTION_ARGS) +{ + Oid relationId = PG_GETARG_OID(0); + int blockRowCount = 0; + int stripeRowCount = 0; + CompressionType compression = COMPRESSION_TYPE_INVALID; + + Relation rel = table_open(relationId, AccessExclusiveLock); /* ALTER TABLE LOCK */ + DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, true); + if (!metadata) + { + ereport(ERROR, (errmsg("table %s is not a cstore table", + quote_identifier(RelationGetRelationName(rel))))); + } + + blockRowCount = metadata->blockRowCount; + stripeRowCount = metadata->stripeRowCount; + compression = metadata->compression; + + /* block_row_count => not null */ + if (!PG_ARGISNULL(1)) + { + blockRowCount = PG_GETARG_INT32(1); + ereport(DEBUG1, (errmsg("updating block row count to %d", blockRowCount))); + } + + /* stripe_row_count => not null */ + if (!PG_ARGISNULL(2)) + { + stripeRowCount = PG_GETARG_INT32(2); + ereport(DEBUG1, (errmsg("updating stripe row count to %d", stripeRowCount))); + } + + /* compression => not null */ + if (!PG_ARGISNULL(3)) + { + Name compressionName = PG_GETARG_NAME(3); + compression = ParseCompressionType(NameStr(*compressionName)); + if (compression == COMPRESSION_TYPE_INVALID) + { + ereport(ERROR, (errmsg("unknown compression type for cstore table: %s", + quote_identifier(NameStr(*compressionName))))); + } + ereport(DEBUG1, (errmsg("updating compression to %s", + CompressionTypeStr(compression)))); + } + + UpdateCStoreDataFileMetadata(rel->rd_node.relNode, blockRowCount, stripeRowCount, + compression); + + table_close(rel, NoLock); + + PG_RETURN_VOID(); +} + + +PG_FUNCTION_INFO_V1(alter_cstore_table_reset); +Datum +alter_cstore_table_reset(PG_FUNCTION_ARGS) +{ + Oid relationId = PG_GETARG_OID(0); + int blockRowCount = 0; + int stripeRowCount = 0; + CompressionType compression = COMPRESSION_TYPE_INVALID; + + Relation rel = table_open(relationId, AccessExclusiveLock); /* ALTER TABLE LOCK */ + DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, true); + if (!metadata) + { + ereport(ERROR, (errmsg("table %s is not a cstore table", + quote_identifier(RelationGetRelationName(rel))))); + } + + blockRowCount = metadata->blockRowCount; + stripeRowCount = metadata->stripeRowCount; + compression = metadata->compression; + + /* block_row_count => true */ + if (!PG_ARGISNULL(1) && PG_GETARG_BOOL(1)) + { + blockRowCount = cstore_block_row_count; + ereport(DEBUG1, (errmsg("resetting block row count to %d", blockRowCount))); + } + + /* stripe_row_count => true */ + if (!PG_ARGISNULL(2) && PG_GETARG_BOOL(2)) + { + stripeRowCount = cstore_stripe_row_count; + ereport(DEBUG1, (errmsg("resetting stripe row count to %d", stripeRowCount))); + } + + /* compression => true */ + if (!PG_ARGISNULL(3) && PG_GETARG_BOOL(3)) + { + compression = cstore_compression; + ereport(DEBUG1, (errmsg("resetting compression to %s", + CompressionTypeStr(compression)))); + } + + UpdateCStoreDataFileMetadata(rel->rd_node.relNode, blockRowCount, stripeRowCount, + compression); + + table_close(rel, NoLock); + + PG_RETURN_VOID(); +} diff --git a/expected/am_tableoptions.out b/expected/am_tableoptions.out new file mode 100644 index 000000000..e5e0f9a4f --- /dev/null +++ b/expected/am_tableoptions.out @@ -0,0 +1,179 @@ +CREATE SCHEMA am_tableoptions; +SET search_path TO am_tableoptions; +CREATE TABLE table_options (a int) USING cstore_tableam; +INSERT INTO table_options SELECT generate_series(1,100); +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 10000 | 150000 | none +(1 row) + +-- test changing the compression +SELECT alter_cstore_table_set('table_options', compression => 'pglz'); + alter_cstore_table_set +------------------------ + +(1 row) + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 10000 | 150000 | pglz +(1 row) + +-- test changing the block_row_count +SELECT alter_cstore_table_set('table_options', block_row_count => 10); + alter_cstore_table_set +------------------------ + +(1 row) + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 10 | 150000 | pglz +(1 row) + +-- test changing the block_row_count +SELECT alter_cstore_table_set('table_options', stripe_row_count => 100); + alter_cstore_table_set +------------------------ + +(1 row) + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 10 | 100 | pglz +(1 row) + +-- VACUUM FULL creates a new table, make sure it copies settings from the table you are vacuuming +VACUUM FULL table_options; +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 10 | 100 | pglz +(1 row) + +-- set all settings at the same time +SELECT alter_cstore_table_set('table_options', stripe_row_count => 1000, block_row_count => 100, compression => 'none'); + alter_cstore_table_set +------------------------ + +(1 row) + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 100 | 1000 | none +(1 row) + +-- reset settings one by one to the version of the GUC's +SET cstore.block_row_count TO 1000; +SET cstore.stripe_row_count TO 10000; +SET cstore.compression TO 'pglz'; +-- verify setting the GUC's didn't change the settings +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 100 | 1000 | none +(1 row) + +SELECT alter_cstore_table_reset('table_options', block_row_count => true); + alter_cstore_table_reset +-------------------------- + +(1 row) + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 1000 | 1000 | none +(1 row) + +SELECT alter_cstore_table_reset('table_options', stripe_row_count => true); + alter_cstore_table_reset +-------------------------- + +(1 row) + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 1000 | 10000 | none +(1 row) + +SELECT alter_cstore_table_reset('table_options', compression => true); + alter_cstore_table_reset +-------------------------- + +(1 row) + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 1000 | 10000 | pglz +(1 row) + +-- verify resetting all settings at once work +SET cstore.block_row_count TO 10000; +SET cstore.stripe_row_count TO 100000; +SET cstore.compression TO 'none'; +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 1000 | 10000 | pglz +(1 row) + +SELECT alter_cstore_table_reset( + 'table_options', + block_row_count => true, + stripe_row_count => true, + compression => true); + alter_cstore_table_reset +-------------------------- + +(1 row) + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 10000 | 100000 | none +(1 row) + +-- verify edge cases +-- first start with a table that is not a cstore table +CREATE TABLE not_a_cstore_table (a int); +SELECT alter_cstore_table_set('not_a_cstore_table', compression => 'pglz'); +ERROR: table not_a_cstore_table is not a cstore table +SELECT alter_cstore_table_reset('not_a_cstore_table', compression => true); +ERROR: table not_a_cstore_table is not a cstore table +-- verify you can't use a compression that is not known +SELECT alter_cstore_table_set('table_options', compression => 'foobar'); +ERROR: unknown compression type for cstore table: foobar +SET client_min_messages TO warning; +DROP SCHEMA am_tableoptions CASCADE; diff --git a/expected/am_vacuum.out b/expected/am_vacuum.out index d1270a3d2..3975be12b 100644 --- a/expected/am_vacuum.out +++ b/expected/am_vacuum.out @@ -36,7 +36,12 @@ SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.r (1 row) -- test the case when all data cannot fit into a single stripe -SET cstore.stripe_row_count TO 1000; +SELECT alter_cstore_table_set('t', stripe_row_count => 1000); + alter_cstore_table_set +------------------------ + +(1 row) + INSERT INTO t SELECT i, 2 * i FROM generate_series(1,2500) i; SELECT sum(a), sum(b) FROM t; sum | sum @@ -157,14 +162,25 @@ SELECT count(*) FROM t; -- add some stripes with different compression types and create some gaps, -- then vacuum to print stats BEGIN; -SET cstore.block_row_count TO 1000; -SET cstore.stripe_row_count TO 2000; -SET cstore.compression TO "pglz"; +SELECT alter_cstore_table_set('t', + block_row_count => 1000, + stripe_row_count => 2000, + compression => 'pglz'); + alter_cstore_table_set +------------------------ + +(1 row) + SAVEPOINT s1; INSERT INTO t SELECT i FROM generate_series(1, 1500) i; ROLLBACK TO SAVEPOINT s1; INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; -SET cstore.compression TO "none"; +SELECT alter_cstore_table_set('t', compression => 'none'); + alter_cstore_table_set +------------------------ + +(1 row) + SAVEPOINT s2; INSERT INTO t SELECT i FROM generate_series(1, 1500) i; ROLLBACK TO SAVEPOINT s2; @@ -195,7 +211,12 @@ block count: 11, containing data for dropped columns: 2, none compressed: 9, pgl -- vacuum full should remove blocks for dropped columns -- note that, a block will be stored in non-compressed for if compression -- doesn't reduce its size. -SET cstore.compression TO "pglz"; +SELECT alter_cstore_table_set('t', compression => 'pglz'); + alter_cstore_table_set +------------------------ + +(1 row) + VACUUM FULL t; VACUUM VERBOSE t; INFO: statistics for "t": diff --git a/sql/am_tableoptions.sql b/sql/am_tableoptions.sql new file mode 100644 index 000000000..33f26ec76 --- /dev/null +++ b/sql/am_tableoptions.sql @@ -0,0 +1,102 @@ +CREATE SCHEMA am_tableoptions; +SET search_path TO am_tableoptions; + +CREATE TABLE table_options (a int) USING cstore_tableam; +INSERT INTO table_options SELECT generate_series(1,100); + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +-- test changing the compression +SELECT alter_cstore_table_set('table_options', compression => 'pglz'); + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +-- test changing the block_row_count +SELECT alter_cstore_table_set('table_options', block_row_count => 10); + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +-- test changing the block_row_count +SELECT alter_cstore_table_set('table_options', stripe_row_count => 100); + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +-- VACUUM FULL creates a new table, make sure it copies settings from the table you are vacuuming +VACUUM FULL table_options; + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +-- set all settings at the same time +SELECT alter_cstore_table_set('table_options', stripe_row_count => 1000, block_row_count => 100, compression => 'none'); + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +-- reset settings one by one to the version of the GUC's +SET cstore.block_row_count TO 1000; +SET cstore.stripe_row_count TO 10000; +SET cstore.compression TO 'pglz'; + +-- verify setting the GUC's didn't change the settings +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +SELECT alter_cstore_table_reset('table_options', block_row_count => true); +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +SELECT alter_cstore_table_reset('table_options', stripe_row_count => true); + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +SELECT alter_cstore_table_reset('table_options', compression => true); + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +-- verify resetting all settings at once work +SET cstore.block_row_count TO 10000; +SET cstore.stripe_row_count TO 100000; +SET cstore.compression TO 'none'; + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +SELECT alter_cstore_table_reset( + 'table_options', + block_row_count => true, + stripe_row_count => true, + compression => true); + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +-- verify edge cases +-- first start with a table that is not a cstore table +CREATE TABLE not_a_cstore_table (a int); +SELECT alter_cstore_table_set('not_a_cstore_table', compression => 'pglz'); +SELECT alter_cstore_table_reset('not_a_cstore_table', compression => true); + +-- verify you can't use a compression that is not known +SELECT alter_cstore_table_set('table_options', compression => 'foobar'); + +SET client_min_messages TO warning; +DROP SCHEMA am_tableoptions CASCADE; diff --git a/sql/am_vacuum.sql b/sql/am_vacuum.sql index f7f9d77bd..6d248a147 100644 --- a/sql/am_vacuum.sql +++ b/sql/am_vacuum.sql @@ -18,7 +18,7 @@ SELECT sum(a), sum(b) FROM t; SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; -- test the case when all data cannot fit into a single stripe -SET cstore.stripe_row_count TO 1000; +SELECT alter_cstore_table_set('t', stripe_row_count => 1000); INSERT INTO t SELECT i, 2 * i FROM generate_series(1,2500) i; SELECT sum(a), sum(b) FROM t; @@ -65,14 +65,15 @@ SELECT count(*) FROM t; -- then vacuum to print stats BEGIN; -SET cstore.block_row_count TO 1000; -SET cstore.stripe_row_count TO 2000; -SET cstore.compression TO "pglz"; +SELECT alter_cstore_table_set('t', + block_row_count => 1000, + stripe_row_count => 2000, + compression => 'pglz'); SAVEPOINT s1; INSERT INTO t SELECT i FROM generate_series(1, 1500) i; ROLLBACK TO SAVEPOINT s1; INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; -SET cstore.compression TO "none"; +SELECT alter_cstore_table_set('t', compression => 'none'); SAVEPOINT s2; INSERT INTO t SELECT i FROM generate_series(1, 1500) i; ROLLBACK TO SAVEPOINT s2; @@ -93,7 +94,7 @@ VACUUM VERBOSE t; -- vacuum full should remove blocks for dropped columns -- note that, a block will be stored in non-compressed for if compression -- doesn't reduce its size. -SET cstore.compression TO "pglz"; +SELECT alter_cstore_table_set('t', compression => 'pglz'); VACUUM FULL t; VACUUM VERBOSE t; From 630e579912d4725d6dc034a3bd4c18fd2355c096 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Sun, 1 Nov 2020 20:25:06 -0800 Subject: [PATCH 91/91] Handle case of partially-present metadata. --- cstore_metadata_tables.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 793f3dd7f..a2eab1940 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -740,6 +740,15 @@ DeleteDataFileMetadataRowIfExists(Oid relfilenode) SysScanDesc scanDescriptor = NULL; HeapTuple heapTuple = NULL; + /* + * During a restore for binary upgrade, metadata tables and indexes may or + * may not exist. + */ + if (IsBinaryUpgrade) + { + return; + } + ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode, BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode));